Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm: submit stacked requests in irq enabled context

Switch to having request-based DM enqueue all prep'ed requests into work
processed by another thread. This allows request-based DM to invoke
block APIs that assume interrupt enabled context (e.g. blk_get_request)
and is a prerequisite for adding blk-mq support to request-based DM.

The new kernel thread is only initialized for request-based DM devices.

multipath_map() is now always in irq enabled context so change multipath
spinlock (m->lock) locking to always disable interrupts.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Keith Busch and committed by
Mike Snitzer
2eb6e1e3 1ae49ea2

+47 -19
+12 -8
drivers/md/dm-mpath.c
··· 384 384 struct multipath *m = (struct multipath *) ti->private; 385 385 int r = DM_MAPIO_REQUEUE; 386 386 size_t nr_bytes = blk_rq_bytes(clone); 387 - unsigned long flags; 388 387 struct pgpath *pgpath; 389 388 struct block_device *bdev; 390 389 struct dm_mpath_io *mpio; 391 390 392 - spin_lock_irqsave(&m->lock, flags); 391 + spin_lock_irq(&m->lock); 393 392 394 393 /* Do we need to select a new pgpath? */ 395 394 if (!m->current_pgpath || ··· 410 411 /* ENOMEM, requeue */ 411 412 goto out_unlock; 412 413 413 - bdev = pgpath->path.dev->bdev; 414 - clone->q = bdev_get_queue(bdev); 415 - clone->rq_disk = bdev->bd_disk; 416 - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 417 414 mpio = map_context->ptr; 418 415 mpio->pgpath = pgpath; 419 416 mpio->nr_bytes = nr_bytes; 417 + 418 + bdev = pgpath->path.dev->bdev; 419 + 420 + clone->q = bdev_get_queue(bdev); 421 + clone->rq_disk = bdev->bd_disk; 422 + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 423 + 424 + spin_unlock_irq(&m->lock); 425 + 420 426 if (pgpath->pg->ps.type->start_io) 421 427 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 422 428 &pgpath->path, 423 429 nr_bytes); 424 - r = DM_MAPIO_REMAPPED; 430 + return DM_MAPIO_REMAPPED; 425 431 426 432 out_unlock: 427 - spin_unlock_irqrestore(&m->lock, flags); 433 + spin_unlock_irq(&m->lock); 428 434 429 435 return r; 430 436 }
+35 -11
drivers/md/dm.c
··· 20 20 #include <linux/hdreg.h> 21 21 #include <linux/delay.h> 22 22 #include <linux/wait.h> 23 + #include <linux/kthread.h> 23 24 24 25 #include <trace/events/block.h> 25 26 ··· 80 79 struct mapped_device *md; 81 80 struct dm_target *ti; 82 81 struct request *orig, *clone; 82 + struct kthread_work work; 83 83 int error; 84 84 union map_info info; 85 85 }; ··· 210 208 struct bio flush_bio; 211 209 212 210 struct dm_stats stats; 211 + 212 + struct kthread_worker kworker; 213 + struct task_struct *kworker_task; 213 214 }; 214 215 215 216 /* ··· 1778 1773 return clone; 1779 1774 } 1780 1775 1776 + static void map_tio_request(struct kthread_work *work); 1777 + 1781 1778 static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1782 1779 gfp_t gfp_mask) 1783 1780 { ··· 1796 1789 tio->orig = rq; 1797 1790 tio->error = 0; 1798 1791 memset(&tio->info, 0, sizeof(tio->info)); 1792 + init_kthread_work(&tio->work, map_tio_request); 1799 1793 1800 1794 clone = __clone_rq(rq, md, tio, GFP_ATOMIC); 1801 1795 if (!clone) { ··· 1841 1833 int r, requeued = 0; 1842 1834 struct dm_rq_target_io *tio = clone->end_io_data; 1843 1835 1844 - tio->ti = ti; 1845 1836 r = ti->type->map_rq(ti, clone, &tio->info); 1846 1837 switch (r) { 1847 1838 case DM_MAPIO_SUBMITTED: ··· 1869 1862 } 1870 1863 1871 1864 return requeued; 1865 + } 1866 + 1867 + static void map_tio_request(struct kthread_work *work) 1868 + { 1869 + struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 1870 + 1871 + map_request(tio->ti, tio->clone, tio->md); 1872 1872 } 1873 1873 1874 1874 static struct request *dm_start_request(struct mapped_device *md, struct request *orig) ··· 1909 1895 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 1910 1896 struct dm_target *ti; 1911 1897 struct request *rq, *clone; 1898 + struct dm_rq_target_io *tio; 1912 1899 sector_t pos; 1913 1900 1914 1901 /* ··· 1945 1930 1946 1931 clone = dm_start_request(md, rq); 1947 1932 1948 - spin_unlock(q->queue_lock); 1949 - if (map_request(ti, clone, md)) 1950 - goto requeued; 1951 - 1933 + tio = rq->special; 1934 + /* Establish tio->ti before queuing work (map_tio_request) */ 1935 + tio->ti = ti; 1936 + queue_kthread_work(&md->kworker, &tio->work); 1952 1937 BUG_ON(!irqs_disabled()); 1953 - spin_lock(q->queue_lock); 1954 1938 } 1955 1939 1956 1940 goto out; 1957 - 1958 - requeued: 1959 - BUG_ON(!irqs_disabled()); 1960 - spin_lock(q->queue_lock); 1961 1941 1962 1942 delay_and_out: 1963 1943 blk_delay_queue(q, HZ / 10); ··· 2139 2129 INIT_WORK(&md->work, dm_wq_work); 2140 2130 init_waitqueue_head(&md->eventq); 2141 2131 init_completion(&md->kobj_holder.completion); 2132 + md->kworker_task = NULL; 2142 2133 2143 2134 md->disk->major = _major; 2144 2135 md->disk->first_minor = minor; ··· 2200 2189 unlock_fs(md); 2201 2190 bdput(md->bdev); 2202 2191 destroy_workqueue(md->wq); 2192 + 2193 + if (md->kworker_task) 2194 + kthread_stop(md->kworker_task); 2203 2195 if (md->io_pool) 2204 2196 mempool_destroy(md->io_pool); 2205 2197 if (md->rq_pool) ··· 2498 2484 blk_queue_prep_rq(md->queue, dm_prep_fn); 2499 2485 blk_queue_lld_busy(md->queue, dm_lld_busy); 2500 2486 2487 + /* Also initialize the request-based DM worker thread */ 2488 + init_kthread_worker(&md->kworker); 2489 + md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, 2490 + "kdmwork-%s", dm_device_name(md)); 2491 + 2501 2492 elv_register_queue(md->queue); 2502 2493 2503 2494 return 1; ··· 2592 2573 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2593 2574 set_bit(DMF_FREEING, &md->flags); 2594 2575 spin_unlock(&_minor_lock); 2576 + 2577 + if (dm_request_based(md)) 2578 + flush_kthread_worker(&md->kworker); 2595 2579 2596 2580 if (!dm_suspended_md(md)) { 2597 2581 dm_table_presuspend_targets(map); ··· 2839 2817 * Stop md->queue before flushing md->wq in case request-based 2840 2818 * dm defers requests to md->wq from md->queue. 2841 2819 */ 2842 - if (dm_request_based(md)) 2820 + if (dm_request_based(md)) { 2843 2821 stop_queue(md->queue); 2822 + flush_kthread_worker(&md->kworker); 2823 + } 2844 2824 2845 2825 flush_workqueue(md->wq); 2846 2826