Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm: Fix deadlock when reloading a multipath table

Request-based devices (dm-multipath) queue I/O in blk-mq on noflush
suspends. Any queued IO will make it impossible to freeze the queue. If
a process attempts to update the queue limits while there is queued IO,
it can be get stuck holding the limits lock, while unable to freeze the
queue. If device-mapper then attempts to update the limits during a
table swap, it will deadlock trying to grab the limits lock while making
it impossible to flush the IO.

Disallow updating the queue limits during a table swap, when updating an
immutable request-based dm device (dm-multipath) during a noflush
suspend. It is userspace's responsibility to make sure that the new
table uses the same limits as the existing table if it asks for a
noflush suspend.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

authored by

Benjamin Marzinski and committed by
Mikulas Patocka
be4addb1 4929ba5c

+29 -17
+4
drivers/md/dm-table.c
··· 2043 2043 return true; 2044 2044 } 2045 2045 2046 + /* 2047 + * This function will be skipped by noflush reloads of immutable request 2048 + * based devices (dm-mpath). 2049 + */ 2046 2050 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 2047 2051 struct queue_limits *limits) 2048 2052 {
+2 -5
drivers/md/dm-thin.c
··· 4383 4383 { 4384 4384 struct thin_c *tc = ti->private; 4385 4385 4386 - /* 4387 - * The dm_noflush_suspending flag has been cleared by now, so 4388 - * unfortunately we must always run this. 4389 - */ 4390 - noflush_work(tc, do_noflush_stop); 4386 + if (dm_noflush_suspending(ti)) 4387 + noflush_work(tc, do_noflush_stop); 4391 4388 } 4392 4389 4393 4390 static int thin_preresume(struct dm_target *ti)
+23 -12
drivers/md/dm.c
··· 2439 2439 { 2440 2440 struct dm_table *old_map; 2441 2441 sector_t size, old_size; 2442 - int ret; 2443 2442 2444 2443 lockdep_assert_held(&md->suspend_lock); 2445 2444 ··· 2453 2454 2454 2455 set_capacity(md->disk, size); 2455 2456 2456 - ret = dm_table_set_restrictions(t, md->queue, limits); 2457 - if (ret) { 2458 - set_capacity(md->disk, old_size); 2459 - old_map = ERR_PTR(ret); 2460 - goto out; 2457 + if (limits) { 2458 + int ret = dm_table_set_restrictions(t, md->queue, limits); 2459 + if (ret) { 2460 + set_capacity(md->disk, old_size); 2461 + old_map = ERR_PTR(ret); 2462 + goto out; 2463 + } 2461 2464 } 2462 2465 2463 2466 /* ··· 2837 2836 2838 2837 static void dm_queue_flush(struct mapped_device *md) 2839 2838 { 2839 + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2840 2840 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2841 2841 smp_mb__after_atomic(); 2842 2842 queue_work(md->wq, &md->work); ··· 2850 2848 { 2851 2849 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); 2852 2850 struct queue_limits limits; 2851 + bool update_limits = true; 2853 2852 int r; 2854 2853 2855 2854 mutex_lock(&md->suspend_lock); ··· 2860 2857 goto out; 2861 2858 2862 2859 /* 2860 + * To avoid a potential deadlock locking the queue limits, disallow 2861 + * updating the queue limits during a table swap, when updating an 2862 + * immutable request-based dm device (dm-multipath) during a noflush 2863 + * suspend. It is userspace's responsibility to make sure that the new 2864 + * table uses the same limits as the existing table, if it asks for a 2865 + * noflush suspend. 2866 + */ 2867 + if (dm_request_based(md) && md->immutable_target && 2868 + __noflush_suspending(md)) 2869 + update_limits = false; 2870 + /* 2863 2871 * If the new table has no data devices, retain the existing limits. 2864 2872 * This helps multipath with queue_if_no_path if all paths disappear, 2865 2873 * then new I/O is queued based on these limits, and then some paths 2866 2874 * reappear. 2867 2875 */ 2868 - if (dm_table_has_no_data_devices(table)) { 2876 + else if (dm_table_has_no_data_devices(table)) { 2869 2877 live_map = dm_get_live_table_fast(md); 2870 2878 if (live_map) 2871 2879 limits = md->queue->limits; 2872 2880 dm_put_live_table_fast(md); 2873 2881 } 2874 2882 2875 - if (!live_map) { 2883 + if (update_limits && !live_map) { 2876 2884 r = dm_calculate_queue_limits(table, &limits); 2877 2885 if (r) { 2878 2886 map = ERR_PTR(r); ··· 2891 2877 } 2892 2878 } 2893 2879 2894 - map = __bind(md, table, &limits); 2880 + map = __bind(md, table, update_limits ? &limits : NULL); 2895 2881 dm_issue_global_event(); 2896 2882 2897 2883 out: ··· 2944 2930 2945 2931 /* 2946 2932 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2947 - * This flag is cleared before dm_suspend returns. 2948 2933 */ 2949 2934 if (noflush) 2950 2935 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); ··· 3006 2993 if (!r) 3007 2994 set_bit(dmf_suspended_flag, &md->flags); 3008 2995 3009 - if (noflush) 3010 - clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 3011 2996 if (map) 3012 2997 synchronize_srcu(&md->io_barrier); 3013 2998