Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md/raid5: avoid races when changing cache size.

Cache size can grow or shrink due to various pressures at
any time. So when we resize the cache as part of a 'grow'
operation (i.e. change the size to allow more devices) we need
to blocks that automatic growing/shrinking.

So introduce a mutex. auto grow/shrink uses mutex_trylock()
and just doesn't bother if there is a blockage.
Resizing the whole cache holds the mutex to ensure that
the correct number of new stripes is allocated.

This bug can result in some stripes not being freed when an
array is stopped. This leads to the kmem_cache not being
freed and a subsequent array can try to use the same kmem_cache
and get confused.

Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.")
Cc: stable@vger.kernel.org (4.1 - please delay until 2 weeks after release of 4.2)
Signed-off-by: NeilBrown <neilb@suse.com>

NeilBrown 2d5b569b 6aaf0da8

+27 -7
+25 -6
drivers/md/raid5.c
··· 2162 2162 if (!sc) 2163 2163 return -ENOMEM; 2164 2164 2165 + /* Need to ensure auto-resizing doesn't interfere */ 2166 + mutex_lock(&conf->cache_size_mutex); 2167 + 2165 2168 for (i = conf->max_nr_stripes; i; i--) { 2166 2169 nsh = alloc_stripe(sc, GFP_KERNEL); 2167 2170 if (!nsh) ··· 2181 2178 kmem_cache_free(sc, nsh); 2182 2179 } 2183 2180 kmem_cache_destroy(sc); 2181 + mutex_unlock(&conf->cache_size_mutex); 2184 2182 return -ENOMEM; 2185 2183 } 2186 2184 /* Step 2 - Must use GFP_NOIO now. ··· 2228 2224 } else 2229 2225 err = -ENOMEM; 2230 2226 2227 + mutex_unlock(&conf->cache_size_mutex); 2231 2228 /* Step 4, return new stripes to service */ 2232 2229 while(!list_empty(&newstripes)) { 2233 2230 nsh = list_entry(newstripes.next, struct stripe_head, lru); ··· 5862 5857 pr_debug("%d stripes handled\n", handled); 5863 5858 5864 5859 spin_unlock_irq(&conf->device_lock); 5865 - if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { 5860 + if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && 5861 + mutex_trylock(&conf->cache_size_mutex)) { 5866 5862 grow_one_stripe(conf, __GFP_NOWARN); 5867 5863 /* Set flag even if allocation failed. This helps 5868 5864 * slow down allocation requests when mem is short 5869 5865 */ 5870 5866 set_bit(R5_DID_ALLOC, &conf->cache_state); 5867 + mutex_unlock(&conf->cache_size_mutex); 5871 5868 } 5872 5869 5873 5870 async_tx_issue_pending_all(); ··· 5901 5894 return -EINVAL; 5902 5895 5903 5896 conf->min_nr_stripes = size; 5897 + mutex_lock(&conf->cache_size_mutex); 5904 5898 while (size < conf->max_nr_stripes && 5905 5899 drop_one_stripe(conf)) 5906 5900 ; 5901 + mutex_unlock(&conf->cache_size_mutex); 5907 5902 5908 5903 5909 5904 err = md_allow_write(mddev); 5910 5905 if (err) 5911 5906 return err; 5912 5907 5908 + mutex_lock(&conf->cache_size_mutex); 5913 5909 while (size > conf->max_nr_stripes) 5914 5910 if (!grow_one_stripe(conf, GFP_KERNEL)) 5915 5911 break; 5912 + mutex_unlock(&conf->cache_size_mutex); 5916 5913 5917 5914 return 0; 5918 5915 } ··· 6382 6371 struct shrink_control *sc) 6383 6372 { 6384 6373 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); 6385 - int ret = 0; 6386 - while (ret < sc->nr_to_scan) { 6387 - if (drop_one_stripe(conf) == 0) 6388 - return SHRINK_STOP; 6389 - ret++; 6374 + unsigned long ret = SHRINK_STOP; 6375 + 6376 + if (mutex_trylock(&conf->cache_size_mutex)) { 6377 + ret= 0; 6378 + while (ret < sc->nr_to_scan) { 6379 + if (drop_one_stripe(conf) == 0) { 6380 + ret = SHRINK_STOP; 6381 + break; 6382 + } 6383 + ret++; 6384 + } 6385 + mutex_unlock(&conf->cache_size_mutex); 6390 6386 } 6391 6387 return ret; 6392 6388 } ··· 6462 6444 goto abort; 6463 6445 spin_lock_init(&conf->device_lock); 6464 6446 seqcount_init(&conf->gen_lock); 6447 + mutex_init(&conf->cache_size_mutex); 6465 6448 init_waitqueue_head(&conf->wait_for_quiescent); 6466 6449 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { 6467 6450 init_waitqueue_head(&conf->wait_for_stripe[i]);
+2 -1
drivers/md/raid5.h
··· 482 482 */ 483 483 int active_name; 484 484 char cache_name[2][32]; 485 - struct kmem_cache *slab_cache; /* for allocating stripes */ 485 + struct kmem_cache *slab_cache; /* for allocating stripes */ 486 + struct mutex cache_size_mutex; /* Protect changes to cache size */ 486 487 487 488 int seq_flush, seq_write; 488 489 int quiesce;