Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm cache: prevent corruption caused by discard_block_size > cache_block_size

If the discard block size is larger than the cache block size we will
not properly quiesce IO to a region that is about to be discarded. This
results in a race between a cache migration where no copy is needed, and
a write to an adjacent cache block that's within the same large discard
block.

Workaround this by limiting the discard_block_size to cache_block_size.
Also limit the max_discard_sectors to cache_block_size.

A more comprehensive fix that introduces range locking support in the
bio_prison and proper quiescing of a discard range that spans multiple
cache blocks is already in development.

Reported-by: Morgan Mears <Morgan.Mears@netapp.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Acked-by: Heinz Mauelshagen <heinzm@redhat.com>
Cc: stable@vger.kernel.org

+3 -34
+3 -34
drivers/md/dm-cache-target.c
··· 239 239 */ 240 240 dm_dblock_t discard_nr_blocks; 241 241 unsigned long *discard_bitset; 242 - uint32_t discard_block_size; /* a power of 2 times sectors per block */ 242 + uint32_t discard_block_size; 243 243 244 244 /* 245 245 * Rather than reconstructing the table line for the status we just ··· 2171 2171 return 0; 2172 2172 } 2173 2173 2174 - /* 2175 - * We want the discard block size to be a power of two, at least the size 2176 - * of the cache block size, and have no more than 2^14 discard blocks 2177 - * across the origin. 2178 - */ 2179 - #define MAX_DISCARD_BLOCKS (1 << 14) 2180 - 2181 - static bool too_many_discard_blocks(sector_t discard_block_size, 2182 - sector_t origin_size) 2183 - { 2184 - (void) sector_div(origin_size, discard_block_size); 2185 - 2186 - return origin_size > MAX_DISCARD_BLOCKS; 2187 - } 2188 - 2189 - static sector_t calculate_discard_block_size(sector_t cache_block_size, 2190 - sector_t origin_size) 2191 - { 2192 - sector_t discard_block_size; 2193 - 2194 - discard_block_size = roundup_pow_of_two(cache_block_size); 2195 - 2196 - if (origin_size) 2197 - while (too_many_discard_blocks(discard_block_size, origin_size)) 2198 - discard_block_size *= 2; 2199 - 2200 - return discard_block_size; 2201 - } 2202 - 2203 2174 #define DEFAULT_MIGRATION_THRESHOLD 2048 2204 2175 2205 2176 static int cache_create(struct cache_args *ca, struct cache **result) ··· 2292 2321 } 2293 2322 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2294 2323 2295 - cache->discard_block_size = 2296 - calculate_discard_block_size(cache->sectors_per_block, 2297 - cache->origin_sectors); 2324 + cache->discard_block_size = cache->sectors_per_block; 2298 2325 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); 2299 2326 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 2300 2327 if (!cache->discard_bitset) { ··· 3089 3120 /* 3090 3121 * FIXME: these limits may be incompatible with the cache device 3091 3122 */ 3092 - limits->max_discard_sectors = cache->discard_block_size * 1024; 3123 + limits->max_discard_sectors = cache->discard_block_size; 3093 3124 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 3094 3125 } 3095 3126