Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm vdo slab-depot: read refcount blocks in large chunks at load time

At startup, vdo loads all the reference count data before the device
reports that it is ready. Using a pool of large metadata vios can
improve the startup speed of vdo. The pool of large vios is released
after the device is ready.

During normal operation, reference counts are updated 4kB at a time,
as before.

Signed-off-by: Ken Raeburn <raeburn@redhat.com>
Signed-off-by: Matthew Sakai <msakai@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

authored by

Ken Raeburn and committed by
Mikulas Patocka
0ce46f4f f979da51

+59 -17
+47 -16
drivers/md/dm-vdo/slab-depot.c
··· 1170 1170 1171 1171 vio_record_metadata_io_error(vio); 1172 1172 return_vio_to_pool(vio_as_pooled_vio(vio)); 1173 - slab->active_count--; 1173 + slab->active_count -= vio->io_size / VDO_BLOCK_SIZE; 1174 1174 vdo_enter_read_only_mode(slab->allocator->depot->vdo, result); 1175 1175 check_if_slab_drained(slab); 1176 1176 } ··· 2239 2239 struct pooled_vio *pooled = vio_as_pooled_vio(vio); 2240 2240 struct reference_block *block = completion->parent; 2241 2241 struct vdo_slab *slab = block->slab; 2242 + unsigned int block_count = vio->io_size / VDO_BLOCK_SIZE; 2243 + unsigned int i; 2244 + char *data = vio->data; 2242 2245 2243 - unpack_reference_block((struct packed_reference_block *) vio->data, block); 2246 + for (i = 0; i < block_count; i++, block++, data += VDO_BLOCK_SIZE) { 2247 + struct packed_reference_block *packed = (struct packed_reference_block *) data; 2248 + 2249 + unpack_reference_block(packed, block); 2250 + clear_provisional_references(block); 2251 + slab->free_blocks -= block->allocated_count; 2252 + } 2244 2253 return_vio_to_pool(pooled); 2245 - slab->active_count--; 2246 - clear_provisional_references(block); 2254 + slab->active_count -= block_count; 2247 2255 2248 - slab->free_blocks -= block->allocated_count; 2249 2256 check_if_slab_drained(slab); 2250 2257 } 2251 2258 ··· 2266 2259 } 2267 2260 2268 2261 /** 2269 - * load_reference_block() - After a block waiter has gotten a VIO from the VIO pool, load the 2270 - * block. 2271 - * @waiter: The waiter of the block to load. 2262 + * load_reference_block_group() - After a block waiter has gotten a VIO from the VIO pool, load 2263 + * a set of blocks. 2264 + * @waiter: The waiter of the first block to load. 2272 2265 * @context: The VIO returned by the pool. 2273 2266 */ 2274 - static void load_reference_block(struct vdo_waiter *waiter, void *context) 2267 + static void load_reference_block_group(struct vdo_waiter *waiter, void *context) 2275 2268 { 2276 2269 struct pooled_vio *pooled = context; 2277 2270 struct vio *vio = &pooled->vio; 2278 2271 struct reference_block *block = 2279 2272 container_of(waiter, struct reference_block, waiter); 2280 - size_t block_offset = (block - block->slab->reference_blocks); 2273 + u32 block_offset = block - block->slab->reference_blocks; 2274 + u32 max_block_count = block->slab->reference_block_count - block_offset; 2275 + u32 block_count = min_t(int, vio->block_count, max_block_count); 2281 2276 2282 2277 vio->completion.parent = block; 2283 - vdo_submit_metadata_vio(vio, block->slab->ref_counts_origin + block_offset, 2284 - load_reference_block_endio, handle_io_error, 2285 - REQ_OP_READ); 2278 + vdo_submit_metadata_vio_with_size(vio, block->slab->ref_counts_origin + block_offset, 2279 + load_reference_block_endio, handle_io_error, 2280 + REQ_OP_READ, block_count * VDO_BLOCK_SIZE); 2286 2281 } 2287 2282 2288 2283 /** ··· 2294 2285 static void load_reference_blocks(struct vdo_slab *slab) 2295 2286 { 2296 2287 block_count_t i; 2288 + u64 blocks_per_vio = slab->allocator->refcount_blocks_per_big_vio; 2289 + struct vio_pool *pool = slab->allocator->refcount_big_vio_pool; 2290 + 2291 + if (!pool) { 2292 + pool = slab->allocator->vio_pool; 2293 + blocks_per_vio = 1; 2294 + } 2297 2295 2298 2296 slab->free_blocks = slab->block_count; 2299 2297 slab->active_count = slab->reference_block_count; 2300 - for (i = 0; i < slab->reference_block_count; i++) { 2298 + for (i = 0; i < slab->reference_block_count; i += blocks_per_vio) { 2301 2299 struct vdo_waiter *waiter = &slab->reference_blocks[i].waiter; 2302 2300 2303 - waiter->callback = load_reference_block; 2304 - acquire_vio_from_pool(slab->allocator->vio_pool, waiter); 2301 + waiter->callback = load_reference_block_group; 2302 + acquire_vio_from_pool(pool, waiter); 2305 2303 } 2306 2304 } 2307 2305 ··· 2715 2699 vdo_log_info("VDO commencing normal operation"); 2716 2700 else if (prior_state == VDO_RECOVERING) 2717 2701 vdo_log_info("Exiting recovery mode"); 2702 + free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool)); 2718 2703 } 2719 2704 2720 2705 /* ··· 3999 3982 struct vdo *vdo = depot->vdo; 4000 3983 block_count_t max_free_blocks = depot->slab_config.data_blocks; 4001 3984 unsigned int max_priority = (2 + ilog2(max_free_blocks)); 3985 + u32 reference_block_count, refcount_reads_needed, refcount_blocks_per_vio; 4002 3986 4003 3987 *allocator = (struct block_allocator) { 4004 3988 .depot = depot, ··· 4020 4002 result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, 1, allocator->thread_id, 4021 4003 VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, 4022 4004 allocator, &allocator->vio_pool); 4005 + if (result != VDO_SUCCESS) 4006 + return result; 4007 + 4008 + /* Initialize the refcount-reading vio pool. */ 4009 + reference_block_count = vdo_get_saved_reference_count_size(depot->slab_config.slab_blocks); 4010 + refcount_reads_needed = DIV_ROUND_UP(reference_block_count, MAX_BLOCKS_PER_VIO); 4011 + refcount_blocks_per_vio = DIV_ROUND_UP(reference_block_count, refcount_reads_needed); 4012 + allocator->refcount_blocks_per_big_vio = refcount_blocks_per_vio; 4013 + result = make_vio_pool(vdo, BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE, 4014 + allocator->refcount_blocks_per_big_vio, allocator->thread_id, 4015 + VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, 4016 + NULL, &allocator->refcount_big_vio_pool); 4023 4017 if (result != VDO_SUCCESS) 4024 4018 return result; 4025 4019 ··· 4252 4222 uninitialize_allocator_summary(allocator); 4253 4223 uninitialize_scrubber_vio(&allocator->scrubber); 4254 4224 free_vio_pool(vdo_forget(allocator->vio_pool)); 4225 + free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool)); 4255 4226 vdo_free_priority_table(vdo_forget(allocator->prioritized_slabs)); 4256 4227 } 4257 4228
+12 -1
drivers/md/dm-vdo/slab-depot.h
··· 45 45 enum { 46 46 /* The number of vios in the vio pool is proportional to the throughput of the VDO. */ 47 47 BLOCK_ALLOCATOR_VIO_POOL_SIZE = 128, 48 + 49 + /* 50 + * The number of vios in the vio pool used for loading reference count data. A slab's 51 + * refcounts is capped at ~8MB, and we process one at a time in a zone, so 9 should be 52 + * plenty. 53 + */ 54 + BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE = 9, 48 55 }; 49 56 50 57 /* ··· 255 248 256 249 /* A list of the dirty blocks waiting to be written out */ 257 250 struct vdo_wait_queue dirty_blocks; 258 - /* The number of blocks which are currently writing */ 251 + /* The number of blocks which are currently reading or writing */ 259 252 size_t active_count; 260 253 261 254 /* A waiter object for updating the slab summary */ ··· 432 425 433 426 /* The vio pool for reading and writing block allocator metadata */ 434 427 struct vio_pool *vio_pool; 428 + /* The vio pool for large initial reads of ref count areas */ 429 + struct vio_pool *refcount_big_vio_pool; 430 + /* How many ref count blocks are read per vio at initial load */ 431 + u32 refcount_blocks_per_big_vio; 435 432 /* The dm_kcopyd client for erasing slab journals */ 436 433 struct dm_kcopyd_client *eraser; 437 434 /* Iterator over the slabs to be erased */