Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm space maps: improve performance with inc/dec on ranges of blocks

When we break sharing on btree nodes we typically need to increment
the reference counts to every value held in the node. This can
cause a lot of repeated calls to the space maps. Fix this by changing
the interface to the space map inc/dec methods to take ranges of
adjacent blocks to be operated on.

For installations that are using a lot of snapshots this will reduce
cpu overhead of fundamental operations such as provisioning a new block,
or deleting a snapshot, by as much as 10 times.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Joe Thornber and committed by
Mike Snitzer
be500ed7 5faafc77

+778 -249
+14 -10
drivers/md/dm-era-target.c
··· 363 363 core->root = le64_to_cpu(disk->root); 364 364 } 365 365 366 - static void ws_inc(void *context, const void *value) 366 + static void ws_inc(void *context, const void *value, unsigned count) 367 367 { 368 368 struct era_metadata *md = context; 369 369 struct writeset_disk ws_d; 370 370 dm_block_t b; 371 + unsigned i; 371 372 372 - memcpy(&ws_d, value, sizeof(ws_d)); 373 - b = le64_to_cpu(ws_d.root); 374 - 375 - dm_tm_inc(md->tm, b); 373 + for (i = 0; i < count; i++) { 374 + memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); 375 + b = le64_to_cpu(ws_d.root); 376 + dm_tm_inc(md->tm, b); 377 + } 376 378 } 377 379 378 - static void ws_dec(void *context, const void *value) 380 + static void ws_dec(void *context, const void *value, unsigned count) 379 381 { 380 382 struct era_metadata *md = context; 381 383 struct writeset_disk ws_d; 382 384 dm_block_t b; 385 + unsigned i; 383 386 384 - memcpy(&ws_d, value, sizeof(ws_d)); 385 - b = le64_to_cpu(ws_d.root); 386 - 387 - dm_bitset_del(&md->bitset_info, b); 387 + for (i = 0; i < count; i++) { 388 + memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); 389 + b = le64_to_cpu(ws_d.root); 390 + dm_bitset_del(&md->bitset_info, b); 391 + } 388 392 } 389 393 390 394 static int ws_eq(void *context, const void *value1, const void *value2)
+55 -40
drivers/md/dm-thin-metadata.c
··· 311 311 *t = v & ((1 << 24) - 1); 312 312 } 313 313 314 - static void data_block_inc(void *context, const void *value_le) 315 - { 316 - struct dm_space_map *sm = context; 317 - __le64 v_le; 318 - uint64_t b; 319 - uint32_t t; 314 + /* 315 + * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as 316 + * possible. 'with_runs' reads contiguous runs of blocks, and calls the 317 + * given sm function. 318 + */ 319 + typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t); 320 320 321 - memcpy(&v_le, value_le, sizeof(v_le)); 322 - unpack_block_time(le64_to_cpu(v_le), &b, &t); 323 - dm_sm_inc_block(sm, b); 321 + static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn) 322 + { 323 + uint64_t b, begin, end; 324 + uint32_t t; 325 + bool in_run = false; 326 + unsigned i; 327 + 328 + for (i = 0; i < count; i++, value_le++) { 329 + /* We know value_le is 8 byte aligned */ 330 + unpack_block_time(le64_to_cpu(*value_le), &b, &t); 331 + 332 + if (in_run) { 333 + if (b == end) { 334 + end++; 335 + } else { 336 + fn(sm, begin, end); 337 + begin = b; 338 + end = b + 1; 339 + } 340 + } else { 341 + in_run = true; 342 + begin = b; 343 + end = b + 1; 344 + } 345 + } 346 + 347 + if (in_run) 348 + fn(sm, begin, end); 324 349 } 325 350 326 - static void data_block_dec(void *context, const void *value_le) 351 + static void data_block_inc(void *context, const void *value_le, unsigned count) 327 352 { 328 - struct dm_space_map *sm = context; 329 - __le64 v_le; 330 - uint64_t b; 331 - uint32_t t; 353 + with_runs((struct dm_space_map *) context, 354 + (const __le64 *) value_le, count, dm_sm_inc_blocks); 355 + } 332 356 333 - memcpy(&v_le, value_le, sizeof(v_le)); 334 - unpack_block_time(le64_to_cpu(v_le), &b, &t); 335 - dm_sm_dec_block(sm, b); 357 + static void data_block_dec(void *context, const void *value_le, unsigned count) 358 + { 359 + with_runs((struct dm_space_map *) context, 360 + (const __le64 *) value_le, count, dm_sm_dec_blocks); 336 361 } 337 362 338 363 static int data_block_equal(void *context, const void *value1_le, const void *value2_le) ··· 374 349 return b1 == b2; 375 350 } 376 351 377 - static void subtree_inc(void *context, const void *value) 352 + static void subtree_inc(void *context, const void *value, unsigned count) 378 353 { 379 354 struct dm_btree_info *info = context; 380 - __le64 root_le; 381 - uint64_t root; 355 + const __le64 *root_le = value; 356 + unsigned i; 382 357 383 - memcpy(&root_le, value, sizeof(root_le)); 384 - root = le64_to_cpu(root_le); 385 - dm_tm_inc(info->tm, root); 358 + for (i = 0; i < count; i++, root_le++) 359 + dm_tm_inc(info->tm, le64_to_cpu(*root_le)); 386 360 } 387 361 388 - static void subtree_dec(void *context, const void *value) 362 + static void subtree_dec(void *context, const void *value, unsigned count) 389 363 { 390 364 struct dm_btree_info *info = context; 391 - __le64 root_le; 392 - uint64_t root; 365 + const __le64 *root_le = value; 366 + unsigned i; 393 367 394 - memcpy(&root_le, value, sizeof(root_le)); 395 - root = le64_to_cpu(root_le); 396 - if (dm_btree_del(info, root)) 397 - DMERR("btree delete failed"); 368 + for (i = 0; i < count; i++, root_le++) 369 + if (dm_btree_del(info, le64_to_cpu(*root_le))) 370 + DMERR("btree delete failed"); 398 371 } 399 372 400 373 static int subtree_equal(void *context, const void *value1_le, const void *value2_le) ··· 1784 1761 int r = 0; 1785 1762 1786 1763 pmd_write_lock(pmd); 1787 - for (; b != e; b++) { 1788 - r = dm_sm_inc_block(pmd->data_sm, b); 1789 - if (r) 1790 - break; 1791 - } 1764 + r = dm_sm_inc_blocks(pmd->data_sm, b, e); 1792 1765 pmd_write_unlock(pmd); 1793 1766 1794 1767 return r; ··· 1795 1776 int r = 0; 1796 1777 1797 1778 pmd_write_lock(pmd); 1798 - for (; b != e; b++) { 1799 - r = dm_sm_dec_block(pmd->data_sm, b); 1800 - if (r) 1801 - break; 1802 - } 1779 + r = dm_sm_dec_blocks(pmd->data_sm, b, e); 1803 1780 pmd_write_unlock(pmd); 1804 1781 1805 1782 return r;
+28 -24
drivers/md/persistent-data/dm-array.c
··· 108 108 * in an array block. 109 109 */ 110 110 static void on_entries(struct dm_array_info *info, struct array_block *ab, 111 - void (*fn)(void *, const void *)) 111 + void (*fn)(void *, const void *, unsigned)) 112 112 { 113 - unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); 114 - 115 - for (i = 0; i < nr_entries; i++) 116 - fn(info->value_type.context, element_at(info, ab, i)); 113 + unsigned nr_entries = le32_to_cpu(ab->nr_entries); 114 + fn(info->value_type.context, element_at(info, ab, 0), nr_entries); 117 115 } 118 116 119 117 /* ··· 173 175 static void fill_ablock(struct dm_array_info *info, struct array_block *ab, 174 176 const void *value, unsigned new_nr) 175 177 { 176 - unsigned i; 177 - uint32_t nr_entries; 178 + uint32_t nr_entries, delta, i; 178 179 struct dm_btree_value_type *vt = &info->value_type; 179 180 180 181 BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); 181 182 BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); 182 183 183 184 nr_entries = le32_to_cpu(ab->nr_entries); 184 - for (i = nr_entries; i < new_nr; i++) { 185 - if (vt->inc) 186 - vt->inc(vt->context, value); 185 + delta = new_nr - nr_entries; 186 + if (vt->inc) 187 + vt->inc(vt->context, value, delta); 188 + for (i = nr_entries; i < new_nr; i++) 187 189 memcpy(element_at(info, ab, i), value, vt->size); 188 - } 189 190 ab->nr_entries = cpu_to_le32(new_nr); 190 191 } 191 192 ··· 196 199 static void trim_ablock(struct dm_array_info *info, struct array_block *ab, 197 200 unsigned new_nr) 198 201 { 199 - unsigned i; 200 - uint32_t nr_entries; 202 + uint32_t nr_entries, delta; 201 203 struct dm_btree_value_type *vt = &info->value_type; 202 204 203 205 BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); 204 206 BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); 205 207 206 208 nr_entries = le32_to_cpu(ab->nr_entries); 207 - for (i = nr_entries; i > new_nr; i--) 208 - if (vt->dec) 209 - vt->dec(vt->context, element_at(info, ab, i - 1)); 209 + delta = nr_entries - new_nr; 210 + if (vt->dec) 211 + vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta); 210 212 ab->nr_entries = cpu_to_le32(new_nr); 211 213 } 212 214 ··· 569 573 * These are the value_type functions for the btree elements, which point 570 574 * to array blocks. 571 575 */ 572 - static void block_inc(void *context, const void *value) 576 + static void block_inc(void *context, const void *value, unsigned count) 573 577 { 574 - __le64 block_le; 578 + const __le64 *block_le = value; 575 579 struct dm_array_info *info = context; 580 + unsigned i; 576 581 577 - memcpy(&block_le, value, sizeof(block_le)); 578 - dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); 582 + for (i = 0; i < count; i++, block_le++) 583 + dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le)); 579 584 } 580 585 581 - static void block_dec(void *context, const void *value) 586 + static void __block_dec(void *context, const void *value) 582 587 { 583 588 int r; 584 589 uint64_t b; ··· 616 619 } 617 620 618 621 dm_tm_dec(info->btree_info.tm, b); 622 + } 623 + 624 + static void block_dec(void *context, const void *value, unsigned count) 625 + { 626 + unsigned i; 627 + for (i = 0; i < count; i++, value += sizeof(__le64)) 628 + __block_dec(context, value); 619 629 } 620 630 621 631 static int block_equal(void *context, const void *value1, const void *value2) ··· 715 711 return r; 716 712 717 713 if (vt->inc) 718 - vt->inc(vt->context, element_at(info, ab, i)); 714 + vt->inc(vt->context, element_at(info, ab, i), 1); 719 715 } 720 716 721 717 ab->nr_entries = cpu_to_le32(new_nr); ··· 826 822 old_value = element_at(info, ab, entry); 827 823 if (vt->dec && 828 824 (!vt->equal || !vt->equal(vt->context, old_value, value))) { 829 - vt->dec(vt->context, old_value); 825 + vt->dec(vt->context, old_value, 1); 830 826 if (vt->inc) 831 - vt->inc(vt->context, value); 827 + vt->inc(vt->context, value, 1); 832 828 } 833 829 834 830 memcpy(old_value, value, info->value_type.size);
+13
drivers/md/persistent-data/dm-btree-internal.h
··· 144 144 extern void init_le64_type(struct dm_transaction_manager *tm, 145 145 struct dm_btree_value_type *vt); 146 146 147 + /* 148 + * This returns a shadowed btree leaf that you may modify. In practise 149 + * this means overwrites only, since an insert could cause a node to 150 + * be split. Useful if you need access to the old value to calculate the 151 + * new one. 152 + * 153 + * This only works with single level btrees. The given key must be present in 154 + * the tree, otherwise -EINVAL will be returned. 155 + */ 156 + int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, 157 + uint64_t key, int *index, 158 + dm_block_t *new_root, struct dm_block **leaf); 159 + 147 160 #endif /* DM_BTREE_INTERNAL_H */
+2 -2
drivers/md/persistent-data/dm-btree-remove.c
··· 544 544 545 545 if (info->value_type.dec) 546 546 info->value_type.dec(info->value_type.context, 547 - value_ptr(n, index)); 547 + value_ptr(n, index), 1); 548 548 549 549 delete_at(n, index); 550 550 } ··· 653 653 if (k >= keys[last_level] && k < end_key) { 654 654 if (info->value_type.dec) 655 655 info->value_type.dec(info->value_type.context, 656 - value_ptr(n, index)); 656 + value_ptr(n, index), 1); 657 657 658 658 delete_at(n, index); 659 659 keys[last_level] = k + 1ull;
+4 -12
drivers/md/persistent-data/dm-btree-spine.c
··· 236 236 return s->root; 237 237 } 238 238 239 - static void le64_inc(void *context, const void *value_le) 239 + static void le64_inc(void *context, const void *value_le, unsigned count) 240 240 { 241 - struct dm_transaction_manager *tm = context; 242 - __le64 v_le; 243 - 244 - memcpy(&v_le, value_le, sizeof(v_le)); 245 - dm_tm_inc(tm, le64_to_cpu(v_le)); 241 + dm_tm_with_runs(context, value_le, count, dm_tm_inc_range); 246 242 } 247 243 248 - static void le64_dec(void *context, const void *value_le) 244 + static void le64_dec(void *context, const void *value_le, unsigned count) 249 245 { 250 - struct dm_transaction_manager *tm = context; 251 - __le64 v_le; 252 - 253 - memcpy(&v_le, value_le, sizeof(v_le)); 254 - dm_tm_dec(tm, le64_to_cpu(v_le)); 246 + dm_tm_with_runs(context, value_le, count, dm_tm_dec_range); 255 247 } 256 248 257 249 static int le64_equal(void *context, const void *value1_le, const void *value2_le)
+78 -13
drivers/md/persistent-data/dm-btree.c
··· 71 71 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, 72 72 struct dm_btree_value_type *vt) 73 73 { 74 - unsigned i; 75 74 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 76 75 77 76 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) 78 - for (i = 0; i < nr_entries; i++) 79 - dm_tm_inc(tm, value64(n, i)); 77 + dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range); 78 + 80 79 else if (vt->inc) 81 - for (i = 0; i < nr_entries; i++) 82 - vt->inc(vt->context, value_ptr(n, i)); 80 + vt->inc(vt->context, value_ptr(n, 0), nr_entries); 83 81 } 84 82 85 83 static int insert_at(size_t value_size, struct btree_node *node, unsigned index, ··· 316 318 goto out; 317 319 318 320 } else { 319 - if (info->value_type.dec) { 320 - unsigned i; 321 - 322 - for (i = 0; i < f->nr_children; i++) 323 - info->value_type.dec(info->value_type.context, 324 - value_ptr(f->n, i)); 325 - } 321 + if (info->value_type.dec) 322 + info->value_type.dec(info->value_type.context, 323 + value_ptr(f->n, 0), f->nr_children); 326 324 pop_frame(s); 327 325 } 328 326 } ··· 1140 1146 return 0; 1141 1147 } 1142 1148 1149 + static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root, 1150 + uint64_t key, int *index) 1151 + { 1152 + int r, i = -1; 1153 + struct btree_node *node; 1154 + 1155 + *index = 0; 1156 + for (;;) { 1157 + r = shadow_step(s, root, &s->info->value_type); 1158 + if (r < 0) 1159 + return r; 1160 + 1161 + node = dm_block_data(shadow_current(s)); 1162 + 1163 + /* 1164 + * We have to patch up the parent node, ugly, but I don't 1165 + * see a way to do this automatically as part of the spine 1166 + * op. 1167 + */ 1168 + if (shadow_has_parent(s) && i >= 0) { 1169 + __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 1170 + 1171 + __dm_bless_for_disk(&location); 1172 + memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), 1173 + &location, sizeof(__le64)); 1174 + } 1175 + 1176 + node = dm_block_data(shadow_current(s)); 1177 + i = lower_bound(node, key); 1178 + 1179 + BUG_ON(i < 0); 1180 + BUG_ON(i >= le32_to_cpu(node->header.nr_entries)); 1181 + 1182 + if (le32_to_cpu(node->header.flags) & LEAF_NODE) { 1183 + if (key != le64_to_cpu(node->keys[i])) 1184 + return -EINVAL; 1185 + break; 1186 + } 1187 + 1188 + root = value64(node, i); 1189 + } 1190 + 1191 + *index = i; 1192 + return 0; 1193 + } 1194 + 1195 + int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, 1196 + uint64_t key, int *index, 1197 + dm_block_t *new_root, struct dm_block **leaf) 1198 + { 1199 + int r; 1200 + struct shadow_spine spine; 1201 + 1202 + BUG_ON(info->levels > 1); 1203 + init_shadow_spine(&spine, info); 1204 + r = __btree_get_overwrite_leaf(&spine, root, key, index); 1205 + if (!r) { 1206 + *new_root = shadow_root(&spine); 1207 + *leaf = shadow_current(&spine); 1208 + 1209 + /* 1210 + * Decrement the count so exit_shadow_spine() doesn't 1211 + * unlock the leaf. 1212 + */ 1213 + spine.count--; 1214 + } 1215 + exit_shadow_spine(&spine); 1216 + 1217 + return r; 1218 + } 1219 + 1143 1220 static bool need_insert(struct btree_node *node, uint64_t *keys, 1144 1221 unsigned level, unsigned index) 1145 1222 { ··· 1287 1222 value_ptr(n, index), 1288 1223 value))) { 1289 1224 info->value_type.dec(info->value_type.context, 1290 - value_ptr(n, index)); 1225 + value_ptr(n, index), 1); 1291 1226 } 1292 1227 memcpy_disk(value_ptr(n, index), 1293 1228 value, info->value_type.size);
+5 -5
drivers/md/persistent-data/dm-btree.h
··· 51 51 */ 52 52 53 53 /* 54 - * The btree is making a duplicate of the value, for instance 54 + * The btree is making a duplicate of a run of values, for instance 55 55 * because previously-shared btree nodes have now diverged. 56 56 * @value argument is the new copy that the copy function may modify. 57 57 * (Probably it just wants to increment a reference count 58 58 * somewhere.) This method is _not_ called for insertion of a new 59 59 * value: It is assumed the ref count is already 1. 60 60 */ 61 - void (*inc)(void *context, const void *value); 61 + void (*inc)(void *context, const void *value, unsigned count); 62 62 63 63 /* 64 - * This value is being deleted. The btree takes care of freeing 64 + * These values are being deleted. The btree takes care of freeing 65 65 * the memory pointed to by @value. Often the del function just 66 - * needs to decrement a reference count somewhere. 66 + * needs to decrement a reference counts somewhere. 67 67 */ 68 - void (*dec)(void *context, const void *value); 68 + void (*dec)(void *context, const void *value, unsigned count); 69 69 70 70 /* 71 71 * A test for equality between two values. When a value is
+424 -28
drivers/md/persistent-data/dm-space-map-common.c
··· 6 6 7 7 #include "dm-space-map-common.h" 8 8 #include "dm-transaction-manager.h" 9 + #include "dm-btree-internal.h" 9 10 10 11 #include <linux/bitops.h> 11 12 #include <linux/device-mapper.h> ··· 410 409 return r; 411 410 } 412 411 413 - static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, 414 - int (*mutator)(void *context, uint32_t old, uint32_t *new), 415 - void *context, enum allocation_event *ev) 412 + /*----------------------------------------------------------------*/ 413 + 414 + int sm_ll_insert(struct ll_disk *ll, dm_block_t b, 415 + uint32_t ref_count, int32_t *nr_allocations) 416 416 { 417 417 int r; 418 - uint32_t bit, old, ref_count; 418 + uint32_t bit, old; 419 419 struct dm_block *nb; 420 420 dm_block_t index = b; 421 421 struct disk_index_entry ie_disk; ··· 435 433 return r; 436 434 } 437 435 ie_disk.blocknr = cpu_to_le64(dm_block_location(nb)); 438 - 439 436 bm_le = dm_bitmap_data(nb); 440 - old = sm_lookup_bitmap(bm_le, bit); 441 437 438 + old = sm_lookup_bitmap(bm_le, bit); 442 439 if (old > 2) { 443 440 r = sm_ll_lookup_big_ref_count(ll, b, &old); 444 441 if (r < 0) { ··· 446 445 } 447 446 } 448 447 449 - r = mutator(context, old, &ref_count); 450 448 if (r) { 451 449 dm_tm_unlock(ll->tm, nb); 452 450 return r; ··· 453 453 454 454 if (ref_count <= 2) { 455 455 sm_set_bitmap(bm_le, bit, ref_count); 456 - 457 456 dm_tm_unlock(ll->tm, nb); 458 457 459 458 if (old > 2) { ··· 479 480 } 480 481 481 482 if (ref_count && !old) { 482 - *ev = SM_ALLOC; 483 + *nr_allocations = 1; 483 484 ll->nr_allocated++; 484 485 le32_add_cpu(&ie_disk.nr_free, -1); 485 486 if (le32_to_cpu(ie_disk.none_free_before) == bit) 486 487 ie_disk.none_free_before = cpu_to_le32(bit + 1); 487 488 488 489 } else if (old && !ref_count) { 489 - *ev = SM_FREE; 490 + *nr_allocations = -1; 490 491 ll->nr_allocated--; 491 492 le32_add_cpu(&ie_disk.nr_free, 1); 492 493 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); 493 494 } else 494 - *ev = SM_NONE; 495 + *nr_allocations = 0; 495 496 496 497 return ll->save_ie(ll, index, &ie_disk); 497 498 } 498 499 499 - static int set_ref_count(void *context, uint32_t old, uint32_t *new) 500 + /*----------------------------------------------------------------*/ 501 + 502 + /* 503 + * Holds useful intermediate results for the range based inc and dec 504 + * operations. 505 + */ 506 + struct inc_context { 507 + struct disk_index_entry ie_disk; 508 + struct dm_block *bitmap_block; 509 + void *bitmap; 510 + 511 + struct dm_block *overflow_leaf; 512 + }; 513 + 514 + static inline void init_inc_context(struct inc_context *ic) 500 515 { 501 - *new = *((uint32_t *) context); 502 - return 0; 516 + ic->bitmap_block = NULL; 517 + ic->bitmap = NULL; 518 + ic->overflow_leaf = NULL; 503 519 } 504 520 505 - int sm_ll_insert(struct ll_disk *ll, dm_block_t b, 506 - uint32_t ref_count, enum allocation_event *ev) 521 + static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic) 507 522 { 508 - return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); 523 + if (ic->bitmap_block) 524 + dm_tm_unlock(ll->tm, ic->bitmap_block); 525 + if (ic->overflow_leaf) 526 + dm_tm_unlock(ll->tm, ic->overflow_leaf); 509 527 } 510 528 511 - static int inc_ref_count(void *context, uint32_t old, uint32_t *new) 529 + static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic) 512 530 { 513 - *new = old + 1; 514 - return 0; 531 + exit_inc_context(ll, ic); 532 + init_inc_context(ic); 515 533 } 516 534 517 - int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) 535 + /* 536 + * Confirms a btree node contains a particular key at an index. 537 + */ 538 + static bool contains_key(struct btree_node *n, uint64_t key, int index) 518 539 { 519 - return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); 540 + return index >= 0 && 541 + index < le32_to_cpu(n->header.nr_entries) && 542 + le64_to_cpu(n->keys[index]) == key; 520 543 } 521 544 522 - static int dec_ref_count(void *context, uint32_t old, uint32_t *new) 545 + static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) 523 546 { 524 - if (!old) { 525 - DMERR_LIMIT("unable to decrement a reference count below 0"); 547 + int r; 548 + int index; 549 + struct btree_node *n; 550 + __le32 *v_ptr; 551 + uint32_t rc; 552 + 553 + /* 554 + * bitmap_block needs to be unlocked because getting the 555 + * overflow_leaf may need to allocate, and thus use the space map. 556 + */ 557 + reset_inc_context(ll, ic); 558 + 559 + r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, 560 + b, &index, &ll->ref_count_root, &ic->overflow_leaf); 561 + if (r < 0) 562 + return r; 563 + 564 + n = dm_block_data(ic->overflow_leaf); 565 + 566 + if (!contains_key(n, b, index)) { 567 + DMERR("overflow btree is missing an entry"); 526 568 return -EINVAL; 527 569 } 528 570 529 - *new = old - 1; 571 + v_ptr = value_ptr(n, index); 572 + rc = le32_to_cpu(*v_ptr) + 1; 573 + *v_ptr = cpu_to_le32(rc); 574 + 530 575 return 0; 531 576 } 532 577 533 - int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) 578 + static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) 534 579 { 535 - return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev); 580 + int index; 581 + struct btree_node *n; 582 + __le32 *v_ptr; 583 + uint32_t rc; 584 + 585 + /* 586 + * Do we already have the correct overflow leaf? 587 + */ 588 + if (ic->overflow_leaf) { 589 + n = dm_block_data(ic->overflow_leaf); 590 + index = lower_bound(n, b); 591 + if (contains_key(n, b, index)) { 592 + v_ptr = value_ptr(n, index); 593 + rc = le32_to_cpu(*v_ptr) + 1; 594 + *v_ptr = cpu_to_le32(rc); 595 + 596 + return 0; 597 + } 598 + } 599 + 600 + return __sm_ll_inc_overflow(ll, b, ic); 536 601 } 602 + 603 + static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic) 604 + { 605 + int r, inc; 606 + r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr), 607 + &dm_sm_bitmap_validator, &ic->bitmap_block, &inc); 608 + if (r < 0) { 609 + DMERR("dm_tm_shadow_block() failed"); 610 + return r; 611 + } 612 + ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block)); 613 + ic->bitmap = dm_bitmap_data(ic->bitmap_block); 614 + return 0; 615 + } 616 + 617 + /* 618 + * Once shadow_bitmap has been called, which always happens at the start of inc/dec, 619 + * we can reopen the bitmap with a simple write lock, rather than re calling 620 + * dm_tm_shadow_block(). 621 + */ 622 + static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic) 623 + { 624 + if (!ic->bitmap_block) { 625 + int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr), 626 + &dm_sm_bitmap_validator, &ic->bitmap_block); 627 + if (r) { 628 + DMERR("unable to re-get write lock for bitmap"); 629 + return r; 630 + } 631 + ic->bitmap = dm_bitmap_data(ic->bitmap_block); 632 + } 633 + 634 + return 0; 635 + } 636 + 637 + /* 638 + * Loops round incrementing entries in a single bitmap. 639 + */ 640 + static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b, 641 + uint32_t bit, uint32_t bit_end, 642 + int32_t *nr_allocations, dm_block_t *new_b, 643 + struct inc_context *ic) 644 + { 645 + int r; 646 + __le32 le_rc; 647 + uint32_t old; 648 + 649 + for (; bit != bit_end; bit++, b++) { 650 + /* 651 + * We only need to drop the bitmap if we need to find a new btree 652 + * leaf for the overflow. So if it was dropped last iteration, 653 + * we now re-get it. 654 + */ 655 + r = ensure_bitmap(ll, ic); 656 + if (r) 657 + return r; 658 + 659 + old = sm_lookup_bitmap(ic->bitmap, bit); 660 + switch (old) { 661 + case 0: 662 + /* inc bitmap, adjust nr_allocated */ 663 + sm_set_bitmap(ic->bitmap, bit, 1); 664 + (*nr_allocations)++; 665 + ll->nr_allocated++; 666 + le32_add_cpu(&ic->ie_disk.nr_free, -1); 667 + if (le32_to_cpu(ic->ie_disk.none_free_before) == bit) 668 + ic->ie_disk.none_free_before = cpu_to_le32(bit + 1); 669 + break; 670 + 671 + case 1: 672 + /* inc bitmap */ 673 + sm_set_bitmap(ic->bitmap, bit, 2); 674 + break; 675 + 676 + case 2: 677 + /* inc bitmap and insert into overflow */ 678 + sm_set_bitmap(ic->bitmap, bit, 3); 679 + reset_inc_context(ll, ic); 680 + 681 + le_rc = cpu_to_le32(3); 682 + __dm_bless_for_disk(&le_rc); 683 + r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root, 684 + &b, &le_rc, &ll->ref_count_root); 685 + if (r < 0) { 686 + DMERR("ref count insert failed"); 687 + return r; 688 + } 689 + break; 690 + 691 + default: 692 + /* 693 + * inc within the overflow tree only. 694 + */ 695 + r = sm_ll_inc_overflow(ll, b, ic); 696 + if (r < 0) 697 + return r; 698 + } 699 + } 700 + 701 + *new_b = b; 702 + return 0; 703 + } 704 + 705 + /* 706 + * Finds a bitmap that contains entries in the block range, and increments 707 + * them. 708 + */ 709 + static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, 710 + int32_t *nr_allocations, dm_block_t *new_b) 711 + { 712 + int r; 713 + struct inc_context ic; 714 + uint32_t bit, bit_end; 715 + dm_block_t index = b; 716 + 717 + init_inc_context(&ic); 718 + 719 + bit = do_div(index, ll->entries_per_block); 720 + r = ll->load_ie(ll, index, &ic.ie_disk); 721 + if (r < 0) 722 + return r; 723 + 724 + r = shadow_bitmap(ll, &ic); 725 + if (r) 726 + return r; 727 + 728 + bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); 729 + r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic); 730 + 731 + exit_inc_context(ll, &ic); 732 + 733 + if (r) 734 + return r; 735 + 736 + return ll->save_ie(ll, index, &ic.ie_disk); 737 + } 738 + 739 + int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, 740 + int32_t *nr_allocations) 741 + { 742 + *nr_allocations = 0; 743 + while (b != e) { 744 + int r = __sm_ll_inc(ll, b, e, nr_allocations, &b); 745 + if (r) 746 + return r; 747 + } 748 + 749 + return 0; 750 + } 751 + 752 + /*----------------------------------------------------------------*/ 753 + 754 + static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b, 755 + struct inc_context *ic) 756 + { 757 + reset_inc_context(ll, ic); 758 + return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root, 759 + &b, &ll->ref_count_root); 760 + } 761 + 762 + static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, 763 + struct inc_context *ic, uint32_t *old_rc) 764 + { 765 + int r; 766 + int index = -1; 767 + struct btree_node *n; 768 + __le32 *v_ptr; 769 + uint32_t rc; 770 + 771 + reset_inc_context(ll, ic); 772 + r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, 773 + b, &index, &ll->ref_count_root, &ic->overflow_leaf); 774 + if (r < 0) 775 + return r; 776 + 777 + n = dm_block_data(ic->overflow_leaf); 778 + 779 + if (!contains_key(n, b, index)) { 780 + DMERR("overflow btree is missing an entry"); 781 + return -EINVAL; 782 + } 783 + 784 + v_ptr = value_ptr(n, index); 785 + rc = le32_to_cpu(*v_ptr); 786 + *old_rc = rc; 787 + 788 + if (rc == 3) { 789 + return __sm_ll_del_overflow(ll, b, ic); 790 + } else { 791 + rc--; 792 + *v_ptr = cpu_to_le32(rc); 793 + return 0; 794 + } 795 + } 796 + 797 + static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, 798 + struct inc_context *ic, uint32_t *old_rc) 799 + { 800 + /* 801 + * Do we already have the correct overflow leaf? 802 + */ 803 + if (ic->overflow_leaf) { 804 + int index; 805 + struct btree_node *n; 806 + __le32 *v_ptr; 807 + uint32_t rc; 808 + 809 + n = dm_block_data(ic->overflow_leaf); 810 + index = lower_bound(n, b); 811 + if (contains_key(n, b, index)) { 812 + v_ptr = value_ptr(n, index); 813 + rc = le32_to_cpu(*v_ptr); 814 + *old_rc = rc; 815 + 816 + if (rc > 3) { 817 + rc--; 818 + *v_ptr = cpu_to_le32(rc); 819 + return 0; 820 + } else { 821 + return __sm_ll_del_overflow(ll, b, ic); 822 + } 823 + 824 + } 825 + } 826 + 827 + return __sm_ll_dec_overflow(ll, b, ic, old_rc); 828 + } 829 + 830 + /* 831 + * Loops round incrementing entries in a single bitmap. 832 + */ 833 + static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b, 834 + uint32_t bit, uint32_t bit_end, 835 + struct inc_context *ic, 836 + int32_t *nr_allocations, dm_block_t *new_b) 837 + { 838 + int r; 839 + uint32_t old; 840 + 841 + for (; bit != bit_end; bit++, b++) { 842 + /* 843 + * We only need to drop the bitmap if we need to find a new btree 844 + * leaf for the overflow. So if it was dropped last iteration, 845 + * we now re-get it. 846 + */ 847 + r = ensure_bitmap(ll, ic); 848 + if (r) 849 + return r; 850 + 851 + old = sm_lookup_bitmap(ic->bitmap, bit); 852 + switch (old) { 853 + case 0: 854 + DMERR("unable to decrement block"); 855 + return -EINVAL; 856 + 857 + case 1: 858 + /* dec bitmap */ 859 + sm_set_bitmap(ic->bitmap, bit, 0); 860 + (*nr_allocations)--; 861 + ll->nr_allocated--; 862 + le32_add_cpu(&ic->ie_disk.nr_free, 1); 863 + ic->ie_disk.none_free_before = 864 + cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit)); 865 + break; 866 + 867 + case 2: 868 + /* dec bitmap and insert into overflow */ 869 + sm_set_bitmap(ic->bitmap, bit, 1); 870 + break; 871 + 872 + case 3: 873 + r = sm_ll_dec_overflow(ll, b, ic, &old); 874 + if (r < 0) 875 + return r; 876 + 877 + if (old == 3) { 878 + r = ensure_bitmap(ll, ic); 879 + if (r) 880 + return r; 881 + 882 + sm_set_bitmap(ic->bitmap, bit, 2); 883 + } 884 + break; 885 + } 886 + } 887 + 888 + *new_b = b; 889 + return 0; 890 + } 891 + 892 + static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, 893 + int32_t *nr_allocations, dm_block_t *new_b) 894 + { 895 + int r; 896 + uint32_t bit, bit_end; 897 + struct inc_context ic; 898 + dm_block_t index = b; 899 + 900 + init_inc_context(&ic); 901 + 902 + bit = do_div(index, ll->entries_per_block); 903 + r = ll->load_ie(ll, index, &ic.ie_disk); 904 + if (r < 0) 905 + return r; 906 + 907 + r = shadow_bitmap(ll, &ic); 908 + if (r) 909 + return r; 910 + 911 + bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); 912 + r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b); 913 + exit_inc_context(ll, &ic); 914 + 915 + if (r) 916 + return r; 917 + 918 + return ll->save_ie(ll, index, &ic.ie_disk); 919 + } 920 + 921 + int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, 922 + int32_t *nr_allocations) 923 + { 924 + *nr_allocations = 0; 925 + while (b != e) { 926 + int r = __sm_ll_dec(ll, b, e, nr_allocations, &b); 927 + if (r) 928 + return r; 929 + } 930 + 931 + return 0; 932 + } 933 + 934 + /*----------------------------------------------------------------*/ 537 935 538 936 int sm_ll_commit(struct ll_disk *ll) 539 937 {
+9 -9
drivers/md/persistent-data/dm-space-map-common.h
··· 96 96 __le64 blocknr; 97 97 } __attribute__ ((packed, aligned(8))); 98 98 99 - enum allocation_event { 100 - SM_NONE, 101 - SM_ALLOC, 102 - SM_FREE, 103 - }; 104 - 105 99 /*----------------------------------------------------------------*/ 106 100 107 101 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks); ··· 105 111 dm_block_t end, dm_block_t *result); 106 112 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, 107 113 dm_block_t begin, dm_block_t end, dm_block_t *result); 108 - int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); 109 - int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); 110 - int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); 114 + 115 + /* 116 + * The next three functions return (via nr_allocations) the net number of 117 + * allocations that were made. This number may be negative if there were 118 + * more frees than allocs. 119 + */ 120 + int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations); 121 + int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations); 122 + int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations); 111 123 int sm_ll_commit(struct ll_disk *ll); 112 124 113 125 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
+18 -56
drivers/md/persistent-data/dm-space-map-disk.c
··· 87 87 uint32_t count) 88 88 { 89 89 int r; 90 - uint32_t old_count; 91 - enum allocation_event ev; 90 + int32_t nr_allocations; 92 91 struct sm_disk *smd = container_of(sm, struct sm_disk, sm); 93 92 94 - r = sm_ll_insert(&smd->ll, b, count, &ev); 93 + r = sm_ll_insert(&smd->ll, b, count, &nr_allocations); 95 94 if (!r) { 96 - switch (ev) { 97 - case SM_NONE: 98 - break; 99 - 100 - case SM_ALLOC: 101 - /* 102 - * This _must_ be free in the prior transaction 103 - * otherwise we've lost atomicity. 104 - */ 105 - smd->nr_allocated_this_transaction++; 106 - break; 107 - 108 - case SM_FREE: 109 - /* 110 - * It's only free if it's also free in the last 111 - * transaction. 112 - */ 113 - r = sm_ll_lookup(&smd->old_ll, b, &old_count); 114 - if (r) 115 - return r; 116 - 117 - if (!old_count) 118 - smd->nr_allocated_this_transaction--; 119 - break; 120 - } 95 + smd->nr_allocated_this_transaction += nr_allocations; 121 96 } 122 97 123 98 return r; 124 99 } 125 100 126 - static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) 101 + static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 127 102 { 128 103 int r; 129 - enum allocation_event ev; 104 + int32_t nr_allocations; 130 105 struct sm_disk *smd = container_of(sm, struct sm_disk, sm); 131 106 132 - r = sm_ll_inc(&smd->ll, b, &ev); 133 - if (!r && (ev == SM_ALLOC)) 134 - /* 135 - * This _must_ be free in the prior transaction 136 - * otherwise we've lost atomicity. 137 - */ 138 - smd->nr_allocated_this_transaction++; 107 + r = sm_ll_inc(&smd->ll, b, e, &nr_allocations); 108 + if (!r) 109 + smd->nr_allocated_this_transaction += nr_allocations; 139 110 140 111 return r; 141 112 } 142 113 143 - static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) 114 + static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 144 115 { 145 116 int r; 146 - uint32_t old_count; 147 - enum allocation_event ev; 117 + int32_t nr_allocations; 148 118 struct sm_disk *smd = container_of(sm, struct sm_disk, sm); 149 119 150 - r = sm_ll_dec(&smd->ll, b, &ev); 151 - if (!r && (ev == SM_FREE)) { 152 - /* 153 - * It's only free if it's also free in the last 154 - * transaction. 155 - */ 156 - r = sm_ll_lookup(&smd->old_ll, b, &old_count); 157 - if (!r && !old_count) 158 - smd->nr_allocated_this_transaction--; 159 - } 120 + r = sm_ll_dec(&smd->ll, b, e, &nr_allocations); 121 + if (!r) 122 + smd->nr_allocated_this_transaction += nr_allocations; 160 123 161 124 return r; 162 125 } ··· 127 164 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) 128 165 { 129 166 int r; 130 - enum allocation_event ev; 167 + int32_t nr_allocations; 131 168 struct sm_disk *smd = container_of(sm, struct sm_disk, sm); 132 169 133 170 /* ··· 146 183 return r; 147 184 148 185 smd->begin = *b + 1; 149 - r = sm_ll_inc(&smd->ll, *b, &ev); 186 + r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations); 150 187 if (!r) { 151 - BUG_ON(ev != SM_ALLOC); 152 - smd->nr_allocated_this_transaction++; 188 + smd->nr_allocated_this_transaction += nr_allocations; 153 189 } 154 190 155 191 return r; ··· 204 242 .get_count = sm_disk_get_count, 205 243 .count_is_more_than_one = sm_disk_count_is_more_than_one, 206 244 .set_count = sm_disk_set_count, 207 - .inc_block = sm_disk_inc_block, 208 - .dec_block = sm_disk_dec_block, 245 + .inc_blocks = sm_disk_inc_blocks, 246 + .dec_blocks = sm_disk_dec_blocks, 209 247 .new_block = sm_disk_new_block, 210 248 .commit = sm_disk_commit, 211 249 .root_size = sm_disk_root_size,
+51 -45
drivers/md/persistent-data/dm-space-map-metadata.c
··· 89 89 90 90 struct block_op { 91 91 enum block_op_type type; 92 - dm_block_t block; 92 + dm_block_t b; 93 + dm_block_t e; 93 94 }; 94 95 95 96 struct bop_ring_buffer { ··· 117 116 } 118 117 119 118 static int brb_push(struct bop_ring_buffer *brb, 120 - enum block_op_type type, dm_block_t b) 119 + enum block_op_type type, dm_block_t b, dm_block_t e) 121 120 { 122 121 struct block_op *bop; 123 122 unsigned next = brb_next(brb, brb->end); ··· 131 130 132 131 bop = brb->bops + brb->end; 133 132 bop->type = type; 134 - bop->block = b; 133 + bop->b = b; 134 + bop->e = e; 135 135 136 136 brb->end = next; 137 137 ··· 147 145 return -ENODATA; 148 146 149 147 bop = brb->bops + brb->begin; 150 - result->type = bop->type; 151 - result->block = bop->block; 152 - 148 + memcpy(result, bop, sizeof(*result)); 153 149 return 0; 154 150 } 155 151 ··· 178 178 struct threshold threshold; 179 179 }; 180 180 181 - static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 181 + static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e) 182 182 { 183 - int r = brb_push(&smm->uncommitted, type, b); 184 - 183 + int r = brb_push(&smm->uncommitted, type, b, e); 185 184 if (r) { 186 185 DMERR("too many recursive allocations"); 187 186 return -ENOMEM; ··· 192 193 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 193 194 { 194 195 int r = 0; 195 - enum allocation_event ev; 196 + int32_t nr_allocations; 196 197 197 198 switch (op->type) { 198 199 case BOP_INC: 199 - r = sm_ll_inc(&smm->ll, op->block, &ev); 200 + r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations); 200 201 break; 201 202 202 203 case BOP_DEC: 203 - r = sm_ll_dec(&smm->ll, op->block, &ev); 204 + r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations); 204 205 break; 205 206 } 206 207 ··· 313 314 i = brb_next(&smm->uncommitted, i)) { 314 315 struct block_op *op = smm->uncommitted.bops + i; 315 316 316 - if (op->block != b) 317 + if (b < op->b || b >= op->e) 317 318 continue; 318 319 319 320 switch (op->type) { ··· 354 355 355 356 struct block_op *op = smm->uncommitted.bops + i; 356 357 357 - if (op->block != b) 358 + if (b < op->b || b >= op->e) 358 359 continue; 359 360 360 361 switch (op->type) { ··· 392 393 uint32_t count) 393 394 { 394 395 int r, r2; 395 - enum allocation_event ev; 396 + int32_t nr_allocations; 396 397 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 398 399 if (smm->recursion_count) { ··· 401 402 } 402 403 403 404 in(smm); 404 - r = sm_ll_insert(&smm->ll, b, count, &ev); 405 + r = sm_ll_insert(&smm->ll, b, count, &nr_allocations); 405 406 r2 = out(smm); 406 407 407 408 return combine_errors(r, r2); 408 409 } 409 410 410 - static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 411 + static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 411 412 { 412 413 int r, r2 = 0; 413 - enum allocation_event ev; 414 + int32_t nr_allocations; 414 415 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 415 416 416 - if (recursing(smm)) 417 - r = add_bop(smm, BOP_INC, b); 418 - else { 417 + if (recursing(smm)) { 418 + r = add_bop(smm, BOP_INC, b, e); 419 + if (r) 420 + return r; 421 + } else { 419 422 in(smm); 420 - r = sm_ll_inc(&smm->ll, b, &ev); 423 + r = sm_ll_inc(&smm->ll, b, e, &nr_allocations); 421 424 r2 = out(smm); 422 425 } 423 426 424 427 return combine_errors(r, r2); 425 428 } 426 429 427 - static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 430 + static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 428 431 { 429 432 int r, r2 = 0; 430 - enum allocation_event ev; 433 + int32_t nr_allocations; 431 434 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 432 435 433 436 if (recursing(smm)) 434 - r = add_bop(smm, BOP_DEC, b); 437 + r = add_bop(smm, BOP_DEC, b, e); 435 438 else { 436 439 in(smm); 437 - r = sm_ll_dec(&smm->ll, b, &ev); 440 + r = sm_ll_dec(&smm->ll, b, e, &nr_allocations); 438 441 r2 = out(smm); 439 442 } 440 443 ··· 446 445 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 447 446 { 448 447 int r, r2 = 0; 449 - enum allocation_event ev; 448 + int32_t nr_allocations; 450 449 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 451 450 452 451 /* ··· 467 466 smm->begin = *b + 1; 468 467 469 468 if (recursing(smm)) 470 - r = add_bop(smm, BOP_INC, *b); 469 + r = add_bop(smm, BOP_INC, *b, *b + 1); 471 470 else { 472 471 in(smm); 473 - r = sm_ll_inc(&smm->ll, *b, &ev); 472 + r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations); 474 473 r2 = out(smm); 475 474 } 476 475 ··· 564 563 .get_count = sm_metadata_get_count, 565 564 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 566 565 .set_count = sm_metadata_set_count, 567 - .inc_block = sm_metadata_inc_block, 568 - .dec_block = sm_metadata_dec_block, 566 + .inc_blocks = sm_metadata_inc_blocks, 567 + .dec_blocks = sm_metadata_dec_blocks, 569 568 .new_block = sm_metadata_new_block, 570 569 .commit = sm_metadata_commit, 571 570 .root_size = sm_metadata_root_size, ··· 649 648 return 0; 650 649 } 651 650 652 - static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 651 + static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 653 652 { 653 + int r; 654 654 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 655 655 656 - return add_bop(smm, BOP_INC, b); 656 + r = add_bop(smm, BOP_INC, b, e); 657 + if (r) 658 + return r; 659 + 660 + return 0; 657 661 } 658 662 659 - static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 663 + static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 660 664 { 665 + int r; 661 666 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 662 667 663 - return add_bop(smm, BOP_DEC, b); 668 + r = add_bop(smm, BOP_DEC, b, e); 669 + if (r) 670 + return r; 671 + 672 + return 0; 664 673 } 665 674 666 675 static int sm_bootstrap_commit(struct dm_space_map *sm) ··· 701 690 .get_count = sm_bootstrap_get_count, 702 691 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 703 692 .set_count = sm_bootstrap_set_count, 704 - .inc_block = sm_bootstrap_inc_block, 705 - .dec_block = sm_bootstrap_dec_block, 693 + .inc_blocks = sm_bootstrap_inc_blocks, 694 + .dec_blocks = sm_bootstrap_dec_blocks, 706 695 .new_block = sm_bootstrap_new_block, 707 696 .commit = sm_bootstrap_commit, 708 697 .root_size = sm_bootstrap_root_size, ··· 714 703 715 704 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 716 705 { 717 - int r, i; 706 + int r; 718 707 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 719 708 dm_block_t old_len = smm->ll.nr_blocks; 720 709 ··· 736 725 * allocate any new blocks. 737 726 */ 738 727 do { 739 - for (i = old_len; !r && i < smm->begin; i++) 740 - r = add_bop(smm, BOP_INC, i); 741 - 728 + r = add_bop(smm, BOP_INC, old_len, smm->begin); 742 729 if (r) 743 730 goto out; 744 731 ··· 783 774 dm_block_t superblock) 784 775 { 785 776 int r; 786 - dm_block_t i; 787 777 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 788 778 789 779 smm->begin = superblock + 1; ··· 807 799 * Now we need to update the newly created data structures with the 808 800 * allocated blocks that they were built from. 809 801 */ 810 - for (i = superblock; !r && i < smm->begin; i++) 811 - r = add_bop(smm, BOP_INC, i); 812 - 802 + r = add_bop(smm, BOP_INC, superblock, smm->begin); 813 803 if (r) 814 804 return r; 815 805
+14 -4
drivers/md/persistent-data/dm-space-map.h
··· 46 46 47 47 int (*commit)(struct dm_space_map *sm); 48 48 49 - int (*inc_block)(struct dm_space_map *sm, dm_block_t b); 50 - int (*dec_block)(struct dm_space_map *sm, dm_block_t b); 49 + int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e); 50 + int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e); 51 51 52 52 /* 53 53 * new_block will increment the returned block. ··· 117 117 return sm->commit(sm); 118 118 } 119 119 120 + static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 121 + { 122 + return sm->inc_blocks(sm, b, e); 123 + } 124 + 120 125 static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b) 121 126 { 122 - return sm->inc_block(sm, b); 127 + return dm_sm_inc_blocks(sm, b, b + 1); 128 + } 129 + 130 + static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 131 + { 132 + return sm->dec_blocks(sm, b, e); 123 133 } 124 134 125 135 static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b) 126 136 { 127 - return sm->dec_block(sm, b); 137 + return dm_sm_dec_blocks(sm, b, b + 1); 128 138 } 129 139 130 140 static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
+52
drivers/md/persistent-data/dm-transaction-manager.c
··· 359 359 } 360 360 EXPORT_SYMBOL_GPL(dm_tm_inc); 361 361 362 + void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e) 363 + { 364 + /* 365 + * The non-blocking clone doesn't support this. 366 + */ 367 + BUG_ON(tm->is_clone); 368 + 369 + dm_sm_inc_blocks(tm->sm, b, e); 370 + } 371 + EXPORT_SYMBOL_GPL(dm_tm_inc_range); 372 + 362 373 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b) 363 374 { 364 375 /* ··· 380 369 dm_sm_dec_block(tm->sm, b); 381 370 } 382 371 EXPORT_SYMBOL_GPL(dm_tm_dec); 372 + 373 + void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e) 374 + { 375 + /* 376 + * The non-blocking clone doesn't support this. 377 + */ 378 + BUG_ON(tm->is_clone); 379 + 380 + dm_sm_dec_blocks(tm->sm, b, e); 381 + } 382 + EXPORT_SYMBOL_GPL(dm_tm_dec_range); 383 + 384 + void dm_tm_with_runs(struct dm_transaction_manager *tm, 385 + const __le64 *value_le, unsigned count, dm_tm_run_fn fn) 386 + { 387 + uint64_t b, begin, end; 388 + bool in_run = false; 389 + unsigned i; 390 + 391 + for (i = 0; i < count; i++, value_le++) { 392 + b = le64_to_cpu(*value_le); 393 + 394 + if (in_run) { 395 + if (b == end) 396 + end++; 397 + else { 398 + fn(tm, begin, end); 399 + begin = b; 400 + end = b + 1; 401 + } 402 + } else { 403 + in_run = true; 404 + begin = b; 405 + end = b + 1; 406 + } 407 + } 408 + 409 + if (in_run) 410 + fn(tm, begin, end); 411 + } 412 + EXPORT_SYMBOL_GPL(dm_tm_with_runs); 383 413 384 414 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, 385 415 uint32_t *result)
+11 -1
drivers/md/persistent-data/dm-transaction-manager.h
··· 100 100 * Functions for altering the reference count of a block directly. 101 101 */ 102 102 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b); 103 - 103 + void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e); 104 104 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b); 105 + void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e); 106 + 107 + /* 108 + * Builds up runs of adjacent blocks, and then calls the given fn 109 + * (typically dm_tm_inc/dec). Very useful when you have to perform 110 + * the same tm operation on all values in a btree leaf. 111 + */ 112 + typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t); 113 + void dm_tm_with_runs(struct dm_transaction_manager *tm, 114 + const __le64 *value_le, unsigned count, dm_tm_run_fn fn); 105 115 106 116 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result); 107 117