Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm snapshot: track snapshot reads

Whenever a snapshot read gets mapped through to the origin, track it in
a per-snapshot hash table indexed by chunk number, using memory allocated
from a new per-snapshot mempool.

We need to track these reads to avoid race conditions which will be fixed
by patches that follow.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Mikulas Patocka and committed by
Alasdair G Kergon
cd45daff def052d2

+106 -10
+97 -10
drivers/md/dm-snap.c
··· 40 40 */ 41 41 #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 42 42 43 + /* 44 + * The size of the mempool used to track chunks in use. 45 + */ 46 + #define MIN_IOS 256 47 + 43 48 static struct workqueue_struct *ksnapd; 44 49 static void flush_queued_bios(struct work_struct *work); 45 50 ··· 97 92 static struct kmem_cache *exception_cache; 98 93 static struct kmem_cache *pending_cache; 99 94 static mempool_t *pending_pool; 95 + 96 + struct dm_snap_tracked_chunk { 97 + struct hlist_node node; 98 + chunk_t chunk; 99 + }; 100 + 101 + static struct kmem_cache *tracked_chunk_cache; 102 + 103 + static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 104 + chunk_t chunk) 105 + { 106 + struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 107 + GFP_NOIO); 108 + unsigned long flags; 109 + 110 + c->chunk = chunk; 111 + 112 + spin_lock_irqsave(&s->tracked_chunk_lock, flags); 113 + hlist_add_head(&c->node, 114 + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 115 + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 116 + 117 + return c; 118 + } 119 + 120 + static void stop_tracking_chunk(struct dm_snapshot *s, 121 + struct dm_snap_tracked_chunk *c) 122 + { 123 + unsigned long flags; 124 + 125 + spin_lock_irqsave(&s->tracked_chunk_lock, flags); 126 + hlist_del(&c->node); 127 + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 128 + 129 + mempool_free(c, s->tracked_chunk_pool); 130 + } 100 131 101 132 /* 102 133 * One of these per registered origin, held in the snapshot_origins hash ··· 523 482 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 524 483 { 525 484 struct dm_snapshot *s; 485 + int i; 526 486 int r = -EINVAL; 527 487 char persistent; 528 488 char *origin_path; ··· 606 564 goto bad5; 607 565 } 608 566 567 + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 568 + tracked_chunk_cache); 569 + if (!s->tracked_chunk_pool) { 570 + ti->error = "Could not allocate tracked_chunk mempool for " 571 + "tracking reads"; 572 + goto bad6; 573 + } 574 + 575 + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 576 + INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 577 + 578 + spin_lock_init(&s->tracked_chunk_lock); 579 + 609 580 /* Metadata must only be loaded into one table at once */ 610 581 r = s->store.read_metadata(&s->store); 611 582 if (r < 0) { 612 583 ti->error = "Failed to read snapshot metadata"; 613 - goto bad6; 584 + goto bad_load_and_register; 614 585 } else if (r > 0) { 615 586 s->valid = 0; 616 587 DMWARN("Snapshot is marked invalid."); ··· 637 582 if (register_snapshot(s)) { 638 583 r = -EINVAL; 639 584 ti->error = "Cannot register snapshot origin"; 640 - goto bad6; 585 + goto bad_load_and_register; 641 586 } 642 587 643 588 ti->private = s; 644 589 ti->split_io = s->chunk_size; 645 590 646 591 return 0; 592 + 593 + bad_load_and_register: 594 + mempool_destroy(s->tracked_chunk_pool); 647 595 648 596 bad6: 649 597 dm_kcopyd_client_destroy(s->kcopyd_client); ··· 682 624 683 625 static void snapshot_dtr(struct dm_target *ti) 684 626 { 627 + #ifdef CONFIG_DM_DEBUG 628 + int i; 629 + #endif 685 630 struct dm_snapshot *s = ti->private; 686 631 687 632 flush_workqueue(ksnapd); ··· 692 631 /* Prevent further origin writes from using this snapshot. */ 693 632 /* After this returns there can be no new kcopyd jobs. */ 694 633 unregister_snapshot(s); 634 + 635 + #ifdef CONFIG_DM_DEBUG 636 + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 637 + BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 638 + #endif 639 + 640 + mempool_destroy(s->tracked_chunk_pool); 695 641 696 642 __free_exceptions(s); 697 643 ··· 1042 974 start_copy(pe); 1043 975 goto out; 1044 976 } 1045 - } else 1046 - /* 1047 - * FIXME: this read path scares me because we 1048 - * always use the origin when we have a pending 1049 - * exception. However I can't think of a 1050 - * situation where this is wrong - ejt. 1051 - */ 977 + } else { 1052 978 bio->bi_bdev = s->origin->bdev; 979 + map_context->ptr = track_chunk(s, chunk); 980 + } 1053 981 1054 982 out_unlock: 1055 983 up_write(&s->lock); 1056 984 out: 1057 985 return r; 986 + } 987 + 988 + static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 989 + int error, union map_info *map_context) 990 + { 991 + struct dm_snapshot *s = ti->private; 992 + struct dm_snap_tracked_chunk *c = map_context->ptr; 993 + 994 + if (c) 995 + stop_tracking_chunk(s, c); 996 + 997 + return 0; 1058 998 } 1059 999 1060 1000 static void snapshot_resume(struct dm_target *ti) ··· 1342 1266 .ctr = snapshot_ctr, 1343 1267 .dtr = snapshot_dtr, 1344 1268 .map = snapshot_map, 1269 + .end_io = snapshot_end_io, 1345 1270 .resume = snapshot_resume, 1346 1271 .status = snapshot_status, 1347 1272 }; ··· 1383 1306 goto bad4; 1384 1307 } 1385 1308 1309 + tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 1310 + if (!tracked_chunk_cache) { 1311 + DMERR("Couldn't create cache to track chunks in use."); 1312 + r = -ENOMEM; 1313 + goto bad5; 1314 + } 1315 + 1386 1316 pending_pool = mempool_create_slab_pool(128, pending_cache); 1387 1317 if (!pending_pool) { 1388 1318 DMERR("Couldn't create pending pool."); 1389 1319 r = -ENOMEM; 1390 - goto bad5; 1320 + goto bad_pending_pool; 1391 1321 } 1392 1322 1393 1323 ksnapd = create_singlethread_workqueue("ksnapd"); ··· 1408 1324 1409 1325 bad6: 1410 1326 mempool_destroy(pending_pool); 1327 + bad_pending_pool: 1328 + kmem_cache_destroy(tracked_chunk_cache); 1411 1329 bad5: 1412 1330 kmem_cache_destroy(pending_cache); 1413 1331 bad4: ··· 1441 1355 mempool_destroy(pending_pool); 1442 1356 kmem_cache_destroy(pending_cache); 1443 1357 kmem_cache_destroy(exception_cache); 1358 + kmem_cache_destroy(tracked_chunk_cache); 1444 1359 } 1445 1360 1446 1361 /* Module hooks */
+9
drivers/md/dm-snap.h
··· 130 130 void *context; 131 131 }; 132 132 133 + #define DM_TRACKED_CHUNK_HASH_SIZE 16 134 + #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 135 + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 136 + 133 137 struct dm_snapshot { 134 138 struct rw_semaphore lock; 135 139 struct dm_target *ti; ··· 178 174 /* Queue of snapshot writes for ksnapd to flush */ 179 175 struct bio_list queued_bios; 180 176 struct work_struct queued_bios_work; 177 + 178 + /* Chunks with outstanding reads */ 179 + mempool_t *tracked_chunk_pool; 180 + spinlock_t tracked_chunk_lock; 181 + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 181 182 }; 182 183 183 184 /*