Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
dm crypt: add merge
dm table: remove merge_bvec sector restriction
dm: linear add merge
dm: introduce merge_bvec_fn
dm snapshot: use per device mempools
dm snapshot: fix race during exception creation
dm snapshot: track snapshot reads
dm mpath: fix test for reinstate_path
dm mpath: return parameter error
dm io: remove struct padding
dm log: make dm_dirty_log init and exit static
dm mpath: free path selector on invalid args

+270 -49
+17 -1
drivers/md/dm-crypt.c
··· 1216 1216 return -EINVAL; 1217 1217 } 1218 1218 1219 + static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 1220 + struct bio_vec *biovec, int max_size) 1221 + { 1222 + struct crypt_config *cc = ti->private; 1223 + struct request_queue *q = bdev_get_queue(cc->dev->bdev); 1224 + 1225 + if (!q->merge_bvec_fn) 1226 + return max_size; 1227 + 1228 + bvm->bi_bdev = cc->dev->bdev; 1229 + bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; 1230 + 1231 + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 1232 + } 1233 + 1219 1234 static struct target_type crypt_target = { 1220 1235 .name = "crypt", 1221 - .version= {1, 5, 0}, 1236 + .version= {1, 6, 0}, 1222 1237 .module = THIS_MODULE, 1223 1238 .ctr = crypt_ctr, 1224 1239 .dtr = crypt_dtr, ··· 1243 1228 .preresume = crypt_preresume, 1244 1229 .resume = crypt_resume, 1245 1230 .message = crypt_message, 1231 + .merge = crypt_merge, 1246 1232 }; 1247 1233 1248 1234 static int __init dm_crypt_init(void)
+33 -5
drivers/md/dm-linear.c
··· 69 69 kfree(lc); 70 70 } 71 71 72 + static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) 73 + { 74 + struct linear_c *lc = ti->private; 75 + 76 + return lc->start + (bi_sector - ti->begin); 77 + } 78 + 79 + static void linear_map_bio(struct dm_target *ti, struct bio *bio) 80 + { 81 + struct linear_c *lc = ti->private; 82 + 83 + bio->bi_bdev = lc->dev->bdev; 84 + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); 85 + } 86 + 72 87 static int linear_map(struct dm_target *ti, struct bio *bio, 73 88 union map_info *map_context) 74 89 { 75 - struct linear_c *lc = (struct linear_c *) ti->private; 76 - 77 - bio->bi_bdev = lc->dev->bdev; 78 - bio->bi_sector = lc->start + (bio->bi_sector - ti->begin); 90 + linear_map_bio(ti, bio); 79 91 80 92 return DM_MAPIO_REMAPPED; 81 93 } ··· 126 114 return blkdev_driver_ioctl(bdev->bd_inode, &fake_file, bdev->bd_disk, cmd, arg); 127 115 } 128 116 117 + static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 118 + struct bio_vec *biovec, int max_size) 119 + { 120 + struct linear_c *lc = ti->private; 121 + struct request_queue *q = bdev_get_queue(lc->dev->bdev); 122 + 123 + if (!q->merge_bvec_fn) 124 + return max_size; 125 + 126 + bvm->bi_bdev = lc->dev->bdev; 127 + bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector); 128 + 129 + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 130 + } 131 + 129 132 static struct target_type linear_target = { 130 133 .name = "linear", 131 - .version= {1, 0, 2}, 134 + .version= {1, 0, 3}, 132 135 .module = THIS_MODULE, 133 136 .ctr = linear_ctr, 134 137 .dtr = linear_dtr, 135 138 .map = linear_map, 136 139 .status = linear_status, 137 140 .ioctl = linear_ioctl, 141 + .merge = linear_merge, 138 142 }; 139 143 140 144 int __init dm_linear_init(void)
+2 -2
drivers/md/dm-log.c
··· 831 831 .status = disk_status, 832 832 }; 833 833 834 - int __init dm_dirty_log_init(void) 834 + static int __init dm_dirty_log_init(void) 835 835 { 836 836 int r; 837 837 ··· 848 848 return r; 849 849 } 850 850 851 - void __exit dm_dirty_log_exit(void) 851 + static void __exit dm_dirty_log_exit(void) 852 852 { 853 853 dm_dirty_log_type_unregister(&_disk_type); 854 854 dm_dirty_log_type_unregister(&_core_type);
+7 -3
drivers/md/dm-mpath.c
··· 525 525 } 526 526 527 527 r = read_param(_params, shift(as), &ps_argc, &ti->error); 528 - if (r) 528 + if (r) { 529 + dm_put_path_selector(pst); 529 530 return -EINVAL; 531 + } 530 532 531 533 r = pst->create(&pg->ps, ps_argc, as->argv); 532 534 if (r) { ··· 625 623 struct pgpath *pgpath; 626 624 struct arg_set path_args; 627 625 628 - if (as->argc < nr_params) 626 + if (as->argc < nr_params) { 627 + ti->error = "not enough path parameters"; 629 628 goto bad; 629 + } 630 630 631 631 path_args.argc = nr_params; 632 632 path_args.argv = as->argv; ··· 871 867 if (pgpath->path.is_active) 872 868 goto out; 873 869 874 - if (!pgpath->pg->ps.type) { 870 + if (!pgpath->pg->ps.type->reinstate_path) { 875 871 DMWARN("Reinstate path not supported by path selector %s", 876 872 pgpath->pg->ps.type->name); 877 873 r = -EINVAL;
+141 -22
drivers/md/dm-snap.c
··· 40 40 */ 41 41 #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 42 42 43 + /* 44 + * The size of the mempool used to track chunks in use. 45 + */ 46 + #define MIN_IOS 256 47 + 43 48 static struct workqueue_struct *ksnapd; 44 49 static void flush_queued_bios(struct work_struct *work); 45 50 ··· 96 91 */ 97 92 static struct kmem_cache *exception_cache; 98 93 static struct kmem_cache *pending_cache; 99 - static mempool_t *pending_pool; 94 + 95 + struct dm_snap_tracked_chunk { 96 + struct hlist_node node; 97 + chunk_t chunk; 98 + }; 99 + 100 + static struct kmem_cache *tracked_chunk_cache; 101 + 102 + static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 103 + chunk_t chunk) 104 + { 105 + struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 106 + GFP_NOIO); 107 + unsigned long flags; 108 + 109 + c->chunk = chunk; 110 + 111 + spin_lock_irqsave(&s->tracked_chunk_lock, flags); 112 + hlist_add_head(&c->node, 113 + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 114 + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 115 + 116 + return c; 117 + } 118 + 119 + static void stop_tracking_chunk(struct dm_snapshot *s, 120 + struct dm_snap_tracked_chunk *c) 121 + { 122 + unsigned long flags; 123 + 124 + spin_lock_irqsave(&s->tracked_chunk_lock, flags); 125 + hlist_del(&c->node); 126 + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 127 + 128 + mempool_free(c, s->tracked_chunk_pool); 129 + } 130 + 131 + static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 132 + { 133 + struct dm_snap_tracked_chunk *c; 134 + struct hlist_node *hn; 135 + int found = 0; 136 + 137 + spin_lock_irq(&s->tracked_chunk_lock); 138 + 139 + hlist_for_each_entry(c, hn, 140 + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 141 + if (c->chunk == chunk) { 142 + found = 1; 143 + break; 144 + } 145 + } 146 + 147 + spin_unlock_irq(&s->tracked_chunk_lock); 148 + 149 + return found; 150 + } 100 151 101 152 /* 102 153 * One of these per registered origin, held in the snapshot_origins hash ··· 363 302 kmem_cache_free(exception_cache, e); 364 303 } 365 304 366 - static struct dm_snap_pending_exception *alloc_pending_exception(void) 305 + static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 367 306 { 368 - return mempool_alloc(pending_pool, GFP_NOIO); 307 + struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 308 + GFP_NOIO); 309 + 310 + pe->snap = s; 311 + 312 + return pe; 369 313 } 370 314 371 315 static void free_pending_exception(struct dm_snap_pending_exception *pe) 372 316 { 373 - mempool_free(pe, pending_pool); 317 + mempool_free(pe, pe->snap->pending_pool); 374 318 } 375 319 376 320 static void insert_completed_exception(struct dm_snapshot *s, ··· 548 482 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 549 483 { 550 484 struct dm_snapshot *s; 485 + int i; 551 486 int r = -EINVAL; 552 487 char persistent; 553 488 char *origin_path; ··· 631 564 goto bad5; 632 565 } 633 566 567 + s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 568 + if (!s->pending_pool) { 569 + ti->error = "Could not allocate mempool for pending exceptions"; 570 + goto bad6; 571 + } 572 + 573 + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 574 + tracked_chunk_cache); 575 + if (!s->tracked_chunk_pool) { 576 + ti->error = "Could not allocate tracked_chunk mempool for " 577 + "tracking reads"; 578 + goto bad_tracked_chunk_pool; 579 + } 580 + 581 + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 582 + INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 583 + 584 + spin_lock_init(&s->tracked_chunk_lock); 585 + 634 586 /* Metadata must only be loaded into one table at once */ 635 587 r = s->store.read_metadata(&s->store); 636 588 if (r < 0) { 637 589 ti->error = "Failed to read snapshot metadata"; 638 - goto bad6; 590 + goto bad_load_and_register; 639 591 } else if (r > 0) { 640 592 s->valid = 0; 641 593 DMWARN("Snapshot is marked invalid."); ··· 668 582 if (register_snapshot(s)) { 669 583 r = -EINVAL; 670 584 ti->error = "Cannot register snapshot origin"; 671 - goto bad6; 585 + goto bad_load_and_register; 672 586 } 673 587 674 588 ti->private = s; 675 589 ti->split_io = s->chunk_size; 676 590 677 591 return 0; 592 + 593 + bad_load_and_register: 594 + mempool_destroy(s->tracked_chunk_pool); 595 + 596 + bad_tracked_chunk_pool: 597 + mempool_destroy(s->pending_pool); 678 598 679 599 bad6: 680 600 dm_kcopyd_client_destroy(s->kcopyd_client); ··· 716 624 717 625 static void snapshot_dtr(struct dm_target *ti) 718 626 { 627 + #ifdef CONFIG_DM_DEBUG 628 + int i; 629 + #endif 719 630 struct dm_snapshot *s = ti->private; 720 631 721 632 flush_workqueue(ksnapd); ··· 727 632 /* After this returns there can be no new kcopyd jobs. */ 728 633 unregister_snapshot(s); 729 634 635 + #ifdef CONFIG_DM_DEBUG 636 + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 637 + BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 638 + #endif 639 + 640 + mempool_destroy(s->tracked_chunk_pool); 641 + 730 642 __free_exceptions(s); 643 + 644 + mempool_destroy(s->pending_pool); 731 645 732 646 dm_put_device(ti, s->origin); 733 647 dm_put_device(ti, s->cow); ··· 876 772 } 877 773 878 774 /* 775 + * Check for conflicting reads. This is extremely improbable, 776 + * so yield() is sufficient and there is no need for a wait queue. 777 + */ 778 + while (__chunk_is_tracked(s, pe->e.old_chunk)) 779 + yield(); 780 + 781 + /* 879 782 * Add a proper exception, and remove the 880 783 * in-flight exception from the list. 881 784 */ ··· 984 873 * to hold the lock while we do this. 985 874 */ 986 875 up_write(&s->lock); 987 - pe = alloc_pending_exception(); 876 + pe = alloc_pending_exception(s); 988 877 down_write(&s->lock); 989 878 990 879 if (!s->valid) { ··· 1004 893 bio_list_init(&pe->snapshot_bios); 1005 894 pe->primary_pe = NULL; 1006 895 atomic_set(&pe->ref_count, 0); 1007 - pe->snap = s; 1008 896 pe->started = 0; 1009 897 1010 898 if (s->store.prepare_exception(&s->store, &pe->e)) { ··· 1084 974 start_copy(pe); 1085 975 goto out; 1086 976 } 1087 - } else 1088 - /* 1089 - * FIXME: this read path scares me because we 1090 - * always use the origin when we have a pending 1091 - * exception. However I can't think of a 1092 - * situation where this is wrong - ejt. 1093 - */ 977 + } else { 1094 978 bio->bi_bdev = s->origin->bdev; 979 + map_context->ptr = track_chunk(s, chunk); 980 + } 1095 981 1096 982 out_unlock: 1097 983 up_write(&s->lock); 1098 984 out: 1099 985 return r; 986 + } 987 + 988 + static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 989 + int error, union map_info *map_context) 990 + { 991 + struct dm_snapshot *s = ti->private; 992 + struct dm_snap_tracked_chunk *c = map_context->ptr; 993 + 994 + if (c) 995 + stop_tracking_chunk(s, c); 996 + 997 + return 0; 1100 998 } 1101 999 1102 1000 static void snapshot_resume(struct dm_target *ti) ··· 1384 1266 .ctr = snapshot_ctr, 1385 1267 .dtr = snapshot_dtr, 1386 1268 .map = snapshot_map, 1269 + .end_io = snapshot_end_io, 1387 1270 .resume = snapshot_resume, 1388 1271 .status = snapshot_status, 1389 1272 }; ··· 1425 1306 goto bad4; 1426 1307 } 1427 1308 1428 - pending_pool = mempool_create_slab_pool(128, pending_cache); 1429 - if (!pending_pool) { 1430 - DMERR("Couldn't create pending pool."); 1309 + tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 1310 + if (!tracked_chunk_cache) { 1311 + DMERR("Couldn't create cache to track chunks in use."); 1431 1312 r = -ENOMEM; 1432 1313 goto bad5; 1433 1314 } ··· 1436 1317 if (!ksnapd) { 1437 1318 DMERR("Failed to create ksnapd workqueue."); 1438 1319 r = -ENOMEM; 1439 - goto bad6; 1320 + goto bad_pending_pool; 1440 1321 } 1441 1322 1442 1323 return 0; 1443 1324 1444 - bad6: 1445 - mempool_destroy(pending_pool); 1325 + bad_pending_pool: 1326 + kmem_cache_destroy(tracked_chunk_cache); 1446 1327 bad5: 1447 1328 kmem_cache_destroy(pending_cache); 1448 1329 bad4: ··· 1471 1352 DMERR("origin unregister failed %d", r); 1472 1353 1473 1354 exit_origin_hash(); 1474 - mempool_destroy(pending_pool); 1475 1355 kmem_cache_destroy(pending_cache); 1476 1356 kmem_cache_destroy(exception_cache); 1357 + kmem_cache_destroy(tracked_chunk_cache); 1477 1358 } 1478 1359 1479 1360 /* Module hooks */
+11
drivers/md/dm-snap.h
··· 130 130 void *context; 131 131 }; 132 132 133 + #define DM_TRACKED_CHUNK_HASH_SIZE 16 134 + #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 135 + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 136 + 133 137 struct dm_snapshot { 134 138 struct rw_semaphore lock; 135 139 struct dm_target *ti; ··· 161 157 /* The last percentage we notified */ 162 158 int last_percent; 163 159 160 + mempool_t *pending_pool; 161 + 164 162 struct exception_table pending; 165 163 struct exception_table complete; 166 164 ··· 180 174 /* Queue of snapshot writes for ksnapd to flush */ 181 175 struct bio_list queued_bios; 182 176 struct work_struct queued_bios_work; 177 + 178 + /* Chunks with outstanding reads */ 179 + mempool_t *tracked_chunk_pool; 180 + spinlock_t tracked_chunk_lock; 181 + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 183 182 }; 184 183 185 184 /*
+6 -7
drivers/md/dm-table.c
··· 506 506 rs->max_sectors = 507 507 min_not_zero(rs->max_sectors, q->max_sectors); 508 508 509 - /* FIXME: Device-Mapper on top of RAID-0 breaks because DM 510 - * currently doesn't honor MD's merge_bvec_fn routine. 511 - * In this case, we'll force DM to use PAGE_SIZE or 512 - * smaller I/O, just to be safe. A better fix is in the 513 - * works, but add this for the time being so it will at 514 - * least operate correctly. 509 + /* 510 + * Check if merge fn is supported. 511 + * If not we'll force DM to use PAGE_SIZE or 512 + * smaller I/O, just to be safe. 515 513 */ 516 - if (q->merge_bvec_fn) 514 + 515 + if (q->merge_bvec_fn && !ti->type->merge) 517 516 rs->max_sectors = 518 517 min_not_zero(rs->max_sectors, 519 518 (unsigned int) (PAGE_SIZE >> 9));
+45 -1
drivers/md/dm.c
··· 37 37 struct dm_io { 38 38 struct mapped_device *md; 39 39 int error; 40 - struct bio *bio; 41 40 atomic_t io_count; 41 + struct bio *bio; 42 42 unsigned long start_time; 43 43 }; 44 44 ··· 829 829 * CRUD END 830 830 *---------------------------------------------------------------*/ 831 831 832 + static int dm_merge_bvec(struct request_queue *q, 833 + struct bvec_merge_data *bvm, 834 + struct bio_vec *biovec) 835 + { 836 + struct mapped_device *md = q->queuedata; 837 + struct dm_table *map = dm_get_table(md); 838 + struct dm_target *ti; 839 + sector_t max_sectors; 840 + int max_size; 841 + 842 + if (unlikely(!map)) 843 + return 0; 844 + 845 + ti = dm_table_find_target(map, bvm->bi_sector); 846 + 847 + /* 848 + * Find maximum amount of I/O that won't need splitting 849 + */ 850 + max_sectors = min(max_io_len(md, bvm->bi_sector, ti), 851 + (sector_t) BIO_MAX_SECTORS); 852 + max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; 853 + if (max_size < 0) 854 + max_size = 0; 855 + 856 + /* 857 + * merge_bvec_fn() returns number of bytes 858 + * it can accept at this offset 859 + * max is precomputed maximal io size 860 + */ 861 + if (max_size && ti->type->merge) 862 + max_size = ti->type->merge(ti, bvm, biovec, max_size); 863 + 864 + /* 865 + * Always allow an entire first page 866 + */ 867 + if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) 868 + max_size = biovec->bv_len; 869 + 870 + dm_table_put(map); 871 + 872 + return max_size; 873 + } 874 + 832 875 /* 833 876 * The request function that just remaps the bio built up by 834 877 * dm_merge_bvec. ··· 1075 1032 blk_queue_make_request(md->queue, dm_request); 1076 1033 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1077 1034 md->queue->unplug_fn = dm_unplug_all; 1035 + blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1078 1036 1079 1037 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 1080 1038 if (!md->io_pool)
-6
drivers/md/dm.h
··· 100 100 101 101 void dm_kobject_uevent(struct mapped_device *md); 102 102 103 - /* 104 - * Dirty log 105 - */ 106 - int dm_dirty_log_init(void); 107 - void dm_dirty_log_exit(void); 108 - 109 103 int dm_kcopyd_init(void); 110 104 void dm_kcopyd_exit(void); 111 105
+6
include/linux/device-mapper.h
··· 9 9 #define _LINUX_DEVICE_MAPPER_H 10 10 11 11 #include <linux/bio.h> 12 + #include <linux/blkdev.h> 12 13 13 14 struct dm_target; 14 15 struct dm_table; 15 16 struct dm_dev; 16 17 struct mapped_device; 18 + struct bio_vec; 17 19 18 20 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; 19 21 ··· 74 72 struct file *filp, unsigned int cmd, 75 73 unsigned long arg); 76 74 75 + typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 76 + struct bio_vec *biovec, int max_size); 77 + 77 78 void dm_error(const char *message); 78 79 79 80 /* ··· 112 107 dm_status_fn status; 113 108 dm_message_fn message; 114 109 dm_ioctl_fn ioctl; 110 + dm_merge_fn merge; 115 111 }; 116 112 117 113 struct io_restrictions {
+2 -2
include/linux/dm-ioctl.h
··· 256 256 #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 257 257 258 258 #define DM_VERSION_MAJOR 4 259 - #define DM_VERSION_MINOR 13 259 + #define DM_VERSION_MINOR 14 260 260 #define DM_VERSION_PATCHLEVEL 0 261 - #define DM_VERSION_EXTRA "-ioctl (2007-10-18)" 261 + #define DM_VERSION_EXTRA "-ioctl (2008-04-23)" 262 262 263 263 /* Status bits */ 264 264 #define DM_READONLY_FLAG (1 << 0) /* In/Out */