Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device-mapper fixes from Mike Snitzer:
"A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error
handling fixes for dm-multipath. A fix for the thin provisioning
target to not expose non-zero discard limits if discards are disabled.

Lastly, add two DM module parameters which allow users to tune the
emergency memory reserves that DM mainatins per device -- this helps
fix a long-standing issue for dm-multipath. The conservative default
reserve for request-based dm-multipath devices (256) has proven
problematic for users with many multipathed SCSI devices but
relatively little memory. To responsibly select a smaller value users
should use the new nr_bios tracepoint info (via commit 75afb352
"block: Add nr_bios to block_rq_remap tracepoint") to determine the
peak number of bios their workloads create"

* tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm: add reserved_bio_based_ios module parameter
dm: add reserved_rq_based_ios module parameter
dm: lower bio-based mempool reservation
dm thin: do not expose non-zero discard limits if discards disabled
dm mpath: disable WRITE SAME if it fails
dm-snapshot: fix performance degradation due to small hash size
dm snapshot: workaround for a false positive lockdep warning
dm stats: fix possible counter corruption on 32-bit systems
dm mpath: do not fail path on -ENOSPC

+120 -26
+3 -4
drivers/md/dm-io.c
··· 19 19 #define DM_MSG_PREFIX "io" 20 20 21 21 #define DM_IO_MAX_REGIONS BITS_PER_LONG 22 - #define MIN_IOS 16 23 - #define MIN_BIOS 16 24 22 25 23 struct dm_io_client { 26 24 mempool_t *pool; ··· 48 50 struct dm_io_client *dm_io_client_create(void) 49 51 { 50 52 struct dm_io_client *client; 53 + unsigned min_ios = dm_get_reserved_bio_based_ios(); 51 54 52 55 client = kmalloc(sizeof(*client), GFP_KERNEL); 53 56 if (!client) 54 57 return ERR_PTR(-ENOMEM); 55 58 56 - client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); 59 + client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); 57 60 if (!client->pool) 58 61 goto bad; 59 62 60 - client->bios = bioset_create(MIN_BIOS, 0); 63 + client->bios = bioset_create(min_ios, 0); 61 64 if (!client->bios) 62 65 goto bad; 63 66
+14 -4
drivers/md/dm-mpath.c
··· 7 7 8 8 #include <linux/device-mapper.h> 9 9 10 + #include "dm.h" 10 11 #include "dm-path-selector.h" 11 12 #include "dm-uevent.h" 12 13 ··· 117 116 118 117 typedef int (*action_fn) (struct pgpath *pgpath); 119 118 120 - #define MIN_IOS 256 /* Mempool size */ 121 - 122 119 static struct kmem_cache *_mpio_cache; 123 120 124 121 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; ··· 189 190 static struct multipath *alloc_multipath(struct dm_target *ti) 190 191 { 191 192 struct multipath *m; 193 + unsigned min_ios = dm_get_reserved_rq_based_ios(); 192 194 193 195 m = kzalloc(sizeof(*m), GFP_KERNEL); 194 196 if (m) { ··· 202 202 INIT_WORK(&m->trigger_event, trigger_event); 203 203 init_waitqueue_head(&m->pg_init_wait); 204 204 mutex_init(&m->work_mutex); 205 - m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 205 + m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); 206 206 if (!m->mpio_pool) { 207 207 kfree(m); 208 208 return NULL; ··· 1268 1268 case -EREMOTEIO: 1269 1269 case -EILSEQ: 1270 1270 case -ENODATA: 1271 + case -ENOSPC: 1271 1272 return 1; 1272 1273 } 1273 1274 ··· 1299 1298 if (!error && !clone->errors) 1300 1299 return 0; /* I/O complete */ 1301 1300 1302 - if (noretry_error(error)) 1301 + if (noretry_error(error)) { 1302 + if ((clone->cmd_flags & REQ_WRITE_SAME) && 1303 + !clone->q->limits.max_write_same_sectors) { 1304 + struct queue_limits *limits; 1305 + 1306 + /* device doesn't really support WRITE SAME, disable it */ 1307 + limits = dm_get_queue_limits(dm_table_get_md(m->ti->table)); 1308 + limits->max_write_same_sectors = 0; 1309 + } 1303 1310 return error; 1311 + } 1304 1312 1305 1313 if (mpio->pgpath) 1306 1314 fail_path(mpio->pgpath);
+1 -1
drivers/md/dm-snap-persistent.c
··· 256 256 */ 257 257 INIT_WORK_ONSTACK(&req.work, do_metadata); 258 258 queue_work(ps->metadata_wq, &req.work); 259 - flush_work(&req.work); 259 + flush_workqueue(ps->metadata_wq); 260 260 261 261 return req.result; 262 262 }
+2 -3
drivers/md/dm-snap.c
··· 725 725 */ 726 726 static int init_hash_tables(struct dm_snapshot *s) 727 727 { 728 - sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 728 + sector_t hash_size, cow_dev_size, max_buckets; 729 729 730 730 /* 731 731 * Calculate based on the size of the original volume or 732 732 * the COW volume... 733 733 */ 734 734 cow_dev_size = get_dev_size(s->cow->bdev); 735 - origin_dev_size = get_dev_size(s->origin->bdev); 736 735 max_buckets = calc_max_buckets(); 737 736 738 - hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 737 + hash_size = cow_dev_size >> s->store->chunk_shift; 739 738 hash_size = min(hash_size, max_buckets); 740 739 741 740 if (hash_size < 64)
+17 -6
drivers/md/dm-stats.c
··· 451 451 struct dm_stat_percpu *p; 452 452 453 453 /* 454 - * For strict correctness we should use local_irq_disable/enable 454 + * For strict correctness we should use local_irq_save/restore 455 455 * instead of preempt_disable/enable. 456 456 * 457 - * This is racy if the driver finishes bios from non-interrupt 458 - * context as well as from interrupt context or from more different 459 - * interrupts. 457 + * preempt_disable/enable is racy if the driver finishes bios 458 + * from non-interrupt context as well as from interrupt context 459 + * or from more different interrupts. 460 460 * 461 - * However, the race only results in not counting some events, 462 - * so it is acceptable. 461 + * On 64-bit architectures the race only results in not counting some 462 + * events, so it is acceptable. On 32-bit architectures the race could 463 + * cause the counter going off by 2^32, so we need to do proper locking 464 + * there. 463 465 * 464 466 * part_stat_lock()/part_stat_unlock() have this race too. 465 467 */ 468 + #if BITS_PER_LONG == 32 469 + unsigned long flags; 470 + local_irq_save(flags); 471 + #else 466 472 preempt_disable(); 473 + #endif 467 474 p = &s->stat_percpu[smp_processor_id()][entry]; 468 475 469 476 if (!end) { ··· 485 478 p->ticks[idx] += duration; 486 479 } 487 480 481 + #if BITS_PER_LONG == 32 482 + local_irq_restore(flags); 483 + #else 488 484 preempt_enable(); 485 + #endif 489 486 } 490 487 491 488 static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
+11 -3
drivers/md/dm-thin.c
··· 2095 2095 * them down to the data device. The thin device's discard 2096 2096 * processing will cause mappings to be removed from the btree. 2097 2097 */ 2098 + ti->discard_zeroes_data_unsupported = true; 2098 2099 if (pf.discard_enabled && pf.discard_passdown) { 2099 2100 ti->num_discard_bios = 1; 2100 2101 ··· 2105 2104 * thin devices' discard limits consistent). 2106 2105 */ 2107 2106 ti->discards_supported = true; 2108 - ti->discard_zeroes_data_unsupported = true; 2109 2107 } 2110 2108 ti->private = pt; 2111 2109 ··· 2689 2689 * They get transferred to the live pool in bind_control_target() 2690 2690 * called from pool_preresume(). 2691 2691 */ 2692 - if (!pt->adjusted_pf.discard_enabled) 2692 + if (!pt->adjusted_pf.discard_enabled) { 2693 + /* 2694 + * Must explicitly disallow stacking discard limits otherwise the 2695 + * block layer will stack them if pool's data device has support. 2696 + * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the 2697 + * user to see that, so make sure to set all discard limits to 0. 2698 + */ 2699 + limits->discard_granularity = 0; 2693 2700 return; 2701 + } 2694 2702 2695 2703 disable_passdown_if_not_supported(pt); 2696 2704 ··· 2834 2826 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); 2835 2827 2836 2828 /* In case the pool supports discards, pass them on. */ 2829 + ti->discard_zeroes_data_unsupported = true; 2837 2830 if (tc->pool->pf.discard_enabled) { 2838 2831 ti->discards_supported = true; 2839 2832 ti->num_discard_bios = 1; 2840 - ti->discard_zeroes_data_unsupported = true; 2841 2833 /* Discard bios must be split on a block boundary */ 2842 2834 ti->split_discard_bios = true; 2843 2835 }
+67 -4
drivers/md/dm.c
··· 211 211 struct bio_set *bs; 212 212 }; 213 213 214 - #define MIN_IOS 256 214 + #define RESERVED_BIO_BASED_IOS 16 215 + #define RESERVED_REQUEST_BASED_IOS 256 216 + #define RESERVED_MAX_IOS 1024 215 217 static struct kmem_cache *_io_cache; 216 218 static struct kmem_cache *_rq_tio_cache; 219 + 220 + /* 221 + * Bio-based DM's mempools' reserved IOs set by the user. 222 + */ 223 + static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 224 + 225 + /* 226 + * Request-based DM's mempools' reserved IOs set by the user. 227 + */ 228 + static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 229 + 230 + static unsigned __dm_get_reserved_ios(unsigned *reserved_ios, 231 + unsigned def, unsigned max) 232 + { 233 + unsigned ios = ACCESS_ONCE(*reserved_ios); 234 + unsigned modified_ios = 0; 235 + 236 + if (!ios) 237 + modified_ios = def; 238 + else if (ios > max) 239 + modified_ios = max; 240 + 241 + if (modified_ios) { 242 + (void)cmpxchg(reserved_ios, ios, modified_ios); 243 + ios = modified_ios; 244 + } 245 + 246 + return ios; 247 + } 248 + 249 + unsigned dm_get_reserved_bio_based_ios(void) 250 + { 251 + return __dm_get_reserved_ios(&reserved_bio_based_ios, 252 + RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); 253 + } 254 + EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 255 + 256 + unsigned dm_get_reserved_rq_based_ios(void) 257 + { 258 + return __dm_get_reserved_ios(&reserved_rq_based_ios, 259 + RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); 260 + } 261 + EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 217 262 218 263 static int __init local_init(void) 219 264 { ··· 2323 2278 } 2324 2279 2325 2280 /* 2281 + * The queue_limits are only valid as long as you have a reference 2282 + * count on 'md'. 2283 + */ 2284 + struct queue_limits *dm_get_queue_limits(struct mapped_device *md) 2285 + { 2286 + BUG_ON(!atomic_read(&md->holders)); 2287 + return &md->queue->limits; 2288 + } 2289 + EXPORT_SYMBOL_GPL(dm_get_queue_limits); 2290 + 2291 + /* 2326 2292 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2327 2293 */ 2328 2294 static int dm_init_request_based_queue(struct mapped_device *md) ··· 2918 2862 2919 2863 if (type == DM_TYPE_BIO_BASED) { 2920 2864 cachep = _io_cache; 2921 - pool_size = 16; 2865 + pool_size = dm_get_reserved_bio_based_ios(); 2922 2866 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 2923 2867 } else if (type == DM_TYPE_REQUEST_BASED) { 2924 2868 cachep = _rq_tio_cache; 2925 - pool_size = MIN_IOS; 2869 + pool_size = dm_get_reserved_rq_based_ios(); 2926 2870 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 2927 2871 /* per_bio_data_size is not used. See __bind_mempools(). */ 2928 2872 WARN_ON(per_bio_data_size != 0); 2929 2873 } else 2930 2874 goto out; 2931 2875 2932 - pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); 2876 + pools->io_pool = mempool_create_slab_pool(pool_size, cachep); 2933 2877 if (!pools->io_pool) 2934 2878 goto out; 2935 2879 ··· 2980 2924 2981 2925 module_param(major, uint, 0); 2982 2926 MODULE_PARM_DESC(major, "The major number of the device mapper"); 2927 + 2928 + module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); 2929 + MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 2930 + 2931 + module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 2932 + MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 2933 + 2983 2934 MODULE_DESCRIPTION(DM_NAME " driver"); 2984 2935 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 2985 2936 MODULE_LICENSE("GPL");
+3
drivers/md/dm.h
··· 184 184 /* 185 185 * Helpers that are used by DM core 186 186 */ 187 + unsigned dm_get_reserved_bio_based_ios(void); 188 + unsigned dm_get_reserved_rq_based_ios(void); 189 + 187 190 static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) 188 191 { 189 192 return !maxlen || strlen(result) + 1 >= maxlen;
+2 -1
include/linux/device-mapper.h
··· 406 406 union map_info *dm_get_mapinfo(struct bio *bio); 407 407 union map_info *dm_get_rq_mapinfo(struct request *rq); 408 408 409 + struct queue_limits *dm_get_queue_limits(struct mapped_device *md); 410 + 409 411 /* 410 412 * Geometry functions. 411 413 */ 412 414 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); 413 415 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); 414 - 415 416 416 417 /*----------------------------------------------------------------- 417 418 * Functions for manipulating device-mapper tables.