dm: move request-based code out to dm-rq.[hc]

+2 -1

drivers/md/Makefile

··· 3 3 # 4 4 5 5 dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 6 - dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o 6 + dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ 7 + dm-rq.o 7 8 dm-multipath-y += dm-path-selector.o dm-mpath.o 8 9 dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ 9 10 dm-snap-persistent.o

+1 -1

drivers/md/dm-builtin.c

··· 1 - #include "dm.h" 1 + #include "dm-core.h" 2 2 3 3 /* 4 4 * The kobject release method must not be placed in the module itself,

+149

drivers/md/dm-core.h

··· 1 + /* 2 + * Internal header file _only_ for device mapper core 3 + * 4 + * Copyright (C) 2016 Red Hat, Inc. All rights reserved. 5 + * 6 + * This file is released under the LGPL. 7 + */ 8 + 9 + #ifndef DM_CORE_INTERNAL_H 10 + #define DM_CORE_INTERNAL_H 11 + 12 + #include <linux/kthread.h> 13 + #include <linux/ktime.h> 14 + #include <linux/blk-mq.h> 15 + 16 + #include <trace/events/block.h> 17 + 18 + #include "dm.h" 19 + 20 + #define DM_RESERVED_MAX_IOS 1024 21 + 22 + struct dm_kobject_holder { 23 + struct kobject kobj; 24 + struct completion completion; 25 + }; 26 + 27 + /* 28 + * DM core internal structure that used directly by dm.c and dm-rq.c 29 + * DM targets must _not_ deference a mapped_device to directly access its members! 30 + */ 31 + struct mapped_device { 32 + struct srcu_struct io_barrier; 33 + struct mutex suspend_lock; 34 + 35 + /* 36 + * The current mapping (struct dm_table *). 37 + * Use dm_get_live_table{_fast} or take suspend_lock for 38 + * dereference. 39 + */ 40 + void __rcu *map; 41 + 42 + struct list_head table_devices; 43 + struct mutex table_devices_lock; 44 + 45 + unsigned long flags; 46 + 47 + struct request_queue *queue; 48 + int numa_node_id; 49 + 50 + unsigned type; 51 + /* Protect queue and type against concurrent access. */ 52 + struct mutex type_lock; 53 + 54 + atomic_t holders; 55 + atomic_t open_count; 56 + 57 + struct dm_target *immutable_target; 58 + struct target_type *immutable_target_type; 59 + 60 + struct gendisk *disk; 61 + char name[16]; 62 + 63 + void *interface_ptr; 64 + 65 + /* 66 + * A list of ios that arrived while we were suspended. 67 + */ 68 + atomic_t pending[2]; 69 + wait_queue_head_t wait; 70 + struct work_struct work; 71 + spinlock_t deferred_lock; 72 + struct bio_list deferred; 73 + 74 + /* 75 + * Event handling. 76 + */ 77 + wait_queue_head_t eventq; 78 + atomic_t event_nr; 79 + atomic_t uevent_seq; 80 + struct list_head uevent_list; 81 + spinlock_t uevent_lock; /* Protect access to uevent_list */ 82 + 83 + /* the number of internal suspends */ 84 + unsigned internal_suspend_count; 85 + 86 + /* 87 + * Processing queue (flush) 88 + */ 89 + struct workqueue_struct *wq; 90 + 91 + /* 92 + * io objects are allocated from here. 93 + */ 94 + mempool_t *io_pool; 95 + mempool_t *rq_pool; 96 + 97 + struct bio_set *bs; 98 + 99 + /* 100 + * freeze/thaw support require holding onto a super block 101 + */ 102 + struct super_block *frozen_sb; 103 + 104 + /* forced geometry settings */ 105 + struct hd_geometry geometry; 106 + 107 + struct block_device *bdev; 108 + 109 + /* kobject and completion */ 110 + struct dm_kobject_holder kobj_holder; 111 + 112 + /* zero-length flush that will be cloned and submitted to targets */ 113 + struct bio flush_bio; 114 + 115 + struct dm_stats stats; 116 + 117 + struct kthread_worker kworker; 118 + struct task_struct *kworker_task; 119 + 120 + /* for request-based merge heuristic in dm_request_fn() */ 121 + unsigned seq_rq_merge_deadline_usecs; 122 + int last_rq_rw; 123 + sector_t last_rq_pos; 124 + ktime_t last_rq_start_time; 125 + 126 + /* for blk-mq request-based DM support */ 127 + struct blk_mq_tag_set *tag_set; 128 + bool use_blk_mq:1; 129 + bool init_tio_pdu:1; 130 + }; 131 + 132 + void dm_init_md_queue(struct mapped_device *md); 133 + void dm_init_normal_md_queue(struct mapped_device *md); 134 + int md_in_flight(struct mapped_device *md); 135 + void disable_write_same(struct mapped_device *md); 136 + 137 + static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) 138 + { 139 + return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; 140 + } 141 + 142 + unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max); 143 + 144 + static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) 145 + { 146 + return !maxlen || strlen(result) + 1 >= maxlen; 147 + } 148 + 149 + #endif

+1 -1

drivers/md/dm-io.c

··· 5 5 * This file is released under the GPL. 6 6 */ 7 7 8 - #include "dm.h" 8 + #include "dm-core.h" 9 9 10 10 #include <linux/device-mapper.h> 11 11

+1 -1

drivers/md/dm-ioctl.c

··· 5 5 * This file is released under the GPL. 6 6 */ 7 7 8 - #include "dm.h" 8 + #include "dm-core.h" 9 9 10 10 #include <linux/module.h> 11 11 #include <linux/vmalloc.h>

+1 -1

drivers/md/dm-kcopyd.c

··· 26 26 #include <linux/device-mapper.h> 27 27 #include <linux/dm-kcopyd.h> 28 28 29 - #include "dm.h" 29 + #include "dm-core.h" 30 30 31 31 #define SUB_JOB_SIZE 128 32 32 #define SPLIT_COUNT 8

+2 -2

drivers/md/dm-mpath.c

··· 7 7 8 8 #include <linux/device-mapper.h> 9 9 10 - #include "dm.h" 10 + #include "dm-rq.h" 11 11 #include "dm-path-selector.h" 12 12 #include "dm-uevent.h" 13 13 ··· 1328 1328 * during end I/O handling, since those clone requests don't have 1329 1329 * bio clones. If we queue them inside the multipath target, 1330 1330 * we need to make bio clones, that requires memory allocation. 1331 - * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1331 + * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests 1332 1332 * don't have bio clones.) 1333 1333 * Instead of queueing the clone request here, we queue the original 1334 1334 * request into dm core, which will remake a clone request and

+959

drivers/md/dm-rq.c

··· 1 + /* 2 + * Copyright (C) 2016 Red Hat, Inc. All rights reserved. 3 + * 4 + * This file is released under the GPL. 5 + */ 6 + 7 + #include "dm-core.h" 8 + #include "dm-rq.h" 9 + 10 + #include <linux/elevator.h> /* for rq_end_sector() */ 11 + #include <linux/blk-mq.h> 12 + 13 + #define DM_MSG_PREFIX "core-rq" 14 + 15 + #define DM_MQ_NR_HW_QUEUES 1 16 + #define DM_MQ_QUEUE_DEPTH 2048 17 + static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; 18 + static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; 19 + 20 + /* 21 + * Request-based DM's mempools' reserved IOs set by the user. 22 + */ 23 + #define RESERVED_REQUEST_BASED_IOS 256 24 + static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 25 + 26 + #ifdef CONFIG_DM_MQ_DEFAULT 27 + static bool use_blk_mq = true; 28 + #else 29 + static bool use_blk_mq = false; 30 + #endif 31 + 32 + bool dm_use_blk_mq_default(void) 33 + { 34 + return use_blk_mq; 35 + } 36 + 37 + bool dm_use_blk_mq(struct mapped_device *md) 38 + { 39 + return md->use_blk_mq; 40 + } 41 + EXPORT_SYMBOL_GPL(dm_use_blk_mq); 42 + 43 + unsigned dm_get_reserved_rq_based_ios(void) 44 + { 45 + return __dm_get_module_param(&reserved_rq_based_ios, 46 + RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS); 47 + } 48 + EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 49 + 50 + static unsigned dm_get_blk_mq_nr_hw_queues(void) 51 + { 52 + return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32); 53 + } 54 + 55 + static unsigned dm_get_blk_mq_queue_depth(void) 56 + { 57 + return __dm_get_module_param(&dm_mq_queue_depth, 58 + DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH); 59 + } 60 + 61 + int dm_request_based(struct mapped_device *md) 62 + { 63 + return blk_queue_stackable(md->queue); 64 + } 65 + 66 + static void dm_old_start_queue(struct request_queue *q) 67 + { 68 + unsigned long flags; 69 + 70 + spin_lock_irqsave(q->queue_lock, flags); 71 + if (blk_queue_stopped(q)) 72 + blk_start_queue(q); 73 + spin_unlock_irqrestore(q->queue_lock, flags); 74 + } 75 + 76 + void dm_start_queue(struct request_queue *q) 77 + { 78 + if (!q->mq_ops) 79 + dm_old_start_queue(q); 80 + else { 81 + blk_mq_start_stopped_hw_queues(q, true); 82 + blk_mq_kick_requeue_list(q); 83 + } 84 + } 85 + 86 + static void dm_old_stop_queue(struct request_queue *q) 87 + { 88 + unsigned long flags; 89 + 90 + spin_lock_irqsave(q->queue_lock, flags); 91 + if (blk_queue_stopped(q)) { 92 + spin_unlock_irqrestore(q->queue_lock, flags); 93 + return; 94 + } 95 + 96 + blk_stop_queue(q); 97 + spin_unlock_irqrestore(q->queue_lock, flags); 98 + } 99 + 100 + void dm_stop_queue(struct request_queue *q) 101 + { 102 + if (!q->mq_ops) 103 + dm_old_stop_queue(q); 104 + else 105 + blk_mq_stop_hw_queues(q); 106 + } 107 + 108 + static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, 109 + gfp_t gfp_mask) 110 + { 111 + return mempool_alloc(md->io_pool, gfp_mask); 112 + } 113 + 114 + static void free_old_rq_tio(struct dm_rq_target_io *tio) 115 + { 116 + mempool_free(tio, tio->md->io_pool); 117 + } 118 + 119 + static struct request *alloc_old_clone_request(struct mapped_device *md, 120 + gfp_t gfp_mask) 121 + { 122 + return mempool_alloc(md->rq_pool, gfp_mask); 123 + } 124 + 125 + static void free_old_clone_request(struct mapped_device *md, struct request *rq) 126 + { 127 + mempool_free(rq, md->rq_pool); 128 + } 129 + 130 + /* 131 + * Partial completion handling for request-based dm 132 + */ 133 + static void end_clone_bio(struct bio *clone) 134 + { 135 + struct dm_rq_clone_bio_info *info = 136 + container_of(clone, struct dm_rq_clone_bio_info, clone); 137 + struct dm_rq_target_io *tio = info->tio; 138 + struct bio *bio = info->orig; 139 + unsigned int nr_bytes = info->orig->bi_iter.bi_size; 140 + int error = clone->bi_error; 141 + 142 + bio_put(clone); 143 + 144 + if (tio->error) 145 + /* 146 + * An error has already been detected on the request. 147 + * Once error occurred, just let clone->end_io() handle 148 + * the remainder. 149 + */ 150 + return; 151 + else if (error) { 152 + /* 153 + * Don't notice the error to the upper layer yet. 154 + * The error handling decision is made by the target driver, 155 + * when the request is completed. 156 + */ 157 + tio->error = error; 158 + return; 159 + } 160 + 161 + /* 162 + * I/O for the bio successfully completed. 163 + * Notice the data completion to the upper layer. 164 + */ 165 + 166 + /* 167 + * bios are processed from the head of the list. 168 + * So the completing bio should always be rq->bio. 169 + * If it's not, something wrong is happening. 170 + */ 171 + if (tio->orig->bio != bio) 172 + DMERR("bio completion is going in the middle of the request"); 173 + 174 + /* 175 + * Update the original request. 176 + * Do not use blk_end_request() here, because it may complete 177 + * the original request before the clone, and break the ordering. 178 + */ 179 + blk_update_request(tio->orig, 0, nr_bytes); 180 + } 181 + 182 + static struct dm_rq_target_io *tio_from_request(struct request *rq) 183 + { 184 + return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 185 + } 186 + 187 + static void rq_end_stats(struct mapped_device *md, struct request *orig) 188 + { 189 + if (unlikely(dm_stats_used(&md->stats))) { 190 + struct dm_rq_target_io *tio = tio_from_request(orig); 191 + tio->duration_jiffies = jiffies - tio->duration_jiffies; 192 + dm_stats_account_io(&md->stats, rq_data_dir(orig), 193 + blk_rq_pos(orig), tio->n_sectors, true, 194 + tio->duration_jiffies, &tio->stats_aux); 195 + } 196 + } 197 + 198 + /* 199 + * Don't touch any member of the md after calling this function because 200 + * the md may be freed in dm_put() at the end of this function. 201 + * Or do dm_get() before calling this function and dm_put() later. 202 + */ 203 + static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 204 + { 205 + atomic_dec(&md->pending[rw]); 206 + 207 + /* nudge anyone waiting on suspend queue */ 208 + if (!md_in_flight(md)) 209 + wake_up(&md->wait); 210 + 211 + /* 212 + * Run this off this callpath, as drivers could invoke end_io while 213 + * inside their request_fn (and holding the queue lock). Calling 214 + * back into ->request_fn() could deadlock attempting to grab the 215 + * queue lock again. 216 + */ 217 + if (!md->queue->mq_ops && run_queue) 218 + blk_run_queue_async(md->queue); 219 + 220 + /* 221 + * dm_put() must be at the end of this function. See the comment above 222 + */ 223 + dm_put(md); 224 + } 225 + 226 + static void free_rq_clone(struct request *clone) 227 + { 228 + struct dm_rq_target_io *tio = clone->end_io_data; 229 + struct mapped_device *md = tio->md; 230 + 231 + blk_rq_unprep_clone(clone); 232 + 233 + if (md->type == DM_TYPE_MQ_REQUEST_BASED) 234 + /* stacked on blk-mq queue(s) */ 235 + tio->ti->type->release_clone_rq(clone); 236 + else if (!md->queue->mq_ops) 237 + /* request_fn queue stacked on request_fn queue(s) */ 238 + free_old_clone_request(md, clone); 239 + 240 + if (!md->queue->mq_ops) 241 + free_old_rq_tio(tio); 242 + } 243 + 244 + /* 245 + * Complete the clone and the original request. 246 + * Must be called without clone's queue lock held, 247 + * see end_clone_request() for more details. 248 + */ 249 + static void dm_end_request(struct request *clone, int error) 250 + { 251 + int rw = rq_data_dir(clone); 252 + struct dm_rq_target_io *tio = clone->end_io_data; 253 + struct mapped_device *md = tio->md; 254 + struct request *rq = tio->orig; 255 + 256 + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 257 + rq->errors = clone->errors; 258 + rq->resid_len = clone->resid_len; 259 + 260 + if (rq->sense) 261 + /* 262 + * We are using the sense buffer of the original 263 + * request. 264 + * So setting the length of the sense data is enough. 265 + */ 266 + rq->sense_len = clone->sense_len; 267 + } 268 + 269 + free_rq_clone(clone); 270 + rq_end_stats(md, rq); 271 + if (!rq->q->mq_ops) 272 + blk_end_request_all(rq, error); 273 + else 274 + blk_mq_end_request(rq, error); 275 + rq_completed(md, rw, true); 276 + } 277 + 278 + static void dm_unprep_request(struct request *rq) 279 + { 280 + struct dm_rq_target_io *tio = tio_from_request(rq); 281 + struct request *clone = tio->clone; 282 + 283 + if (!rq->q->mq_ops) { 284 + rq->special = NULL; 285 + rq->cmd_flags &= ~REQ_DONTPREP; 286 + } 287 + 288 + if (clone) 289 + free_rq_clone(clone); 290 + else if (!tio->md->queue->mq_ops) 291 + free_old_rq_tio(tio); 292 + } 293 + 294 + /* 295 + * Requeue the original request of a clone. 296 + */ 297 + static void dm_old_requeue_request(struct request *rq) 298 + { 299 + struct request_queue *q = rq->q; 300 + unsigned long flags; 301 + 302 + spin_lock_irqsave(q->queue_lock, flags); 303 + blk_requeue_request(q, rq); 304 + blk_run_queue_async(q); 305 + spin_unlock_irqrestore(q->queue_lock, flags); 306 + } 307 + 308 + static void dm_mq_requeue_request(struct request *rq) 309 + { 310 + struct request_queue *q = rq->q; 311 + unsigned long flags; 312 + 313 + blk_mq_requeue_request(rq); 314 + spin_lock_irqsave(q->queue_lock, flags); 315 + if (!blk_queue_stopped(q)) 316 + blk_mq_kick_requeue_list(q); 317 + spin_unlock_irqrestore(q->queue_lock, flags); 318 + } 319 + 320 + static void dm_requeue_original_request(struct mapped_device *md, 321 + struct request *rq) 322 + { 323 + int rw = rq_data_dir(rq); 324 + 325 + rq_end_stats(md, rq); 326 + dm_unprep_request(rq); 327 + 328 + if (!rq->q->mq_ops) 329 + dm_old_requeue_request(rq); 330 + else 331 + dm_mq_requeue_request(rq); 332 + 333 + rq_completed(md, rw, false); 334 + } 335 + 336 + static void dm_done(struct request *clone, int error, bool mapped) 337 + { 338 + int r = error; 339 + struct dm_rq_target_io *tio = clone->end_io_data; 340 + dm_request_endio_fn rq_end_io = NULL; 341 + 342 + if (tio->ti) { 343 + rq_end_io = tio->ti->type->rq_end_io; 344 + 345 + if (mapped && rq_end_io) 346 + r = rq_end_io(tio->ti, clone, error, &tio->info); 347 + } 348 + 349 + if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) && 350 + !clone->q->limits.max_write_same_sectors)) 351 + disable_write_same(tio->md); 352 + 353 + if (r <= 0) 354 + /* The target wants to complete the I/O */ 355 + dm_end_request(clone, r); 356 + else if (r == DM_ENDIO_INCOMPLETE) 357 + /* The target will handle the I/O */ 358 + return; 359 + else if (r == DM_ENDIO_REQUEUE) 360 + /* The target wants to requeue the I/O */ 361 + dm_requeue_original_request(tio->md, tio->orig); 362 + else { 363 + DMWARN("unimplemented target endio return value: %d", r); 364 + BUG(); 365 + } 366 + } 367 + 368 + /* 369 + * Request completion handler for request-based dm 370 + */ 371 + static void dm_softirq_done(struct request *rq) 372 + { 373 + bool mapped = true; 374 + struct dm_rq_target_io *tio = tio_from_request(rq); 375 + struct request *clone = tio->clone; 376 + int rw; 377 + 378 + if (!clone) { 379 + rq_end_stats(tio->md, rq); 380 + rw = rq_data_dir(rq); 381 + if (!rq->q->mq_ops) { 382 + blk_end_request_all(rq, tio->error); 383 + rq_completed(tio->md, rw, false); 384 + free_old_rq_tio(tio); 385 + } else { 386 + blk_mq_end_request(rq, tio->error); 387 + rq_completed(tio->md, rw, false); 388 + } 389 + return; 390 + } 391 + 392 + if (rq->cmd_flags & REQ_FAILED) 393 + mapped = false; 394 + 395 + dm_done(clone, tio->error, mapped); 396 + } 397 + 398 + /* 399 + * Complete the clone and the original request with the error status 400 + * through softirq context. 401 + */ 402 + static void dm_complete_request(struct request *rq, int error) 403 + { 404 + struct dm_rq_target_io *tio = tio_from_request(rq); 405 + 406 + tio->error = error; 407 + if (!rq->q->mq_ops) 408 + blk_complete_request(rq); 409 + else 410 + blk_mq_complete_request(rq, error); 411 + } 412 + 413 + /* 414 + * Complete the not-mapped clone and the original request with the error status 415 + * through softirq context. 416 + * Target's rq_end_io() function isn't called. 417 + * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 418 + */ 419 + static void dm_kill_unmapped_request(struct request *rq, int error) 420 + { 421 + rq->cmd_flags |= REQ_FAILED; 422 + dm_complete_request(rq, error); 423 + } 424 + 425 + /* 426 + * Called with the clone's queue lock held (in the case of .request_fn) 427 + */ 428 + static void end_clone_request(struct request *clone, int error) 429 + { 430 + struct dm_rq_target_io *tio = clone->end_io_data; 431 + 432 + if (!clone->q->mq_ops) { 433 + /* 434 + * For just cleaning up the information of the queue in which 435 + * the clone was dispatched. 436 + * The clone is *NOT* freed actually here because it is alloced 437 + * from dm own mempool (REQ_ALLOCED isn't set). 438 + */ 439 + __blk_put_request(clone->q, clone); 440 + } 441 + 442 + /* 443 + * Actual request completion is done in a softirq context which doesn't 444 + * hold the clone's queue lock. Otherwise, deadlock could occur because: 445 + * - another request may be submitted by the upper level driver 446 + * of the stacking during the completion 447 + * - the submission which requires queue lock may be done 448 + * against this clone's queue 449 + */ 450 + dm_complete_request(tio->orig, error); 451 + } 452 + 453 + static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 454 + { 455 + int r; 456 + 457 + if (blk_queue_io_stat(clone->q)) 458 + clone->cmd_flags |= REQ_IO_STAT; 459 + 460 + clone->start_time = jiffies; 461 + r = blk_insert_cloned_request(clone->q, clone); 462 + if (r) 463 + /* must complete clone in terms of original request */ 464 + dm_complete_request(rq, r); 465 + } 466 + 467 + static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 468 + void *data) 469 + { 470 + struct dm_rq_target_io *tio = data; 471 + struct dm_rq_clone_bio_info *info = 472 + container_of(bio, struct dm_rq_clone_bio_info, clone); 473 + 474 + info->orig = bio_orig; 475 + info->tio = tio; 476 + bio->bi_end_io = end_clone_bio; 477 + 478 + return 0; 479 + } 480 + 481 + static int setup_clone(struct request *clone, struct request *rq, 482 + struct dm_rq_target_io *tio, gfp_t gfp_mask) 483 + { 484 + int r; 485 + 486 + r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, 487 + dm_rq_bio_constructor, tio); 488 + if (r) 489 + return r; 490 + 491 + clone->cmd = rq->cmd; 492 + clone->cmd_len = rq->cmd_len; 493 + clone->sense = rq->sense; 494 + clone->end_io = end_clone_request; 495 + clone->end_io_data = tio; 496 + 497 + tio->clone = clone; 498 + 499 + return 0; 500 + } 501 + 502 + static struct request *clone_old_rq(struct request *rq, struct mapped_device *md, 503 + struct dm_rq_target_io *tio, gfp_t gfp_mask) 504 + { 505 + /* 506 + * Create clone for use with .request_fn request_queue 507 + */ 508 + struct request *clone; 509 + 510 + clone = alloc_old_clone_request(md, gfp_mask); 511 + if (!clone) 512 + return NULL; 513 + 514 + blk_rq_init(NULL, clone); 515 + if (setup_clone(clone, rq, tio, gfp_mask)) { 516 + /* -ENOMEM */ 517 + free_old_clone_request(md, clone); 518 + return NULL; 519 + } 520 + 521 + return clone; 522 + } 523 + 524 + static void map_tio_request(struct kthread_work *work); 525 + 526 + static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 527 + struct mapped_device *md) 528 + { 529 + tio->md = md; 530 + tio->ti = NULL; 531 + tio->clone = NULL; 532 + tio->orig = rq; 533 + tio->error = 0; 534 + /* 535 + * Avoid initializing info for blk-mq; it passes 536 + * target-specific data through info.ptr 537 + * (see: dm_mq_init_request) 538 + */ 539 + if (!md->init_tio_pdu) 540 + memset(&tio->info, 0, sizeof(tio->info)); 541 + if (md->kworker_task) 542 + init_kthread_work(&tio->work, map_tio_request); 543 + } 544 + 545 + static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq, 546 + struct mapped_device *md, 547 + gfp_t gfp_mask) 548 + { 549 + struct dm_rq_target_io *tio; 550 + int srcu_idx; 551 + struct dm_table *table; 552 + 553 + tio = alloc_old_rq_tio(md, gfp_mask); 554 + if (!tio) 555 + return NULL; 556 + 557 + init_tio(tio, rq, md); 558 + 559 + table = dm_get_live_table(md, &srcu_idx); 560 + /* 561 + * Must clone a request if this .request_fn DM device 562 + * is stacked on .request_fn device(s). 563 + */ 564 + if (!dm_table_mq_request_based(table)) { 565 + if (!clone_old_rq(rq, md, tio, gfp_mask)) { 566 + dm_put_live_table(md, srcu_idx); 567 + free_old_rq_tio(tio); 568 + return NULL; 569 + } 570 + } 571 + dm_put_live_table(md, srcu_idx); 572 + 573 + return tio; 574 + } 575 + 576 + /* 577 + * Called with the queue lock held. 578 + */ 579 + static int dm_old_prep_fn(struct request_queue *q, struct request *rq) 580 + { 581 + struct mapped_device *md = q->queuedata; 582 + struct dm_rq_target_io *tio; 583 + 584 + if (unlikely(rq->special)) { 585 + DMWARN("Already has something in rq->special."); 586 + return BLKPREP_KILL; 587 + } 588 + 589 + tio = dm_old_prep_tio(rq, md, GFP_ATOMIC); 590 + if (!tio) 591 + return BLKPREP_DEFER; 592 + 593 + rq->special = tio; 594 + rq->cmd_flags |= REQ_DONTPREP; 595 + 596 + return BLKPREP_OK; 597 + } 598 + 599 + /* 600 + * Returns: 601 + * 0 : the request has been processed 602 + * DM_MAPIO_REQUEUE : the original request needs to be requeued 603 + * < 0 : the request was completed due to failure 604 + */ 605 + static int map_request(struct dm_rq_target_io *tio, struct request *rq, 606 + struct mapped_device *md) 607 + { 608 + int r; 609 + struct dm_target *ti = tio->ti; 610 + struct request *clone = NULL; 611 + 612 + if (tio->clone) { 613 + clone = tio->clone; 614 + r = ti->type->map_rq(ti, clone, &tio->info); 615 + } else { 616 + r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 617 + if (r < 0) { 618 + /* The target wants to complete the I/O */ 619 + dm_kill_unmapped_request(rq, r); 620 + return r; 621 + } 622 + if (r != DM_MAPIO_REMAPPED) 623 + return r; 624 + if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 625 + /* -ENOMEM */ 626 + ti->type->release_clone_rq(clone); 627 + return DM_MAPIO_REQUEUE; 628 + } 629 + } 630 + 631 + switch (r) { 632 + case DM_MAPIO_SUBMITTED: 633 + /* The target has taken the I/O to submit by itself later */ 634 + break; 635 + case DM_MAPIO_REMAPPED: 636 + /* The target has remapped the I/O so dispatch it */ 637 + trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 638 + blk_rq_pos(rq)); 639 + dm_dispatch_clone_request(clone, rq); 640 + break; 641 + case DM_MAPIO_REQUEUE: 642 + /* The target wants to requeue the I/O */ 643 + dm_requeue_original_request(md, tio->orig); 644 + break; 645 + default: 646 + if (r > 0) { 647 + DMWARN("unimplemented target map return value: %d", r); 648 + BUG(); 649 + } 650 + 651 + /* The target wants to complete the I/O */ 652 + dm_kill_unmapped_request(rq, r); 653 + return r; 654 + } 655 + 656 + return 0; 657 + } 658 + 659 + static void dm_start_request(struct mapped_device *md, struct request *orig) 660 + { 661 + if (!orig->q->mq_ops) 662 + blk_start_request(orig); 663 + else 664 + blk_mq_start_request(orig); 665 + atomic_inc(&md->pending[rq_data_dir(orig)]); 666 + 667 + if (md->seq_rq_merge_deadline_usecs) { 668 + md->last_rq_pos = rq_end_sector(orig); 669 + md->last_rq_rw = rq_data_dir(orig); 670 + md->last_rq_start_time = ktime_get(); 671 + } 672 + 673 + if (unlikely(dm_stats_used(&md->stats))) { 674 + struct dm_rq_target_io *tio = tio_from_request(orig); 675 + tio->duration_jiffies = jiffies; 676 + tio->n_sectors = blk_rq_sectors(orig); 677 + dm_stats_account_io(&md->stats, rq_data_dir(orig), 678 + blk_rq_pos(orig), tio->n_sectors, false, 0, 679 + &tio->stats_aux); 680 + } 681 + 682 + /* 683 + * Hold the md reference here for the in-flight I/O. 684 + * We can't rely on the reference count by device opener, 685 + * because the device may be closed during the request completion 686 + * when all bios are completed. 687 + * See the comment in rq_completed() too. 688 + */ 689 + dm_get(md); 690 + } 691 + 692 + static void map_tio_request(struct kthread_work *work) 693 + { 694 + struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 695 + struct request *rq = tio->orig; 696 + struct mapped_device *md = tio->md; 697 + 698 + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) 699 + dm_requeue_original_request(md, rq); 700 + } 701 + 702 + ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) 703 + { 704 + return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); 705 + } 706 + 707 + #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 708 + 709 + ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 710 + const char *buf, size_t count) 711 + { 712 + unsigned deadline; 713 + 714 + if (!dm_request_based(md) || md->use_blk_mq) 715 + return count; 716 + 717 + if (kstrtouint(buf, 10, &deadline)) 718 + return -EINVAL; 719 + 720 + if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) 721 + deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; 722 + 723 + md->seq_rq_merge_deadline_usecs = deadline; 724 + 725 + return count; 726 + } 727 + 728 + static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md) 729 + { 730 + ktime_t kt_deadline; 731 + 732 + if (!md->seq_rq_merge_deadline_usecs) 733 + return false; 734 + 735 + kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); 736 + kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); 737 + 738 + return !ktime_after(ktime_get(), kt_deadline); 739 + } 740 + 741 + /* 742 + * q->request_fn for old request-based dm. 743 + * Called with the queue lock held. 744 + */ 745 + static void dm_old_request_fn(struct request_queue *q) 746 + { 747 + struct mapped_device *md = q->queuedata; 748 + struct dm_target *ti = md->immutable_target; 749 + struct request *rq; 750 + struct dm_rq_target_io *tio; 751 + sector_t pos = 0; 752 + 753 + if (unlikely(!ti)) { 754 + int srcu_idx; 755 + struct dm_table *map = dm_get_live_table(md, &srcu_idx); 756 + 757 + ti = dm_table_find_target(map, pos); 758 + dm_put_live_table(md, srcu_idx); 759 + } 760 + 761 + /* 762 + * For suspend, check blk_queue_stopped() and increment 763 + * ->pending within a single queue_lock not to increment the 764 + * number of in-flight I/Os after the queue is stopped in 765 + * dm_suspend(). 766 + */ 767 + while (!blk_queue_stopped(q)) { 768 + rq = blk_peek_request(q); 769 + if (!rq) 770 + return; 771 + 772 + /* always use block 0 to find the target for flushes for now */ 773 + pos = 0; 774 + if (req_op(rq) != REQ_OP_FLUSH) 775 + pos = blk_rq_pos(rq); 776 + 777 + if ((dm_old_request_peeked_before_merge_deadline(md) && 778 + md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && 779 + md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || 780 + (ti->type->busy && ti->type->busy(ti))) { 781 + blk_delay_queue(q, HZ / 100); 782 + return; 783 + } 784 + 785 + dm_start_request(md, rq); 786 + 787 + tio = tio_from_request(rq); 788 + /* Establish tio->ti before queuing work (map_tio_request) */ 789 + tio->ti = ti; 790 + queue_kthread_work(&md->kworker, &tio->work); 791 + BUG_ON(!irqs_disabled()); 792 + } 793 + } 794 + 795 + /* 796 + * Fully initialize a .request_fn request-based queue. 797 + */ 798 + int dm_old_init_request_queue(struct mapped_device *md) 799 + { 800 + /* Fully initialize the queue */ 801 + if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL)) 802 + return -EINVAL; 803 + 804 + /* disable dm_old_request_fn's merge heuristic by default */ 805 + md->seq_rq_merge_deadline_usecs = 0; 806 + 807 + dm_init_normal_md_queue(md); 808 + blk_queue_softirq_done(md->queue, dm_softirq_done); 809 + blk_queue_prep_rq(md->queue, dm_old_prep_fn); 810 + 811 + /* Initialize the request-based DM worker thread */ 812 + init_kthread_worker(&md->kworker); 813 + md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, 814 + "kdmwork-%s", dm_device_name(md)); 815 + 816 + elv_register_queue(md->queue); 817 + 818 + return 0; 819 + } 820 + 821 + static int dm_mq_init_request(void *data, struct request *rq, 822 + unsigned int hctx_idx, unsigned int request_idx, 823 + unsigned int numa_node) 824 + { 825 + struct mapped_device *md = data; 826 + struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 827 + 828 + /* 829 + * Must initialize md member of tio, otherwise it won't 830 + * be available in dm_mq_queue_rq. 831 + */ 832 + tio->md = md; 833 + 834 + if (md->init_tio_pdu) { 835 + /* target-specific per-io data is immediately after the tio */ 836 + tio->info.ptr = tio + 1; 837 + } 838 + 839 + return 0; 840 + } 841 + 842 + static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 843 + const struct blk_mq_queue_data *bd) 844 + { 845 + struct request *rq = bd->rq; 846 + struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 847 + struct mapped_device *md = tio->md; 848 + struct dm_target *ti = md->immutable_target; 849 + 850 + if (unlikely(!ti)) { 851 + int srcu_idx; 852 + struct dm_table *map = dm_get_live_table(md, &srcu_idx); 853 + 854 + ti = dm_table_find_target(map, 0); 855 + dm_put_live_table(md, srcu_idx); 856 + } 857 + 858 + if (ti->type->busy && ti->type->busy(ti)) 859 + return BLK_MQ_RQ_QUEUE_BUSY; 860 + 861 + dm_start_request(md, rq); 862 + 863 + /* Init tio using md established in .init_request */ 864 + init_tio(tio, rq, md); 865 + 866 + /* 867 + * Establish tio->ti before calling map_request(). 868 + */ 869 + tio->ti = ti; 870 + 871 + /* Direct call is fine since .queue_rq allows allocations */ 872 + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { 873 + /* Undo dm_start_request() before requeuing */ 874 + rq_end_stats(md, rq); 875 + rq_completed(md, rq_data_dir(rq), false); 876 + return BLK_MQ_RQ_QUEUE_BUSY; 877 + } 878 + 879 + return BLK_MQ_RQ_QUEUE_OK; 880 + } 881 + 882 + static struct blk_mq_ops dm_mq_ops = { 883 + .queue_rq = dm_mq_queue_rq, 884 + .map_queue = blk_mq_map_queue, 885 + .complete = dm_softirq_done, 886 + .init_request = dm_mq_init_request, 887 + }; 888 + 889 + int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt) 890 + { 891 + struct request_queue *q; 892 + int err; 893 + 894 + if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) { 895 + DMERR("request-based dm-mq may only be stacked on blk-mq device(s)"); 896 + return -EINVAL; 897 + } 898 + 899 + md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id); 900 + if (!md->tag_set) 901 + return -ENOMEM; 902 + 903 + md->tag_set->ops = &dm_mq_ops; 904 + md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); 905 + md->tag_set->numa_node = md->numa_node_id; 906 + md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 907 + md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); 908 + md->tag_set->driver_data = md; 909 + 910 + md->tag_set->cmd_size = sizeof(struct dm_rq_target_io); 911 + if (immutable_tgt && immutable_tgt->per_io_data_size) { 912 + /* any target-specific per-io data is immediately after the tio */ 913 + md->tag_set->cmd_size += immutable_tgt->per_io_data_size; 914 + md->init_tio_pdu = true; 915 + } 916 + 917 + err = blk_mq_alloc_tag_set(md->tag_set); 918 + if (err) 919 + goto out_kfree_tag_set; 920 + 921 + q = blk_mq_init_allocated_queue(md->tag_set, md->queue); 922 + if (IS_ERR(q)) { 923 + err = PTR_ERR(q); 924 + goto out_tag_set; 925 + } 926 + dm_init_md_queue(md); 927 + 928 + /* backfill 'mq' sysfs registration normally done in blk_register_queue */ 929 + blk_mq_register_disk(md->disk); 930 + 931 + return 0; 932 + 933 + out_tag_set: 934 + blk_mq_free_tag_set(md->tag_set); 935 + out_kfree_tag_set: 936 + kfree(md->tag_set); 937 + 938 + return err; 939 + } 940 + 941 + void dm_mq_cleanup_mapped_device(struct mapped_device *md) 942 + { 943 + if (md->tag_set) { 944 + blk_mq_free_tag_set(md->tag_set); 945 + kfree(md->tag_set); 946 + } 947 + } 948 + 949 + module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 950 + MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 951 + 952 + module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); 953 + MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); 954 + 955 + module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR); 956 + MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices"); 957 + 958 + module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR); 959 + MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");

+64

drivers/md/dm-rq.h

··· 1 + /* 2 + * Internal header file for device mapper 3 + * 4 + * Copyright (C) 2016 Red Hat, Inc. All rights reserved. 5 + * 6 + * This file is released under the LGPL. 7 + */ 8 + 9 + #ifndef DM_RQ_INTERNAL_H 10 + #define DM_RQ_INTERNAL_H 11 + 12 + #include <linux/bio.h> 13 + #include <linux/kthread.h> 14 + 15 + #include "dm-stats.h" 16 + 17 + struct mapped_device; 18 + 19 + /* 20 + * One of these is allocated per request. 21 + */ 22 + struct dm_rq_target_io { 23 + struct mapped_device *md; 24 + struct dm_target *ti; 25 + struct request *orig, *clone; 26 + struct kthread_work work; 27 + int error; 28 + union map_info info; 29 + struct dm_stats_aux stats_aux; 30 + unsigned long duration_jiffies; 31 + unsigned n_sectors; 32 + }; 33 + 34 + /* 35 + * For request-based dm - the bio clones we allocate are embedded in these 36 + * structs. 37 + * 38 + * We allocate these with bio_alloc_bioset, using the front_pad parameter when 39 + * the bioset is created - this means the bio has to come at the end of the 40 + * struct. 41 + */ 42 + struct dm_rq_clone_bio_info { 43 + struct bio *orig; 44 + struct dm_rq_target_io *tio; 45 + struct bio clone; 46 + }; 47 + 48 + bool dm_use_blk_mq_default(void); 49 + bool dm_use_blk_mq(struct mapped_device *md); 50 + 51 + int dm_old_init_request_queue(struct mapped_device *md); 52 + int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt); 53 + void dm_mq_cleanup_mapped_device(struct mapped_device *md); 54 + 55 + void dm_start_queue(struct request_queue *q); 56 + void dm_stop_queue(struct request_queue *q); 57 + 58 + unsigned dm_get_reserved_rq_based_ios(void); 59 + 60 + ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf); 61 + ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 62 + const char *buf, size_t count); 63 + 64 + #endif

+1 -1

drivers/md/dm-stats.c

··· 10 10 #include <linux/module.h> 11 11 #include <linux/device-mapper.h> 12 12 13 - #include "dm.h" 13 + #include "dm-core.h" 14 14 #include "dm-stats.h" 15 15 16 16 #define DM_MSG_PREFIX "stats"

+2 -1

drivers/md/dm-sysfs.c

··· 6 6 7 7 #include <linux/sysfs.h> 8 8 #include <linux/dm-ioctl.h> 9 - #include "dm.h" 9 + #include "dm-core.h" 10 + #include "dm-rq.h" 10 11 11 12 struct dm_sysfs_attr { 12 13 struct attribute attr;

+1 -1

drivers/md/dm-table.c

··· 5 5 * This file is released under the GPL. 6 6 */ 7 7 8 - #include "dm.h" 8 + #include "dm-core.h" 9 9 10 10 #include <linux/module.h> 11 11 #include <linux/vmalloc.h>

+1 -1

drivers/md/dm-target.c

··· 4 4 * This file is released under the GPL. 5 5 */ 6 6 7 - #include "dm.h" 7 + #include "dm-core.h" 8 8 9 9 #include <linux/module.h> 10 10 #include <linux/init.h>

+13 -1097

drivers/md/dm.c

··· 5 5 * This file is released under the GPL. 6 6 */ 7 7 8 - #include "dm.h" 8 + #include "dm-core.h" 9 + #include "dm-rq.h" 9 10 #include "dm-uevent.h" 10 11 11 12 #include <linux/init.h> 12 13 #include <linux/module.h> 13 14 #include <linux/mutex.h> 14 - #include <linux/moduleparam.h> 15 15 #include <linux/blkpg.h> 16 16 #include <linux/bio.h> 17 17 #include <linux/mempool.h> ··· 20 20 #include <linux/hdreg.h> 21 21 #include <linux/delay.h> 22 22 #include <linux/wait.h> 23 - #include <linux/kthread.h> 24 - #include <linux/ktime.h> 25 - #include <linux/elevator.h> /* for rq_end_sector() */ 26 - #include <linux/blk-mq.h> 27 23 #include <linux/pr.h> 28 - 29 - #include <trace/events/block.h> 30 24 31 25 #define DM_MSG_PREFIX "core" 32 26 ··· 57 63 static struct workqueue_struct *deferred_remove_workqueue; 58 64 59 65 /* 60 - * For bio-based dm. 61 66 * One of these is allocated per bio. 62 67 */ 63 68 struct dm_io { ··· 67 74 unsigned long start_time; 68 75 spinlock_t endio_lock; 69 76 struct dm_stats_aux stats_aux; 70 - }; 71 - 72 - /* 73 - * For request-based dm. 74 - * One of these is allocated per request. 75 - */ 76 - struct dm_rq_target_io { 77 - struct mapped_device *md; 78 - struct dm_target *ti; 79 - struct request *orig, *clone; 80 - struct kthread_work work; 81 - int error; 82 - union map_info info; 83 - struct dm_stats_aux stats_aux; 84 - unsigned long duration_jiffies; 85 - unsigned n_sectors; 86 - }; 87 - 88 - /* 89 - * For request-based dm - the bio clones we allocate are embedded in these 90 - * structs. 91 - * 92 - * We allocate these with bio_alloc_bioset, using the front_pad parameter when 93 - * the bioset is created - this means the bio has to come at the end of the 94 - * struct. 95 - */ 96 - struct dm_rq_clone_bio_info { 97 - struct bio *orig; 98 - struct dm_rq_target_io *tio; 99 - struct bio clone; 100 77 }; 101 78 102 79 #define MINOR_ALLOCED ((void *)-1) ··· 83 120 #define DMF_DEFERRED_REMOVE 6 84 121 #define DMF_SUSPENDED_INTERNALLY 7 85 122 86 - /* 87 - * Work processed by per-device workqueue. 88 - */ 89 - struct mapped_device { 90 - struct srcu_struct io_barrier; 91 - struct mutex suspend_lock; 92 - 93 - /* 94 - * The current mapping (struct dm_table *). 95 - * Use dm_get_live_table{_fast} or take suspend_lock for 96 - * dereference. 97 - */ 98 - void __rcu *map; 99 - 100 - struct list_head table_devices; 101 - struct mutex table_devices_lock; 102 - 103 - unsigned long flags; 104 - 105 - struct request_queue *queue; 106 - int numa_node_id; 107 - 108 - unsigned type; 109 - /* Protect queue and type against concurrent access. */ 110 - struct mutex type_lock; 111 - 112 - atomic_t holders; 113 - atomic_t open_count; 114 - 115 - struct dm_target *immutable_target; 116 - struct target_type *immutable_target_type; 117 - 118 - struct gendisk *disk; 119 - char name[16]; 120 - 121 - void *interface_ptr; 122 - 123 - /* 124 - * A list of ios that arrived while we were suspended. 125 - */ 126 - atomic_t pending[2]; 127 - wait_queue_head_t wait; 128 - struct work_struct work; 129 - spinlock_t deferred_lock; 130 - struct bio_list deferred; 131 - 132 - /* 133 - * Event handling. 134 - */ 135 - wait_queue_head_t eventq; 136 - atomic_t event_nr; 137 - atomic_t uevent_seq; 138 - struct list_head uevent_list; 139 - spinlock_t uevent_lock; /* Protect access to uevent_list */ 140 - 141 - /* the number of internal suspends */ 142 - unsigned internal_suspend_count; 143 - 144 - /* 145 - * Processing queue (flush) 146 - */ 147 - struct workqueue_struct *wq; 148 - 149 - /* 150 - * io objects are allocated from here. 151 - */ 152 - mempool_t *io_pool; 153 - mempool_t *rq_pool; 154 - 155 - struct bio_set *bs; 156 - 157 - /* 158 - * freeze/thaw support require holding onto a super block 159 - */ 160 - struct super_block *frozen_sb; 161 - 162 - /* forced geometry settings */ 163 - struct hd_geometry geometry; 164 - 165 - struct block_device *bdev; 166 - 167 - /* kobject and completion */ 168 - struct dm_kobject_holder kobj_holder; 169 - 170 - /* zero-length flush that will be cloned and submitted to targets */ 171 - struct bio flush_bio; 172 - 173 - struct dm_stats stats; 174 - 175 - struct kthread_worker kworker; 176 - struct task_struct *kworker_task; 177 - 178 - /* for request-based merge heuristic in dm_request_fn() */ 179 - unsigned seq_rq_merge_deadline_usecs; 180 - int last_rq_rw; 181 - sector_t last_rq_pos; 182 - ktime_t last_rq_start_time; 183 - 184 - /* for blk-mq request-based DM support */ 185 - struct blk_mq_tag_set *tag_set; 186 - bool use_blk_mq:1; 187 - bool init_tio_pdu:1; 188 - }; 189 - 190 - #ifdef CONFIG_DM_MQ_DEFAULT 191 - static bool use_blk_mq = true; 192 - #else 193 - static bool use_blk_mq = false; 194 - #endif 195 - 196 - #define DM_MQ_NR_HW_QUEUES 1 197 - #define DM_MQ_QUEUE_DEPTH 2048 198 123 #define DM_NUMA_NODE NUMA_NO_NODE 199 - 200 - static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; 201 - static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; 202 124 static int dm_numa_node = DM_NUMA_NODE; 203 - 204 - bool dm_use_blk_mq(struct mapped_device *md) 205 - { 206 - return md->use_blk_mq; 207 - } 208 - EXPORT_SYMBOL_GPL(dm_use_blk_mq); 209 125 210 126 /* 211 127 * For mempools pre-allocation at the table loading time. ··· 101 259 struct dm_dev dm_dev; 102 260 }; 103 261 104 - #define RESERVED_BIO_BASED_IOS 16 105 - #define RESERVED_REQUEST_BASED_IOS 256 106 - #define RESERVED_MAX_IOS 1024 107 262 static struct kmem_cache *_io_cache; 108 263 static struct kmem_cache *_rq_tio_cache; 109 264 static struct kmem_cache *_rq_cache; ··· 108 269 /* 109 270 * Bio-based DM's mempools' reserved IOs set by the user. 110 271 */ 272 + #define RESERVED_BIO_BASED_IOS 16 111 273 static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 112 - 113 - /* 114 - * Request-based DM's mempools' reserved IOs set by the user. 115 - */ 116 - static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 117 274 118 275 static int __dm_get_module_param_int(int *module_param, int min, int max) 119 276 { ··· 132 297 return param; 133 298 } 134 299 135 - static unsigned __dm_get_module_param(unsigned *module_param, 136 - unsigned def, unsigned max) 300 + unsigned __dm_get_module_param(unsigned *module_param, 301 + unsigned def, unsigned max) 137 302 { 138 303 unsigned param = ACCESS_ONCE(*module_param); 139 304 unsigned modified_param = 0; ··· 154 319 unsigned dm_get_reserved_bio_based_ios(void) 155 320 { 156 321 return __dm_get_module_param(&reserved_bio_based_ios, 157 - RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); 322 + RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS); 158 323 } 159 324 EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 160 - 161 - unsigned dm_get_reserved_rq_based_ios(void) 162 - { 163 - return __dm_get_module_param(&reserved_rq_based_ios, 164 - RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); 165 - } 166 - EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 167 - 168 - static unsigned dm_get_blk_mq_nr_hw_queues(void) 169 - { 170 - return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32); 171 - } 172 - 173 - static unsigned dm_get_blk_mq_queue_depth(void) 174 - { 175 - return __dm_get_module_param(&dm_mq_queue_depth, 176 - DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH); 177 - } 178 325 179 326 static unsigned dm_get_numa_node(void) 180 327 { ··· 496 679 bio_put(&tio->clone); 497 680 } 498 681 499 - static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, 500 - gfp_t gfp_mask) 501 - { 502 - return mempool_alloc(md->io_pool, gfp_mask); 503 - } 504 - 505 - static void free_old_rq_tio(struct dm_rq_target_io *tio) 506 - { 507 - mempool_free(tio, tio->md->io_pool); 508 - } 509 - 510 - static struct request *alloc_old_clone_request(struct mapped_device *md, 511 - gfp_t gfp_mask) 512 - { 513 - return mempool_alloc(md->rq_pool, gfp_mask); 514 - } 515 - 516 - static void free_old_clone_request(struct mapped_device *md, struct request *rq) 517 - { 518 - mempool_free(rq, md->rq_pool); 519 - } 520 - 521 - static int md_in_flight(struct mapped_device *md) 682 + int md_in_flight(struct mapped_device *md) 522 683 { 523 684 return atomic_read(&md->pending[READ]) + 524 685 atomic_read(&md->pending[WRITE]); ··· 814 1019 } 815 1020 } 816 1021 817 - static void disable_write_same(struct mapped_device *md) 1022 + void disable_write_same(struct mapped_device *md) 818 1023 { 819 1024 struct queue_limits *limits = dm_get_queue_limits(md); 820 1025 ··· 854 1059 855 1060 free_tio(tio); 856 1061 dec_pending(io, error); 857 - } 858 - 859 - /* 860 - * Partial completion handling for request-based dm 861 - */ 862 - static void end_clone_bio(struct bio *clone) 863 - { 864 - struct dm_rq_clone_bio_info *info = 865 - container_of(clone, struct dm_rq_clone_bio_info, clone); 866 - struct dm_rq_target_io *tio = info->tio; 867 - struct bio *bio = info->orig; 868 - unsigned int nr_bytes = info->orig->bi_iter.bi_size; 869 - int error = clone->bi_error; 870 - 871 - bio_put(clone); 872 - 873 - if (tio->error) 874 - /* 875 - * An error has already been detected on the request. 876 - * Once error occurred, just let clone->end_io() handle 877 - * the remainder. 878 - */ 879 - return; 880 - else if (error) { 881 - /* 882 - * Don't notice the error to the upper layer yet. 883 - * The error handling decision is made by the target driver, 884 - * when the request is completed. 885 - */ 886 - tio->error = error; 887 - return; 888 - } 889 - 890 - /* 891 - * I/O for the bio successfully completed. 892 - * Notice the data completion to the upper layer. 893 - */ 894 - 895 - /* 896 - * bios are processed from the head of the list. 897 - * So the completing bio should always be rq->bio. 898 - * If it's not, something wrong is happening. 899 - */ 900 - if (tio->orig->bio != bio) 901 - DMERR("bio completion is going in the middle of the request"); 902 - 903 - /* 904 - * Update the original request. 905 - * Do not use blk_end_request() here, because it may complete 906 - * the original request before the clone, and break the ordering. 907 - */ 908 - blk_update_request(tio->orig, 0, nr_bytes); 909 - } 910 - 911 - static struct dm_rq_target_io *tio_from_request(struct request *rq) 912 - { 913 - return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 914 - } 915 - 916 - static void rq_end_stats(struct mapped_device *md, struct request *orig) 917 - { 918 - if (unlikely(dm_stats_used(&md->stats))) { 919 - struct dm_rq_target_io *tio = tio_from_request(orig); 920 - tio->duration_jiffies = jiffies - tio->duration_jiffies; 921 - dm_stats_account_io(&md->stats, rq_data_dir(orig), 922 - blk_rq_pos(orig), tio->n_sectors, true, 923 - tio->duration_jiffies, &tio->stats_aux); 924 - } 925 - } 926 - 927 - /* 928 - * Don't touch any member of the md after calling this function because 929 - * the md may be freed in dm_put() at the end of this function. 930 - * Or do dm_get() before calling this function and dm_put() later. 931 - */ 932 - static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 933 - { 934 - atomic_dec(&md->pending[rw]); 935 - 936 - /* nudge anyone waiting on suspend queue */ 937 - if (!md_in_flight(md)) 938 - wake_up(&md->wait); 939 - 940 - /* 941 - * Run this off this callpath, as drivers could invoke end_io while 942 - * inside their request_fn (and holding the queue lock). Calling 943 - * back into ->request_fn() could deadlock attempting to grab the 944 - * queue lock again. 945 - */ 946 - if (!md->queue->mq_ops && run_queue) 947 - blk_run_queue_async(md->queue); 948 - 949 - /* 950 - * dm_put() must be at the end of this function. See the comment above 951 - */ 952 - dm_put(md); 953 - } 954 - 955 - static void free_rq_clone(struct request *clone) 956 - { 957 - struct dm_rq_target_io *tio = clone->end_io_data; 958 - struct mapped_device *md = tio->md; 959 - 960 - blk_rq_unprep_clone(clone); 961 - 962 - if (md->type == DM_TYPE_MQ_REQUEST_BASED) 963 - /* stacked on blk-mq queue(s) */ 964 - tio->ti->type->release_clone_rq(clone); 965 - else if (!md->queue->mq_ops) 966 - /* request_fn queue stacked on request_fn queue(s) */ 967 - free_old_clone_request(md, clone); 968 - 969 - if (!md->queue->mq_ops) 970 - free_old_rq_tio(tio); 971 - } 972 - 973 - /* 974 - * Complete the clone and the original request. 975 - * Must be called without clone's queue lock held, 976 - * see end_clone_request() for more details. 977 - */ 978 - static void dm_end_request(struct request *clone, int error) 979 - { 980 - int rw = rq_data_dir(clone); 981 - struct dm_rq_target_io *tio = clone->end_io_data; 982 - struct mapped_device *md = tio->md; 983 - struct request *rq = tio->orig; 984 - 985 - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 986 - rq->errors = clone->errors; 987 - rq->resid_len = clone->resid_len; 988 - 989 - if (rq->sense) 990 - /* 991 - * We are using the sense buffer of the original 992 - * request. 993 - * So setting the length of the sense data is enough. 994 - */ 995 - rq->sense_len = clone->sense_len; 996 - } 997 - 998 - free_rq_clone(clone); 999 - rq_end_stats(md, rq); 1000 - if (!rq->q->mq_ops) 1001 - blk_end_request_all(rq, error); 1002 - else 1003 - blk_mq_end_request(rq, error); 1004 - rq_completed(md, rw, true); 1005 - } 1006 - 1007 - static void dm_unprep_request(struct request *rq) 1008 - { 1009 - struct dm_rq_target_io *tio = tio_from_request(rq); 1010 - struct request *clone = tio->clone; 1011 - 1012 - if (!rq->q->mq_ops) { 1013 - rq->special = NULL; 1014 - rq->cmd_flags &= ~REQ_DONTPREP; 1015 - } 1016 - 1017 - if (clone) 1018 - free_rq_clone(clone); 1019 - else if (!tio->md->queue->mq_ops) 1020 - free_old_rq_tio(tio); 1021 - } 1022 - 1023 - /* 1024 - * Requeue the original request of a clone. 1025 - */ 1026 - static void dm_old_requeue_request(struct request *rq) 1027 - { 1028 - struct request_queue *q = rq->q; 1029 - unsigned long flags; 1030 - 1031 - spin_lock_irqsave(q->queue_lock, flags); 1032 - blk_requeue_request(q, rq); 1033 - blk_run_queue_async(q); 1034 - spin_unlock_irqrestore(q->queue_lock, flags); 1035 - } 1036 - 1037 - static void dm_mq_requeue_request(struct request *rq) 1038 - { 1039 - struct request_queue *q = rq->q; 1040 - unsigned long flags; 1041 - 1042 - blk_mq_requeue_request(rq); 1043 - spin_lock_irqsave(q->queue_lock, flags); 1044 - if (!blk_queue_stopped(q)) 1045 - blk_mq_kick_requeue_list(q); 1046 - spin_unlock_irqrestore(q->queue_lock, flags); 1047 - } 1048 - 1049 - static void dm_requeue_original_request(struct mapped_device *md, 1050 - struct request *rq) 1051 - { 1052 - int rw = rq_data_dir(rq); 1053 - 1054 - rq_end_stats(md, rq); 1055 - dm_unprep_request(rq); 1056 - 1057 - if (!rq->q->mq_ops) 1058 - dm_old_requeue_request(rq); 1059 - else 1060 - dm_mq_requeue_request(rq); 1061 - 1062 - rq_completed(md, rw, false); 1063 - } 1064 - 1065 - static void dm_old_stop_queue(struct request_queue *q) 1066 - { 1067 - unsigned long flags; 1068 - 1069 - spin_lock_irqsave(q->queue_lock, flags); 1070 - if (blk_queue_stopped(q)) { 1071 - spin_unlock_irqrestore(q->queue_lock, flags); 1072 - return; 1073 - } 1074 - 1075 - blk_stop_queue(q); 1076 - spin_unlock_irqrestore(q->queue_lock, flags); 1077 - } 1078 - 1079 - static void dm_stop_queue(struct request_queue *q) 1080 - { 1081 - if (!q->mq_ops) 1082 - dm_old_stop_queue(q); 1083 - else 1084 - blk_mq_stop_hw_queues(q); 1085 - } 1086 - 1087 - static void dm_old_start_queue(struct request_queue *q) 1088 - { 1089 - unsigned long flags; 1090 - 1091 - spin_lock_irqsave(q->queue_lock, flags); 1092 - if (blk_queue_stopped(q)) 1093 - blk_start_queue(q); 1094 - spin_unlock_irqrestore(q->queue_lock, flags); 1095 - } 1096 - 1097 - static void dm_start_queue(struct request_queue *q) 1098 - { 1099 - if (!q->mq_ops) 1100 - dm_old_start_queue(q); 1101 - else { 1102 - blk_mq_start_stopped_hw_queues(q, true); 1103 - blk_mq_kick_requeue_list(q); 1104 - } 1105 - } 1106 - 1107 - static void dm_done(struct request *clone, int error, bool mapped) 1108 - { 1109 - int r = error; 1110 - struct dm_rq_target_io *tio = clone->end_io_data; 1111 - dm_request_endio_fn rq_end_io = NULL; 1112 - 1113 - if (tio->ti) { 1114 - rq_end_io = tio->ti->type->rq_end_io; 1115 - 1116 - if (mapped && rq_end_io) 1117 - r = rq_end_io(tio->ti, clone, error, &tio->info); 1118 - } 1119 - 1120 - if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) && 1121 - !clone->q->limits.max_write_same_sectors)) 1122 - disable_write_same(tio->md); 1123 - 1124 - if (r <= 0) 1125 - /* The target wants to complete the I/O */ 1126 - dm_end_request(clone, r); 1127 - else if (r == DM_ENDIO_INCOMPLETE) 1128 - /* The target will handle the I/O */ 1129 - return; 1130 - else if (r == DM_ENDIO_REQUEUE) 1131 - /* The target wants to requeue the I/O */ 1132 - dm_requeue_original_request(tio->md, tio->orig); 1133 - else { 1134 - DMWARN("unimplemented target endio return value: %d", r); 1135 - BUG(); 1136 - } 1137 - } 1138 - 1139 - /* 1140 - * Request completion handler for request-based dm 1141 - */ 1142 - static void dm_softirq_done(struct request *rq) 1143 - { 1144 - bool mapped = true; 1145 - struct dm_rq_target_io *tio = tio_from_request(rq); 1146 - struct request *clone = tio->clone; 1147 - int rw; 1148 - 1149 - if (!clone) { 1150 - rq_end_stats(tio->md, rq); 1151 - rw = rq_data_dir(rq); 1152 - if (!rq->q->mq_ops) { 1153 - blk_end_request_all(rq, tio->error); 1154 - rq_completed(tio->md, rw, false); 1155 - free_old_rq_tio(tio); 1156 - } else { 1157 - blk_mq_end_request(rq, tio->error); 1158 - rq_completed(tio->md, rw, false); 1159 - } 1160 - return; 1161 - } 1162 - 1163 - if (rq->cmd_flags & REQ_FAILED) 1164 - mapped = false; 1165 - 1166 - dm_done(clone, tio->error, mapped); 1167 - } 1168 - 1169 - /* 1170 - * Complete the clone and the original request with the error status 1171 - * through softirq context. 1172 - */ 1173 - static void dm_complete_request(struct request *rq, int error) 1174 - { 1175 - struct dm_rq_target_io *tio = tio_from_request(rq); 1176 - 1177 - tio->error = error; 1178 - if (!rq->q->mq_ops) 1179 - blk_complete_request(rq); 1180 - else 1181 - blk_mq_complete_request(rq, error); 1182 - } 1183 - 1184 - /* 1185 - * Complete the not-mapped clone and the original request with the error status 1186 - * through softirq context. 1187 - * Target's rq_end_io() function isn't called. 1188 - * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 1189 - */ 1190 - static void dm_kill_unmapped_request(struct request *rq, int error) 1191 - { 1192 - rq->cmd_flags |= REQ_FAILED; 1193 - dm_complete_request(rq, error); 1194 - } 1195 - 1196 - /* 1197 - * Called with the clone's queue lock held (in the case of .request_fn) 1198 - */ 1199 - static void end_clone_request(struct request *clone, int error) 1200 - { 1201 - struct dm_rq_target_io *tio = clone->end_io_data; 1202 - 1203 - if (!clone->q->mq_ops) { 1204 - /* 1205 - * For just cleaning up the information of the queue in which 1206 - * the clone was dispatched. 1207 - * The clone is *NOT* freed actually here because it is alloced 1208 - * from dm own mempool (REQ_ALLOCED isn't set). 1209 - */ 1210 - __blk_put_request(clone->q, clone); 1211 - } 1212 - 1213 - /* 1214 - * Actual request completion is done in a softirq context which doesn't 1215 - * hold the clone's queue lock. Otherwise, deadlock could occur because: 1216 - * - another request may be submitted by the upper level driver 1217 - * of the stacking during the completion 1218 - * - the submission which requires queue lock may be done 1219 - * against this clone's queue 1220 - */ 1221 - dm_complete_request(tio->orig, error); 1222 1062 } 1223 1063 1224 1064 /* ··· 1275 1845 return BLK_QC_T_NONE; 1276 1846 } 1277 1847 1278 - int dm_request_based(struct mapped_device *md) 1279 - { 1280 - return blk_queue_stackable(md->queue); 1281 - } 1282 - 1283 - static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 1284 - { 1285 - int r; 1286 - 1287 - if (blk_queue_io_stat(clone->q)) 1288 - clone->cmd_flags |= REQ_IO_STAT; 1289 - 1290 - clone->start_time = jiffies; 1291 - r = blk_insert_cloned_request(clone->q, clone); 1292 - if (r) 1293 - /* must complete clone in terms of original request */ 1294 - dm_complete_request(rq, r); 1295 - } 1296 - 1297 - static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1298 - void *data) 1299 - { 1300 - struct dm_rq_target_io *tio = data; 1301 - struct dm_rq_clone_bio_info *info = 1302 - container_of(bio, struct dm_rq_clone_bio_info, clone); 1303 - 1304 - info->orig = bio_orig; 1305 - info->tio = tio; 1306 - bio->bi_end_io = end_clone_bio; 1307 - 1308 - return 0; 1309 - } 1310 - 1311 - static int setup_clone(struct request *clone, struct request *rq, 1312 - struct dm_rq_target_io *tio, gfp_t gfp_mask) 1313 - { 1314 - int r; 1315 - 1316 - r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, 1317 - dm_rq_bio_constructor, tio); 1318 - if (r) 1319 - return r; 1320 - 1321 - clone->cmd = rq->cmd; 1322 - clone->cmd_len = rq->cmd_len; 1323 - clone->sense = rq->sense; 1324 - clone->end_io = end_clone_request; 1325 - clone->end_io_data = tio; 1326 - 1327 - tio->clone = clone; 1328 - 1329 - return 0; 1330 - } 1331 - 1332 - static struct request *clone_old_rq(struct request *rq, struct mapped_device *md, 1333 - struct dm_rq_target_io *tio, gfp_t gfp_mask) 1334 - { 1335 - /* 1336 - * Create clone for use with .request_fn request_queue 1337 - */ 1338 - struct request *clone; 1339 - 1340 - clone = alloc_old_clone_request(md, gfp_mask); 1341 - if (!clone) 1342 - return NULL; 1343 - 1344 - blk_rq_init(NULL, clone); 1345 - if (setup_clone(clone, rq, tio, gfp_mask)) { 1346 - /* -ENOMEM */ 1347 - free_old_clone_request(md, clone); 1348 - return NULL; 1349 - } 1350 - 1351 - return clone; 1352 - } 1353 - 1354 - static void map_tio_request(struct kthread_work *work); 1355 - 1356 - static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 1357 - struct mapped_device *md) 1358 - { 1359 - tio->md = md; 1360 - tio->ti = NULL; 1361 - tio->clone = NULL; 1362 - tio->orig = rq; 1363 - tio->error = 0; 1364 - /* 1365 - * Avoid initializing info for blk-mq; it passes 1366 - * target-specific data through info.ptr 1367 - * (see: dm_mq_init_request) 1368 - */ 1369 - if (!md->init_tio_pdu) 1370 - memset(&tio->info, 0, sizeof(tio->info)); 1371 - if (md->kworker_task) 1372 - init_kthread_work(&tio->work, map_tio_request); 1373 - } 1374 - 1375 - static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq, 1376 - struct mapped_device *md, 1377 - gfp_t gfp_mask) 1378 - { 1379 - struct dm_rq_target_io *tio; 1380 - int srcu_idx; 1381 - struct dm_table *table; 1382 - 1383 - tio = alloc_old_rq_tio(md, gfp_mask); 1384 - if (!tio) 1385 - return NULL; 1386 - 1387 - init_tio(tio, rq, md); 1388 - 1389 - table = dm_get_live_table(md, &srcu_idx); 1390 - /* 1391 - * Must clone a request if this .request_fn DM device 1392 - * is stacked on .request_fn device(s). 1393 - */ 1394 - if (!dm_table_mq_request_based(table)) { 1395 - if (!clone_old_rq(rq, md, tio, gfp_mask)) { 1396 - dm_put_live_table(md, srcu_idx); 1397 - free_old_rq_tio(tio); 1398 - return NULL; 1399 - } 1400 - } 1401 - dm_put_live_table(md, srcu_idx); 1402 - 1403 - return tio; 1404 - } 1405 - 1406 - /* 1407 - * Called with the queue lock held. 1408 - */ 1409 - static int dm_old_prep_fn(struct request_queue *q, struct request *rq) 1410 - { 1411 - struct mapped_device *md = q->queuedata; 1412 - struct dm_rq_target_io *tio; 1413 - 1414 - if (unlikely(rq->special)) { 1415 - DMWARN("Already has something in rq->special."); 1416 - return BLKPREP_KILL; 1417 - } 1418 - 1419 - tio = dm_old_prep_tio(rq, md, GFP_ATOMIC); 1420 - if (!tio) 1421 - return BLKPREP_DEFER; 1422 - 1423 - rq->special = tio; 1424 - rq->cmd_flags |= REQ_DONTPREP; 1425 - 1426 - return BLKPREP_OK; 1427 - } 1428 - 1429 - /* 1430 - * Returns: 1431 - * 0 : the request has been processed 1432 - * DM_MAPIO_REQUEUE : the original request needs to be requeued 1433 - * < 0 : the request was completed due to failure 1434 - */ 1435 - static int map_request(struct dm_rq_target_io *tio, struct request *rq, 1436 - struct mapped_device *md) 1437 - { 1438 - int r; 1439 - struct dm_target *ti = tio->ti; 1440 - struct request *clone = NULL; 1441 - 1442 - if (tio->clone) { 1443 - clone = tio->clone; 1444 - r = ti->type->map_rq(ti, clone, &tio->info); 1445 - } else { 1446 - r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 1447 - if (r < 0) { 1448 - /* The target wants to complete the I/O */ 1449 - dm_kill_unmapped_request(rq, r); 1450 - return r; 1451 - } 1452 - if (r != DM_MAPIO_REMAPPED) 1453 - return r; 1454 - if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 1455 - /* -ENOMEM */ 1456 - ti->type->release_clone_rq(clone); 1457 - return DM_MAPIO_REQUEUE; 1458 - } 1459 - } 1460 - 1461 - switch (r) { 1462 - case DM_MAPIO_SUBMITTED: 1463 - /* The target has taken the I/O to submit by itself later */ 1464 - break; 1465 - case DM_MAPIO_REMAPPED: 1466 - /* The target has remapped the I/O so dispatch it */ 1467 - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 1468 - blk_rq_pos(rq)); 1469 - dm_dispatch_clone_request(clone, rq); 1470 - break; 1471 - case DM_MAPIO_REQUEUE: 1472 - /* The target wants to requeue the I/O */ 1473 - dm_requeue_original_request(md, tio->orig); 1474 - break; 1475 - default: 1476 - if (r > 0) { 1477 - DMWARN("unimplemented target map return value: %d", r); 1478 - BUG(); 1479 - } 1480 - 1481 - /* The target wants to complete the I/O */ 1482 - dm_kill_unmapped_request(rq, r); 1483 - return r; 1484 - } 1485 - 1486 - return 0; 1487 - } 1488 - 1489 - static void map_tio_request(struct kthread_work *work) 1490 - { 1491 - struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 1492 - struct request *rq = tio->orig; 1493 - struct mapped_device *md = tio->md; 1494 - 1495 - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) 1496 - dm_requeue_original_request(md, rq); 1497 - } 1498 - 1499 - static void dm_start_request(struct mapped_device *md, struct request *orig) 1500 - { 1501 - if (!orig->q->mq_ops) 1502 - blk_start_request(orig); 1503 - else 1504 - blk_mq_start_request(orig); 1505 - atomic_inc(&md->pending[rq_data_dir(orig)]); 1506 - 1507 - if (md->seq_rq_merge_deadline_usecs) { 1508 - md->last_rq_pos = rq_end_sector(orig); 1509 - md->last_rq_rw = rq_data_dir(orig); 1510 - md->last_rq_start_time = ktime_get(); 1511 - } 1512 - 1513 - if (unlikely(dm_stats_used(&md->stats))) { 1514 - struct dm_rq_target_io *tio = tio_from_request(orig); 1515 - tio->duration_jiffies = jiffies; 1516 - tio->n_sectors = blk_rq_sectors(orig); 1517 - dm_stats_account_io(&md->stats, rq_data_dir(orig), 1518 - blk_rq_pos(orig), tio->n_sectors, false, 0, 1519 - &tio->stats_aux); 1520 - } 1521 - 1522 - /* 1523 - * Hold the md reference here for the in-flight I/O. 1524 - * We can't rely on the reference count by device opener, 1525 - * because the device may be closed during the request completion 1526 - * when all bios are completed. 1527 - * See the comment in rq_completed() too. 1528 - */ 1529 - dm_get(md); 1530 - } 1531 - 1532 - #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 1533 - 1534 - ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) 1535 - { 1536 - return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); 1537 - } 1538 - 1539 - ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 1540 - const char *buf, size_t count) 1541 - { 1542 - unsigned deadline; 1543 - 1544 - if (!dm_request_based(md) || md->use_blk_mq) 1545 - return count; 1546 - 1547 - if (kstrtouint(buf, 10, &deadline)) 1548 - return -EINVAL; 1549 - 1550 - if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) 1551 - deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; 1552 - 1553 - md->seq_rq_merge_deadline_usecs = deadline; 1554 - 1555 - return count; 1556 - } 1557 - 1558 - static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md) 1559 - { 1560 - ktime_t kt_deadline; 1561 - 1562 - if (!md->seq_rq_merge_deadline_usecs) 1563 - return false; 1564 - 1565 - kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); 1566 - kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); 1567 - 1568 - return !ktime_after(ktime_get(), kt_deadline); 1569 - } 1570 - 1571 - /* 1572 - * q->request_fn for request-based dm. 1573 - * Called with the queue lock held. 1574 - */ 1575 - static void dm_request_fn(struct request_queue *q) 1576 - { 1577 - struct mapped_device *md = q->queuedata; 1578 - struct dm_target *ti = md->immutable_target; 1579 - struct request *rq; 1580 - struct dm_rq_target_io *tio; 1581 - sector_t pos = 0; 1582 - 1583 - if (unlikely(!ti)) { 1584 - int srcu_idx; 1585 - struct dm_table *map = dm_get_live_table(md, &srcu_idx); 1586 - 1587 - ti = dm_table_find_target(map, pos); 1588 - dm_put_live_table(md, srcu_idx); 1589 - } 1590 - 1591 - /* 1592 - * For suspend, check blk_queue_stopped() and increment 1593 - * ->pending within a single queue_lock not to increment the 1594 - * number of in-flight I/Os after the queue is stopped in 1595 - * dm_suspend(). 1596 - */ 1597 - while (!blk_queue_stopped(q)) { 1598 - rq = blk_peek_request(q); 1599 - if (!rq) 1600 - return; 1601 - 1602 - /* always use block 0 to find the target for flushes for now */ 1603 - pos = 0; 1604 - if (req_op(rq) != REQ_OP_FLUSH) 1605 - pos = blk_rq_pos(rq); 1606 - 1607 - if ((dm_request_peeked_before_merge_deadline(md) && 1608 - md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && 1609 - md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || 1610 - (ti->type->busy && ti->type->busy(ti))) { 1611 - blk_delay_queue(q, HZ / 100); 1612 - return; 1613 - } 1614 - 1615 - dm_start_request(md, rq); 1616 - 1617 - tio = tio_from_request(rq); 1618 - /* Establish tio->ti before queuing work (map_tio_request) */ 1619 - tio->ti = ti; 1620 - queue_kthread_work(&md->kworker, &tio->work); 1621 - BUG_ON(!irqs_disabled()); 1622 - } 1623 - } 1624 - 1625 1848 static int dm_any_congested(void *congested_data, int bdi_bits) 1626 1849 { 1627 1850 int r = bdi_bits; ··· 1352 2269 1353 2270 static void dm_wq_work(struct work_struct *work); 1354 2271 1355 - static void dm_init_md_queue(struct mapped_device *md) 2272 + void dm_init_md_queue(struct mapped_device *md) 1356 2273 { 1357 2274 /* 1358 2275 * Request-based dm devices cannot be stacked on top of bio-based dm ··· 1373 2290 md->queue->backing_dev_info.congested_data = md; 1374 2291 } 1375 2292 1376 - static void dm_init_normal_md_queue(struct mapped_device *md) 2293 + void dm_init_normal_md_queue(struct mapped_device *md) 1377 2294 { 1378 2295 md->use_blk_mq = false; 1379 2296 dm_init_md_queue(md); ··· 1413 2330 bdput(md->bdev); 1414 2331 md->bdev = NULL; 1415 2332 } 2333 + 2334 + dm_mq_cleanup_mapped_device(md); 1416 2335 } 1417 2336 1418 2337 /* ··· 1448 2363 goto bad_io_barrier; 1449 2364 1450 2365 md->numa_node_id = numa_node_id; 1451 - md->use_blk_mq = use_blk_mq; 2366 + md->use_blk_mq = dm_use_blk_mq_default(); 1452 2367 md->init_tio_pdu = false; 1453 2368 md->type = DM_TYPE_NONE; 1454 2369 mutex_init(&md->suspend_lock); ··· 1533 2448 unlock_fs(md); 1534 2449 1535 2450 cleanup_mapped_device(md); 1536 - if (md->tag_set) { 1537 - blk_mq_free_tag_set(md->tag_set); 1538 - kfree(md->tag_set); 1539 - } 1540 2451 1541 2452 free_table_devices(&md->table_devices); 1542 2453 dm_stats_cleanup(&md->stats); ··· 1737 2656 return &md->queue->limits; 1738 2657 } 1739 2658 EXPORT_SYMBOL_GPL(dm_get_queue_limits); 1740 - 1741 - static void dm_old_init_rq_based_worker_thread(struct mapped_device *md) 1742 - { 1743 - /* Initialize the request-based DM worker thread */ 1744 - init_kthread_worker(&md->kworker); 1745 - md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, 1746 - "kdmwork-%s", dm_device_name(md)); 1747 - } 1748 - 1749 - /* 1750 - * Fully initialize a .request_fn request-based queue. 1751 - */ 1752 - static int dm_old_init_request_queue(struct mapped_device *md) 1753 - { 1754 - /* Fully initialize the queue */ 1755 - if (!blk_init_allocated_queue(md->queue, dm_request_fn, NULL)) 1756 - return -EINVAL; 1757 - 1758 - /* disable dm_request_fn's merge heuristic by default */ 1759 - md->seq_rq_merge_deadline_usecs = 0; 1760 - 1761 - dm_init_normal_md_queue(md); 1762 - blk_queue_softirq_done(md->queue, dm_softirq_done); 1763 - blk_queue_prep_rq(md->queue, dm_old_prep_fn); 1764 - 1765 - dm_old_init_rq_based_worker_thread(md); 1766 - 1767 - elv_register_queue(md->queue); 1768 - 1769 - return 0; 1770 - } 1771 - 1772 - static int dm_mq_init_request(void *data, struct request *rq, 1773 - unsigned int hctx_idx, unsigned int request_idx, 1774 - unsigned int numa_node) 1775 - { 1776 - struct mapped_device *md = data; 1777 - struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 1778 - 1779 - /* 1780 - * Must initialize md member of tio, otherwise it won't 1781 - * be available in dm_mq_queue_rq. 1782 - */ 1783 - tio->md = md; 1784 - 1785 - if (md->init_tio_pdu) { 1786 - /* target-specific per-io data is immediately after the tio */ 1787 - tio->info.ptr = tio + 1; 1788 - } 1789 - 1790 - return 0; 1791 - } 1792 - 1793 - static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 1794 - const struct blk_mq_queue_data *bd) 1795 - { 1796 - struct request *rq = bd->rq; 1797 - struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 1798 - struct mapped_device *md = tio->md; 1799 - struct dm_target *ti = md->immutable_target; 1800 - 1801 - if (unlikely(!ti)) { 1802 - int srcu_idx; 1803 - struct dm_table *map = dm_get_live_table(md, &srcu_idx); 1804 - 1805 - ti = dm_table_find_target(map, 0); 1806 - dm_put_live_table(md, srcu_idx); 1807 - } 1808 - 1809 - if (ti->type->busy && ti->type->busy(ti)) 1810 - return BLK_MQ_RQ_QUEUE_BUSY; 1811 - 1812 - dm_start_request(md, rq); 1813 - 1814 - /* Init tio using md established in .init_request */ 1815 - init_tio(tio, rq, md); 1816 - 1817 - /* 1818 - * Establish tio->ti before queuing work (map_tio_request) 1819 - * or making direct call to map_request(). 1820 - */ 1821 - tio->ti = ti; 1822 - 1823 - /* Direct call is fine since .queue_rq allows allocations */ 1824 - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { 1825 - /* Undo dm_start_request() before requeuing */ 1826 - rq_end_stats(md, rq); 1827 - rq_completed(md, rq_data_dir(rq), false); 1828 - return BLK_MQ_RQ_QUEUE_BUSY; 1829 - } 1830 - 1831 - return BLK_MQ_RQ_QUEUE_OK; 1832 - } 1833 - 1834 - static struct blk_mq_ops dm_mq_ops = { 1835 - .queue_rq = dm_mq_queue_rq, 1836 - .map_queue = blk_mq_map_queue, 1837 - .complete = dm_softirq_done, 1838 - .init_request = dm_mq_init_request, 1839 - }; 1840 - 1841 - static int dm_mq_init_request_queue(struct mapped_device *md, 1842 - struct dm_target *immutable_tgt) 1843 - { 1844 - struct request_queue *q; 1845 - int err; 1846 - 1847 - if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) { 1848 - DMERR("request-based dm-mq may only be stacked on blk-mq device(s)"); 1849 - return -EINVAL; 1850 - } 1851 - 1852 - md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id); 1853 - if (!md->tag_set) 1854 - return -ENOMEM; 1855 - 1856 - md->tag_set->ops = &dm_mq_ops; 1857 - md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); 1858 - md->tag_set->numa_node = md->numa_node_id; 1859 - md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 1860 - md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); 1861 - md->tag_set->driver_data = md; 1862 - 1863 - md->tag_set->cmd_size = sizeof(struct dm_rq_target_io); 1864 - if (immutable_tgt && immutable_tgt->per_io_data_size) { 1865 - /* any target-specific per-io data is immediately after the tio */ 1866 - md->tag_set->cmd_size += immutable_tgt->per_io_data_size; 1867 - md->init_tio_pdu = true; 1868 - } 1869 - 1870 - err = blk_mq_alloc_tag_set(md->tag_set); 1871 - if (err) 1872 - goto out_kfree_tag_set; 1873 - 1874 - q = blk_mq_init_allocated_queue(md->tag_set, md->queue); 1875 - if (IS_ERR(q)) { 1876 - err = PTR_ERR(q); 1877 - goto out_tag_set; 1878 - } 1879 - dm_init_md_queue(md); 1880 - 1881 - /* backfill 'mq' sysfs registration normally done in blk_register_queue */ 1882 - blk_mq_register_disk(md->disk); 1883 - 1884 - return 0; 1885 - 1886 - out_tag_set: 1887 - blk_mq_free_tag_set(md->tag_set); 1888 - out_kfree_tag_set: 1889 - kfree(md->tag_set); 1890 - 1891 - return err; 1892 - } 1893 2659 1894 2660 static unsigned filter_md_type(unsigned type, struct mapped_device *md) 1895 2661 { ··· 2668 3740 2669 3741 module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); 2670 3742 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 2671 - 2672 - module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 2673 - MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 2674 - 2675 - module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); 2676 - MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); 2677 - 2678 - module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR); 2679 - MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices"); 2680 - 2681 - module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR); 2682 - MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices"); 2683 3743 2684 3744 module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); 2685 3745 MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");

+2 -23

drivers/md/dm.h

··· 13 13 #include <linux/fs.h> 14 14 #include <linux/device-mapper.h> 15 15 #include <linux/list.h> 16 + #include <linux/moduleparam.h> 16 17 #include <linux/blkdev.h> 17 18 #include <linux/backing-dev.h> 18 19 #include <linux/hdreg.h> ··· 162 161 /* 163 162 * sysfs interface 164 163 */ 165 - struct dm_kobject_holder { 166 - struct kobject kobj; 167 - struct completion completion; 168 - }; 169 - 170 - static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) 171 - { 172 - return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; 173 - } 174 - 175 164 int dm_sysfs_init(struct mapped_device *md); 176 165 void dm_sysfs_exit(struct mapped_device *md); 177 166 struct kobject *dm_kobject(struct mapped_device *md); ··· 203 212 void dm_internal_suspend(struct mapped_device *md); 204 213 void dm_internal_resume(struct mapped_device *md); 205 214 206 - bool dm_use_blk_mq(struct mapped_device *md); 207 - 208 215 int dm_io_init(void); 209 216 void dm_io_exit(void); 210 217 ··· 217 228 void dm_free_md_mempools(struct dm_md_mempools *pools); 218 229 219 230 /* 220 - * Helpers that are used by DM core 231 + * Various helpers 221 232 */ 222 233 unsigned dm_get_reserved_bio_based_ios(void); 223 - unsigned dm_get_reserved_rq_based_ios(void); 224 - 225 - static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) 226 - { 227 - return !maxlen || strlen(result) + 1 >= maxlen; 228 - } 229 - 230 - ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf); 231 - ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 232 - const char *buf, size_t count); 233 234 234 235 #endif