Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"A smaller collection of fixes for the block core that would be nice to
have in -rc2. This pull request contains:

- Fixes for races in the wait/wakeup logic used in blk-mq from
Alexander. No issues have been observed, but it is definitely a
bit flakey currently. Alternatively, we may drop the cyclic
wakeups going forward, but that needs more testing.

- Some cleanups from Christoph.

- Fix for an oops in null_blk if queue_mode=1 and softirq completions
are used. From me.

- A fix for a regression caused by the chunk size setting. It
inadvertently used max_hw_sectors instead of max_sectors, which is
incorrect, and causes hangs on btrfs multi-disk setups (where hw
sectors apparently isn't set). From me.

- Removal of WQ_POWER_EFFICIENT in the kblockd creation. This was a
recent addition as well, but it actually breaks blk-mq which relies
on strict scheduling. If the workqueue power_efficient mode is
turned on, this breaks blk-mq. From Matias.

- null_blk module parameter description fix from Mike"

* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: bitmap tag: fix races in bt_get() function
blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt
blk-mq: bitmap tag: fix races on shared ::wake_index fields
block: blk_max_size_offset() should check ->max_sectors
null_blk: fix softirq completions for queue_mode == 1
blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue
blk-mq: properly drain stopped queues
block: remove WQ_POWER_EFFICIENT from kblockd
null_blk: fix name and description of 'queue_mode' module parameter
block: remove elv_abort_queue and blk_abort_flushes

+51 -97
+1 -2
block/blk-core.c
··· 3312 3312 3313 3313 /* used for unplugging and affects IO latency/throughput - HIGHPRI */ 3314 3314 kblockd_workqueue = alloc_workqueue("kblockd", 3315 - WQ_MEM_RECLAIM | WQ_HIGHPRI | 3316 - WQ_POWER_EFFICIENT, 0); 3315 + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 3317 3316 if (!kblockd_workqueue) 3318 3317 panic("Failed to create kblockd\n"); 3319 3318
-38
block/blk-flush.c
··· 422 422 } 423 423 424 424 /** 425 - * blk_abort_flushes - @q is being aborted, abort flush requests 426 - * @q: request_queue being aborted 427 - * 428 - * To be called from elv_abort_queue(). @q is being aborted. Prepare all 429 - * FLUSH/FUA requests for abortion. 430 - * 431 - * CONTEXT: 432 - * spin_lock_irq(q->queue_lock) 433 - */ 434 - void blk_abort_flushes(struct request_queue *q) 435 - { 436 - struct request *rq, *n; 437 - int i; 438 - 439 - /* 440 - * Requests in flight for data are already owned by the dispatch 441 - * queue or the device driver. Just restore for normal completion. 442 - */ 443 - list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { 444 - list_del_init(&rq->flush.list); 445 - blk_flush_restore_request(rq); 446 - } 447 - 448 - /* 449 - * We need to give away requests on flush queues. Restore for 450 - * normal completion and put them on the dispatch queue. 451 - */ 452 - for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { 453 - list_for_each_entry_safe(rq, n, &q->flush_queue[i], 454 - flush.list) { 455 - list_del_init(&rq->flush.list); 456 - blk_flush_restore_request(rq); 457 - list_add_tail(&rq->queuelist, &q->queue_head); 458 - } 459 - } 460 - } 461 - 462 - /** 463 425 * blkdev_issue_flush - queue a flush 464 426 * @bdev: blockdev to issue flush for 465 427 * @gfp_mask: memory allocation flags (for bio_alloc)
+39 -22
block/blk-mq-tag.c
··· 43 43 return bt_has_free_tags(&tags->bitmap_tags); 44 44 } 45 45 46 - static inline void bt_index_inc(unsigned int *index) 46 + static inline int bt_index_inc(int index) 47 47 { 48 - *index = (*index + 1) & (BT_WAIT_QUEUES - 1); 48 + return (index + 1) & (BT_WAIT_QUEUES - 1); 49 + } 50 + 51 + static inline void bt_index_atomic_inc(atomic_t *index) 52 + { 53 + int old = atomic_read(index); 54 + int new = bt_index_inc(old); 55 + atomic_cmpxchg(index, old, new); 49 56 } 50 57 51 58 /* ··· 76 69 int i, wake_index; 77 70 78 71 bt = &tags->bitmap_tags; 79 - wake_index = bt->wake_index; 72 + wake_index = atomic_read(&bt->wake_index); 80 73 for (i = 0; i < BT_WAIT_QUEUES; i++) { 81 74 struct bt_wait_state *bs = &bt->bs[wake_index]; 82 75 83 76 if (waitqueue_active(&bs->wait)) 84 77 wake_up(&bs->wait); 85 78 86 - bt_index_inc(&wake_index); 79 + wake_index = bt_index_inc(wake_index); 87 80 } 88 81 } 89 82 ··· 219 212 struct blk_mq_hw_ctx *hctx) 220 213 { 221 214 struct bt_wait_state *bs; 215 + int wait_index; 222 216 223 217 if (!hctx) 224 218 return &bt->bs[0]; 225 219 226 - bs = &bt->bs[hctx->wait_index]; 227 - bt_index_inc(&hctx->wait_index); 220 + wait_index = atomic_read(&hctx->wait_index); 221 + bs = &bt->bs[wait_index]; 222 + bt_index_atomic_inc(&hctx->wait_index); 228 223 return bs; 229 224 } 230 225 ··· 248 239 249 240 bs = bt_wait_ptr(bt, hctx); 250 241 do { 251 - bool was_empty; 252 - 253 - was_empty = list_empty(&wait.task_list); 254 242 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 255 243 256 244 tag = __bt_get(hctx, bt, last_tag); 257 245 if (tag != -1) 258 246 break; 259 - 260 - if (was_empty) 261 - atomic_set(&bs->wait_cnt, bt->wake_cnt); 262 247 263 248 blk_mq_put_ctx(data->ctx); 264 249 ··· 316 313 { 317 314 int i, wake_index; 318 315 319 - wake_index = bt->wake_index; 316 + wake_index = atomic_read(&bt->wake_index); 320 317 for (i = 0; i < BT_WAIT_QUEUES; i++) { 321 318 struct bt_wait_state *bs = &bt->bs[wake_index]; 322 319 323 320 if (waitqueue_active(&bs->wait)) { 324 - if (wake_index != bt->wake_index) 325 - bt->wake_index = wake_index; 321 + int o = atomic_read(&bt->wake_index); 322 + if (wake_index != o) 323 + atomic_cmpxchg(&bt->wake_index, o, wake_index); 326 324 327 325 return bs; 328 326 } 329 327 330 - bt_index_inc(&wake_index); 328 + wake_index = bt_index_inc(wake_index); 331 329 } 332 330 333 331 return NULL; ··· 338 334 { 339 335 const int index = TAG_TO_INDEX(bt, tag); 340 336 struct bt_wait_state *bs; 337 + int wait_cnt; 341 338 342 339 /* 343 340 * The unlock memory barrier need to order access to req in free ··· 347 342 clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); 348 343 349 344 bs = bt_wake_ptr(bt); 350 - if (bs && atomic_dec_and_test(&bs->wait_cnt)) { 351 - atomic_set(&bs->wait_cnt, bt->wake_cnt); 352 - bt_index_inc(&bt->wake_index); 345 + if (!bs) 346 + return; 347 + 348 + wait_cnt = atomic_dec_return(&bs->wait_cnt); 349 + if (wait_cnt == 0) { 350 + wake: 351 + atomic_add(bt->wake_cnt, &bs->wait_cnt); 352 + bt_index_atomic_inc(&bt->wake_index); 353 353 wake_up(&bs->wait); 354 + } else if (wait_cnt < 0) { 355 + wait_cnt = atomic_inc_return(&bs->wait_cnt); 356 + if (!wait_cnt) 357 + goto wake; 354 358 } 355 359 } 356 360 ··· 513 499 return -ENOMEM; 514 500 } 515 501 516 - for (i = 0; i < BT_WAIT_QUEUES; i++) 517 - init_waitqueue_head(&bt->bs[i].wait); 518 - 519 502 bt_update_count(bt, depth); 503 + 504 + for (i = 0; i < BT_WAIT_QUEUES; i++) { 505 + init_waitqueue_head(&bt->bs[i].wait); 506 + atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt); 507 + } 508 + 520 509 return 0; 521 510 } 522 511
+1 -1
block/blk-mq-tag.h
··· 24 24 unsigned int map_nr; 25 25 struct blk_align_bitmap *map; 26 26 27 - unsigned int wake_index; 27 + atomic_t wake_index; 28 28 struct bt_wait_state *bs; 29 29 }; 30 30
+3 -8
block/blk-mq.c
··· 109 109 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 110 110 } 111 111 112 - static void __blk_mq_drain_queue(struct request_queue *q) 112 + void blk_mq_drain_queue(struct request_queue *q) 113 113 { 114 114 while (true) { 115 115 s64 count; ··· 120 120 121 121 if (count == 0) 122 122 break; 123 - blk_mq_run_queues(q, false); 123 + blk_mq_start_hw_queues(q); 124 124 msleep(10); 125 125 } 126 126 } ··· 139 139 spin_unlock_irq(q->queue_lock); 140 140 141 141 if (drain) 142 - __blk_mq_drain_queue(q); 143 - } 144 - 145 - void blk_mq_drain_queue(struct request_queue *q) 146 - { 147 - __blk_mq_drain_queue(q); 142 + blk_mq_drain_queue(q); 148 143 } 149 144 150 145 static void blk_mq_unfreeze_queue(struct request_queue *q)
-1
block/blk.h
··· 84 84 #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) 85 85 86 86 void blk_insert_flush(struct request *rq); 87 - void blk_abort_flushes(struct request_queue *q); 88 87 89 88 static inline struct request *__elv_next_request(struct request_queue *q) 90 89 {
-20
block/elevator.c
··· 729 729 return ELV_MQUEUE_MAY; 730 730 } 731 731 732 - void elv_abort_queue(struct request_queue *q) 733 - { 734 - struct request *rq; 735 - 736 - blk_abort_flushes(q); 737 - 738 - while (!list_empty(&q->queue_head)) { 739 - rq = list_entry_rq(q->queue_head.next); 740 - rq->cmd_flags |= REQ_QUIET; 741 - trace_block_rq_abort(q, rq); 742 - /* 743 - * Mark this request as started so we don't trigger 744 - * any debug logic in the end I/O path. 745 - */ 746 - blk_start_request(rq); 747 - __blk_end_request_all(rq, -EIO); 748 - } 749 - } 750 - EXPORT_SYMBOL(elv_abort_queue); 751 - 752 732 void elv_completed_request(struct request_queue *q, struct request *rq) 753 733 { 754 734 struct elevator_queue *e = q->elevator;
+5 -2
drivers/block/null_blk.c
··· 79 79 80 80 static int queue_mode = NULL_Q_MQ; 81 81 module_param(queue_mode, int, S_IRUGO); 82 - MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); 82 + MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); 83 83 84 84 static int gb = 250; 85 85 module_param(gb, int, S_IRUGO); ··· 227 227 228 228 static void null_softirq_done_fn(struct request *rq) 229 229 { 230 - end_cmd(blk_mq_rq_to_pdu(rq)); 230 + if (queue_mode == NULL_Q_MQ) 231 + end_cmd(blk_mq_rq_to_pdu(rq)); 232 + else 233 + end_cmd(rq->special); 231 234 } 232 235 233 236 static inline void null_handle_cmd(struct nullb_cmd *cmd)
+1 -1
include/linux/blk-mq.h
··· 42 42 unsigned int nr_ctx; 43 43 struct blk_mq_ctx **ctxs; 44 44 45 - unsigned int wait_index; 45 + atomic_t wait_index; 46 46 47 47 struct blk_mq_tags *tags; 48 48
+1 -1
include/linux/blkdev.h
··· 920 920 sector_t offset) 921 921 { 922 922 if (!q->limits.chunk_sectors) 923 - return q->limits.max_hw_sectors; 923 + return q->limits.max_sectors; 924 924 925 925 return q->limits.chunk_sectors - 926 926 (offset & (q->limits.chunk_sectors - 1));
-1
include/linux/elevator.h
··· 133 133 extern int elv_register_queue(struct request_queue *q); 134 134 extern void elv_unregister_queue(struct request_queue *q); 135 135 extern int elv_may_queue(struct request_queue *, int); 136 - extern void elv_abort_queue(struct request_queue *); 137 136 extern void elv_completed_request(struct request_queue *, struct request *); 138 137 extern int elv_set_request(struct request_queue *q, struct request *rq, 139 138 struct bio *bio, gfp_t gfp_mask);