Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"This contains a set of fixes for xen-blkback by way of Konrad, and a
performance regression fix for blk-mq for shared tags.

The latter could account for as much as a 50x reduction in
performance, with the test case from the user with 500 name spaces. A
more realistic setup on my end with 32 drives showed a 3.5x drop. The
fix has been thoroughly tested before being committed"

* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: fix performance regression with shared tags
xen-blkback: don't leak stack data via response ring
xen/blkback: don't use xen_blkif_get() in xen-blkback kthread
xen/blkback: don't free be structure too early
xen/blkback: fix disconnect while I/Os in flight

Changed files
+87 -65
block
drivers
block
include
linux
+46 -12
block/blk-mq-sched.c
··· 68 68 __blk_mq_sched_assign_ioc(q, rq, bio, ioc); 69 69 } 70 70 71 + /* 72 + * Mark a hardware queue as needing a restart. For shared queues, maintain 73 + * a count of how many hardware queues are marked for restart. 74 + */ 75 + static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 76 + { 77 + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 78 + return; 79 + 80 + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 81 + struct request_queue *q = hctx->queue; 82 + 83 + if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 84 + atomic_inc(&q->shared_hctx_restart); 85 + } else 86 + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 87 + } 88 + 89 + static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) 90 + { 91 + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 92 + return false; 93 + 94 + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 95 + struct request_queue *q = hctx->queue; 96 + 97 + if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 98 + atomic_dec(&q->shared_hctx_restart); 99 + } else 100 + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 101 + 102 + if (blk_mq_hctx_has_pending(hctx)) { 103 + blk_mq_run_hw_queue(hctx, true); 104 + return true; 105 + } 106 + 107 + return false; 108 + } 109 + 71 110 struct request *blk_mq_sched_get_request(struct request_queue *q, 72 111 struct bio *bio, 73 112 unsigned int op, ··· 305 266 return true; 306 267 } 307 268 308 - static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) 309 - { 310 - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { 311 - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 312 - if (blk_mq_hctx_has_pending(hctx)) { 313 - blk_mq_run_hw_queue(hctx, true); 314 - return true; 315 - } 316 - } 317 - return false; 318 - } 319 - 320 269 /** 321 270 * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list 322 271 * @pos: loop cursor. ··· 336 309 unsigned int i, j; 337 310 338 311 if (set->flags & BLK_MQ_F_TAG_SHARED) { 312 + /* 313 + * If this is 0, then we know that no hardware queues 314 + * have RESTART marked. We're done. 315 + */ 316 + if (!atomic_read(&queue->shared_hctx_restart)) 317 + return; 318 + 339 319 rcu_read_lock(); 340 320 list_for_each_entry_rcu_rr(q, queue, &set->tag_list, 341 321 tag_set_list) {
-9
block/blk-mq-sched.h
··· 115 115 return false; 116 116 } 117 117 118 - /* 119 - * Mark a hardware queue as needing a restart. 120 - */ 121 - static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) 122 - { 123 - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 124 - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 125 - } 126 - 127 118 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) 128 119 { 129 120 return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+13 -3
block/blk-mq.c
··· 2103 2103 } 2104 2104 } 2105 2105 2106 + /* 2107 + * Caller needs to ensure that we're either frozen/quiesced, or that 2108 + * the queue isn't live yet. 2109 + */ 2106 2110 static void queue_set_hctx_shared(struct request_queue *q, bool shared) 2107 2111 { 2108 2112 struct blk_mq_hw_ctx *hctx; 2109 2113 int i; 2110 2114 2111 2115 queue_for_each_hw_ctx(q, hctx, i) { 2112 - if (shared) 2116 + if (shared) { 2117 + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 2118 + atomic_inc(&q->shared_hctx_restart); 2113 2119 hctx->flags |= BLK_MQ_F_TAG_SHARED; 2114 - else 2120 + } else { 2121 + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 2122 + atomic_dec(&q->shared_hctx_restart); 2115 2123 hctx->flags &= ~BLK_MQ_F_TAG_SHARED; 2124 + } 2116 2125 } 2117 2126 } 2118 2127 2119 - static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) 2128 + static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, 2129 + bool shared) 2120 2130 { 2121 2131 struct request_queue *q; 2122 2132
+12 -14
drivers/block/xen-blkback/blkback.c
··· 609 609 unsigned long timeout; 610 610 int ret; 611 611 612 - xen_blkif_get(blkif); 613 - 614 612 set_freezable(); 615 613 while (!kthread_should_stop()) { 616 614 if (try_to_freeze()) ··· 663 665 print_stats(ring); 664 666 665 667 ring->xenblkd = NULL; 666 - xen_blkif_put(blkif); 667 668 668 669 return 0; 669 670 } ··· 1433 1436 static void make_response(struct xen_blkif_ring *ring, u64 id, 1434 1437 unsigned short op, int st) 1435 1438 { 1436 - struct blkif_response resp; 1439 + struct blkif_response *resp; 1437 1440 unsigned long flags; 1438 1441 union blkif_back_rings *blk_rings; 1439 1442 int notify; 1440 - 1441 - resp.id = id; 1442 - resp.operation = op; 1443 - resp.status = st; 1444 1443 1445 1444 spin_lock_irqsave(&ring->blk_ring_lock, flags); 1446 1445 blk_rings = &ring->blk_rings; 1447 1446 /* Place on the response ring for the relevant domain. */ 1448 1447 switch (ring->blkif->blk_protocol) { 1449 1448 case BLKIF_PROTOCOL_NATIVE: 1450 - memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), 1451 - &resp, sizeof(resp)); 1449 + resp = RING_GET_RESPONSE(&blk_rings->native, 1450 + blk_rings->native.rsp_prod_pvt); 1452 1451 break; 1453 1452 case BLKIF_PROTOCOL_X86_32: 1454 - memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), 1455 - &resp, sizeof(resp)); 1453 + resp = RING_GET_RESPONSE(&blk_rings->x86_32, 1454 + blk_rings->x86_32.rsp_prod_pvt); 1456 1455 break; 1457 1456 case BLKIF_PROTOCOL_X86_64: 1458 - memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), 1459 - &resp, sizeof(resp)); 1457 + resp = RING_GET_RESPONSE(&blk_rings->x86_64, 1458 + blk_rings->x86_64.rsp_prod_pvt); 1460 1459 break; 1461 1460 default: 1462 1461 BUG(); 1463 1462 } 1463 + 1464 + resp->id = id; 1465 + resp->operation = op; 1466 + resp->status = st; 1467 + 1464 1468 blk_rings->common.rsp_prod_pvt++; 1465 1469 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); 1466 1470 spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
+6 -20
drivers/block/xen-blkback/common.h
··· 75 75 struct blkif_common_request { 76 76 char dummy; 77 77 }; 78 - struct blkif_common_response { 79 - char dummy; 80 - }; 78 + 79 + /* i386 protocol version */ 81 80 82 81 struct blkif_x86_32_request_rw { 83 82 uint8_t nr_segments; /* number of segments */ ··· 128 129 } u; 129 130 } __attribute__((__packed__)); 130 131 131 - /* i386 protocol version */ 132 - #pragma pack(push, 4) 133 - struct blkif_x86_32_response { 134 - uint64_t id; /* copied from request */ 135 - uint8_t operation; /* copied from request */ 136 - int16_t status; /* BLKIF_RSP_??? */ 137 - }; 138 - #pragma pack(pop) 139 132 /* x86_64 protocol version */ 140 133 141 134 struct blkif_x86_64_request_rw { ··· 184 193 } u; 185 194 } __attribute__((__packed__)); 186 195 187 - struct blkif_x86_64_response { 188 - uint64_t __attribute__((__aligned__(8))) id; 189 - uint8_t operation; /* copied from request */ 190 - int16_t status; /* BLKIF_RSP_??? */ 191 - }; 192 - 193 196 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, 194 - struct blkif_common_response); 197 + struct blkif_response); 195 198 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, 196 - struct blkif_x86_32_response); 199 + struct blkif_response __packed); 197 200 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, 198 - struct blkif_x86_64_response); 201 + struct blkif_response); 199 202 200 203 union blkif_back_rings { 201 204 struct blkif_back_ring native; ··· 266 281 267 282 wait_queue_head_t wq; 268 283 atomic_t inflight; 284 + bool active; 269 285 /* One thread per blkif ring. */ 270 286 struct task_struct *xenblkd; 271 287 unsigned int waiting_reqs;
+8 -7
drivers/block/xen-blkback/xenbus.c
··· 159 159 init_waitqueue_head(&ring->shutdown_wq); 160 160 ring->blkif = blkif; 161 161 ring->st_print = jiffies; 162 - xen_blkif_get(blkif); 162 + ring->active = true; 163 163 } 164 164 165 165 return 0; ··· 249 249 struct xen_blkif_ring *ring = &blkif->rings[r]; 250 250 unsigned int i = 0; 251 251 252 + if (!ring->active) 253 + continue; 254 + 252 255 if (ring->xenblkd) { 253 256 kthread_stop(ring->xenblkd); 254 257 wake_up(&ring->shutdown_wq); 255 - ring->xenblkd = NULL; 256 258 } 257 259 258 260 /* The above kthread_stop() guarantees that at this point we ··· 298 296 BUG_ON(ring->free_pages_num != 0); 299 297 BUG_ON(ring->persistent_gnt_c != 0); 300 298 WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); 301 - xen_blkif_put(blkif); 299 + ring->active = false; 302 300 } 303 301 blkif->nr_ring_pages = 0; 304 302 /* ··· 314 312 315 313 static void xen_blkif_free(struct xen_blkif *blkif) 316 314 { 317 - 318 - xen_blkif_disconnect(blkif); 315 + WARN_ON(xen_blkif_disconnect(blkif)); 319 316 xen_vbd_free(&blkif->vbd); 317 + kfree(blkif->be->mode); 318 + kfree(blkif->be); 320 319 321 320 /* Make sure everything is drained before shutting down */ 322 321 kmem_cache_free(xen_blkif_cachep, blkif); ··· 514 511 xen_blkif_put(be->blkif); 515 512 } 516 513 517 - kfree(be->mode); 518 - kfree(be); 519 514 return 0; 520 515 } 521 516
+2
include/linux/blkdev.h
··· 391 391 int nr_rqs[2]; /* # allocated [a]sync rqs */ 392 392 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ 393 393 394 + atomic_t shared_hctx_restart; 395 + 394 396 struct blk_queue_stats *stats; 395 397 struct rq_wb *rq_wb; 396 398