Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.dk/linux-block into next

Pull block follow-up bits from Jens Axboe:
"A few minor (but important) fixes for blk-mq for the -rc1 window.

- Hot removal potential oops fix for single queue devices. From me.

- Two merged patches in late May meant that we accidentally lost a
fix for freeing an active queue. Fix that up. From me.

- A change of the blk_mq_tag_to_rq() API, passing in blk_mq_tags, to
make life considerably easier for scsi-mq. From me.

- A schedule-while-atomic fix from Ming Lei, which would hit if the
tag space was exhausted.

- Missing __percpu annotation in one place in blk-mq. Found by the
magic Wu compile bot due to code being moved around by the previous
patch, but it's actually an older issue. From Ming Lei.

- Clearing of tag of a flush request at end_io time. From Ming Lei"

* 'for-linus' of git://git.kernel.dk/linux-block:
block: mq flush: clear flush_rq's tag in flush_end_io()
blk-mq: let blk_mq_tag_to_rq() take blk_mq_tags as the main parameter
blk-mq: fix regression from commit 624dbe475416
blk-mq: handle NULL req return from blk_map_request in single queue mode
blk-mq: fix sparse warning on missed __percpu annotation
blk-mq: fix schedule from atomic context
blk-mq: move blk_mq_get_ctx/blk_mq_put_ctx to mq private header

+123 -65
+1 -1
block/blk-flush.c
··· 225 225 226 226 if (q->mq_ops) { 227 227 spin_lock_irqsave(&q->mq_flush_lock, flags); 228 - q->flush_rq->cmd_flags = 0; 228 + q->flush_rq->tag = -1; 229 229 } 230 230 231 231 running = &q->flush_queue[q->flush_running_idx];
+31 -17
block/blk-mq-tag.c
··· 221 221 return bs; 222 222 } 223 223 224 - static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx, 225 - unsigned int *last_tag, gfp_t gfp) 224 + static int bt_get(struct blk_mq_alloc_data *data, 225 + struct blk_mq_bitmap_tags *bt, 226 + struct blk_mq_hw_ctx *hctx, 227 + unsigned int *last_tag) 226 228 { 227 229 struct bt_wait_state *bs; 228 230 DEFINE_WAIT(wait); ··· 234 232 if (tag != -1) 235 233 return tag; 236 234 237 - if (!(gfp & __GFP_WAIT)) 235 + if (!(data->gfp & __GFP_WAIT)) 238 236 return -1; 239 237 240 238 bs = bt_wait_ptr(bt, hctx); ··· 251 249 if (was_empty) 252 250 atomic_set(&bs->wait_cnt, bt->wake_cnt); 253 251 252 + blk_mq_put_ctx(data->ctx); 253 + 254 254 io_schedule(); 255 + 256 + data->ctx = blk_mq_get_ctx(data->q); 257 + data->hctx = data->q->mq_ops->map_queue(data->q, 258 + data->ctx->cpu); 259 + if (data->reserved) { 260 + bt = &data->hctx->tags->breserved_tags; 261 + } else { 262 + last_tag = &data->ctx->last_tag; 263 + hctx = data->hctx; 264 + bt = &hctx->tags->bitmap_tags; 265 + } 266 + finish_wait(&bs->wait, &wait); 267 + bs = bt_wait_ptr(bt, hctx); 255 268 } while (1); 256 269 257 270 finish_wait(&bs->wait, &wait); 258 271 return tag; 259 272 } 260 273 261 - static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, 262 - struct blk_mq_hw_ctx *hctx, 263 - unsigned int *last_tag, gfp_t gfp) 274 + static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) 264 275 { 265 276 int tag; 266 277 267 - tag = bt_get(&tags->bitmap_tags, hctx, last_tag, gfp); 278 + tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, 279 + &data->ctx->last_tag); 268 280 if (tag >= 0) 269 - return tag + tags->nr_reserved_tags; 281 + return tag + data->hctx->tags->nr_reserved_tags; 270 282 271 283 return BLK_MQ_TAG_FAIL; 272 284 } 273 285 274 - static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, 275 - gfp_t gfp) 286 + static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) 276 287 { 277 288 int tag, zero = 0; 278 289 279 - if (unlikely(!tags->nr_reserved_tags)) { 290 + if (unlikely(!data->hctx->tags->nr_reserved_tags)) { 280 291 WARN_ON_ONCE(1); 281 292 return BLK_MQ_TAG_FAIL; 282 293 } 283 294 284 - tag = bt_get(&tags->breserved_tags, NULL, &zero, gfp); 295 + tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero); 285 296 if (tag < 0) 286 297 return BLK_MQ_TAG_FAIL; 287 298 288 299 return tag; 289 300 } 290 301 291 - unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, 292 - gfp_t gfp, bool reserved) 302 + unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) 293 303 { 294 - if (!reserved) 295 - return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp); 304 + if (!data->reserved) 305 + return __blk_mq_get_tag(data); 296 306 297 - return __blk_mq_get_reserved_tag(hctx->tags, gfp); 307 + return __blk_mq_get_reserved_tag(data); 298 308 } 299 309 300 310 static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
+1 -1
block/blk-mq-tag.h
··· 48 48 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); 49 49 extern void blk_mq_free_tags(struct blk_mq_tags *tags); 50 50 51 - extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); 51 + extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 52 52 extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag); 53 53 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); 54 54 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
+40 -43
block/blk-mq.c
··· 33 33 34 34 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); 35 35 36 - static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 37 - unsigned int cpu) 38 - { 39 - return per_cpu_ptr(q->queue_ctx, cpu); 40 - } 41 - 42 - /* 43 - * This assumes per-cpu software queueing queues. They could be per-node 44 - * as well, for instance. For now this is hardcoded as-is. Note that we don't 45 - * care about preemption, since we know the ctx's are persistent. This does 46 - * mean that we can't rely on ctx always matching the currently running CPU. 47 - */ 48 - static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) 49 - { 50 - return __blk_mq_get_ctx(q, get_cpu()); 51 - } 52 - 53 - static void blk_mq_put_ctx(struct blk_mq_ctx *ctx) 54 - { 55 - put_cpu(); 56 - } 57 - 58 36 /* 59 37 * Check if any of the ctx's have pending work in this hardware queue 60 38 */ ··· 210 232 } 211 233 212 234 static struct request * 213 - __blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx, 214 - struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved) 235 + __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) 215 236 { 216 237 struct request *rq; 217 238 unsigned int tag; 218 239 219 - tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved); 240 + tag = blk_mq_get_tag(data); 220 241 if (tag != BLK_MQ_TAG_FAIL) { 221 - rq = hctx->tags->rqs[tag]; 242 + rq = data->hctx->tags->rqs[tag]; 222 243 223 244 rq->cmd_flags = 0; 224 - if (blk_mq_tag_busy(hctx)) { 245 + if (blk_mq_tag_busy(data->hctx)) { 225 246 rq->cmd_flags = REQ_MQ_INFLIGHT; 226 - atomic_inc(&hctx->nr_active); 247 + atomic_inc(&data->hctx->nr_active); 227 248 } 228 249 229 250 rq->tag = tag; 230 - blk_mq_rq_ctx_init(q, ctx, rq, rw); 251 + blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw); 231 252 return rq; 232 253 } 233 254 ··· 239 262 struct blk_mq_ctx *ctx; 240 263 struct blk_mq_hw_ctx *hctx; 241 264 struct request *rq; 265 + struct blk_mq_alloc_data alloc_data; 242 266 243 267 if (blk_mq_queue_enter(q)) 244 268 return NULL; 245 269 246 270 ctx = blk_mq_get_ctx(q); 247 271 hctx = q->mq_ops->map_queue(q, ctx->cpu); 272 + blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT, 273 + reserved, ctx, hctx); 248 274 249 - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT, 250 - reserved); 275 + rq = __blk_mq_alloc_request(&alloc_data, rw); 251 276 if (!rq && (gfp & __GFP_WAIT)) { 252 277 __blk_mq_run_hw_queue(hctx); 253 278 blk_mq_put_ctx(ctx); 254 279 255 280 ctx = blk_mq_get_ctx(q); 256 281 hctx = q->mq_ops->map_queue(q, ctx->cpu); 257 - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved); 282 + blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx, 283 + hctx); 284 + rq = __blk_mq_alloc_request(&alloc_data, rw); 285 + ctx = alloc_data.ctx; 258 286 } 259 287 blk_mq_put_ctx(ctx); 260 288 return rq; ··· 529 547 } 530 548 EXPORT_SYMBOL(blk_mq_kick_requeue_list); 531 549 532 - struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag) 550 + static inline bool is_flush_request(struct request *rq, unsigned int tag) 533 551 { 534 - struct request_queue *q = hctx->queue; 552 + return ((rq->cmd_flags & REQ_FLUSH_SEQ) && 553 + rq->q->flush_rq->tag == tag); 554 + } 535 555 536 - if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) && 537 - q->flush_rq->tag == tag) 538 - return q->flush_rq; 556 + struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) 557 + { 558 + struct request *rq = tags->rqs[tag]; 539 559 540 - return hctx->tags->rqs[tag]; 560 + if (!is_flush_request(rq, tag)) 561 + return rq; 562 + 563 + return rq->q->flush_rq; 541 564 } 542 565 EXPORT_SYMBOL(blk_mq_tag_to_rq); 543 566 ··· 571 584 if (tag >= hctx->tags->nr_tags) 572 585 break; 573 586 574 - rq = blk_mq_tag_to_rq(hctx, tag++); 587 + rq = blk_mq_tag_to_rq(hctx->tags, tag++); 575 588 if (rq->q != hctx->queue) 576 589 continue; 577 590 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) ··· 1145 1158 struct blk_mq_ctx *ctx; 1146 1159 struct request *rq; 1147 1160 int rw = bio_data_dir(bio); 1161 + struct blk_mq_alloc_data alloc_data; 1148 1162 1149 1163 if (unlikely(blk_mq_queue_enter(q))) { 1150 1164 bio_endio(bio, -EIO); ··· 1159 1171 rw |= REQ_SYNC; 1160 1172 1161 1173 trace_block_getrq(q, bio, rw); 1162 - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false); 1174 + blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx, 1175 + hctx); 1176 + rq = __blk_mq_alloc_request(&alloc_data, rw); 1163 1177 if (unlikely(!rq)) { 1164 1178 __blk_mq_run_hw_queue(hctx); 1165 1179 blk_mq_put_ctx(ctx); ··· 1169 1179 1170 1180 ctx = blk_mq_get_ctx(q); 1171 1181 hctx = q->mq_ops->map_queue(q, ctx->cpu); 1172 - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, 1173 - __GFP_WAIT|GFP_ATOMIC, false); 1182 + blk_mq_set_alloc_data(&alloc_data, q, 1183 + __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx); 1184 + rq = __blk_mq_alloc_request(&alloc_data, rw); 1185 + ctx = alloc_data.ctx; 1186 + hctx = alloc_data.hctx; 1174 1187 } 1175 1188 1176 1189 hctx->queued++; ··· 1281 1288 return; 1282 1289 1283 1290 rq = blk_mq_map_request(q, bio, &data); 1291 + if (unlikely(!rq)) 1292 + return; 1284 1293 1285 1294 if (unlikely(is_flush_fua)) { 1286 1295 blk_mq_bio_to_request(rq, bio); ··· 1557 1562 if (i == nr_queue) 1558 1563 break; 1559 1564 1565 + blk_mq_tag_idle(hctx); 1566 + 1560 1567 if (set->ops->exit_hctx) 1561 1568 set->ops->exit_hctx(hctx, i); 1562 1569 ··· 1776 1779 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) 1777 1780 { 1778 1781 struct blk_mq_hw_ctx **hctxs; 1779 - struct blk_mq_ctx *ctx; 1782 + struct blk_mq_ctx __percpu *ctx; 1780 1783 struct request_queue *q; 1781 1784 unsigned int *map; 1782 1785 int i;
+45
block/blk-mq.h
··· 69 69 unsigned long depth; 70 70 } ____cacheline_aligned_in_smp; 71 71 72 + static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 73 + unsigned int cpu) 74 + { 75 + return per_cpu_ptr(q->queue_ctx, cpu); 76 + } 77 + 78 + /* 79 + * This assumes per-cpu software queueing queues. They could be per-node 80 + * as well, for instance. For now this is hardcoded as-is. Note that we don't 81 + * care about preemption, since we know the ctx's are persistent. This does 82 + * mean that we can't rely on ctx always matching the currently running CPU. 83 + */ 84 + static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) 85 + { 86 + return __blk_mq_get_ctx(q, get_cpu()); 87 + } 88 + 89 + static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) 90 + { 91 + put_cpu(); 92 + } 93 + 94 + struct blk_mq_alloc_data { 95 + /* input parameter */ 96 + struct request_queue *q; 97 + gfp_t gfp; 98 + bool reserved; 99 + 100 + /* input & output parameter */ 101 + struct blk_mq_ctx *ctx; 102 + struct blk_mq_hw_ctx *hctx; 103 + }; 104 + 105 + static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, 106 + struct request_queue *q, gfp_t gfp, bool reserved, 107 + struct blk_mq_ctx *ctx, 108 + struct blk_mq_hw_ctx *hctx) 109 + { 110 + data->q = q; 111 + data->gfp = gfp; 112 + data->reserved = reserved; 113 + data->ctx = ctx; 114 + data->hctx = hctx; 115 + } 116 + 72 117 #endif
+3 -1
drivers/block/mtip32xx/mtip32xx.c
··· 193 193 static struct request *mtip_rq_from_tag(struct driver_data *dd, 194 194 unsigned int tag) 195 195 { 196 - return blk_mq_tag_to_rq(dd->queue->queue_hw_ctx[0], tag); 196 + struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; 197 + 198 + return blk_mq_tag_to_rq(hctx->tags, tag); 197 199 } 198 200 199 201 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
+1 -1
include/linux/blk-mq.h
··· 155 155 bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 156 156 struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 157 157 gfp_t gfp, bool reserved); 158 - struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag); 158 + struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 159 159 160 160 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); 161 161 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
+1 -1
include/linux/blkdev.h
··· 335 335 unsigned int *mq_map; 336 336 337 337 /* sw queues */ 338 - struct blk_mq_ctx *queue_ctx; 338 + struct blk_mq_ctx __percpu *queue_ctx; 339 339 unsigned int nr_queues; 340 340 341 341 /* hw dispatch queues */