Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: unify elevator tags and type xarrays into struct elv_change_ctx

Currently, the nr_hw_queues update path manages two disjoint xarrays —
one for elevator tags and another for elevator type — both used during
elevator switching. Maintaining these two parallel structures for the
same purpose adds unnecessary complexity and potential for mismatched
state.

This patch unifies both xarrays into a single structure, struct
elv_change_ctx, which holds all per-queue elevator change context. A
single xarray, named elv_tbl, now maps each queue (q->id) in a tagset
to its corresponding elv_change_ctx entry, encapsulating the elevator
tags, type and name references.

This unification simplifies the code, improves maintainability, and
clarifies ownership of per-queue elevator state.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yu Kuai <yukuai@fnnas.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Nilay Shroff and committed by
Jens Axboe
232143b6 c3f42a6d

+116 -68
+59 -19
block/blk-mq-sched.c
··· 427 427 kfree(et); 428 428 } 429 429 430 - void blk_mq_free_sched_tags_batch(struct xarray *et_table, 430 + void blk_mq_free_sched_tags_batch(struct xarray *elv_tbl, 431 431 struct blk_mq_tag_set *set) 432 432 { 433 433 struct request_queue *q; 434 - struct elevator_tags *et; 434 + struct elv_change_ctx *ctx; 435 435 436 436 lockdep_assert_held_write(&set->update_nr_hwq_lock); 437 437 ··· 444 444 * concurrently. 445 445 */ 446 446 if (q->elevator) { 447 - et = xa_load(et_table, q->id); 448 - if (unlikely(!et)) 447 + ctx = xa_load(elv_tbl, q->id); 448 + if (!ctx || !ctx->et) { 449 449 WARN_ON_ONCE(1); 450 - else 451 - blk_mq_free_sched_tags(et, set); 450 + continue; 451 + } 452 + blk_mq_free_sched_tags(ctx->et, set); 453 + ctx->et = NULL; 452 454 } 453 455 } 456 + } 457 + 458 + void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl) 459 + { 460 + unsigned long i; 461 + struct elv_change_ctx *ctx; 462 + 463 + xa_for_each(elv_tbl, i, ctx) { 464 + xa_erase(elv_tbl, i); 465 + kfree(ctx); 466 + } 467 + } 468 + 469 + int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl, 470 + struct blk_mq_tag_set *set) 471 + { 472 + struct request_queue *q; 473 + struct elv_change_ctx *ctx; 474 + 475 + lockdep_assert_held_write(&set->update_nr_hwq_lock); 476 + 477 + list_for_each_entry(q, &set->tag_list, tag_set_list) { 478 + ctx = kzalloc(sizeof(struct elv_change_ctx), GFP_KERNEL); 479 + if (!ctx) 480 + return -ENOMEM; 481 + 482 + if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) { 483 + kfree(ctx); 484 + return -ENOMEM; 485 + } 486 + } 487 + return 0; 454 488 } 455 489 456 490 struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, ··· 531 497 return NULL; 532 498 } 533 499 534 - int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, 500 + int blk_mq_alloc_sched_tags_batch(struct xarray *elv_tbl, 535 501 struct blk_mq_tag_set *set, unsigned int nr_hw_queues) 536 502 { 503 + struct elv_change_ctx *ctx; 537 504 struct request_queue *q; 538 505 struct elevator_tags *et; 539 - gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; 506 + int ret = -ENOMEM; 540 507 541 508 lockdep_assert_held_write(&set->update_nr_hwq_lock); 542 509 ··· 550 515 * concurrently. 551 516 */ 552 517 if (q->elevator) { 553 - et = blk_mq_alloc_sched_tags(set, nr_hw_queues, 554 - blk_mq_default_nr_requests(set)); 555 - if (!et) 518 + ctx = xa_load(elv_tbl, q->id); 519 + if (WARN_ON_ONCE(!ctx)) { 520 + ret = -ENOENT; 556 521 goto out_unwind; 557 - if (xa_insert(et_table, q->id, et, gfp)) 558 - goto out_free_tags; 522 + } 523 + 524 + ctx->et = blk_mq_alloc_sched_tags(set, nr_hw_queues, 525 + blk_mq_default_nr_requests(set)); 526 + if (!ctx->et) 527 + goto out_unwind; 528 + 559 529 } 560 530 } 561 531 return 0; 562 - out_free_tags: 563 - blk_mq_free_sched_tags(et, set); 564 532 out_unwind: 565 533 list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) { 566 534 if (q->elevator) { 567 - et = xa_load(et_table, q->id); 568 - if (et) 569 - blk_mq_free_sched_tags(et, set); 535 + ctx = xa_load(elv_tbl, q->id); 536 + if (ctx && ctx->et) { 537 + blk_mq_free_sched_tags(ctx->et, set); 538 + ctx->et = NULL; 539 + } 570 540 } 571 541 } 572 - return -ENOMEM; 542 + return ret; 573 543 } 574 544 575 545 /* caller must have a reference to @e, will grab another one if successful */
+3
block/blk-mq-sched.h
··· 27 27 unsigned int nr_hw_queues, unsigned int nr_requests); 28 28 int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, 29 29 struct blk_mq_tag_set *set, unsigned int nr_hw_queues); 30 + int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl, 31 + struct blk_mq_tag_set *set); 32 + void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl); 30 33 void blk_mq_free_sched_tags(struct elevator_tags *et, 31 34 struct blk_mq_tag_set *set); 32 35 void blk_mq_free_sched_tags_batch(struct xarray *et_table,
+29 -21
block/blk-mq.c
··· 4989 4989 * Switch back to the elevator type stored in the xarray. 4990 4990 */ 4991 4991 static void blk_mq_elv_switch_back(struct request_queue *q, 4992 - struct xarray *elv_tbl, struct xarray *et_tbl) 4992 + struct xarray *elv_tbl) 4993 4993 { 4994 - struct elevator_type *e = xa_load(elv_tbl, q->id); 4995 - struct elevator_tags *t = xa_load(et_tbl, q->id); 4994 + struct elv_change_ctx *ctx = xa_load(elv_tbl, q->id); 4995 + 4996 + if (WARN_ON_ONCE(!ctx)) 4997 + return; 4996 4998 4997 4999 /* The elv_update_nr_hw_queues unfreezes the queue. */ 4998 - elv_update_nr_hw_queues(q, e, t); 5000 + elv_update_nr_hw_queues(q, ctx); 4999 5001 5000 5002 /* Drop the reference acquired in blk_mq_elv_switch_none. */ 5001 - if (e) 5002 - elevator_put(e); 5003 + if (ctx->type) 5004 + elevator_put(ctx->type); 5003 5005 } 5004 5006 5005 5007 /* 5006 - * Stores elevator type in xarray and set current elevator to none. It uses 5007 - * q->id as an index to store the elevator type into the xarray. 5008 + * Stores elevator name and type in ctx and set current elevator to none. 5008 5009 */ 5009 5010 static int blk_mq_elv_switch_none(struct request_queue *q, 5010 5011 struct xarray *elv_tbl) 5011 5012 { 5012 - int ret = 0; 5013 + struct elv_change_ctx *ctx; 5013 5014 5014 5015 lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock); 5015 5016 ··· 5022 5021 * can't run concurrently. 5023 5022 */ 5024 5023 if (q->elevator) { 5024 + ctx = xa_load(elv_tbl, q->id); 5025 + if (WARN_ON_ONCE(!ctx)) 5026 + return -ENOENT; 5025 5027 5026 - ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL); 5027 - if (WARN_ON_ONCE(ret)) 5028 - return ret; 5028 + ctx->name = q->elevator->type->elevator_name; 5029 5029 5030 5030 /* 5031 5031 * Before we switch elevator to 'none', take a reference to ··· 5037 5035 */ 5038 5036 __elevator_get(q->elevator->type); 5039 5037 5038 + /* 5039 + * Store elevator type so that we can release the reference 5040 + * taken above later. 5041 + */ 5042 + ctx->type = q->elevator->type; 5040 5043 elevator_set_none(q); 5041 5044 } 5042 - return ret; 5045 + return 0; 5043 5046 } 5044 5047 5045 5048 static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, ··· 5054 5047 int prev_nr_hw_queues = set->nr_hw_queues; 5055 5048 unsigned int memflags; 5056 5049 int i; 5057 - struct xarray elv_tbl, et_tbl; 5050 + struct xarray elv_tbl; 5058 5051 bool queues_frozen = false; 5059 5052 5060 5053 lockdep_assert_held(&set->tag_list_lock); ··· 5068 5061 5069 5062 memflags = memalloc_noio_save(); 5070 5063 5071 - xa_init(&et_tbl); 5072 - if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0) 5073 - goto out_memalloc_restore; 5074 - 5075 5064 xa_init(&elv_tbl); 5065 + if (blk_mq_alloc_sched_ctx_batch(&elv_tbl, set) < 0) 5066 + goto out_free_ctx; 5067 + 5068 + if (blk_mq_alloc_sched_tags_batch(&elv_tbl, set, nr_hw_queues) < 0) 5069 + goto out_free_ctx; 5076 5070 5077 5071 list_for_each_entry(q, &set->tag_list, tag_set_list) { 5078 5072 blk_mq_debugfs_unregister_hctxs(q); ··· 5119 5111 /* switch_back expects queue to be frozen */ 5120 5112 if (!queues_frozen) 5121 5113 blk_mq_freeze_queue_nomemsave(q); 5122 - blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); 5114 + blk_mq_elv_switch_back(q, &elv_tbl); 5123 5115 } 5124 5116 5125 5117 list_for_each_entry(q, &set->tag_list, tag_set_list) { ··· 5130 5122 blk_mq_add_hw_queues_cpuhp(q); 5131 5123 } 5132 5124 5125 + out_free_ctx: 5126 + blk_mq_free_sched_ctx_batch(&elv_tbl); 5133 5127 xa_destroy(&elv_tbl); 5134 - xa_destroy(&et_tbl); 5135 - out_memalloc_restore: 5136 5128 memalloc_noio_restore(memflags); 5137 5129 5138 5130 /* Free the excess tags when nr_hw_queues shrink. */
+3 -4
block/blk.h
··· 11 11 #include <xen/xen.h> 12 12 #include "blk-crypto-internal.h" 13 13 14 - struct elevator_type; 15 - struct elevator_tags; 14 + struct elv_change_ctx; 16 15 17 16 /* 18 17 * Default upper limit for the software max_sectors limit used for regular I/Os. ··· 332 333 333 334 bool blk_insert_flush(struct request *rq); 334 335 335 - void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, 336 - struct elevator_tags *t); 336 + void elv_update_nr_hw_queues(struct request_queue *q, 337 + struct elv_change_ctx *ctx); 337 338 void elevator_set_default(struct request_queue *q); 338 339 void elevator_set_none(struct request_queue *q); 339 340
+7 -24
block/elevator.c
··· 45 45 #include "blk-wbt.h" 46 46 #include "blk-cgroup.h" 47 47 48 - /* Holding context data for changing elevator */ 49 - struct elv_change_ctx { 50 - const char *name; 51 - bool no_uevent; 52 - 53 - /* for unregistering old elevator */ 54 - struct elevator_queue *old; 55 - /* for registering new elevator */ 56 - struct elevator_queue *new; 57 - /* holds sched tags data */ 58 - struct elevator_tags *et; 59 - }; 60 - 61 48 static DEFINE_SPINLOCK(elv_list_lock); 62 49 static LIST_HEAD(elv_list); 63 50 ··· 693 706 * The I/O scheduler depends on the number of hardware queues, this forces a 694 707 * reattachment when nr_hw_queues changes. 695 708 */ 696 - void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, 697 - struct elevator_tags *t) 709 + void elv_update_nr_hw_queues(struct request_queue *q, 710 + struct elv_change_ctx *ctx) 698 711 { 699 712 struct blk_mq_tag_set *set = q->tag_set; 700 - struct elv_change_ctx ctx = {}; 701 713 int ret = -ENODEV; 702 714 703 715 WARN_ON_ONCE(q->mq_freeze_depth == 0); 704 716 705 - if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { 706 - ctx.name = e->elevator_name; 707 - ctx.et = t; 708 - 717 + if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) { 709 718 mutex_lock(&q->elevator_lock); 710 719 /* force to reattach elevator after nr_hw_queue is updated */ 711 - ret = elevator_switch(q, &ctx); 720 + ret = elevator_switch(q, ctx); 712 721 mutex_unlock(&q->elevator_lock); 713 722 } 714 723 blk_mq_unfreeze_queue_nomemrestore(q); 715 724 if (!ret) 716 - WARN_ON_ONCE(elevator_change_done(q, &ctx)); 725 + WARN_ON_ONCE(elevator_change_done(q, ctx)); 717 726 /* 718 727 * Free sched tags if it's allocated but we couldn't switch elevator. 719 728 */ 720 - if (t && !ctx.new) 721 - blk_mq_free_sched_tags(t, set); 729 + if (ctx->et && !ctx->new) 730 + blk_mq_free_sched_tags(ctx->et, set); 722 731 } 723 732 724 733 /*
+15
block/elevator.h
··· 32 32 struct blk_mq_tags *tags[]; 33 33 }; 34 34 35 + /* Holding context data for changing elevator */ 36 + struct elv_change_ctx { 37 + const char *name; 38 + bool no_uevent; 39 + 40 + /* for unregistering old elevator */ 41 + struct elevator_queue *old; 42 + /* for registering new elevator */ 43 + struct elevator_queue *new; 44 + /* store elevator type */ 45 + struct elevator_type *type; 46 + /* holds sched tags data */ 47 + struct elevator_tags *et; 48 + }; 49 + 35 50 struct elevator_mq_ops { 36 51 int (*init_sched)(struct request_queue *, struct elevator_queue *); 37 52 void (*exit_sched)(struct elevator_queue *);