Merge branch 'elevator-switch-6.19' into for-6.19/block

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge elevator switching improvements from Nilay:

"This patchset reorganizes the elevator switch path used during both
nr_hw_queues update and elv_iosched_store() operations to address a
recently reported lockdep splat [1].

The warning highlights a locking dependency between ->freeze_lock and
->elevator_lock on pcpu_alloc_mutex, triggered when the Kyber scheduler
dynamically allocates its private scheduling data. The fix is to ensure
that such allocations occur outside the locked sections, thus
eliminating the dependency chain.

While working on this, it also became evident that the nr_hw_queue
update code maintains two disjoint xarrays—one for elevator tags and
another for elevator type—both serving the same purpose. Unifying these
into a single elv_change_ctx structure improves clarity and
maintainability.

This series therefore implements five patches:
The first perparatory patch unifies elevator tags and type xarrays. It
combines both xarrays into a single struct elv_change_ctx, simplifying
per-queue elevator state management.

The second patch is aimed to group together all elevator-related
resources that share the same lifetime and as a first step we move the
elevator tags pointer from struct elv_change_ctx into the newly
introduced struct elevator_resources. The subsequent patch extends the
struct elevator_resources to include other elevator-related data.

The third patch introduce ->alloc_sched_data and ->free_sched_data
elevator ops which could be then used to safely allocate and free
scheduler data.

The fourth patch now builds upon the previous patch and starts using
the newly introduced alloc/free sched data methods in the earlier patch
during elevator switch and nr_hw_queue update. And while doing so, it's
ensured that sched data allocation and free happens before we acquire
->freeze_lock and ->elevator_lock thus preventing its dependency on
pcpu_alloc_mutex.

The last patch of this series converts Kyber scheduler to use the new
methods inroduced in the previous patch. It hooks Kyber’s scheduler
data allocation and teardown logic from ->init_sched and ->exit_sched
into the new methods, ensuring memory operations are performed outside
locked sections.

Together, these changes simplify the elevator switch logic and prevent
the reported lockdep splat."

Link: https://lore.kernel.org/linux-block/20251113090619.2030737-1-nilay@linux.ibm.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>

* elevator-switch-6.19:
block: define alloc_sched_data and free_sched_data methods for kyber
block: use {alloc|free}_sched data methods
block: introduce alloc_sched_data and free_sched_data elevator methods
block: move elevator tags into struct elevator_resources
block: unify elevator tags and type xarrays into struct elv_change_ctx

Jens Axboe 4 months ago 8e1bf774 c3f42a6d

+248 -102

7 changed files

expand all

block

blk-mq-sched.c

blk-mq-sched.h

blk-mq.c

blk.h

elevator.c

elevator.h

kyber-iosched.c

+95 -22

block/blk-mq-sched.c

··· 427 427 kfree(et); 428 428 } 429 429 430 - void blk_mq_free_sched_tags_batch(struct xarray *et_table, 430 + void blk_mq_free_sched_res(struct elevator_resources *res, 431 + struct elevator_type *type, 432 + struct blk_mq_tag_set *set) 433 + { 434 + if (res->et) { 435 + blk_mq_free_sched_tags(res->et, set); 436 + res->et = NULL; 437 + } 438 + if (res->data) { 439 + blk_mq_free_sched_data(type, res->data); 440 + res->data = NULL; 441 + } 442 + } 443 + 444 + void blk_mq_free_sched_res_batch(struct xarray *elv_tbl, 431 445 struct blk_mq_tag_set *set) 432 446 { 433 447 struct request_queue *q; 434 - struct elevator_tags *et; 448 + struct elv_change_ctx *ctx; 435 449 436 450 lockdep_assert_held_write(&set->update_nr_hwq_lock); 437 451 ··· 458 444 * concurrently. 459 445 */ 460 446 if (q->elevator) { 461 - et = xa_load(et_table, q->id); 462 - if (unlikely(!et)) 447 + ctx = xa_load(elv_tbl, q->id); 448 + if (!ctx) { 463 449 WARN_ON_ONCE(1); 464 - else 465 - blk_mq_free_sched_tags(et, set); 450 + continue; 451 + } 452 + blk_mq_free_sched_res(&ctx->res, ctx->type, set); 466 453 } 467 454 } 455 + } 456 + 457 + void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl) 458 + { 459 + unsigned long i; 460 + struct elv_change_ctx *ctx; 461 + 462 + xa_for_each(elv_tbl, i, ctx) { 463 + xa_erase(elv_tbl, i); 464 + kfree(ctx); 465 + } 466 + } 467 + 468 + int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl, 469 + struct blk_mq_tag_set *set) 470 + { 471 + struct request_queue *q; 472 + struct elv_change_ctx *ctx; 473 + 474 + lockdep_assert_held_write(&set->update_nr_hwq_lock); 475 + 476 + list_for_each_entry(q, &set->tag_list, tag_set_list) { 477 + ctx = kzalloc(sizeof(struct elv_change_ctx), GFP_KERNEL); 478 + if (!ctx) 479 + return -ENOMEM; 480 + 481 + if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) { 482 + kfree(ctx); 483 + return -ENOMEM; 484 + } 485 + } 486 + return 0; 468 487 } 469 488 470 489 struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, ··· 544 497 return NULL; 545 498 } 546 499 547 - int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, 500 + int blk_mq_alloc_sched_res(struct request_queue *q, 501 + struct elevator_type *type, 502 + struct elevator_resources *res, 503 + unsigned int nr_hw_queues) 504 + { 505 + struct blk_mq_tag_set *set = q->tag_set; 506 + 507 + res->et = blk_mq_alloc_sched_tags(set, nr_hw_queues, 508 + blk_mq_default_nr_requests(set)); 509 + if (!res->et) 510 + return -ENOMEM; 511 + 512 + res->data = blk_mq_alloc_sched_data(q, type); 513 + if (IS_ERR(res->data)) { 514 + blk_mq_free_sched_tags(res->et, set); 515 + return -ENOMEM; 516 + } 517 + 518 + return 0; 519 + } 520 + 521 + int blk_mq_alloc_sched_res_batch(struct xarray *elv_tbl, 548 522 struct blk_mq_tag_set *set, unsigned int nr_hw_queues) 549 523 { 524 + struct elv_change_ctx *ctx; 550 525 struct request_queue *q; 551 - struct elevator_tags *et; 552 - gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; 526 + int ret = -ENOMEM; 553 527 554 528 lockdep_assert_held_write(&set->update_nr_hwq_lock); 555 529 ··· 583 515 * concurrently. 584 516 */ 585 517 if (q->elevator) { 586 - et = blk_mq_alloc_sched_tags(set, nr_hw_queues, 587 - blk_mq_default_nr_requests(set)); 588 - if (!et) 518 + ctx = xa_load(elv_tbl, q->id); 519 + if (WARN_ON_ONCE(!ctx)) { 520 + ret = -ENOENT; 589 521 goto out_unwind; 590 - if (xa_insert(et_table, q->id, et, gfp)) 591 - goto out_free_tags; 522 + } 523 + 524 + ret = blk_mq_alloc_sched_res(q, q->elevator->type, 525 + &ctx->res, nr_hw_queues); 526 + if (ret) 527 + goto out_unwind; 592 528 } 593 529 } 594 530 return 0; 595 - out_free_tags: 596 - blk_mq_free_sched_tags(et, set); 531 + 597 532 out_unwind: 598 533 list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) { 599 534 if (q->elevator) { 600 - et = xa_load(et_table, q->id); 601 - if (et) 602 - blk_mq_free_sched_tags(et, set); 535 + ctx = xa_load(elv_tbl, q->id); 536 + if (ctx) 537 + blk_mq_free_sched_res(&ctx->res, 538 + ctx->type, set); 603 539 } 604 540 } 605 - return -ENOMEM; 541 + return ret; 606 542 } 607 543 608 544 /* caller must have a reference to @e, will grab another one if successful */ 609 545 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, 610 - struct elevator_tags *et) 546 + struct elevator_resources *res) 611 547 { 612 548 unsigned int flags = q->tag_set->flags; 549 + struct elevator_tags *et = res->et; 613 550 struct blk_mq_hw_ctx *hctx; 614 551 struct elevator_queue *eq; 615 552 unsigned long i; 616 553 int ret; 617 554 618 - eq = elevator_alloc(q, e, et); 555 + eq = elevator_alloc(q, e, res); 619 556 if (!eq) 620 557 return -ENOMEM; 621 558

+37 -3

block/blk-mq-sched.h

··· 19 19 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); 20 20 21 21 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, 22 - struct elevator_tags *et); 22 + struct elevator_resources *res); 23 23 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); 24 24 void blk_mq_sched_free_rqs(struct request_queue *q); 25 25 26 26 struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, 27 27 unsigned int nr_hw_queues, unsigned int nr_requests); 28 - int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, 28 + int blk_mq_alloc_sched_res(struct request_queue *q, 29 + struct elevator_type *type, 30 + struct elevator_resources *res, 31 + unsigned int nr_hw_queues); 32 + int blk_mq_alloc_sched_res_batch(struct xarray *elv_tbl, 29 33 struct blk_mq_tag_set *set, unsigned int nr_hw_queues); 34 + int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl, 35 + struct blk_mq_tag_set *set); 36 + void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl); 30 37 void blk_mq_free_sched_tags(struct elevator_tags *et, 31 38 struct blk_mq_tag_set *set); 32 - void blk_mq_free_sched_tags_batch(struct xarray *et_table, 39 + void blk_mq_free_sched_res(struct elevator_resources *res, 40 + struct elevator_type *type, 33 41 struct blk_mq_tag_set *set); 42 + void blk_mq_free_sched_res_batch(struct xarray *et_table, 43 + struct blk_mq_tag_set *set); 44 + /* 45 + * blk_mq_alloc_sched_data() - Allocates scheduler specific data 46 + * Returns: 47 + * - Pointer to allocated data on success 48 + * - NULL if no allocation needed 49 + * - ERR_PTR(-ENOMEM) in case of failure 50 + */ 51 + static inline void *blk_mq_alloc_sched_data(struct request_queue *q, 52 + struct elevator_type *e) 53 + { 54 + void *sched_data; 55 + 56 + if (!e || !e->ops.alloc_sched_data) 57 + return NULL; 58 + 59 + sched_data = e->ops.alloc_sched_data(q); 60 + return (sched_data) ?: ERR_PTR(-ENOMEM); 61 + } 62 + 63 + static inline void blk_mq_free_sched_data(struct elevator_type *e, void *data) 64 + { 65 + if (e && e->ops.free_sched_data) 66 + e->ops.free_sched_data(data); 67 + } 34 68 35 69 static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) 36 70 {

+29 -21

block/blk-mq.c

··· 4989 4989 * Switch back to the elevator type stored in the xarray. 4990 4990 */ 4991 4991 static void blk_mq_elv_switch_back(struct request_queue *q, 4992 - struct xarray *elv_tbl, struct xarray *et_tbl) 4992 + struct xarray *elv_tbl) 4993 4993 { 4994 - struct elevator_type *e = xa_load(elv_tbl, q->id); 4995 - struct elevator_tags *t = xa_load(et_tbl, q->id); 4994 + struct elv_change_ctx *ctx = xa_load(elv_tbl, q->id); 4995 + 4996 + if (WARN_ON_ONCE(!ctx)) 4997 + return; 4996 4998 4997 4999 /* The elv_update_nr_hw_queues unfreezes the queue. */ 4998 - elv_update_nr_hw_queues(q, e, t); 5000 + elv_update_nr_hw_queues(q, ctx); 4999 5001 5000 5002 /* Drop the reference acquired in blk_mq_elv_switch_none. */ 5001 - if (e) 5002 - elevator_put(e); 5003 + if (ctx->type) 5004 + elevator_put(ctx->type); 5003 5005 } 5004 5006 5005 5007 /* 5006 - * Stores elevator type in xarray and set current elevator to none. It uses 5007 - * q->id as an index to store the elevator type into the xarray. 5008 + * Stores elevator name and type in ctx and set current elevator to none. 5008 5009 */ 5009 5010 static int blk_mq_elv_switch_none(struct request_queue *q, 5010 5011 struct xarray *elv_tbl) 5011 5012 { 5012 - int ret = 0; 5013 + struct elv_change_ctx *ctx; 5013 5014 5014 5015 lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock); 5015 5016 ··· 5022 5021 * can't run concurrently. 5023 5022 */ 5024 5023 if (q->elevator) { 5024 + ctx = xa_load(elv_tbl, q->id); 5025 + if (WARN_ON_ONCE(!ctx)) 5026 + return -ENOENT; 5025 5027 5026 - ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL); 5027 - if (WARN_ON_ONCE(ret)) 5028 - return ret; 5028 + ctx->name = q->elevator->type->elevator_name; 5029 5029 5030 5030 /* 5031 5031 * Before we switch elevator to 'none', take a reference to ··· 5037 5035 */ 5038 5036 __elevator_get(q->elevator->type); 5039 5037 5038 + /* 5039 + * Store elevator type so that we can release the reference 5040 + * taken above later. 5041 + */ 5042 + ctx->type = q->elevator->type; 5040 5043 elevator_set_none(q); 5041 5044 } 5042 - return ret; 5045 + return 0; 5043 5046 } 5044 5047 5045 5048 static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, ··· 5054 5047 int prev_nr_hw_queues = set->nr_hw_queues; 5055 5048 unsigned int memflags; 5056 5049 int i; 5057 - struct xarray elv_tbl, et_tbl; 5050 + struct xarray elv_tbl; 5058 5051 bool queues_frozen = false; 5059 5052 5060 5053 lockdep_assert_held(&set->tag_list_lock); ··· 5068 5061 5069 5062 memflags = memalloc_noio_save(); 5070 5063 5071 - xa_init(&et_tbl); 5072 - if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0) 5073 - goto out_memalloc_restore; 5074 - 5075 5064 xa_init(&elv_tbl); 5065 + if (blk_mq_alloc_sched_ctx_batch(&elv_tbl, set) < 0) 5066 + goto out_free_ctx; 5067 + 5068 + if (blk_mq_alloc_sched_res_batch(&elv_tbl, set, nr_hw_queues) < 0) 5069 + goto out_free_ctx; 5076 5070 5077 5071 list_for_each_entry(q, &set->tag_list, tag_set_list) { 5078 5072 blk_mq_debugfs_unregister_hctxs(q); ··· 5119 5111 /* switch_back expects queue to be frozen */ 5120 5112 if (!queues_frozen) 5121 5113 blk_mq_freeze_queue_nomemsave(q); 5122 - blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); 5114 + blk_mq_elv_switch_back(q, &elv_tbl); 5123 5115 } 5124 5116 5125 5117 list_for_each_entry(q, &set->tag_list, tag_set_list) { ··· 5130 5122 blk_mq_add_hw_queues_cpuhp(q); 5131 5123 } 5132 5124 5125 + out_free_ctx: 5126 + blk_mq_free_sched_ctx_batch(&elv_tbl); 5133 5127 xa_destroy(&elv_tbl); 5134 - xa_destroy(&et_tbl); 5135 - out_memalloc_restore: 5136 5128 memalloc_noio_restore(memflags); 5137 5129 5138 5130 /* Free the excess tags when nr_hw_queues shrink. */

+3 -4

block/blk.h

··· 11 11 #include <xen/xen.h> 12 12 #include "blk-crypto-internal.h" 13 13 14 - struct elevator_type; 15 - struct elevator_tags; 14 + struct elv_change_ctx; 16 15 17 16 /* 18 17 * Default upper limit for the software max_sectors limit used for regular I/Os. ··· 332 333 333 334 bool blk_insert_flush(struct request *rq); 334 335 335 - void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, 336 - struct elevator_tags *t); 336 + void elv_update_nr_hw_queues(struct request_queue *q, 337 + struct elv_change_ctx *ctx); 337 338 void elevator_set_default(struct request_queue *q); 338 339 void elevator_set_none(struct request_queue *q); 339 340

+37 -43

block/elevator.c

··· 45 45 #include "blk-wbt.h" 46 46 #include "blk-cgroup.h" 47 47 48 - /* Holding context data for changing elevator */ 49 - struct elv_change_ctx { 50 - const char *name; 51 - bool no_uevent; 52 - 53 - /* for unregistering old elevator */ 54 - struct elevator_queue *old; 55 - /* for registering new elevator */ 56 - struct elevator_queue *new; 57 - /* holds sched tags data */ 58 - struct elevator_tags *et; 59 - }; 60 - 61 48 static DEFINE_SPINLOCK(elv_list_lock); 62 49 static LIST_HEAD(elv_list); 63 50 ··· 121 134 static const struct kobj_type elv_ktype; 122 135 123 136 struct elevator_queue *elevator_alloc(struct request_queue *q, 124 - struct elevator_type *e, struct elevator_tags *et) 137 + struct elevator_type *e, struct elevator_resources *res) 125 138 { 126 139 struct elevator_queue *eq; 127 140 ··· 134 147 kobject_init(&eq->kobj, &elv_ktype); 135 148 mutex_init(&eq->sysfs_lock); 136 149 hash_init(eq->hash); 137 - eq->et = et; 150 + eq->et = res->et; 151 + eq->elevator_data = res->data; 138 152 139 153 return eq; 140 154 } ··· 581 593 } 582 594 583 595 if (new_e) { 584 - ret = blk_mq_init_sched(q, new_e, ctx->et); 596 + ret = blk_mq_init_sched(q, new_e, &ctx->res); 585 597 if (ret) 586 598 goto out_unfreeze; 587 599 ctx->new = q->elevator; ··· 605 617 return ret; 606 618 } 607 619 608 - static void elv_exit_and_release(struct request_queue *q) 620 + static void elv_exit_and_release(struct elv_change_ctx *ctx, 621 + struct request_queue *q) 609 622 { 610 623 struct elevator_queue *e; 611 624 unsigned memflags; ··· 618 629 mutex_unlock(&q->elevator_lock); 619 630 blk_mq_unfreeze_queue(q, memflags); 620 631 if (e) { 621 - blk_mq_free_sched_tags(e->et, q->tag_set); 632 + blk_mq_free_sched_res(&ctx->res, ctx->type, q->tag_set); 622 633 kobject_put(&e->kobj); 623 634 } 624 635 } ··· 629 640 int ret = 0; 630 641 631 642 if (ctx->old) { 643 + struct elevator_resources res = { 644 + .et = ctx->old->et, 645 + .data = ctx->old->elevator_data 646 + }; 632 647 bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, 633 648 &ctx->old->flags); 634 649 635 650 elv_unregister_queue(q, ctx->old); 636 - blk_mq_free_sched_tags(ctx->old->et, q->tag_set); 651 + blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set); 637 652 kobject_put(&ctx->old->kobj); 638 653 if (enable_wbt) 639 654 wbt_enable_default(q->disk); ··· 645 652 if (ctx->new) { 646 653 ret = elv_register_queue(q, ctx->new, !ctx->no_uevent); 647 654 if (ret) 648 - elv_exit_and_release(q); 655 + elv_exit_and_release(ctx, q); 649 656 } 650 657 return ret; 651 658 } ··· 662 669 lockdep_assert_held(&set->update_nr_hwq_lock); 663 670 664 671 if (strncmp(ctx->name, "none", 4)) { 665 - ctx->et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues, 666 - blk_mq_default_nr_requests(set)); 667 - if (!ctx->et) 668 - return -ENOMEM; 672 + ret = blk_mq_alloc_sched_res(q, ctx->type, &ctx->res, 673 + set->nr_hw_queues); 674 + if (ret) 675 + return ret; 669 676 } 670 677 671 678 memflags = blk_mq_freeze_queue(q); ··· 686 693 blk_mq_unfreeze_queue(q, memflags); 687 694 if (!ret) 688 695 ret = elevator_change_done(q, ctx); 696 + 689 697 /* 690 - * Free sched tags if it's allocated but we couldn't switch elevator. 698 + * Free sched resource if it's allocated but we couldn't switch elevator. 691 699 */ 692 - if (ctx->et && !ctx->new) 693 - blk_mq_free_sched_tags(ctx->et, set); 700 + if (!ctx->new) 701 + blk_mq_free_sched_res(&ctx->res, ctx->type, set); 694 702 695 703 return ret; 696 704 } ··· 700 706 * The I/O scheduler depends on the number of hardware queues, this forces a 701 707 * reattachment when nr_hw_queues changes. 702 708 */ 703 - void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, 704 - struct elevator_tags *t) 709 + void elv_update_nr_hw_queues(struct request_queue *q, 710 + struct elv_change_ctx *ctx) 705 711 { 706 712 struct blk_mq_tag_set *set = q->tag_set; 707 - struct elv_change_ctx ctx = {}; 708 713 int ret = -ENODEV; 709 714 710 715 WARN_ON_ONCE(q->mq_freeze_depth == 0); 711 716 712 - if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { 713 - ctx.name = e->elevator_name; 714 - ctx.et = t; 715 - 717 + if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) { 716 718 mutex_lock(&q->elevator_lock); 717 719 /* force to reattach elevator after nr_hw_queue is updated */ 718 - ret = elevator_switch(q, &ctx); 720 + ret = elevator_switch(q, ctx); 719 721 mutex_unlock(&q->elevator_lock); 720 722 } 721 723 blk_mq_unfreeze_queue_nomemrestore(q); 722 724 if (!ret) 723 - WARN_ON_ONCE(elevator_change_done(q, &ctx)); 725 + WARN_ON_ONCE(elevator_change_done(q, ctx)); 726 + 724 727 /* 725 - * Free sched tags if it's allocated but we couldn't switch elevator. 728 + * Free sched resource if it's allocated but we couldn't switch elevator. 726 729 */ 727 - if (t && !ctx.new) 728 - blk_mq_free_sched_tags(t, set); 730 + if (!ctx->new) 731 + blk_mq_free_sched_res(&ctx->res, ctx->type, set); 729 732 } 730 733 731 734 /* ··· 736 745 .no_uevent = true, 737 746 }; 738 747 int err; 739 - struct elevator_type *e; 740 748 741 749 /* now we allow to switch elevator */ 742 750 blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q); ··· 748 758 * have multiple queues or mq-deadline is not available, default 749 759 * to "none". 750 760 */ 751 - e = elevator_find_get(ctx.name); 752 - if (!e) 761 + ctx.type = elevator_find_get(ctx.name); 762 + if (!ctx.type) 753 763 return; 754 764 755 765 if ((q->nr_hw_queues == 1 || ··· 759 769 pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n", 760 770 ctx.name, err); 761 771 } 762 - elevator_put(e); 772 + elevator_put(ctx.type); 763 773 } 764 774 765 775 void elevator_set_none(struct request_queue *q) ··· 808 818 ctx.name = strstrip(elevator_name); 809 819 810 820 elv_iosched_load_module(ctx.name); 821 + ctx.type = elevator_find_get(ctx.name); 811 822 812 823 down_read(&set->update_nr_hwq_lock); 813 824 if (!blk_queue_no_elv_switch(q)) { ··· 819 828 ret = -ENOENT; 820 829 } 821 830 up_read(&set->update_nr_hwq_lock); 831 + 832 + if (ctx.type) 833 + elevator_put(ctx.type); 822 834 return ret; 823 835 } 824 836

+25 -1

block/elevator.h

··· 32 32 struct blk_mq_tags *tags[]; 33 33 }; 34 34 35 + struct elevator_resources { 36 + /* holds elevator data */ 37 + void *data; 38 + /* holds elevator tags */ 39 + struct elevator_tags *et; 40 + }; 41 + 42 + /* Holding context data for changing elevator */ 43 + struct elv_change_ctx { 44 + const char *name; 45 + bool no_uevent; 46 + 47 + /* for unregistering old elevator */ 48 + struct elevator_queue *old; 49 + /* for registering new elevator */ 50 + struct elevator_queue *new; 51 + /* store elevator type */ 52 + struct elevator_type *type; 53 + /* store elevator resources */ 54 + struct elevator_resources res; 55 + }; 56 + 35 57 struct elevator_mq_ops { 36 58 int (*init_sched)(struct request_queue *, struct elevator_queue *); 37 59 void (*exit_sched)(struct elevator_queue *); 38 60 int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); 39 61 void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); 40 62 void (*depth_updated)(struct request_queue *); 63 + void *(*alloc_sched_data)(struct request_queue *); 64 + void (*free_sched_data)(void *); 41 65 42 66 bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); 43 67 bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); ··· 187 163 188 164 extern bool elv_bio_merge_ok(struct request *, struct bio *); 189 165 struct elevator_queue *elevator_alloc(struct request_queue *, 190 - struct elevator_type *, struct elevator_tags *); 166 + struct elevator_type *, struct elevator_resources *); 191 167 192 168 /* 193 169 * Helper functions.

+22 -8

block/kyber-iosched.c

··· 409 409 410 410 static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq) 411 411 { 412 - struct kyber_queue_data *kqd; 413 - 414 - kqd = kyber_queue_data_alloc(q); 415 - if (IS_ERR(kqd)) 416 - return PTR_ERR(kqd); 417 - 418 412 blk_stat_enable_accounting(q); 419 413 420 414 blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q); 421 415 422 - eq->elevator_data = kqd; 423 416 q->elevator = eq; 424 417 kyber_depth_updated(q); 425 418 426 419 return 0; 427 420 } 428 421 422 + static void *kyber_alloc_sched_data(struct request_queue *q) 423 + { 424 + struct kyber_queue_data *kqd; 425 + 426 + kqd = kyber_queue_data_alloc(q); 427 + if (IS_ERR(kqd)) 428 + return NULL; 429 + 430 + return kqd; 431 + } 432 + 429 433 static void kyber_exit_sched(struct elevator_queue *e) 430 434 { 431 435 struct kyber_queue_data *kqd = e->elevator_data; 432 - int i; 433 436 434 437 timer_shutdown_sync(&kqd->timer); 435 438 blk_stat_disable_accounting(kqd->q); 439 + } 440 + 441 + static void kyber_free_sched_data(void *elv_data) 442 + { 443 + struct kyber_queue_data *kqd = elv_data; 444 + int i; 445 + 446 + if (!kqd) 447 + return; 436 448 437 449 for (i = 0; i < KYBER_NUM_DOMAINS; i++) 438 450 sbitmap_queue_free(&kqd->domain_tokens[i]); ··· 1016 1004 .exit_sched = kyber_exit_sched, 1017 1005 .init_hctx = kyber_init_hctx, 1018 1006 .exit_hctx = kyber_exit_hctx, 1007 + .alloc_sched_data = kyber_alloc_sched_data, 1008 + .free_sched_data = kyber_free_sched_data, 1019 1009 .limit_depth = kyber_limit_depth, 1020 1010 .bio_merge = kyber_bio_merge, 1021 1011 .prepare_request = kyber_prepare_request,