Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: fix race between wbt_enable_default and IO submission

When wbt_enable_default() is moved out of queue freezing in elevator_change(),
it can cause the wbt inflight counter to become negative (-1), leading to hung
tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
is in an inconsistent state.

The issue occurs because wbt_enable_default() could race with IO submission,
allowing the counter to be decremented before proper initialization. This manifests
as:

rq_wait[0]:
inflight: -1
has_waiters: True

rwb_enabled() checks the state, which can be updated exactly between wbt_wait()
(rq_qos_throttle()) and wbt_track()(rq_qos_track()), then the inflight counter
will become negative.

And results in hung task warnings like:
task:kworker/u24:39 state:D stack:0 pid:14767
Call Trace:
rq_qos_wait+0xb4/0x150
wbt_wait+0xa9/0x100
__rq_qos_throttle+0x24/0x40
blk_mq_submit_bio+0x672/0x7b0
...

Fix this by:

1. Splitting wbt_enable_default() into:
- __wbt_enable_default(): Returns true if wbt_init() should be called
- wbt_enable_default(): Wrapper for existing callers (no init)
- wbt_init_enable_default(): New function that checks and inits WBT

2. Using wbt_init_enable_default() in blk_register_queue() to ensure
proper initialization during queue registration

3. Move wbt_init() out of wbt_enable_default() which is only for enabling
disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
original lock warning can be avoided.

4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
code since it's no longer needed

This ensures WBT is properly initialized before any IO can be submitted,
preventing the counter from going negative.

Cc: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai@fnnas.com>
Cc: Guangwu Zhang <guazhang@redhat.com>
Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
9869d3a6 63276182

+23 -11
+1 -1
block/bfq-iosched.c
··· 7181 7181 7182 7182 blk_stat_disable_accounting(bfqd->queue); 7183 7183 blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue); 7184 - set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags); 7184 + wbt_enable_default(bfqd->queue->disk); 7185 7185 7186 7186 kfree(bfqd); 7187 7187 }
+1 -1
block/blk-sysfs.c
··· 932 932 elevator_set_default(q); 933 933 934 934 blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); 935 - wbt_enable_default(disk); 935 + wbt_init_enable_default(disk); 936 936 937 937 /* Now everything is ready and send out KOBJ_ADD uevent */ 938 938 kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
+16 -4
block/blk-wbt.c
··· 699 699 /* 700 700 * Enable wbt if defaults are configured that way 701 701 */ 702 - void wbt_enable_default(struct gendisk *disk) 702 + static bool __wbt_enable_default(struct gendisk *disk) 703 703 { 704 704 struct request_queue *q = disk->queue; 705 705 struct rq_qos *rqos; ··· 716 716 if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) 717 717 RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; 718 718 mutex_unlock(&disk->rqos_state_mutex); 719 - return; 719 + return false; 720 720 } 721 721 mutex_unlock(&disk->rqos_state_mutex); 722 722 723 723 /* Queue not registered? Maybe shutting down... */ 724 724 if (!blk_queue_registered(q)) 725 - return; 725 + return false; 726 726 727 727 if (queue_is_mq(q) && enable) 728 - wbt_init(disk); 728 + return true; 729 + return false; 730 + } 731 + 732 + void wbt_enable_default(struct gendisk *disk) 733 + { 734 + __wbt_enable_default(disk); 729 735 } 730 736 EXPORT_SYMBOL_GPL(wbt_enable_default); 737 + 738 + void wbt_init_enable_default(struct gendisk *disk) 739 + { 740 + if (__wbt_enable_default(disk)) 741 + WARN_ON_ONCE(wbt_init(disk)); 742 + } 731 743 732 744 u64 wbt_default_latency_nsec(struct request_queue *q) 733 745 {
+5
block/blk-wbt.h
··· 5 5 #ifdef CONFIG_BLK_WBT 6 6 7 7 int wbt_init(struct gendisk *disk); 8 + void wbt_init_enable_default(struct gendisk *disk); 8 9 void wbt_disable_default(struct gendisk *disk); 9 10 void wbt_enable_default(struct gendisk *disk); 10 11 ··· 16 15 u64 wbt_default_latency_nsec(struct request_queue *); 17 16 18 17 #else 18 + 19 + static inline void wbt_init_enable_default(struct gendisk *disk) 20 + { 21 + } 19 22 20 23 static inline void wbt_disable_default(struct gendisk *disk) 21 24 {
-4
block/elevator.c
··· 633 633 .et = ctx->old->et, 634 634 .data = ctx->old->elevator_data 635 635 }; 636 - bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, 637 - &ctx->old->flags); 638 636 639 637 elv_unregister_queue(q, ctx->old); 640 638 blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set); 641 639 kobject_put(&ctx->old->kobj); 642 - if (enable_wbt) 643 - wbt_enable_default(q->disk); 644 640 } 645 641 if (ctx->new) { 646 642 ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
-1
block/elevator.h
··· 156 156 157 157 #define ELEVATOR_FLAG_REGISTERED 0 158 158 #define ELEVATOR_FLAG_DYING 1 159 - #define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT 2 160 159 161 160 /* 162 161 * block elevator interface