Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blk-mq: fix potential uaf for 'queue_hw_ctx'

This is just apply Kuai's patch in [1] with mirror changes.

blk_mq_realloc_hw_ctxs() will free the 'queue_hw_ctx'(e.g. undate
submit_queues through configfs for null_blk), while it might still be
used from other context(e.g. switch elevator to none):

t1 t2
elevator_switch
blk_mq_unquiesce_queue
blk_mq_run_hw_queues
queue_for_each_hw_ctx
// assembly code for hctx = (q)->queue_hw_ctx[i]
mov 0x48(%rbp),%rdx -> read old queue_hw_ctx

__blk_mq_update_nr_hw_queues
blk_mq_realloc_hw_ctxs
hctxs = q->queue_hw_ctx
q->queue_hw_ctx = new_hctxs
kfree(hctxs)
movslq %ebx,%rax
mov (%rdx,%rax,8),%rdi ->uaf

This problem was found by code review, and I comfirmed that the concurrent
scenario do exist(specifically 'q->queue_hw_ctx' can be changed during
blk_mq_run_hw_queues()), however, the uaf problem hasn't been repoduced yet
without hacking the kernel.

Sicne the queue is freezed in __blk_mq_update_nr_hw_queues(), fix the
problem by protecting 'queue_hw_ctx' through rcu where it can be accessed
without grabbing 'q_usage_counter'.

[1] https://lore.kernel.org/all/20220225072053.2472431-1-yukuai3@huawei.com/

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Fengnan Chang and committed by
Jens Axboe
89e1fb7c d0c98769

+19 -3
+6 -1
block/blk-mq.c
··· 4535 4535 if (hctxs) 4536 4536 memcpy(new_hctxs, hctxs, q->nr_hw_queues * 4537 4537 sizeof(*hctxs)); 4538 - q->queue_hw_ctx = new_hctxs; 4538 + rcu_assign_pointer(q->queue_hw_ctx, new_hctxs); 4539 + /* 4540 + * Make sure reading the old queue_hw_ctx from other 4541 + * context concurrently won't trigger uaf. 4542 + */ 4543 + synchronize_rcu_expedited(); 4539 4544 kfree(hctxs); 4540 4545 hctxs = new_hctxs; 4541 4546 }
+12 -1
include/linux/blk-mq.h
··· 1015 1015 return rq + 1; 1016 1016 } 1017 1017 1018 + static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id) 1019 + { 1020 + struct blk_mq_hw_ctx *hctx; 1021 + 1022 + rcu_read_lock(); 1023 + hctx = rcu_dereference(q->queue_hw_ctx)[id]; 1024 + rcu_read_unlock(); 1025 + 1026 + return hctx; 1027 + } 1028 + 1018 1029 #define queue_for_each_hw_ctx(q, hctx, i) \ 1019 1030 for ((i) = 0; (i) < (q)->nr_hw_queues && \ 1020 - ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) 1031 + ({ hctx = queue_hctx((q), i); 1; }); (i)++) 1021 1032 1022 1033 #define hctx_for_each_ctx(hctx, ctx, i) \ 1023 1034 for ((i) = 0; (i) < (hctx)->nr_ctx && \
+1 -1
include/linux/blkdev.h
··· 503 503 504 504 /* hw dispatch queues */ 505 505 unsigned int nr_hw_queues; 506 - struct blk_mq_hw_ctx **queue_hw_ctx; 506 + struct blk_mq_hw_ctx * __rcu *queue_hw_ctx; 507 507 508 508 struct percpu_ref q_usage_counter; 509 509 struct lock_class_key io_lock_cls_key;