Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blk-mq: Move flush queue allocation into blk_mq_init_hctx()

Move flush queue allocation into blk_mq_init_hctx() and its release into
blk_mq_exit_hctx(), and prepare for replacing tags->lock with SRCU to
draining inflight request walking. blk_mq_exit_hctx() is the last chance
for us to get valid `tag_set` reference, and we need to add one SRCU to
`tag_set` for freeing flush request via call_srcu().

It is safe to move flush queue & request release into blk_mq_exit_hctx(),
because blk_mq_clear_flush_rq_mapping() clears the flush request
reference int driver tags inflight request table, meantime inflight
request walking is drained.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
aba19ee7 ba28afbd

+13 -8
-1
block/blk-mq-sysfs.c
··· 34 34 struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, 35 35 kobj); 36 36 37 - blk_free_flush_queue(hctx->fq); 38 37 sbitmap_free(&hctx->ctx_map); 39 38 free_cpumask_var(hctx->cpumask); 40 39 kfree(hctx->ctxs);
+13 -7
block/blk-mq.c
··· 3939 3939 if (set->ops->exit_hctx) 3940 3940 set->ops->exit_hctx(hctx, hctx_idx); 3941 3941 3942 + blk_free_flush_queue(hctx->fq); 3943 + hctx->fq = NULL; 3944 + 3942 3945 xa_erase(&q->hctx_table, hctx_idx); 3943 3946 3944 3947 spin_lock(&q->unused_hctx_lock); ··· 3967 3964 struct blk_mq_tag_set *set, 3968 3965 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) 3969 3966 { 3967 + gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY; 3968 + 3969 + hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); 3970 + if (!hctx->fq) 3971 + goto fail; 3972 + 3970 3973 hctx->queue_num = hctx_idx; 3971 3974 3972 3975 hctx->tags = set->tags[hctx_idx]; 3973 3976 3974 3977 if (set->ops->init_hctx && 3975 3978 set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) 3976 - goto fail; 3979 + goto fail_free_fq; 3977 3980 3978 3981 if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, 3979 3982 hctx->numa_node)) ··· 3996 3987 exit_hctx: 3997 3988 if (set->ops->exit_hctx) 3998 3989 set->ops->exit_hctx(hctx, hctx_idx); 3990 + fail_free_fq: 3991 + blk_free_flush_queue(hctx->fq); 3992 + hctx->fq = NULL; 3999 3993 fail: 4000 3994 return -1; 4001 3995 } ··· 4050 4038 init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); 4051 4039 INIT_LIST_HEAD(&hctx->dispatch_wait.entry); 4052 4040 4053 - hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); 4054 - if (!hctx->fq) 4055 - goto free_bitmap; 4056 - 4057 4041 blk_mq_hctx_kobj_init(hctx); 4058 4042 4059 4043 return hctx; 4060 4044 4061 - free_bitmap: 4062 - sbitmap_free(&hctx->ctx_map); 4063 4045 free_ctxs: 4064 4046 kfree(hctx->ctxs); 4065 4047 free_cpumask: