Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()

nvme_map_user_request() is called from both nvme_submit_user_cmd() and
nvme_uring_cmd_io(). But the ioucmd branch is only applicable to
nvme_uring_cmd_io(). Move it to nvme_uring_cmd_io() and just pass the
resulting iov_iter to nvme_map_user_request().

For NVMe passthru operations with fixed buffers, the fixed buffer lookup
happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can
return -EAGAIN first from nvme_alloc_user_request() if all tags in the
tag set are in use. This ordering difference is observable when using
UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If
the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF
to unregister the fixed buffer and the NVMe passthru goes async, the
fixed buffer lookup will fail because it happens after the unregister.

Userspace should not depend on the order in which io_uring issues SQEs
submitted in parallel, but it may try submitting the SQEs together and
fall back on a slow path if the fixed buffer lookup fails. To make the
fast path more likely, do the import before nvme_alloc_user_request().

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>

authored by

Caleb Sander Mateos and committed by
Keith Busch
38808af5 cd683de6

+24 -21
+24 -21
drivers/nvme/host/ioctl.c
··· 114 114 115 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 - struct io_uring_cmd *ioucmd, unsigned int flags, 118 - unsigned int iou_issue_flags) 117 + struct iov_iter *iter, unsigned int flags) 119 118 { 120 119 struct request_queue *q = req->q; 121 120 struct nvme_ns *ns = q->queuedata; ··· 136 137 "using unchecked metadata buffer\n"); 137 138 } 138 139 139 - if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 140 - struct iov_iter iter; 141 - 142 - /* fixedbufs is only for non-vectored io */ 143 - if (flags & NVME_IOCTL_VEC) 144 - return -EINVAL; 145 - 146 - ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 147 - rq_data_dir(req), &iter, ioucmd, 148 - iou_issue_flags); 149 - if (ret < 0) 150 - return ret; 151 - ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 152 - } else { 140 + if (iter) 141 + ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); 142 + else 153 143 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 154 144 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 155 145 0, rq_data_dir(req)); 156 - } 157 146 158 147 if (ret) 159 148 return ret; ··· 183 196 req->timeout = timeout; 184 197 if (ubuffer && bufflen) { 185 198 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 186 - meta_len, NULL, flags, 0); 199 + meta_len, NULL, flags); 187 200 if (ret) 188 201 goto out_free_req; 189 202 } ··· 455 468 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 456 469 struct nvme_uring_data d; 457 470 struct nvme_command c; 471 + struct iov_iter iter; 472 + struct iov_iter *map_iter = NULL; 458 473 struct request *req; 459 474 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 460 475 blk_mq_req_flags_t blk_flags = 0; ··· 492 503 d.metadata_len = READ_ONCE(cmd->metadata_len); 493 504 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 494 505 506 + if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 507 + /* fixedbufs is only for non-vectored io */ 508 + if (vec) 509 + return -EINVAL; 510 + 511 + ret = io_uring_cmd_import_fixed(d.addr, d.data_len, 512 + nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd, 513 + issue_flags); 514 + if (ret < 0) 515 + return ret; 516 + 517 + map_iter = &iter; 518 + } 519 + 495 520 if (issue_flags & IO_URING_F_NONBLOCK) { 496 521 rq_flags |= REQ_NOWAIT; 497 522 blk_flags = BLK_MQ_REQ_NOWAIT; ··· 519 516 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 520 517 521 518 if (d.data_len) { 522 - ret = nvme_map_user_request(req, d.addr, 523 - d.data_len, nvme_to_user_ptr(d.metadata), 524 - d.metadata_len, ioucmd, vec, issue_flags); 519 + ret = nvme_map_user_request(req, d.addr, d.data_len, 520 + nvme_to_user_ptr(d.metadata), d.metadata_len, 521 + map_iter, vec); 525 522 if (ret) 526 523 goto out_free_req; 527 524 }