Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ublk: implement ->queue_rqs()

Implement ->queue_rqs() for improving perf in case of MQ.

In this way, we just need to call io_uring_cmd_complete_in_task() once for
whole IO batch, then both io_uring and ublk server can get exact batch from
ublk frontend.

Follows IOPS improvement:

- tests

tools/testing/selftests/ublk/kublk add -t null -q 2 [-z]

fio/t/io_uring -p0 /dev/ublkb0

- results:

more than 10% IOPS boost observed

Pass all ublk selftests, especially the io dispatch order test.

Cc: Uday Shankar <ushankar@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250327095123.179113-9-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
d796cea7 17970209

+111 -20
+111 -20
drivers/block/ublk_drv.c
··· 81 81 }; 82 82 83 83 struct ublk_uring_cmd_pdu { 84 + /* 85 + * Store requests in same batch temporarily for queuing them to 86 + * daemon context. 87 + * 88 + * It should have been stored to request payload, but we do want 89 + * to avoid extra pre-allocation, and uring_cmd payload is always 90 + * free for us 91 + */ 92 + struct request *req_list; 93 + 94 + /* 95 + * The following two are valid in this cmd whole lifetime, and 96 + * setup in ublk uring_cmd handler 97 + */ 84 98 struct ublk_queue *ubq; 85 99 u16 tag; 86 100 }; ··· 1184 1170 blk_mq_end_request(rq, BLK_STS_IOERR); 1185 1171 } 1186 1172 1187 - static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, 1188 - unsigned int issue_flags) 1173 + static void ublk_dispatch_req(struct ublk_queue *ubq, 1174 + struct io_uring_cmd *cmd, 1175 + struct request *req, 1176 + unsigned int issue_flags) 1189 1177 { 1190 - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); 1191 - struct ublk_queue *ubq = pdu->ubq; 1192 - int tag = pdu->tag; 1193 - struct request *req = blk_mq_tag_to_rq( 1194 - ubq->dev->tag_set.tags[ubq->q_id], tag); 1178 + int tag = req->tag; 1195 1179 struct ublk_io *io = &ubq->ios[tag]; 1196 1180 unsigned int mapped_bytes; 1197 1181 ··· 1264 1252 ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); 1265 1253 } 1266 1254 1255 + static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, 1256 + unsigned int issue_flags) 1257 + { 1258 + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); 1259 + struct ublk_queue *ubq = pdu->ubq; 1260 + int tag = pdu->tag; 1261 + struct request *req = blk_mq_tag_to_rq( 1262 + ubq->dev->tag_set.tags[ubq->q_id], tag); 1263 + 1264 + ublk_dispatch_req(ubq, cmd, req, issue_flags); 1265 + } 1266 + 1267 1267 static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) 1268 1268 { 1269 1269 struct ublk_io *io = &ubq->ios[rq->tag]; 1270 1270 1271 1271 io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); 1272 + } 1273 + 1274 + static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, 1275 + unsigned int issue_flags) 1276 + { 1277 + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); 1278 + struct request *rq = pdu->req_list; 1279 + struct ublk_queue *ubq = rq->mq_hctx->driver_data; 1280 + struct request *next; 1281 + 1282 + while (rq) { 1283 + struct ublk_io *io = &ubq->ios[rq->tag]; 1284 + 1285 + next = rq->rq_next; 1286 + rq->rq_next = NULL; 1287 + ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); 1288 + rq = next; 1289 + } 1290 + } 1291 + 1292 + static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) 1293 + { 1294 + struct request *rq = rq_list_peek(l); 1295 + struct ublk_io *io = &ubq->ios[rq->tag]; 1296 + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); 1297 + 1298 + pdu->req_list = rq; 1299 + rq_list_init(l); 1300 + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); 1272 1301 } 1273 1302 1274 1303 static enum blk_eh_timer_return ublk_timeout(struct request *rq) ··· 1350 1297 return BLK_EH_RESET_TIMER; 1351 1298 } 1352 1299 1353 - static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, 1354 - const struct blk_mq_queue_data *bd) 1300 + static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) 1355 1301 { 1356 - struct ublk_queue *ubq = hctx->driver_data; 1357 - struct request *rq = bd->rq; 1358 1302 blk_status_t res; 1359 1303 1360 - if (unlikely(ubq->fail_io)) { 1304 + if (unlikely(ubq->fail_io)) 1361 1305 return BLK_STS_TARGET; 1362 - } 1363 - 1364 - /* fill iod to slot in io cmd buffer */ 1365 - res = ublk_setup_iod(ubq, rq); 1366 - if (unlikely(res != BLK_STS_OK)) 1367 - return BLK_STS_IOERR; 1368 1306 1369 1307 /* With recovery feature enabled, force_abort is set in 1370 1308 * ublk_stop_dev() before calling del_gendisk(). We have to ··· 1369 1325 if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) 1370 1326 return BLK_STS_IOERR; 1371 1327 1328 + if (unlikely(ubq->canceling)) 1329 + return BLK_STS_IOERR; 1330 + 1331 + /* fill iod to slot in io cmd buffer */ 1332 + res = ublk_setup_iod(ubq, rq); 1333 + if (unlikely(res != BLK_STS_OK)) 1334 + return BLK_STS_IOERR; 1335 + 1336 + blk_mq_start_request(rq); 1337 + return BLK_STS_OK; 1338 + } 1339 + 1340 + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, 1341 + const struct blk_mq_queue_data *bd) 1342 + { 1343 + struct ublk_queue *ubq = hctx->driver_data; 1344 + struct request *rq = bd->rq; 1345 + blk_status_t res; 1346 + 1347 + res = ublk_prep_req(ubq, rq); 1348 + if (res != BLK_STS_OK) 1349 + return res; 1350 + 1372 1351 /* 1373 1352 * ->canceling has to be handled after ->force_abort and ->fail_io 1374 1353 * is dealt with, otherwise this request may not be failed in case ··· 1402 1335 return BLK_STS_OK; 1403 1336 } 1404 1337 1405 - blk_mq_start_request(bd->rq); 1406 1338 ublk_queue_cmd(ubq, rq); 1407 - 1408 1339 return BLK_STS_OK; 1340 + } 1341 + 1342 + static void ublk_queue_rqs(struct rq_list *rqlist) 1343 + { 1344 + struct rq_list requeue_list = { }; 1345 + struct rq_list submit_list = { }; 1346 + struct ublk_queue *ubq = NULL; 1347 + struct request *req; 1348 + 1349 + while ((req = rq_list_pop(rqlist))) { 1350 + struct ublk_queue *this_q = req->mq_hctx->driver_data; 1351 + 1352 + if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) 1353 + ublk_queue_cmd_list(ubq, &submit_list); 1354 + ubq = this_q; 1355 + 1356 + if (ublk_prep_req(ubq, req) == BLK_STS_OK) 1357 + rq_list_add_tail(&submit_list, req); 1358 + else 1359 + rq_list_add_tail(&requeue_list, req); 1360 + } 1361 + 1362 + if (ubq && !rq_list_empty(&submit_list)) 1363 + ublk_queue_cmd_list(ubq, &submit_list); 1364 + *rqlist = requeue_list; 1409 1365 } 1410 1366 1411 1367 static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, ··· 1443 1353 1444 1354 static const struct blk_mq_ops ublk_mq_ops = { 1445 1355 .queue_rq = ublk_queue_rq, 1356 + .queue_rqs = ublk_queue_rqs, 1446 1357 .init_hctx = ublk_init_hctx, 1447 1358 .timeout = ublk_timeout, 1448 1359 };