Merge tag 'io_uring-6.15-20250509' of git://git.kernel.dk/linux

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull io_uring fixes from Jens Axboe:

- Fix for linked timeouts arming and firing wrt prep and issue of the
request being managed by the linked timeout

- Fix for a CQE ordering issue between requests with multishot and
using the same buffer group. This is a dumbed down version for this
release and for stable, it'll get improved for v6.16

- Tweak the SQPOLL submit batch size. A previous commit made SQPOLL
manage its own task_work and chose a tiny batch size, bump it from 8
to 32 to fix a performance regression due to that

* tag 'io_uring-6.15-20250509' of git://git.kernel.dk/linux:
io_uring/sqpoll: Increase task_work submission batch size
io_uring: ensure deferred completions are flushed for multishot
io_uring: always arm linked timeouts prior to issue

Linus Torvalds 11 months ago 7380c60b 29fe5d50

+24 -36

2 changed files

expand all

io_uring

io_uring.c

sqpoll.c

+23 -35

io_uring/io_uring.c

··· 448 448 return req->link; 449 449 } 450 450 451 - static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) 452 - { 453 - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) 454 - return NULL; 455 - return __io_prep_linked_timeout(req); 456 - } 457 - 458 - static noinline void __io_arm_ltimeout(struct io_kiocb *req) 459 - { 460 - io_queue_linked_timeout(__io_prep_linked_timeout(req)); 461 - } 462 - 463 - static inline void io_arm_ltimeout(struct io_kiocb *req) 464 - { 465 - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) 466 - __io_arm_ltimeout(req); 467 - } 468 - 469 451 static void io_prep_async_work(struct io_kiocb *req) 470 452 { 471 453 const struct io_issue_def *def = &io_issue_defs[req->opcode]; ··· 500 518 501 519 static void io_queue_iowq(struct io_kiocb *req) 502 520 { 503 - struct io_kiocb *link = io_prep_linked_timeout(req); 504 521 struct io_uring_task *tctx = req->tctx; 505 522 506 523 BUG_ON(!tctx); ··· 524 543 525 544 trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); 526 545 io_wq_enqueue(tctx->io_wq, &req->work); 527 - if (link) 528 - io_queue_linked_timeout(link); 529 546 } 530 547 531 548 static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw) ··· 847 868 { 848 869 struct io_ring_ctx *ctx = req->ctx; 849 870 bool posted; 871 + 872 + /* 873 + * If multishot has already posted deferred completions, ensure that 874 + * those are flushed first before posting this one. If not, CQEs 875 + * could get reordered. 876 + */ 877 + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) 878 + __io_submit_flush_completions(ctx); 850 879 851 880 lockdep_assert(!io_wq_current_is_worker()); 852 881 lockdep_assert_held(&ctx->uring_lock); ··· 1711 1724 return !!req->file; 1712 1725 } 1713 1726 1727 + #define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) 1728 + 1714 1729 static inline int __io_issue_sqe(struct io_kiocb *req, 1715 1730 unsigned int issue_flags, 1716 1731 const struct io_issue_def *def) 1717 1732 { 1718 1733 const struct cred *creds = NULL; 1734 + struct io_kiocb *link = NULL; 1719 1735 int ret; 1720 1736 1721 - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) 1722 - creds = override_creds(req->creds); 1737 + if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { 1738 + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) 1739 + creds = override_creds(req->creds); 1740 + if (req->flags & REQ_F_ARM_LTIMEOUT) 1741 + link = __io_prep_linked_timeout(req); 1742 + } 1723 1743 1724 1744 if (!def->audit_skip) 1725 1745 audit_uring_entry(req->opcode); ··· 1736 1742 if (!def->audit_skip) 1737 1743 audit_uring_exit(!ret, ret); 1738 1744 1739 - if (creds) 1740 - revert_creds(creds); 1745 + if (unlikely(creds || link)) { 1746 + if (creds) 1747 + revert_creds(creds); 1748 + if (link) 1749 + io_queue_linked_timeout(link); 1750 + } 1741 1751 1742 1752 return ret; 1743 1753 } ··· 1767 1769 1768 1770 if (ret == IOU_ISSUE_SKIP_COMPLETE) { 1769 1771 ret = 0; 1770 - io_arm_ltimeout(req); 1771 1772 1772 1773 /* If the op doesn't have a file, we're not polling for it */ 1773 1774 if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) ··· 1820 1823 __io_req_set_refcount(req, 2); 1821 1824 else 1822 1825 req_ref_get(req); 1823 - 1824 - io_arm_ltimeout(req); 1825 1826 1826 1827 /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ 1827 1828 if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { ··· 1936 1941 static void io_queue_async(struct io_kiocb *req, int ret) 1937 1942 __must_hold(&req->ctx->uring_lock) 1938 1943 { 1939 - struct io_kiocb *linked_timeout; 1940 - 1941 1944 if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { 1942 1945 io_req_defer_failed(req, ret); 1943 1946 return; 1944 1947 } 1945 - 1946 - linked_timeout = io_prep_linked_timeout(req); 1947 1948 1948 1949 switch (io_arm_poll_handler(req, 0)) { 1949 1950 case IO_APOLL_READY: ··· 1953 1962 case IO_APOLL_OK: 1954 1963 break; 1955 1964 } 1956 - 1957 - if (linked_timeout) 1958 - io_queue_linked_timeout(linked_timeout); 1959 1965 } 1960 1966 1961 1967 static inline void io_queue_sqe(struct io_kiocb *req)

+1 -1

io_uring/sqpoll.c

··· 20 20 #include "sqpoll.h" 21 21 22 22 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 23 - #define IORING_TW_CAP_ENTRIES_VALUE 8 23 + #define IORING_TW_CAP_ENTRIES_VALUE 32 24 24 25 25 enum { 26 26 IO_SQ_THREAD_SHOULD_STOP = 0,