Merge tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

- Fix for a regression introduced in the io-wq worker creation logic.

- Remove the allocation cache for the msg_ring io_kiocb allocations. I
have a suspicion that there's a bug there, and since we just fixed
one in that area, let's just yank the use of that cache entirely.
It's not that important, and it kills some code.

- Treat a closed ring like task exiting in that any requests that
trigger post that condition should just get canceled. Doesn't fix any
real issues, outside of having tasks being able to rely on that
guarantee.

- Fix for a bug in the network zero-copy notification mechanism, where
a comparison for matching tctx/ctx for notifications was buggy in
that it didn't correctly compare with the previous notification.

* tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux:
io_uring: fix incorrect io_kiocb reference in io_link_skb
io_uring/msg_ring: kill alloc_cache for io_kiocb allocations
io_uring: include dying ring in task_work "should cancel" state
io_uring/io-wq: fix `max_workers` breakage and `nr_workers` underflow

+15 -40
-3
include/linux/io_uring_types.h
··· 420 420 struct list_head defer_list; 421 421 unsigned nr_drained; 422 422 423 - struct io_alloc_cache msg_cache; 424 - spinlock_t msg_lock; 425 - 426 423 #ifdef CONFIG_NET_RX_BUSY_POLL 427 424 struct list_head napi_list; /* track busy poll napi_id */ 428 425 spinlock_t napi_lock; /* napi_list lock */
+3 -3
io_uring/io-wq.c
··· 352 352 struct io_wq *wq; 353 353 354 354 struct io_wq_acct *acct; 355 - bool do_create = false; 355 + bool activated_free_worker, do_create = false; 356 356 357 357 worker = container_of(cb, struct io_worker, create_work); 358 358 wq = worker->wq; 359 359 acct = worker->acct; 360 360 361 361 rcu_read_lock(); 362 - do_create = !io_acct_activate_free_worker(acct); 362 + activated_free_worker = io_acct_activate_free_worker(acct); 363 363 rcu_read_unlock(); 364 - if (!do_create) 364 + if (activated_free_worker) 365 365 goto no_need_create; 366 366 367 367 raw_spin_lock(&acct->workers_lock);
+4 -6
io_uring/io_uring.c
··· 290 290 io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); 291 291 io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); 292 292 io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free); 293 - io_alloc_cache_free(&ctx->msg_cache, kfree); 294 293 io_futex_cache_free(ctx); 295 294 io_rsrc_cache_free(ctx); 296 295 } ··· 336 337 ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX, 337 338 sizeof(struct io_async_cmd), 338 339 sizeof(struct io_async_cmd)); 339 - spin_lock_init(&ctx->msg_lock); 340 - ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, 341 - sizeof(struct io_kiocb), 0); 342 340 ret |= io_futex_cache_init(ctx); 343 341 ret |= io_rsrc_cache_init(ctx); 344 342 if (ret) ··· 1402 1406 1403 1407 void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw) 1404 1408 { 1405 - io_tw_lock(req->ctx, tw); 1406 - if (unlikely(io_should_terminate_tw())) 1409 + struct io_ring_ctx *ctx = req->ctx; 1410 + 1411 + io_tw_lock(ctx, tw); 1412 + if (unlikely(io_should_terminate_tw(ctx))) 1407 1413 io_req_defer_failed(req, -EFAULT); 1408 1414 else if (req->flags & REQ_F_FORCE_ASYNC) 1409 1415 io_queue_iowq(req);
+2 -2
io_uring/io_uring.h
··· 476 476 * 2) PF_KTHREAD is set, in which case the invoker of the task_work is 477 477 * our fallback task_work. 478 478 */ 479 - static inline bool io_should_terminate_tw(void) 479 + static inline bool io_should_terminate_tw(struct io_ring_ctx *ctx) 480 480 { 481 - return current->flags & (PF_KTHREAD | PF_EXITING); 481 + return (current->flags & (PF_KTHREAD | PF_EXITING)) || percpu_ref_is_dying(&ctx->refs); 482 482 } 483 483 484 484 static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
+2 -22
io_uring/msg_ring.c
··· 11 11 #include "io_uring.h" 12 12 #include "rsrc.h" 13 13 #include "filetable.h" 14 - #include "alloc_cache.h" 15 14 #include "msg_ring.h" 16 15 17 16 /* All valid masks for MSG_RING */ ··· 75 76 struct io_ring_ctx *ctx = req->ctx; 76 77 77 78 io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags); 78 - if (spin_trylock(&ctx->msg_lock)) { 79 - if (io_alloc_cache_put(&ctx->msg_cache, req)) 80 - req = NULL; 81 - spin_unlock(&ctx->msg_lock); 82 - } 83 - if (req) 84 - kfree_rcu(req, rcu_head); 79 + kfree_rcu(req, rcu_head); 85 80 percpu_ref_put(&ctx->refs); 86 81 } 87 82 ··· 97 104 return 0; 98 105 } 99 106 100 - static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) 101 - { 102 - struct io_kiocb *req = NULL; 103 - 104 - if (spin_trylock(&ctx->msg_lock)) { 105 - req = io_alloc_cache_get(&ctx->msg_cache); 106 - spin_unlock(&ctx->msg_lock); 107 - if (req) 108 - return req; 109 - } 110 - return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 111 - } 112 - 113 107 static int io_msg_data_remote(struct io_ring_ctx *target_ctx, 114 108 struct io_msg *msg) 115 109 { 116 110 struct io_kiocb *target; 117 111 u32 flags = 0; 118 112 119 - target = io_msg_get_kiocb(target_ctx); 113 + target = kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO) ; 120 114 if (unlikely(!target)) 121 115 return -ENOMEM; 122 116
+1 -1
io_uring/notif.c
··· 85 85 return -EEXIST; 86 86 87 87 prev_nd = container_of(prev_uarg, struct io_notif_data, uarg); 88 - prev_notif = cmd_to_io_kiocb(nd); 88 + prev_notif = cmd_to_io_kiocb(prev_nd); 89 89 90 90 /* make sure all noifications can be finished in the same task_work */ 91 91 if (unlikely(notif->ctx != prev_notif->ctx ||
+1 -1
io_uring/poll.c
··· 224 224 { 225 225 int v; 226 226 227 - if (unlikely(io_should_terminate_tw())) 227 + if (unlikely(io_should_terminate_tw(req->ctx))) 228 228 return -ECANCELED; 229 229 230 230 do {
+1 -1
io_uring/timeout.c
··· 324 324 int ret; 325 325 326 326 if (prev) { 327 - if (!io_should_terminate_tw()) { 327 + if (!io_should_terminate_tw(req->ctx)) { 328 328 struct io_cancel_data cd = { 329 329 .ctx = req->ctx, 330 330 .data = prev->cqe.user_data,
+1 -1
io_uring/uring_cmd.c
··· 118 118 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); 119 119 unsigned int flags = IO_URING_F_COMPLETE_DEFER; 120 120 121 - if (io_should_terminate_tw()) 121 + if (io_should_terminate_tw(req->ctx)) 122 122 flags |= IO_URING_F_TASK_DEAD; 123 123 124 124 /* task_work executor checks the deffered list completion */