commit 0d64ebf676bdeeb2df99377193830f01f92702bd · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

- Fix for a regression introduced in the io-wq worker creation logic.

- Remove the allocation cache for the msg_ring io_kiocb allocations. I
have a suspicion that there's a bug there, and since we just fixed
one in that area, let's just yank the use of that cache entirely.
It's not that important, and it kills some code.

- Treat a closed ring like task exiting in that any requests that
trigger post that condition should just get canceled. Doesn't fix any
real issues, outside of having tasks being able to rely on that
guarantee.

- Fix for a bug in the network zero-copy notification mechanism, where
a comparison for matching tctx/ctx for notifications was buggy in
that it didn't correctly compare with the previous notification.

* tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux:
io_uring: fix incorrect io_kiocb reference in io_link_skb
io_uring/msg_ring: kill alloc_cache for io_kiocb allocations
io_uring: include dying ring in task_work "should cancel" state
io_uring/io-wq: fix `max_workers` breakage and `nr_workers` underflow

Linus Torvalds 4 months ago 0d64ebf6 0903d098

+15 -40

9 changed files

expand all

unified split

include

linux

io_uring_types.h

io_uring

io-wq.c

io_uring.c

io_uring.h

msg_ring.c

notif.c

poll.c

timeout.c

uring_cmd.c

-3

include/linux/io_uring_types.h

··· 420 struct list_head defer_list; 421 unsigned nr_drained; 422 423 - struct io_alloc_cache msg_cache; 424 - spinlock_t msg_lock; 425 - 426 #ifdef CONFIG_NET_RX_BUSY_POLL 427 struct list_head napi_list; /* track busy poll napi_id */ 428 spinlock_t napi_lock; /* napi_list lock */

··· 420 struct list_head defer_list; 421 unsigned nr_drained; 422 423 #ifdef CONFIG_NET_RX_BUSY_POLL 424 struct list_head napi_list; /* track busy poll napi_id */ 425 spinlock_t napi_lock; /* napi_list lock */

+3 -3

io_uring/io-wq.c

··· 352 struct io_wq *wq; 353 354 struct io_wq_acct *acct; 355 - bool do_create = false; 356 357 worker = container_of(cb, struct io_worker, create_work); 358 wq = worker->wq; 359 acct = worker->acct; 360 361 rcu_read_lock(); 362 - do_create = !io_acct_activate_free_worker(acct); 363 rcu_read_unlock(); 364 - if (!do_create) 365 goto no_need_create; 366 367 raw_spin_lock(&acct->workers_lock);

··· 352 struct io_wq *wq; 353 354 struct io_wq_acct *acct; 355 + bool activated_free_worker, do_create = false; 356 357 worker = container_of(cb, struct io_worker, create_work); 358 wq = worker->wq; 359 acct = worker->acct; 360 361 rcu_read_lock(); 362 + activated_free_worker = io_acct_activate_free_worker(acct); 363 rcu_read_unlock(); 364 + if (activated_free_worker) 365 goto no_need_create; 366 367 raw_spin_lock(&acct->workers_lock);

+4 -6

io_uring/io_uring.c

··· 290 io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); 291 io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); 292 io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free); 293 - io_alloc_cache_free(&ctx->msg_cache, kfree); 294 io_futex_cache_free(ctx); 295 io_rsrc_cache_free(ctx); 296 } ··· 336 ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX, 337 sizeof(struct io_async_cmd), 338 sizeof(struct io_async_cmd)); 339 - spin_lock_init(&ctx->msg_lock); 340 - ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, 341 - sizeof(struct io_kiocb), 0); 342 ret |= io_futex_cache_init(ctx); 343 ret |= io_rsrc_cache_init(ctx); 344 if (ret) ··· 1402 1403 void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw) 1404 { 1405 - io_tw_lock(req->ctx, tw); 1406 - if (unlikely(io_should_terminate_tw())) 1407 io_req_defer_failed(req, -EFAULT); 1408 else if (req->flags & REQ_F_FORCE_ASYNC) 1409 io_queue_iowq(req);

··· 290 io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); 291 io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); 292 io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free); 293 io_futex_cache_free(ctx); 294 io_rsrc_cache_free(ctx); 295 } ··· 337 ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX, 338 sizeof(struct io_async_cmd), 339 sizeof(struct io_async_cmd)); 340 ret |= io_futex_cache_init(ctx); 341 ret |= io_rsrc_cache_init(ctx); 342 if (ret) ··· 1406 1407 void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw) 1408 { 1409 + struct io_ring_ctx *ctx = req->ctx; 1410 + 1411 + io_tw_lock(ctx, tw); 1412 + if (unlikely(io_should_terminate_tw(ctx))) 1413 io_req_defer_failed(req, -EFAULT); 1414 else if (req->flags & REQ_F_FORCE_ASYNC) 1415 io_queue_iowq(req);

+2 -2

io_uring/io_uring.h

··· 476 * 2) PF_KTHREAD is set, in which case the invoker of the task_work is 477 * our fallback task_work. 478 */ 479 - static inline bool io_should_terminate_tw(void) 480 { 481 - return current->flags & (PF_KTHREAD | PF_EXITING); 482 } 483 484 static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)

··· 476 * 2) PF_KTHREAD is set, in which case the invoker of the task_work is 477 * our fallback task_work. 478 */ 479 + static inline bool io_should_terminate_tw(struct io_ring_ctx *ctx) 480 { 481 + return (current->flags & (PF_KTHREAD | PF_EXITING)) || percpu_ref_is_dying(&ctx->refs); 482 } 483 484 static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)

+2 -22

io_uring/msg_ring.c

··· 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "filetable.h" 14 - #include "alloc_cache.h" 15 #include "msg_ring.h" 16 17 /* All valid masks for MSG_RING */ ··· 75 struct io_ring_ctx *ctx = req->ctx; 76 77 io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags); 78 - if (spin_trylock(&ctx->msg_lock)) { 79 - if (io_alloc_cache_put(&ctx->msg_cache, req)) 80 - req = NULL; 81 - spin_unlock(&ctx->msg_lock); 82 - } 83 - if (req) 84 - kfree_rcu(req, rcu_head); 85 percpu_ref_put(&ctx->refs); 86 } 87 ··· 97 return 0; 98 } 99 100 - static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) 101 - { 102 - struct io_kiocb *req = NULL; 103 - 104 - if (spin_trylock(&ctx->msg_lock)) { 105 - req = io_alloc_cache_get(&ctx->msg_cache); 106 - spin_unlock(&ctx->msg_lock); 107 - if (req) 108 - return req; 109 - } 110 - return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 111 - } 112 - 113 static int io_msg_data_remote(struct io_ring_ctx *target_ctx, 114 struct io_msg *msg) 115 { 116 struct io_kiocb *target; 117 u32 flags = 0; 118 119 - target = io_msg_get_kiocb(target_ctx); 120 if (unlikely(!target)) 121 return -ENOMEM; 122

··· 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "filetable.h" 14 #include "msg_ring.h" 15 16 /* All valid masks for MSG_RING */ ··· 76 struct io_ring_ctx *ctx = req->ctx; 77 78 io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags); 79 + kfree_rcu(req, rcu_head); 80 percpu_ref_put(&ctx->refs); 81 } 82 ··· 104 return 0; 105 } 106 107 static int io_msg_data_remote(struct io_ring_ctx *target_ctx, 108 struct io_msg *msg) 109 { 110 struct io_kiocb *target; 111 u32 flags = 0; 112 113 + target = kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO) ; 114 if (unlikely(!target)) 115 return -ENOMEM; 116

+1 -1

io_uring/notif.c

··· 85 return -EEXIST; 86 87 prev_nd = container_of(prev_uarg, struct io_notif_data, uarg); 88 - prev_notif = cmd_to_io_kiocb(nd); 89 90 /* make sure all noifications can be finished in the same task_work */ 91 if (unlikely(notif->ctx != prev_notif->ctx ||

··· 85 return -EEXIST; 86 87 prev_nd = container_of(prev_uarg, struct io_notif_data, uarg); 88 + prev_notif = cmd_to_io_kiocb(prev_nd); 89 90 /* make sure all noifications can be finished in the same task_work */ 91 if (unlikely(notif->ctx != prev_notif->ctx ||

+1 -1

io_uring/poll.c

··· 224 { 225 int v; 226 227 - if (unlikely(io_should_terminate_tw())) 228 return -ECANCELED; 229 230 do {

··· 224 { 225 int v; 226 227 + if (unlikely(io_should_terminate_tw(req->ctx))) 228 return -ECANCELED; 229 230 do {

+1 -1

io_uring/timeout.c

··· 324 int ret; 325 326 if (prev) { 327 - if (!io_should_terminate_tw()) { 328 struct io_cancel_data cd = { 329 .ctx = req->ctx, 330 .data = prev->cqe.user_data,

··· 324 int ret; 325 326 if (prev) { 327 + if (!io_should_terminate_tw(req->ctx)) { 328 struct io_cancel_data cd = { 329 .ctx = req->ctx, 330 .data = prev->cqe.user_data,

+1 -1

io_uring/uring_cmd.c

··· 118 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); 119 unsigned int flags = IO_URING_F_COMPLETE_DEFER; 120 121 - if (io_should_terminate_tw()) 122 flags |= IO_URING_F_TASK_DEAD; 123 124 /* task_work executor checks the deffered list completion */

··· 118 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); 119 unsigned int flags = IO_URING_F_COMPLETE_DEFER; 120 121 + if (io_should_terminate_tw(req->ctx)) 122 flags |= IO_URING_F_TASK_DEAD; 123 124 /* task_work executor checks the deffered list completion */