Merge tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
"This one looks a bit bigger than it is, but that's mainly because 2/3
of it is enabling IORING_OP_CLOSE to close direct file descriptors.

We've had a few folks using them and finding it confusing that the way
to close them is through using -1 for file update, this just brings
API symmetry for direct descriptors. Hence I think we should just do
this now and have a better API for 5.15 release. There's some room for
de-duplicating the close code, but we're leaving that for the next
merge window.

Outside of that, just small fixes:

- Poll race fixes (Hao)

- io-wq core dump exit fix (me)

- Reschedule around potentially intensive tctx and buffer iterators
on teardown (me)

- Fix for always ending up punting files update to io-wq (me)

- Put the provided buffer meta data under memcg accounting (me)

- Tweak for io_write(), removing dead code that was added with the
iterator changes in this release (Pavel)"

* tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
io_uring: make OP_CLOSE consistent with direct open
io_uring: kill extra checks in io_write()
io_uring: don't punt files update to io-wq unconditionally
io_uring: put provided buffer meta data under memcg accounting
io_uring: allow conditional reschedule for intensive iterators
io_uring: fix potential req refcount underflow
io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow
io_uring: fix race between poll completion and cancel_hash insertion
io-wq: ensure we exit if thread group is exiting

+72 -16
+2 -1
fs/io-wq.c
··· 584 584 585 585 if (!get_signal(&ksig)) 586 586 continue; 587 - if (fatal_signal_pending(current)) 587 + if (fatal_signal_pending(current) || 588 + signal_group_exit(current->signal)) 588 589 break; 589 590 continue; 590 591 }
+70 -15
fs/io_uring.c
··· 502 502 struct io_close { 503 503 struct file *file; 504 504 int fd; 505 + u32 file_slot; 505 506 }; 506 507 507 508 struct io_timeout_data { ··· 1099 1098 1100 1099 static int io_install_fixed_file(struct io_kiocb *req, struct file *file, 1101 1100 unsigned int issue_flags, u32 slot_index); 1101 + static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); 1102 + 1102 1103 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); 1103 1104 1104 1105 static struct kmem_cache *req_cachep; ··· 3608 3605 iov_iter_save_state(iter, state); 3609 3606 } 3610 3607 req->result = iov_iter_count(iter); 3611 - ret2 = 0; 3612 3608 3613 3609 /* Ensure we clear previously set non-block flag */ 3614 3610 if (!force_nonblock) ··· 3672 3670 } else { 3673 3671 copy_iov: 3674 3672 iov_iter_restore(iter, state); 3675 - if (ret2 > 0) 3676 - iov_iter_advance(iter, ret2); 3677 3673 ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); 3678 3674 return ret ?: -EAGAIN; 3679 3675 } ··· 4387 4387 int i, bid = pbuf->bid; 4388 4388 4389 4389 for (i = 0; i < pbuf->nbufs; i++) { 4390 - buf = kmalloc(sizeof(*buf), GFP_KERNEL); 4390 + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 4391 4391 if (!buf) 4392 4392 break; 4393 4393 ··· 4594 4594 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4595 4595 return -EINVAL; 4596 4596 if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || 4597 - sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) 4597 + sqe->rw_flags || sqe->buf_index) 4598 4598 return -EINVAL; 4599 4599 if (req->flags & REQ_F_FIXED_FILE) 4600 4600 return -EBADF; 4601 4601 4602 4602 req->close.fd = READ_ONCE(sqe->fd); 4603 + req->close.file_slot = READ_ONCE(sqe->file_index); 4604 + if (req->close.file_slot && req->close.fd) 4605 + return -EINVAL; 4606 + 4603 4607 return 0; 4604 4608 } 4605 4609 ··· 4614 4610 struct fdtable *fdt; 4615 4611 struct file *file = NULL; 4616 4612 int ret = -EBADF; 4613 + 4614 + if (req->close.file_slot) { 4615 + ret = io_close_fixed(req, issue_flags); 4616 + goto err; 4617 + } 4617 4618 4618 4619 spin_lock(&files->file_lock); 4619 4620 fdt = files_fdtable(files); ··· 5347 5338 if (req->poll.events & EPOLLONESHOT) 5348 5339 flags = 0; 5349 5340 if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { 5350 - req->poll.done = true; 5341 + req->poll.events |= EPOLLONESHOT; 5351 5342 flags = 0; 5352 5343 } 5353 5344 if (flags & IORING_CQE_F_MORE) ··· 5376 5367 } else { 5377 5368 bool done; 5378 5369 5370 + if (req->poll.done) { 5371 + spin_unlock(&ctx->completion_lock); 5372 + return; 5373 + } 5379 5374 done = __io_poll_complete(req, req->result); 5380 5375 if (done) { 5381 5376 io_poll_remove_double(req); 5382 5377 hash_del(&req->hash_node); 5378 + req->poll.done = true; 5383 5379 } else { 5384 5380 req->result = 0; 5385 5381 add_wait_queue(req->poll.head, &req->poll.wait); ··· 5522 5508 5523 5509 hash_del(&req->hash_node); 5524 5510 io_poll_remove_double(req); 5511 + apoll->poll.done = true; 5525 5512 spin_unlock(&ctx->completion_lock); 5526 5513 5527 5514 if (!READ_ONCE(apoll->poll.canceled)) ··· 5843 5828 struct io_ring_ctx *ctx = req->ctx; 5844 5829 struct io_poll_table ipt; 5845 5830 __poll_t mask; 5831 + bool done; 5846 5832 5847 5833 ipt.pt._qproc = io_poll_queue_proc; 5848 5834 ··· 5852 5836 5853 5837 if (mask) { /* no async, we'd stolen it */ 5854 5838 ipt.error = 0; 5855 - io_poll_complete(req, mask); 5839 + done = io_poll_complete(req, mask); 5856 5840 } 5857 5841 spin_unlock(&ctx->completion_lock); 5858 5842 5859 5843 if (mask) { 5860 5844 io_cqring_ev_posted(ctx); 5861 - if (poll->events & EPOLLONESHOT) 5845 + if (done) 5862 5846 io_put_req(req); 5863 5847 } 5864 5848 return ipt.error; ··· 6349 6333 struct io_uring_rsrc_update2 up; 6350 6334 int ret; 6351 6335 6352 - if (issue_flags & IO_URING_F_NONBLOCK) 6353 - return -EAGAIN; 6354 - 6355 6336 up.offset = req->rsrc_update.offset; 6356 6337 up.data = req->rsrc_update.arg; 6357 6338 up.nr = 0; 6358 6339 up.tags = 0; 6359 6340 up.resv = 0; 6360 6341 6361 - mutex_lock(&ctx->uring_lock); 6342 + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 6362 6343 ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 6363 6344 &up, req->rsrc_update.nr_args); 6364 - mutex_unlock(&ctx->uring_lock); 6345 + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 6365 6346 6366 6347 if (ret < 0) 6367 6348 req_set_fail(req); ··· 8413 8400 return ret; 8414 8401 } 8415 8402 8403 + static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) 8404 + { 8405 + unsigned int offset = req->close.file_slot - 1; 8406 + struct io_ring_ctx *ctx = req->ctx; 8407 + struct io_fixed_file *file_slot; 8408 + struct file *file; 8409 + int ret, i; 8410 + 8411 + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 8412 + ret = -ENXIO; 8413 + if (unlikely(!ctx->file_data)) 8414 + goto out; 8415 + ret = -EINVAL; 8416 + if (offset >= ctx->nr_user_files) 8417 + goto out; 8418 + ret = io_rsrc_node_switch_start(ctx); 8419 + if (ret) 8420 + goto out; 8421 + 8422 + i = array_index_nospec(offset, ctx->nr_user_files); 8423 + file_slot = io_fixed_file_slot(&ctx->file_table, i); 8424 + ret = -EBADF; 8425 + if (!file_slot->file_ptr) 8426 + goto out; 8427 + 8428 + file = (struct file *)(file_slot->file_ptr & FFS_MASK); 8429 + ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); 8430 + if (ret) 8431 + goto out; 8432 + 8433 + file_slot->file_ptr = 0; 8434 + io_rsrc_node_switch(ctx, ctx->file_data); 8435 + ret = 0; 8436 + out: 8437 + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 8438 + return ret; 8439 + } 8440 + 8416 8441 static int __io_sqe_files_update(struct io_ring_ctx *ctx, 8417 8442 struct io_uring_rsrc_update2 *up, 8418 8443 unsigned nr_args) ··· 9217 9166 struct io_buffer *buf; 9218 9167 unsigned long index; 9219 9168 9220 - xa_for_each(&ctx->io_buffers, index, buf) 9169 + xa_for_each(&ctx->io_buffers, index, buf) { 9221 9170 __io_remove_buffers(ctx, buf, index, -1U); 9171 + cond_resched(); 9172 + } 9222 9173 } 9223 9174 9224 9175 static void io_req_cache_free(struct list_head *list) ··· 9718 9665 struct io_tctx_node *node; 9719 9666 unsigned long index; 9720 9667 9721 - xa_for_each(&tctx->xa, index, node) 9668 + xa_for_each(&tctx->xa, index, node) { 9722 9669 io_uring_del_tctx_node(index); 9670 + cond_resched(); 9671 + } 9723 9672 if (wq) { 9724 9673 /* 9725 9674 * Must be after io_uring_del_task_file() (removes nodes under