Merge tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
"This one looks a bit bigger than it is, but that's mainly because 2/3
of it is enabling IORING_OP_CLOSE to close direct file descriptors.

We've had a few folks using them and finding it confusing that the way
to close them is through using -1 for file update, this just brings
API symmetry for direct descriptors. Hence I think we should just do
this now and have a better API for 5.15 release. There's some room for
de-duplicating the close code, but we're leaving that for the next
merge window.

Outside of that, just small fixes:

- Poll race fixes (Hao)

- io-wq core dump exit fix (me)

- Reschedule around potentially intensive tctx and buffer iterators
on teardown (me)

- Fix for always ending up punting files update to io-wq (me)

- Put the provided buffer meta data under memcg accounting (me)

- Tweak for io_write(), removing dead code that was added with the
iterator changes in this release (Pavel)"

* tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
io_uring: make OP_CLOSE consistent with direct open
io_uring: kill extra checks in io_write()
io_uring: don't punt files update to io-wq unconditionally
io_uring: put provided buffer meta data under memcg accounting
io_uring: allow conditional reschedule for intensive iterators
io_uring: fix potential req refcount underflow
io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow
io_uring: fix race between poll completion and cancel_hash insertion
io-wq: ensure we exit if thread group is exiting

+72 -16
+2 -1
fs/io-wq.c
··· 584 585 if (!get_signal(&ksig)) 586 continue; 587 - if (fatal_signal_pending(current)) 588 break; 589 continue; 590 }
··· 584 585 if (!get_signal(&ksig)) 586 continue; 587 + if (fatal_signal_pending(current) || 588 + signal_group_exit(current->signal)) 589 break; 590 continue; 591 }
+70 -15
fs/io_uring.c
··· 502 struct io_close { 503 struct file *file; 504 int fd; 505 }; 506 507 struct io_timeout_data { ··· 1099 1100 static int io_install_fixed_file(struct io_kiocb *req, struct file *file, 1101 unsigned int issue_flags, u32 slot_index); 1102 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); 1103 1104 static struct kmem_cache *req_cachep; ··· 3608 iov_iter_save_state(iter, state); 3609 } 3610 req->result = iov_iter_count(iter); 3611 - ret2 = 0; 3612 3613 /* Ensure we clear previously set non-block flag */ 3614 if (!force_nonblock) ··· 3672 } else { 3673 copy_iov: 3674 iov_iter_restore(iter, state); 3675 - if (ret2 > 0) 3676 - iov_iter_advance(iter, ret2); 3677 ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); 3678 return ret ?: -EAGAIN; 3679 } ··· 4387 int i, bid = pbuf->bid; 4388 4389 for (i = 0; i < pbuf->nbufs; i++) { 4390 - buf = kmalloc(sizeof(*buf), GFP_KERNEL); 4391 if (!buf) 4392 break; 4393 ··· 4594 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4595 return -EINVAL; 4596 if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || 4597 - sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) 4598 return -EINVAL; 4599 if (req->flags & REQ_F_FIXED_FILE) 4600 return -EBADF; 4601 4602 req->close.fd = READ_ONCE(sqe->fd); 4603 return 0; 4604 } 4605 ··· 4614 struct fdtable *fdt; 4615 struct file *file = NULL; 4616 int ret = -EBADF; 4617 4618 spin_lock(&files->file_lock); 4619 fdt = files_fdtable(files); ··· 5347 if (req->poll.events & EPOLLONESHOT) 5348 flags = 0; 5349 if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { 5350 - req->poll.done = true; 5351 flags = 0; 5352 } 5353 if (flags & IORING_CQE_F_MORE) ··· 5376 } else { 5377 bool done; 5378 5379 done = __io_poll_complete(req, req->result); 5380 if (done) { 5381 io_poll_remove_double(req); 5382 hash_del(&req->hash_node); 5383 } else { 5384 req->result = 0; 5385 add_wait_queue(req->poll.head, &req->poll.wait); ··· 5522 5523 hash_del(&req->hash_node); 5524 io_poll_remove_double(req); 5525 spin_unlock(&ctx->completion_lock); 5526 5527 if (!READ_ONCE(apoll->poll.canceled)) ··· 5843 struct io_ring_ctx *ctx = req->ctx; 5844 struct io_poll_table ipt; 5845 __poll_t mask; 5846 5847 ipt.pt._qproc = io_poll_queue_proc; 5848 ··· 5852 5853 if (mask) { /* no async, we'd stolen it */ 5854 ipt.error = 0; 5855 - io_poll_complete(req, mask); 5856 } 5857 spin_unlock(&ctx->completion_lock); 5858 5859 if (mask) { 5860 io_cqring_ev_posted(ctx); 5861 - if (poll->events & EPOLLONESHOT) 5862 io_put_req(req); 5863 } 5864 return ipt.error; ··· 6349 struct io_uring_rsrc_update2 up; 6350 int ret; 6351 6352 - if (issue_flags & IO_URING_F_NONBLOCK) 6353 - return -EAGAIN; 6354 - 6355 up.offset = req->rsrc_update.offset; 6356 up.data = req->rsrc_update.arg; 6357 up.nr = 0; 6358 up.tags = 0; 6359 up.resv = 0; 6360 6361 - mutex_lock(&ctx->uring_lock); 6362 ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 6363 &up, req->rsrc_update.nr_args); 6364 - mutex_unlock(&ctx->uring_lock); 6365 6366 if (ret < 0) 6367 req_set_fail(req); ··· 8413 return ret; 8414 } 8415 8416 static int __io_sqe_files_update(struct io_ring_ctx *ctx, 8417 struct io_uring_rsrc_update2 *up, 8418 unsigned nr_args) ··· 9217 struct io_buffer *buf; 9218 unsigned long index; 9219 9220 - xa_for_each(&ctx->io_buffers, index, buf) 9221 __io_remove_buffers(ctx, buf, index, -1U); 9222 } 9223 9224 static void io_req_cache_free(struct list_head *list) ··· 9718 struct io_tctx_node *node; 9719 unsigned long index; 9720 9721 - xa_for_each(&tctx->xa, index, node) 9722 io_uring_del_tctx_node(index); 9723 if (wq) { 9724 /* 9725 * Must be after io_uring_del_task_file() (removes nodes under
··· 502 struct io_close { 503 struct file *file; 504 int fd; 505 + u32 file_slot; 506 }; 507 508 struct io_timeout_data { ··· 1098 1099 static int io_install_fixed_file(struct io_kiocb *req, struct file *file, 1100 unsigned int issue_flags, u32 slot_index); 1101 + static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); 1102 + 1103 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); 1104 1105 static struct kmem_cache *req_cachep; ··· 3605 iov_iter_save_state(iter, state); 3606 } 3607 req->result = iov_iter_count(iter); 3608 3609 /* Ensure we clear previously set non-block flag */ 3610 if (!force_nonblock) ··· 3670 } else { 3671 copy_iov: 3672 iov_iter_restore(iter, state); 3673 ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); 3674 return ret ?: -EAGAIN; 3675 } ··· 4387 int i, bid = pbuf->bid; 4388 4389 for (i = 0; i < pbuf->nbufs; i++) { 4390 + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 4391 if (!buf) 4392 break; 4393 ··· 4594 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4595 return -EINVAL; 4596 if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || 4597 + sqe->rw_flags || sqe->buf_index) 4598 return -EINVAL; 4599 if (req->flags & REQ_F_FIXED_FILE) 4600 return -EBADF; 4601 4602 req->close.fd = READ_ONCE(sqe->fd); 4603 + req->close.file_slot = READ_ONCE(sqe->file_index); 4604 + if (req->close.file_slot && req->close.fd) 4605 + return -EINVAL; 4606 + 4607 return 0; 4608 } 4609 ··· 4610 struct fdtable *fdt; 4611 struct file *file = NULL; 4612 int ret = -EBADF; 4613 + 4614 + if (req->close.file_slot) { 4615 + ret = io_close_fixed(req, issue_flags); 4616 + goto err; 4617 + } 4618 4619 spin_lock(&files->file_lock); 4620 fdt = files_fdtable(files); ··· 5338 if (req->poll.events & EPOLLONESHOT) 5339 flags = 0; 5340 if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { 5341 + req->poll.events |= EPOLLONESHOT; 5342 flags = 0; 5343 } 5344 if (flags & IORING_CQE_F_MORE) ··· 5367 } else { 5368 bool done; 5369 5370 + if (req->poll.done) { 5371 + spin_unlock(&ctx->completion_lock); 5372 + return; 5373 + } 5374 done = __io_poll_complete(req, req->result); 5375 if (done) { 5376 io_poll_remove_double(req); 5377 hash_del(&req->hash_node); 5378 + req->poll.done = true; 5379 } else { 5380 req->result = 0; 5381 add_wait_queue(req->poll.head, &req->poll.wait); ··· 5508 5509 hash_del(&req->hash_node); 5510 io_poll_remove_double(req); 5511 + apoll->poll.done = true; 5512 spin_unlock(&ctx->completion_lock); 5513 5514 if (!READ_ONCE(apoll->poll.canceled)) ··· 5828 struct io_ring_ctx *ctx = req->ctx; 5829 struct io_poll_table ipt; 5830 __poll_t mask; 5831 + bool done; 5832 5833 ipt.pt._qproc = io_poll_queue_proc; 5834 ··· 5836 5837 if (mask) { /* no async, we'd stolen it */ 5838 ipt.error = 0; 5839 + done = io_poll_complete(req, mask); 5840 } 5841 spin_unlock(&ctx->completion_lock); 5842 5843 if (mask) { 5844 io_cqring_ev_posted(ctx); 5845 + if (done) 5846 io_put_req(req); 5847 } 5848 return ipt.error; ··· 6333 struct io_uring_rsrc_update2 up; 6334 int ret; 6335 6336 up.offset = req->rsrc_update.offset; 6337 up.data = req->rsrc_update.arg; 6338 up.nr = 0; 6339 up.tags = 0; 6340 up.resv = 0; 6341 6342 + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 6343 ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 6344 &up, req->rsrc_update.nr_args); 6345 + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 6346 6347 if (ret < 0) 6348 req_set_fail(req); ··· 8400 return ret; 8401 } 8402 8403 + static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) 8404 + { 8405 + unsigned int offset = req->close.file_slot - 1; 8406 + struct io_ring_ctx *ctx = req->ctx; 8407 + struct io_fixed_file *file_slot; 8408 + struct file *file; 8409 + int ret, i; 8410 + 8411 + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 8412 + ret = -ENXIO; 8413 + if (unlikely(!ctx->file_data)) 8414 + goto out; 8415 + ret = -EINVAL; 8416 + if (offset >= ctx->nr_user_files) 8417 + goto out; 8418 + ret = io_rsrc_node_switch_start(ctx); 8419 + if (ret) 8420 + goto out; 8421 + 8422 + i = array_index_nospec(offset, ctx->nr_user_files); 8423 + file_slot = io_fixed_file_slot(&ctx->file_table, i); 8424 + ret = -EBADF; 8425 + if (!file_slot->file_ptr) 8426 + goto out; 8427 + 8428 + file = (struct file *)(file_slot->file_ptr & FFS_MASK); 8429 + ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); 8430 + if (ret) 8431 + goto out; 8432 + 8433 + file_slot->file_ptr = 0; 8434 + io_rsrc_node_switch(ctx, ctx->file_data); 8435 + ret = 0; 8436 + out: 8437 + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 8438 + return ret; 8439 + } 8440 + 8441 static int __io_sqe_files_update(struct io_ring_ctx *ctx, 8442 struct io_uring_rsrc_update2 *up, 8443 unsigned nr_args) ··· 9166 struct io_buffer *buf; 9167 unsigned long index; 9168 9169 + xa_for_each(&ctx->io_buffers, index, buf) { 9170 __io_remove_buffers(ctx, buf, index, -1U); 9171 + cond_resched(); 9172 + } 9173 } 9174 9175 static void io_req_cache_free(struct list_head *list) ··· 9665 struct io_tctx_node *node; 9666 unsigned long index; 9667 9668 + xa_for_each(&tctx->xa, index, node) { 9669 io_uring_del_tctx_node(index); 9670 + cond_resched(); 9671 + } 9672 if (wq) { 9673 /* 9674 * Must be after io_uring_del_task_file() (removes nodes under