Merge tag 'io_uring-5.11-2021-01-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
"Still need a final cancelation fix that isn't quite done done,
expected in the next day or two. That said, this contains:

- Wakeup fix for IOPOLL requests

- SQPOLL split close op handling fix

- Ensure that any use of io_uring fd itself is marked as inflight

- Short non-regular file read fix (Pavel)

- Fix up bad false positive warning (Pavel)

- SQPOLL fixes (Pavel)

- In-flight removal fix (Pavel)"

* tag 'io_uring-5.11-2021-01-24' of git://git.kernel.dk/linux-block:
io_uring: account io_uring internal files as REQ_F_INFLIGHT
io_uring: fix sleeping under spin in __io_clean_op
io_uring: fix short read retries for non-reg files
io_uring: fix SQPOLL IORING_OP_CLOSE cancelation state
io_uring: fix skipping disabling sqo on exec
io_uring: fix uring_flush in exit_files() warning
io_uring: fix false positive sqo warning on flush
io_uring: iopoll requests should also wake task ->in_idle state

Changed files
+47 -20
fs
+47 -20
fs/io_uring.c
··· 1025 1025 static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, 1026 1026 const struct iovec *fast_iov, 1027 1027 struct iov_iter *iter, bool force); 1028 + static void io_req_drop_files(struct io_kiocb *req); 1028 1029 1029 1030 static struct kmem_cache *req_cachep; 1030 1031 ··· 1049 1048 1050 1049 static inline void io_clean_op(struct io_kiocb *req) 1051 1050 { 1052 - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | 1053 - REQ_F_INFLIGHT)) 1051 + if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) 1054 1052 __io_clean_op(req); 1055 1053 } 1056 1054 ··· 1075 1075 return true; 1076 1076 1077 1077 io_for_each_link(req, head) { 1078 - if ((req->flags & REQ_F_WORK_INITIALIZED) && 1079 - (req->work.flags & IO_WQ_WORK_FILES) && 1078 + if (!(req->flags & REQ_F_WORK_INITIALIZED)) 1079 + continue; 1080 + if (req->file && req->file->f_op == &io_uring_fops) 1081 + return true; 1082 + if ((req->work.flags & IO_WQ_WORK_FILES) && 1080 1083 req->work.identity->files == files) 1081 1084 return true; 1082 1085 } ··· 1397 1394 free_fs_struct(fs); 1398 1395 req->work.flags &= ~IO_WQ_WORK_FS; 1399 1396 } 1397 + if (req->flags & REQ_F_INFLIGHT) 1398 + io_req_drop_files(req); 1400 1399 1401 1400 io_put_identity(req->task->io_uring, req); 1402 1401 } ··· 1508 1503 return false; 1509 1504 atomic_inc(&id->files->count); 1510 1505 get_nsproxy(id->nsproxy); 1511 - req->flags |= REQ_F_INFLIGHT; 1512 1506 1513 - spin_lock_irq(&ctx->inflight_lock); 1514 - list_add(&req->inflight_entry, &ctx->inflight_list); 1515 - spin_unlock_irq(&ctx->inflight_lock); 1507 + if (!(req->flags & REQ_F_INFLIGHT)) { 1508 + req->flags |= REQ_F_INFLIGHT; 1509 + 1510 + spin_lock_irq(&ctx->inflight_lock); 1511 + list_add(&req->inflight_entry, &ctx->inflight_list); 1512 + spin_unlock_irq(&ctx->inflight_lock); 1513 + } 1516 1514 req->work.flags |= IO_WQ_WORK_FILES; 1517 1515 } 1518 1516 if (!(req->work.flags & IO_WQ_WORK_MM) && ··· 2278 2270 struct io_uring_task *tctx = rb->task->io_uring; 2279 2271 2280 2272 percpu_counter_sub(&tctx->inflight, rb->task_refs); 2273 + if (atomic_read(&tctx->in_idle)) 2274 + wake_up(&tctx->wait); 2281 2275 put_task_struct_many(rb->task, rb->task_refs); 2282 2276 rb->task = NULL; 2283 2277 } ··· 2298 2288 struct io_uring_task *tctx = rb->task->io_uring; 2299 2289 2300 2290 percpu_counter_sub(&tctx->inflight, rb->task_refs); 2291 + if (atomic_read(&tctx->in_idle)) 2292 + wake_up(&tctx->wait); 2301 2293 put_task_struct_many(rb->task, rb->task_refs); 2302 2294 } 2303 2295 rb->task = req->task; ··· 3560 3548 3561 3549 /* read it all, or we did blocking attempt. no retry. */ 3562 3550 if (!iov_iter_count(iter) || !force_nonblock || 3563 - (req->file->f_flags & O_NONBLOCK)) 3551 + (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG)) 3564 3552 goto done; 3565 3553 3566 3554 io_size -= ret; ··· 4480 4468 * io_wq_work.flags, so initialize io_wq_work firstly. 4481 4469 */ 4482 4470 io_req_init_async(req); 4483 - req->work.flags |= IO_WQ_WORK_NO_CANCEL; 4484 4471 4485 4472 if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4486 4473 return -EINVAL; ··· 4512 4501 4513 4502 /* if the file has a flush method, be safe and punt to async */ 4514 4503 if (close->put_file->f_op->flush && force_nonblock) { 4504 + /* not safe to cancel at this point */ 4505 + req->work.flags |= IO_WQ_WORK_NO_CANCEL; 4515 4506 /* was never set, but play safe */ 4516 4507 req->flags &= ~REQ_F_NOWAIT; 4517 4508 /* avoid grabbing files - we don't need the files */ ··· 6170 6157 struct io_uring_task *tctx = req->task->io_uring; 6171 6158 unsigned long flags; 6172 6159 6173 - put_files_struct(req->work.identity->files); 6174 - put_nsproxy(req->work.identity->nsproxy); 6160 + if (req->work.flags & IO_WQ_WORK_FILES) { 6161 + put_files_struct(req->work.identity->files); 6162 + put_nsproxy(req->work.identity->nsproxy); 6163 + } 6175 6164 spin_lock_irqsave(&ctx->inflight_lock, flags); 6176 6165 list_del(&req->inflight_entry); 6177 6166 spin_unlock_irqrestore(&ctx->inflight_lock, flags); ··· 6240 6225 } 6241 6226 req->flags &= ~REQ_F_NEED_CLEANUP; 6242 6227 } 6243 - 6244 - if (req->flags & REQ_F_INFLIGHT) 6245 - io_req_drop_files(req); 6246 6228 } 6247 6229 6248 6230 static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, ··· 6456 6444 } else { 6457 6445 trace_io_uring_file_get(ctx, fd); 6458 6446 file = __io_file_get(state, fd); 6447 + } 6448 + 6449 + if (file && file->f_op == &io_uring_fops) { 6450 + io_req_init_async(req); 6451 + req->flags |= REQ_F_INFLIGHT; 6452 + 6453 + spin_lock_irq(&ctx->inflight_lock); 6454 + list_add(&req->inflight_entry, &ctx->inflight_list); 6455 + spin_unlock_irq(&ctx->inflight_lock); 6459 6456 } 6460 6457 6461 6458 return file; ··· 8877 8856 8878 8857 spin_lock_irq(&ctx->inflight_lock); 8879 8858 list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { 8880 - if (req->task != task || 8881 - req->work.identity->files != files) 8859 + if (!io_match_task(req, task, files)) 8882 8860 continue; 8883 8861 found = true; 8884 8862 break; ··· 8894 8874 io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); 8895 8875 io_poll_remove_all(ctx, task, files); 8896 8876 io_kill_timeouts(ctx, task, files); 8877 + io_cqring_overflow_flush(ctx, true, task, files); 8897 8878 /* cancellations _may_ trigger task work */ 8898 8879 io_run_task_work(); 8899 8880 schedule(); ··· 8935 8914 8936 8915 static void io_disable_sqo_submit(struct io_ring_ctx *ctx) 8937 8916 { 8938 - WARN_ON_ONCE(ctx->sqo_task != current); 8939 - 8940 8917 mutex_lock(&ctx->uring_lock); 8941 8918 ctx->sqo_dead = 1; 8942 8919 mutex_unlock(&ctx->uring_lock); ··· 8956 8937 8957 8938 if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { 8958 8939 /* for SQPOLL only sqo_task has task notes */ 8940 + WARN_ON_ONCE(ctx->sqo_task != current); 8959 8941 io_disable_sqo_submit(ctx); 8960 8942 task = ctx->sq_data->thread; 8961 8943 atomic_inc(&task->io_uring->in_idle); ··· 9102 9082 /* make sure overflow events are dropped */ 9103 9083 atomic_inc(&tctx->in_idle); 9104 9084 9085 + /* trigger io_disable_sqo_submit() */ 9086 + if (tctx->sqpoll) 9087 + __io_uring_files_cancel(NULL); 9088 + 9105 9089 do { 9106 9090 /* read completions before cancelations */ 9107 9091 inflight = tctx_inflight(tctx); ··· 9152 9128 9153 9129 if (ctx->flags & IORING_SETUP_SQPOLL) { 9154 9130 /* there is only one file note, which is owned by sqo_task */ 9155 - WARN_ON_ONCE((ctx->sqo_task == current) == 9131 + WARN_ON_ONCE(ctx->sqo_task != current && 9132 + xa_load(&tctx->xa, (unsigned long)file)); 9133 + /* sqo_dead check is for when this happens after cancellation */ 9134 + WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && 9156 9135 !xa_load(&tctx->xa, (unsigned long)file)); 9157 9136 9158 9137 io_disable_sqo_submit(ctx);