Merge tag 'io_uring-5.14-2021-07-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- Fix a memory leak due to a race condition in io_init_wq_offload
(Yang)

- Poll error handling fixes (Pavel)

- Fix early fdput() regression (me)

- Don't reissue iopoll requests off release path (me)

- Add a safety check for io-wq queue off wrong path (me)

* tag 'io_uring-5.14-2021-07-24' of git://git.kernel.dk/linux-block:
io_uring: explicitly catch any illegal async queue attempt
io_uring: never attempt iopoll reissue from release path
io_uring: fix early fdput() of file
io_uring: fix memleak in io_init_wq_offload()
io_uring: remove double poll entry on arm failure
io_uring: explicitly count entries for poll reqs

+45 -17
+6 -1
fs/io-wq.c
··· 731 731 int work_flags; 732 732 unsigned long flags; 733 733 734 - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { 734 + /* 735 + * If io-wq is exiting for this task, or if the request has explicitly 736 + * been marked as one that should not get executed, cancel it here. 737 + */ 738 + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || 739 + (work->flags & IO_WQ_WORK_CANCEL)) { 735 740 io_run_cancel(work, wqe); 736 741 return; 737 742 }
+39 -16
fs/io_uring.c
··· 1294 1294 1295 1295 /* init ->work of the whole link before punting */ 1296 1296 io_prep_async_link(req); 1297 + 1298 + /* 1299 + * Not expected to happen, but if we do have a bug where this _can_ 1300 + * happen, catch it here and ensure the request is marked as 1301 + * canceled. That will make io-wq go through the usual work cancel 1302 + * procedure rather than attempt to run this request (or create a new 1303 + * worker for it). 1304 + */ 1305 + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) 1306 + req->work.flags |= IO_WQ_WORK_CANCEL; 1307 + 1297 1308 trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, 1298 1309 &req->work, req->flags); 1299 1310 io_wq_enqueue(tctx->io_wq, &req->work); ··· 2216 2205 * Find and free completed poll iocbs 2217 2206 */ 2218 2207 static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, 2219 - struct list_head *done) 2208 + struct list_head *done, bool resubmit) 2220 2209 { 2221 2210 struct req_batch rb; 2222 2211 struct io_kiocb *req; ··· 2231 2220 req = list_first_entry(done, struct io_kiocb, inflight_entry); 2232 2221 list_del(&req->inflight_entry); 2233 2222 2234 - if (READ_ONCE(req->result) == -EAGAIN && 2223 + if (READ_ONCE(req->result) == -EAGAIN && resubmit && 2235 2224 !(req->flags & REQ_F_DONT_REISSUE)) { 2236 2225 req->iopoll_completed = 0; 2237 2226 req_ref_get(req); ··· 2255 2244 } 2256 2245 2257 2246 static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, 2258 - long min) 2247 + long min, bool resubmit) 2259 2248 { 2260 2249 struct io_kiocb *req, *tmp; 2261 2250 LIST_HEAD(done); ··· 2298 2287 } 2299 2288 2300 2289 if (!list_empty(&done)) 2301 - io_iopoll_complete(ctx, nr_events, &done); 2290 + io_iopoll_complete(ctx, nr_events, &done, resubmit); 2302 2291 2303 2292 return ret; 2304 2293 } ··· 2316 2305 while (!list_empty(&ctx->iopoll_list)) { 2317 2306 unsigned int nr_events = 0; 2318 2307 2319 - io_do_iopoll(ctx, &nr_events, 0); 2308 + io_do_iopoll(ctx, &nr_events, 0, false); 2320 2309 2321 2310 /* let it sleep and repeat later if can't complete a request */ 2322 2311 if (nr_events == 0) ··· 2378 2367 list_empty(&ctx->iopoll_list)) 2379 2368 break; 2380 2369 } 2381 - ret = io_do_iopoll(ctx, &nr_events, min); 2370 + ret = io_do_iopoll(ctx, &nr_events, min, true); 2382 2371 } while (!ret && nr_events < min && !need_resched()); 2383 2372 out: 2384 2373 mutex_unlock(&ctx->uring_lock); ··· 4813 4802 struct io_poll_table { 4814 4803 struct poll_table_struct pt; 4815 4804 struct io_kiocb *req; 4805 + int nr_entries; 4816 4806 int error; 4817 4807 }; 4818 4808 ··· 5007 4995 struct io_kiocb *req = pt->req; 5008 4996 5009 4997 /* 5010 - * If poll->head is already set, it's because the file being polled 5011 - * uses multiple waitqueues for poll handling (eg one for read, one 5012 - * for write). Setup a separate io_poll_iocb if this happens. 4998 + * The file being polled uses multiple waitqueues for poll handling 4999 + * (e.g. one for read, one for write). Setup a separate io_poll_iocb 5000 + * if this happens. 5013 5001 */ 5014 - if (unlikely(poll->head)) { 5002 + if (unlikely(pt->nr_entries)) { 5015 5003 struct io_poll_iocb *poll_one = poll; 5016 5004 5017 5005 /* already have a 2nd entry, fail a third attempt */ ··· 5039 5027 *poll_ptr = poll; 5040 5028 } 5041 5029 5042 - pt->error = 0; 5030 + pt->nr_entries++; 5043 5031 poll->head = head; 5044 5032 5045 5033 if (poll->events & EPOLLEXCLUSIVE) ··· 5116 5104 5117 5105 ipt->pt._key = mask; 5118 5106 ipt->req = req; 5119 - ipt->error = -EINVAL; 5107 + ipt->error = 0; 5108 + ipt->nr_entries = 0; 5120 5109 5121 5110 mask = vfs_poll(req->file, &ipt->pt) & poll->events; 5111 + if (unlikely(!ipt->nr_entries) && !ipt->error) 5112 + ipt->error = -EINVAL; 5122 5113 5123 5114 spin_lock_irq(&ctx->completion_lock); 5115 + if (ipt->error) 5116 + io_poll_remove_double(req); 5124 5117 if (likely(poll->head)) { 5125 5118 spin_lock(&poll->head->lock); 5126 5119 if (unlikely(list_empty(&poll->wait.entry))) { ··· 6809 6792 6810 6793 mutex_lock(&ctx->uring_lock); 6811 6794 if (!list_empty(&ctx->iopoll_list)) 6812 - io_do_iopoll(ctx, &nr_events, 0); 6795 + io_do_iopoll(ctx, &nr_events, 0, true); 6813 6796 6814 6797 /* 6815 6798 * Don't submit if refs are dying, good for io_uring_register(), ··· 7916 7899 struct io_wq_data data; 7917 7900 unsigned int concurrency; 7918 7901 7902 + mutex_lock(&ctx->uring_lock); 7919 7903 hash = ctx->hash_map; 7920 7904 if (!hash) { 7921 7905 hash = kzalloc(sizeof(*hash), GFP_KERNEL); 7922 - if (!hash) 7906 + if (!hash) { 7907 + mutex_unlock(&ctx->uring_lock); 7923 7908 return ERR_PTR(-ENOMEM); 7909 + } 7924 7910 refcount_set(&hash->refs, 1); 7925 7911 init_waitqueue_head(&hash->wait); 7926 7912 ctx->hash_map = hash; 7927 7913 } 7914 + mutex_unlock(&ctx->uring_lock); 7928 7915 7929 7916 data.hash = hash; 7930 7917 data.task = task; ··· 8002 7981 f = fdget(p->wq_fd); 8003 7982 if (!f.file) 8004 7983 return -ENXIO; 8005 - fdput(f); 8006 - if (f.file->f_op != &io_uring_fops) 7984 + if (f.file->f_op != &io_uring_fops) { 7985 + fdput(f); 8007 7986 return -EINVAL; 7987 + } 7988 + fdput(f); 8008 7989 } 8009 7990 if (ctx->flags & IORING_SETUP_SQPOLL) { 8010 7991 struct task_struct *tsk;