Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/fs.h>
5#include <linux/file.h>
6#include <linux/mm.h>
7#include <linux/slab.h>
8#include <linux/namei.h>
9#include <linux/nospec.h>
10#include <linux/io_uring.h>
11
12#include <uapi/linux/io_uring.h>
13
14#include "filetable.h"
15#include "io_uring.h"
16#include "tctx.h"
17#include "sqpoll.h"
18#include "uring_cmd.h"
19#include "poll.h"
20#include "timeout.h"
21#include "waitid.h"
22#include "futex.h"
23#include "cancel.h"
24
25struct io_cancel {
26 struct file *file;
27 u64 addr;
28 u32 flags;
29 s32 fd;
30 u8 opcode;
31};
32
33#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
34 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
35 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
36
37/*
38 * Returns true if the request matches the criteria outlined by 'cd'.
39 */
40bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
41{
42 bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
43
44 if (req->ctx != cd->ctx)
45 return false;
46
47 if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
48 match_user_data = true;
49
50 if (cd->flags & IORING_ASYNC_CANCEL_ANY)
51 goto check_seq;
52 if (cd->flags & IORING_ASYNC_CANCEL_FD) {
53 if (req->file != cd->file)
54 return false;
55 }
56 if (cd->flags & IORING_ASYNC_CANCEL_OP) {
57 if (req->opcode != cd->opcode)
58 return false;
59 }
60 if (match_user_data && req->cqe.user_data != cd->data)
61 return false;
62 if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
63check_seq:
64 if (io_cancel_match_sequence(req, cd->seq))
65 return false;
66 }
67
68 return true;
69}
70
71static bool io_cancel_cb(struct io_wq_work *work, void *data)
72{
73 struct io_kiocb *req = container_of(work, struct io_kiocb, work);
74 struct io_cancel_data *cd = data;
75
76 return io_cancel_req_match(req, cd);
77}
78
79static int io_async_cancel_one(struct io_uring_task *tctx,
80 struct io_cancel_data *cd)
81{
82 enum io_wq_cancel cancel_ret;
83 int ret = 0;
84 bool all;
85
86 if (!tctx || !tctx->io_wq)
87 return -ENOENT;
88
89 all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
90 cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
91 switch (cancel_ret) {
92 case IO_WQ_CANCEL_OK:
93 ret = 0;
94 break;
95 case IO_WQ_CANCEL_RUNNING:
96 ret = -EALREADY;
97 break;
98 case IO_WQ_CANCEL_NOTFOUND:
99 ret = -ENOENT;
100 break;
101 }
102
103 return ret;
104}
105
106int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
107 unsigned issue_flags)
108{
109 struct io_ring_ctx *ctx = cd->ctx;
110 int ret;
111
112 WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
113
114 ret = io_async_cancel_one(tctx, cd);
115 /*
116 * Fall-through even for -EALREADY, as we may have poll armed
117 * that need unarming.
118 */
119 if (!ret)
120 return 0;
121
122 ret = io_poll_cancel(ctx, cd, issue_flags);
123 if (ret != -ENOENT)
124 return ret;
125
126 ret = io_waitid_cancel(ctx, cd, issue_flags);
127 if (ret != -ENOENT)
128 return ret;
129
130 ret = io_futex_cancel(ctx, cd, issue_flags);
131 if (ret != -ENOENT)
132 return ret;
133
134 spin_lock(&ctx->completion_lock);
135 if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
136 ret = io_timeout_cancel(ctx, cd);
137 spin_unlock(&ctx->completion_lock);
138 return ret;
139}
140
141int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
142{
143 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
144
145 if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
146 return -EINVAL;
147 if (sqe->off || sqe->splice_fd_in)
148 return -EINVAL;
149
150 cancel->addr = READ_ONCE(sqe->addr);
151 cancel->flags = READ_ONCE(sqe->cancel_flags);
152 if (cancel->flags & ~CANCEL_FLAGS)
153 return -EINVAL;
154 if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
155 if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
156 return -EINVAL;
157 cancel->fd = READ_ONCE(sqe->fd);
158 }
159 if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
160 if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
161 return -EINVAL;
162 cancel->opcode = READ_ONCE(sqe->len);
163 }
164
165 return 0;
166}
167
168static int __io_async_cancel(struct io_cancel_data *cd,
169 struct io_uring_task *tctx,
170 unsigned int issue_flags)
171{
172 bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
173 struct io_ring_ctx *ctx = cd->ctx;
174 struct io_tctx_node *node;
175 int ret, nr = 0;
176
177 do {
178 ret = io_try_cancel(tctx, cd, issue_flags);
179 if (ret == -ENOENT)
180 break;
181 if (!all)
182 return ret;
183 nr++;
184 } while (1);
185
186 /* slow path, try all io-wq's */
187 io_ring_submit_lock(ctx, issue_flags);
188 ret = -ENOENT;
189 list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
190 ret = io_async_cancel_one(node->task->io_uring, cd);
191 if (ret != -ENOENT) {
192 if (!all)
193 break;
194 nr++;
195 }
196 }
197 io_ring_submit_unlock(ctx, issue_flags);
198 return all ? nr : ret;
199}
200
201int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
202{
203 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
204 struct io_cancel_data cd = {
205 .ctx = req->ctx,
206 .data = cancel->addr,
207 .flags = cancel->flags,
208 .opcode = cancel->opcode,
209 .seq = atomic_inc_return(&req->ctx->cancel_seq),
210 };
211 struct io_uring_task *tctx = req->tctx;
212 int ret;
213
214 if (cd.flags & IORING_ASYNC_CANCEL_FD) {
215 if (req->flags & REQ_F_FIXED_FILE ||
216 cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
217 req->flags |= REQ_F_FIXED_FILE;
218 req->file = io_file_get_fixed(req, cancel->fd,
219 issue_flags);
220 } else {
221 req->file = io_file_get_normal(req, cancel->fd);
222 }
223 if (!req->file) {
224 ret = -EBADF;
225 goto done;
226 }
227 cd.file = req->file;
228 }
229
230 ret = __io_async_cancel(&cd, tctx, issue_flags);
231done:
232 if (ret < 0)
233 req_set_fail(req);
234 io_req_set_res(req, ret, 0);
235 return IOU_COMPLETE;
236}
237
238static int __io_sync_cancel(struct io_uring_task *tctx,
239 struct io_cancel_data *cd, int fd)
240{
241 struct io_ring_ctx *ctx = cd->ctx;
242
243 /* fixed must be grabbed every time since we drop the uring_lock */
244 if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
245 (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
246 struct io_rsrc_node *node;
247
248 node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
249 if (unlikely(!node))
250 return -EBADF;
251 cd->file = io_slot_file(node);
252 if (!cd->file)
253 return -EBADF;
254 }
255
256 return __io_async_cancel(cd, tctx, 0);
257}
258
259int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
260 __must_hold(&ctx->uring_lock)
261{
262 struct io_cancel_data cd = {
263 .ctx = ctx,
264 .seq = atomic_inc_return(&ctx->cancel_seq),
265 };
266 ktime_t timeout = KTIME_MAX;
267 struct io_uring_sync_cancel_reg sc;
268 struct file *file = NULL;
269 DEFINE_WAIT(wait);
270 int ret, i;
271
272 if (copy_from_user(&sc, arg, sizeof(sc)))
273 return -EFAULT;
274 if (sc.flags & ~CANCEL_FLAGS)
275 return -EINVAL;
276 for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
277 if (sc.pad[i])
278 return -EINVAL;
279 for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
280 if (sc.pad2[i])
281 return -EINVAL;
282
283 cd.data = sc.addr;
284 cd.flags = sc.flags;
285 cd.opcode = sc.opcode;
286
287 /* we can grab a normal file descriptor upfront */
288 if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
289 !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
290 file = fget(sc.fd);
291 if (!file)
292 return -EBADF;
293 cd.file = file;
294 }
295
296 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
297
298 /* found something, done! */
299 if (ret != -EALREADY)
300 goto out;
301
302 if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
303 struct timespec64 ts = {
304 .tv_sec = sc.timeout.tv_sec,
305 .tv_nsec = sc.timeout.tv_nsec
306 };
307
308 timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
309 }
310
311 /*
312 * Keep looking until we get -ENOENT. we'll get woken everytime
313 * every time a request completes and will retry the cancelation.
314 */
315 do {
316 cd.seq = atomic_inc_return(&ctx->cancel_seq);
317
318 prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
319
320 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
321
322 mutex_unlock(&ctx->uring_lock);
323 if (ret != -EALREADY)
324 break;
325
326 ret = io_run_task_work_sig(ctx);
327 if (ret < 0)
328 break;
329 ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
330 if (!ret) {
331 ret = -ETIME;
332 break;
333 }
334 mutex_lock(&ctx->uring_lock);
335 } while (1);
336
337 finish_wait(&ctx->cq_wait, &wait);
338 mutex_lock(&ctx->uring_lock);
339
340 if (ret == -ENOENT || ret > 0)
341 ret = 0;
342out:
343 if (file)
344 fput(file);
345 return ret;
346}
347
348bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
349 struct hlist_head *list, bool cancel_all,
350 bool (*cancel)(struct io_kiocb *))
351{
352 struct hlist_node *tmp;
353 struct io_kiocb *req;
354 bool found = false;
355
356 lockdep_assert_held(&ctx->uring_lock);
357
358 hlist_for_each_entry_safe(req, tmp, list, hash_node) {
359 if (!io_match_task_safe(req, tctx, cancel_all))
360 continue;
361 hlist_del_init(&req->hash_node);
362 if (cancel(req))
363 found = true;
364 }
365
366 return found;
367}
368
369int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
370 unsigned int issue_flags, struct hlist_head *list,
371 bool (*cancel)(struct io_kiocb *))
372{
373 struct hlist_node *tmp;
374 struct io_kiocb *req;
375 int nr = 0;
376
377 io_ring_submit_lock(ctx, issue_flags);
378 hlist_for_each_entry_safe(req, tmp, list, hash_node) {
379 if (!io_cancel_req_match(req, cd))
380 continue;
381 if (cancel(req))
382 nr++;
383 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
384 break;
385 }
386 io_ring_submit_unlock(ctx, issue_flags);
387 return nr ?: -ENOENT;
388}
389
390static bool io_match_linked(struct io_kiocb *head)
391{
392 struct io_kiocb *req;
393
394 io_for_each_link(req, head) {
395 if (req->flags & REQ_F_INFLIGHT)
396 return true;
397 }
398 return false;
399}
400
401/*
402 * As io_match_task() but protected against racing with linked timeouts.
403 * User must not hold timeout_lock.
404 */
405bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
406 bool cancel_all)
407{
408 bool matched;
409
410 if (tctx && head->tctx != tctx)
411 return false;
412 if (cancel_all)
413 return true;
414
415 if (head->flags & REQ_F_LINK_TIMEOUT) {
416 struct io_ring_ctx *ctx = head->ctx;
417
418 /* protect against races with linked timeouts */
419 raw_spin_lock_irq(&ctx->timeout_lock);
420 matched = io_match_linked(head);
421 raw_spin_unlock_irq(&ctx->timeout_lock);
422 } else {
423 matched = io_match_linked(head);
424 }
425 return matched;
426}
427
428void __io_uring_cancel(bool cancel_all)
429{
430 io_uring_unreg_ringfd();
431 io_uring_cancel_generic(cancel_all, NULL);
432}
433
434struct io_task_cancel {
435 struct io_uring_task *tctx;
436 bool all;
437};
438
439static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
440{
441 struct io_kiocb *req = container_of(work, struct io_kiocb, work);
442 struct io_task_cancel *cancel = data;
443
444 return io_match_task_safe(req, cancel->tctx, cancel->all);
445}
446
447static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
448 struct io_uring_task *tctx,
449 bool cancel_all)
450{
451 struct io_defer_entry *de;
452 LIST_HEAD(list);
453
454 list_for_each_entry_reverse(de, &ctx->defer_list, list) {
455 if (io_match_task_safe(de->req, tctx, cancel_all)) {
456 list_cut_position(&list, &ctx->defer_list, &de->list);
457 break;
458 }
459 }
460 if (list_empty(&list))
461 return false;
462
463 while (!list_empty(&list)) {
464 de = list_first_entry(&list, struct io_defer_entry, list);
465 list_del_init(&de->list);
466 ctx->nr_drained -= io_linked_nr(de->req);
467 io_req_task_queue_fail(de->req, -ECANCELED);
468 kfree(de);
469 }
470 return true;
471}
472
473__cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
474{
475 struct io_kiocb *req = container_of(work, struct io_kiocb, work);
476
477 return req->ctx == data;
478}
479
480static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
481{
482 struct io_tctx_node *node;
483 enum io_wq_cancel cret;
484 bool ret = false;
485
486 mutex_lock(&ctx->uring_lock);
487 list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
488 struct io_uring_task *tctx = node->task->io_uring;
489
490 /*
491 * io_wq will stay alive while we hold uring_lock, because it's
492 * killed after ctx nodes, which requires to take the lock.
493 */
494 if (!tctx || !tctx->io_wq)
495 continue;
496 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
497 ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
498 }
499 mutex_unlock(&ctx->uring_lock);
500
501 return ret;
502}
503
504__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
505 struct io_uring_task *tctx,
506 bool cancel_all, bool is_sqpoll_thread)
507{
508 struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
509 enum io_wq_cancel cret;
510 bool ret = false;
511
512 /* set it so io_req_local_work_add() would wake us up */
513 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
514 atomic_set(&ctx->cq_wait_nr, 1);
515 smp_mb();
516 }
517
518 /* failed during ring init, it couldn't have issued any requests */
519 if (!ctx->rings)
520 return false;
521
522 if (!tctx) {
523 ret |= io_uring_try_cancel_iowq(ctx);
524 } else if (tctx->io_wq) {
525 /*
526 * Cancels requests of all rings, not only @ctx, but
527 * it's fine as the task is in exit/exec.
528 */
529 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
530 &cancel, true);
531 ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
532 }
533
534 /* SQPOLL thread does its own polling */
535 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
536 is_sqpoll_thread) {
537 while (!wq_list_empty(&ctx->iopoll_list)) {
538 io_iopoll_try_reap_events(ctx);
539 ret = true;
540 cond_resched();
541 }
542 }
543
544 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
545 io_allowed_defer_tw_run(ctx))
546 ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
547 mutex_lock(&ctx->uring_lock);
548 ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
549 ret |= io_poll_remove_all(ctx, tctx, cancel_all);
550 ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
551 ret |= io_futex_remove_all(ctx, tctx, cancel_all);
552 ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
553 mutex_unlock(&ctx->uring_lock);
554 ret |= io_kill_timeouts(ctx, tctx, cancel_all);
555 if (tctx)
556 ret |= io_run_task_work() > 0;
557 else
558 ret |= flush_delayed_work(&ctx->fallback_work);
559 return ret;
560}
561
562static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
563{
564 if (tracked)
565 return atomic_read(&tctx->inflight_tracked);
566 return percpu_counter_sum(&tctx->inflight);
567}
568
569/*
570 * Find any io_uring ctx that this task has registered or done IO on, and cancel
571 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
572 */
573__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
574{
575 struct io_uring_task *tctx = current->io_uring;
576 struct io_ring_ctx *ctx;
577 struct io_tctx_node *node;
578 unsigned long index;
579 s64 inflight;
580 DEFINE_WAIT(wait);
581
582 WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
583
584 if (!current->io_uring)
585 return;
586 if (tctx->io_wq)
587 io_wq_exit_start(tctx->io_wq);
588
589 atomic_inc(&tctx->in_cancel);
590 do {
591 bool loop = false;
592
593 io_uring_drop_tctx_refs(current);
594 if (!tctx_inflight(tctx, !cancel_all))
595 break;
596
597 /* read completions before cancelations */
598 inflight = tctx_inflight(tctx, false);
599 if (!inflight)
600 break;
601
602 if (!sqd) {
603 xa_for_each(&tctx->xa, index, node) {
604 /* sqpoll task will cancel all its requests */
605 if (node->ctx->sq_data)
606 continue;
607 loop |= io_uring_try_cancel_requests(node->ctx,
608 current->io_uring,
609 cancel_all,
610 false);
611 }
612 } else {
613 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
614 loop |= io_uring_try_cancel_requests(ctx,
615 current->io_uring,
616 cancel_all,
617 true);
618 }
619
620 if (loop) {
621 cond_resched();
622 continue;
623 }
624
625 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
626 io_run_task_work();
627 io_uring_drop_tctx_refs(current);
628 xa_for_each(&tctx->xa, index, node) {
629 if (io_local_work_pending(node->ctx)) {
630 WARN_ON_ONCE(node->ctx->submitter_task &&
631 node->ctx->submitter_task != current);
632 goto end_wait;
633 }
634 }
635 /*
636 * If we've seen completions, retry without waiting. This
637 * avoids a race where a completion comes in before we did
638 * prepare_to_wait().
639 */
640 if (inflight == tctx_inflight(tctx, !cancel_all))
641 schedule();
642end_wait:
643 finish_wait(&tctx->wait, &wait);
644 } while (1);
645
646 io_uring_clean_tctx(tctx);
647 if (cancel_all) {
648 /*
649 * We shouldn't run task_works after cancel, so just leave
650 * ->in_cancel set for normal exit.
651 */
652 atomic_dec(&tctx->in_cancel);
653 /* for exec all current's requests should be gone, kill tctx */
654 __io_uring_free(current);
655 }
656}