at v6.19-rc4 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2#include <linux/kernel.h> 3#include <linux/errno.h> 4#include <linux/fs.h> 5#include <linux/file.h> 6#include <linux/mm.h> 7#include <linux/slab.h> 8#include <linux/namei.h> 9#include <linux/nospec.h> 10#include <linux/io_uring.h> 11 12#include <uapi/linux/io_uring.h> 13 14#include "filetable.h" 15#include "io_uring.h" 16#include "tctx.h" 17#include "sqpoll.h" 18#include "uring_cmd.h" 19#include "poll.h" 20#include "timeout.h" 21#include "waitid.h" 22#include "futex.h" 23#include "cancel.h" 24 25struct io_cancel { 26 struct file *file; 27 u64 addr; 28 u32 flags; 29 s32 fd; 30 u8 opcode; 31}; 32 33#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ 34 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ 35 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) 36 37/* 38 * Returns true if the request matches the criteria outlined by 'cd'. 39 */ 40bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) 41{ 42 bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA; 43 44 if (req->ctx != cd->ctx) 45 return false; 46 47 if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) 48 match_user_data = true; 49 50 if (cd->flags & IORING_ASYNC_CANCEL_ANY) 51 goto check_seq; 52 if (cd->flags & IORING_ASYNC_CANCEL_FD) { 53 if (req->file != cd->file) 54 return false; 55 } 56 if (cd->flags & IORING_ASYNC_CANCEL_OP) { 57 if (req->opcode != cd->opcode) 58 return false; 59 } 60 if (match_user_data && req->cqe.user_data != cd->data) 61 return false; 62 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 63check_seq: 64 if (io_cancel_match_sequence(req, cd->seq)) 65 return false; 66 } 67 68 return true; 69} 70 71static bool io_cancel_cb(struct io_wq_work *work, void *data) 72{ 73 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 74 struct io_cancel_data *cd = data; 75 76 return io_cancel_req_match(req, cd); 77} 78 79static int io_async_cancel_one(struct io_uring_task *tctx, 80 struct io_cancel_data *cd) 81{ 82 enum io_wq_cancel cancel_ret; 83 int ret = 0; 84 bool all; 85 86 if (!tctx || !tctx->io_wq) 87 return -ENOENT; 88 89 all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 90 cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); 91 switch (cancel_ret) { 92 case IO_WQ_CANCEL_OK: 93 ret = 0; 94 break; 95 case IO_WQ_CANCEL_RUNNING: 96 ret = -EALREADY; 97 break; 98 case IO_WQ_CANCEL_NOTFOUND: 99 ret = -ENOENT; 100 break; 101 } 102 103 return ret; 104} 105 106int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, 107 unsigned issue_flags) 108{ 109 struct io_ring_ctx *ctx = cd->ctx; 110 int ret; 111 112 WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); 113 114 ret = io_async_cancel_one(tctx, cd); 115 /* 116 * Fall-through even for -EALREADY, as we may have poll armed 117 * that need unarming. 118 */ 119 if (!ret) 120 return 0; 121 122 ret = io_poll_cancel(ctx, cd, issue_flags); 123 if (ret != -ENOENT) 124 return ret; 125 126 ret = io_waitid_cancel(ctx, cd, issue_flags); 127 if (ret != -ENOENT) 128 return ret; 129 130 ret = io_futex_cancel(ctx, cd, issue_flags); 131 if (ret != -ENOENT) 132 return ret; 133 134 spin_lock(&ctx->completion_lock); 135 if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) 136 ret = io_timeout_cancel(ctx, cd); 137 spin_unlock(&ctx->completion_lock); 138 return ret; 139} 140 141int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 142{ 143 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 144 145 if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) 146 return -EINVAL; 147 if (sqe->off || sqe->splice_fd_in) 148 return -EINVAL; 149 150 cancel->addr = READ_ONCE(sqe->addr); 151 cancel->flags = READ_ONCE(sqe->cancel_flags); 152 if (cancel->flags & ~CANCEL_FLAGS) 153 return -EINVAL; 154 if (cancel->flags & IORING_ASYNC_CANCEL_FD) { 155 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 156 return -EINVAL; 157 cancel->fd = READ_ONCE(sqe->fd); 158 } 159 if (cancel->flags & IORING_ASYNC_CANCEL_OP) { 160 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 161 return -EINVAL; 162 cancel->opcode = READ_ONCE(sqe->len); 163 } 164 165 return 0; 166} 167 168static int __io_async_cancel(struct io_cancel_data *cd, 169 struct io_uring_task *tctx, 170 unsigned int issue_flags) 171{ 172 bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 173 struct io_ring_ctx *ctx = cd->ctx; 174 struct io_tctx_node *node; 175 int ret, nr = 0; 176 177 do { 178 ret = io_try_cancel(tctx, cd, issue_flags); 179 if (ret == -ENOENT) 180 break; 181 if (!all) 182 return ret; 183 nr++; 184 } while (1); 185 186 /* slow path, try all io-wq's */ 187 __set_current_state(TASK_RUNNING); 188 io_ring_submit_lock(ctx, issue_flags); 189 mutex_lock(&ctx->tctx_lock); 190 ret = -ENOENT; 191 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 192 ret = io_async_cancel_one(node->task->io_uring, cd); 193 if (ret != -ENOENT) { 194 if (!all) 195 break; 196 nr++; 197 } 198 } 199 mutex_unlock(&ctx->tctx_lock); 200 io_ring_submit_unlock(ctx, issue_flags); 201 return all ? nr : ret; 202} 203 204int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) 205{ 206 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 207 struct io_cancel_data cd = { 208 .ctx = req->ctx, 209 .data = cancel->addr, 210 .flags = cancel->flags, 211 .opcode = cancel->opcode, 212 .seq = atomic_inc_return(&req->ctx->cancel_seq), 213 }; 214 struct io_uring_task *tctx = req->tctx; 215 int ret; 216 217 if (cd.flags & IORING_ASYNC_CANCEL_FD) { 218 if (req->flags & REQ_F_FIXED_FILE || 219 cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { 220 req->flags |= REQ_F_FIXED_FILE; 221 req->file = io_file_get_fixed(req, cancel->fd, 222 issue_flags); 223 } else { 224 req->file = io_file_get_normal(req, cancel->fd); 225 } 226 if (!req->file) { 227 ret = -EBADF; 228 goto done; 229 } 230 cd.file = req->file; 231 } 232 233 ret = __io_async_cancel(&cd, tctx, issue_flags); 234done: 235 if (ret < 0) 236 req_set_fail(req); 237 io_req_set_res(req, ret, 0); 238 return IOU_COMPLETE; 239} 240 241static int __io_sync_cancel(struct io_uring_task *tctx, 242 struct io_cancel_data *cd, int fd) 243{ 244 struct io_ring_ctx *ctx = cd->ctx; 245 246 /* fixed must be grabbed every time since we drop the uring_lock */ 247 if ((cd->flags & IORING_ASYNC_CANCEL_FD) && 248 (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 249 struct io_rsrc_node *node; 250 251 node = io_rsrc_node_lookup(&ctx->file_table.data, fd); 252 if (unlikely(!node)) 253 return -EBADF; 254 cd->file = io_slot_file(node); 255 if (!cd->file) 256 return -EBADF; 257 } 258 259 return __io_async_cancel(cd, tctx, 0); 260} 261 262int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) 263 __must_hold(&ctx->uring_lock) 264{ 265 struct io_cancel_data cd = { 266 .ctx = ctx, 267 .seq = atomic_inc_return(&ctx->cancel_seq), 268 }; 269 ktime_t timeout = KTIME_MAX; 270 struct io_uring_sync_cancel_reg sc; 271 struct file *file = NULL; 272 DEFINE_WAIT(wait); 273 int ret, i; 274 275 if (copy_from_user(&sc, arg, sizeof(sc))) 276 return -EFAULT; 277 if (sc.flags & ~CANCEL_FLAGS) 278 return -EINVAL; 279 for (i = 0; i < ARRAY_SIZE(sc.pad); i++) 280 if (sc.pad[i]) 281 return -EINVAL; 282 for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) 283 if (sc.pad2[i]) 284 return -EINVAL; 285 286 cd.data = sc.addr; 287 cd.flags = sc.flags; 288 cd.opcode = sc.opcode; 289 290 /* we can grab a normal file descriptor upfront */ 291 if ((cd.flags & IORING_ASYNC_CANCEL_FD) && 292 !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 293 file = fget(sc.fd); 294 if (!file) 295 return -EBADF; 296 cd.file = file; 297 } 298 299 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 300 301 /* found something, done! */ 302 if (ret != -EALREADY) 303 goto out; 304 305 if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { 306 struct timespec64 ts = { 307 .tv_sec = sc.timeout.tv_sec, 308 .tv_nsec = sc.timeout.tv_nsec 309 }; 310 311 timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 312 } 313 314 /* 315 * Keep looking until we get -ENOENT. we'll get woken everytime 316 * every time a request completes and will retry the cancelation. 317 */ 318 do { 319 cd.seq = atomic_inc_return(&ctx->cancel_seq); 320 321 prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); 322 323 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 324 325 mutex_unlock(&ctx->uring_lock); 326 if (ret != -EALREADY) 327 break; 328 329 ret = io_run_task_work_sig(ctx); 330 if (ret < 0) 331 break; 332 ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); 333 if (!ret) { 334 ret = -ETIME; 335 break; 336 } 337 mutex_lock(&ctx->uring_lock); 338 } while (1); 339 340 finish_wait(&ctx->cq_wait, &wait); 341 mutex_lock(&ctx->uring_lock); 342 343 if (ret == -ENOENT || ret > 0) 344 ret = 0; 345out: 346 if (file) 347 fput(file); 348 return ret; 349} 350 351bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, 352 struct hlist_head *list, bool cancel_all, 353 bool (*cancel)(struct io_kiocb *)) 354{ 355 struct hlist_node *tmp; 356 struct io_kiocb *req; 357 bool found = false; 358 359 lockdep_assert_held(&ctx->uring_lock); 360 361 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 362 if (!io_match_task_safe(req, tctx, cancel_all)) 363 continue; 364 hlist_del_init(&req->hash_node); 365 if (cancel(req)) 366 found = true; 367 } 368 369 return found; 370} 371 372int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 373 unsigned int issue_flags, struct hlist_head *list, 374 bool (*cancel)(struct io_kiocb *)) 375{ 376 struct hlist_node *tmp; 377 struct io_kiocb *req; 378 int nr = 0; 379 380 io_ring_submit_lock(ctx, issue_flags); 381 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 382 if (!io_cancel_req_match(req, cd)) 383 continue; 384 if (cancel(req)) 385 nr++; 386 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 387 break; 388 } 389 io_ring_submit_unlock(ctx, issue_flags); 390 return nr ?: -ENOENT; 391} 392 393static bool io_match_linked(struct io_kiocb *head) 394{ 395 struct io_kiocb *req; 396 397 io_for_each_link(req, head) { 398 if (req->flags & REQ_F_INFLIGHT) 399 return true; 400 } 401 return false; 402} 403 404/* 405 * As io_match_task() but protected against racing with linked timeouts. 406 * User must not hold timeout_lock. 407 */ 408bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, 409 bool cancel_all) 410{ 411 bool matched; 412 413 if (tctx && head->tctx != tctx) 414 return false; 415 if (cancel_all) 416 return true; 417 418 if (head->flags & REQ_F_LINK_TIMEOUT) { 419 struct io_ring_ctx *ctx = head->ctx; 420 421 /* protect against races with linked timeouts */ 422 raw_spin_lock_irq(&ctx->timeout_lock); 423 matched = io_match_linked(head); 424 raw_spin_unlock_irq(&ctx->timeout_lock); 425 } else { 426 matched = io_match_linked(head); 427 } 428 return matched; 429} 430 431void __io_uring_cancel(bool cancel_all) 432{ 433 io_uring_unreg_ringfd(); 434 io_uring_cancel_generic(cancel_all, NULL); 435} 436 437struct io_task_cancel { 438 struct io_uring_task *tctx; 439 bool all; 440}; 441 442static bool io_cancel_task_cb(struct io_wq_work *work, void *data) 443{ 444 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 445 struct io_task_cancel *cancel = data; 446 447 return io_match_task_safe(req, cancel->tctx, cancel->all); 448} 449 450static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, 451 struct io_uring_task *tctx, 452 bool cancel_all) 453{ 454 struct io_defer_entry *de; 455 LIST_HEAD(list); 456 457 list_for_each_entry_reverse(de, &ctx->defer_list, list) { 458 if (io_match_task_safe(de->req, tctx, cancel_all)) { 459 list_cut_position(&list, &ctx->defer_list, &de->list); 460 break; 461 } 462 } 463 if (list_empty(&list)) 464 return false; 465 466 while (!list_empty(&list)) { 467 de = list_first_entry(&list, struct io_defer_entry, list); 468 list_del_init(&de->list); 469 ctx->nr_drained -= io_linked_nr(de->req); 470 io_req_task_queue_fail(de->req, -ECANCELED); 471 kfree(de); 472 } 473 return true; 474} 475 476__cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) 477{ 478 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 479 480 return req->ctx == data; 481} 482 483static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) 484{ 485 struct io_tctx_node *node; 486 enum io_wq_cancel cret; 487 bool ret = false; 488 489 mutex_lock(&ctx->uring_lock); 490 mutex_lock(&ctx->tctx_lock); 491 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 492 struct io_uring_task *tctx = node->task->io_uring; 493 494 /* 495 * io_wq will stay alive while we hold uring_lock, because it's 496 * killed after ctx nodes, which requires to take the lock. 497 */ 498 if (!tctx || !tctx->io_wq) 499 continue; 500 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); 501 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 502 } 503 mutex_unlock(&ctx->tctx_lock); 504 mutex_unlock(&ctx->uring_lock); 505 506 return ret; 507} 508 509__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 510 struct io_uring_task *tctx, 511 bool cancel_all, bool is_sqpoll_thread) 512{ 513 struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, }; 514 enum io_wq_cancel cret; 515 bool ret = false; 516 517 /* set it so io_req_local_work_add() would wake us up */ 518 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 519 atomic_set(&ctx->cq_wait_nr, 1); 520 smp_mb(); 521 } 522 523 /* failed during ring init, it couldn't have issued any requests */ 524 if (!ctx->rings) 525 return false; 526 527 if (!tctx) { 528 ret |= io_uring_try_cancel_iowq(ctx); 529 } else if (tctx->io_wq) { 530 /* 531 * Cancels requests of all rings, not only @ctx, but 532 * it's fine as the task is in exit/exec. 533 */ 534 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, 535 &cancel, true); 536 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 537 } 538 539 /* SQPOLL thread does its own polling */ 540 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || 541 is_sqpoll_thread) { 542 while (!wq_list_empty(&ctx->iopoll_list)) { 543 io_iopoll_try_reap_events(ctx); 544 ret = true; 545 cond_resched(); 546 } 547 } 548 549 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && 550 io_allowed_defer_tw_run(ctx)) 551 ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0; 552 mutex_lock(&ctx->uring_lock); 553 ret |= io_cancel_defer_files(ctx, tctx, cancel_all); 554 ret |= io_poll_remove_all(ctx, tctx, cancel_all); 555 ret |= io_waitid_remove_all(ctx, tctx, cancel_all); 556 ret |= io_futex_remove_all(ctx, tctx, cancel_all); 557 ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); 558 mutex_unlock(&ctx->uring_lock); 559 ret |= io_kill_timeouts(ctx, tctx, cancel_all); 560 if (tctx) 561 ret |= io_run_task_work() > 0; 562 else 563 ret |= flush_delayed_work(&ctx->fallback_work); 564 return ret; 565} 566 567static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 568{ 569 if (tracked) 570 return atomic_read(&tctx->inflight_tracked); 571 return percpu_counter_sum(&tctx->inflight); 572} 573 574/* 575 * Find any io_uring ctx that this task has registered or done IO on, and cancel 576 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. 577 */ 578__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) 579{ 580 struct io_uring_task *tctx = current->io_uring; 581 struct io_ring_ctx *ctx; 582 struct io_tctx_node *node; 583 unsigned long index; 584 s64 inflight; 585 DEFINE_WAIT(wait); 586 587 WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); 588 589 if (!current->io_uring) 590 return; 591 if (tctx->io_wq) 592 io_wq_exit_start(tctx->io_wq); 593 594 atomic_inc(&tctx->in_cancel); 595 do { 596 bool loop = false; 597 598 io_uring_drop_tctx_refs(current); 599 if (!tctx_inflight(tctx, !cancel_all)) 600 break; 601 602 /* read completions before cancelations */ 603 inflight = tctx_inflight(tctx, false); 604 if (!inflight) 605 break; 606 607 if (!sqd) { 608 xa_for_each(&tctx->xa, index, node) { 609 /* sqpoll task will cancel all its requests */ 610 if (node->ctx->sq_data) 611 continue; 612 loop |= io_uring_try_cancel_requests(node->ctx, 613 current->io_uring, 614 cancel_all, 615 false); 616 } 617 } else { 618 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 619 loop |= io_uring_try_cancel_requests(ctx, 620 current->io_uring, 621 cancel_all, 622 true); 623 } 624 625 if (loop) { 626 cond_resched(); 627 continue; 628 } 629 630 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); 631 io_run_task_work(); 632 io_uring_drop_tctx_refs(current); 633 xa_for_each(&tctx->xa, index, node) { 634 if (io_local_work_pending(node->ctx)) { 635 WARN_ON_ONCE(node->ctx->submitter_task && 636 node->ctx->submitter_task != current); 637 goto end_wait; 638 } 639 } 640 /* 641 * If we've seen completions, retry without waiting. This 642 * avoids a race where a completion comes in before we did 643 * prepare_to_wait(). 644 */ 645 if (inflight == tctx_inflight(tctx, !cancel_all)) 646 schedule(); 647end_wait: 648 finish_wait(&tctx->wait, &wait); 649 } while (1); 650 651 io_uring_clean_tctx(tctx); 652 if (cancel_all) { 653 /* 654 * We shouldn't run task_works after cancel, so just leave 655 * ->in_cancel set for normal exit. 656 */ 657 atomic_dec(&tctx->in_cancel); 658 /* for exec all current's requests should be gone, kill tctx */ 659 __io_uring_free(current); 660 } 661}