Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- Fix an off-by-one in a BUILD_BUG_ON() check. Not a real issue right
now as we have plenty of flags left, but could become one. (Hao)

- Fix lockdep issue introduced in this merge window (me)

- Fix a few issues with the worker creation (me, Pavel, Qiang)

- Fix regression with wq_has_sleeper() for IOPOLL (Pavel)

- Timeout link error propagation fix (Pavel)

* tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block:
io_uring: fix off-by-one in BUILD_BUG_ON check of __REQ_F_LAST_BIT
io_uring: fail links of cancelled timeouts
io-wq: fix memory leak in create_io_worker()
io-wq: fix silly logic error in io_task_work_match()
io_uring: drop ctx->uring_lock before acquiring sqd->lock
io_uring: fix missing mb() before waitqueue_active
io-wq: fix cancellation on create-worker failure

+44 -13
+30 -11
fs/io-wq.c
··· 709 709 } 710 710 raw_spin_unlock(&wqe->lock); 711 711 io_worker_ref_put(wqe->wq); 712 + kfree(worker); 712 713 return; 713 714 } 714 715 ··· 726 725 if (!io_queue_worker_create(worker, acct, create_worker_cont)) { 727 726 clear_bit_unlock(0, &worker->create_state); 728 727 io_worker_release(worker); 728 + kfree(worker); 729 729 } 730 730 } 731 731 ··· 761 759 if (!IS_ERR(tsk)) { 762 760 io_init_new_worker(wqe, worker, tsk); 763 761 } else if (!io_should_retry_thread(PTR_ERR(tsk))) { 762 + kfree(worker); 764 763 goto fail; 765 764 } else { 766 765 INIT_WORK(&worker->work, io_workqueue_create); ··· 835 832 wq_list_add_after(&work->list, &tail->list, &acct->work_list); 836 833 } 837 834 835 + static bool io_wq_work_match_item(struct io_wq_work *work, void *data) 836 + { 837 + return work == data; 838 + } 839 + 838 840 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) 839 841 { 840 842 struct io_wqe_acct *acct = io_work_get_acct(wqe, work); ··· 852 844 */ 853 845 if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || 854 846 (work->flags & IO_WQ_WORK_CANCEL)) { 855 - run_cancel: 856 847 io_run_cancel(work, wqe); 857 848 return; 858 849 } ··· 871 864 bool did_create; 872 865 873 866 did_create = io_wqe_create_worker(wqe, acct); 874 - if (unlikely(!did_create)) { 875 - raw_spin_lock(&wqe->lock); 876 - /* fatal condition, failed to create the first worker */ 877 - if (!acct->nr_workers) { 878 - raw_spin_unlock(&wqe->lock); 879 - goto run_cancel; 880 - } 881 - raw_spin_unlock(&wqe->lock); 867 + if (likely(did_create)) 868 + return; 869 + 870 + raw_spin_lock(&wqe->lock); 871 + /* fatal condition, failed to create the first worker */ 872 + if (!acct->nr_workers) { 873 + struct io_cb_cancel_data match = { 874 + .fn = io_wq_work_match_item, 875 + .data = work, 876 + .cancel_all = false, 877 + }; 878 + 879 + if (io_acct_cancel_pending_work(wqe, acct, &match)) 880 + raw_spin_lock(&wqe->lock); 882 881 } 882 + raw_spin_unlock(&wqe->lock); 883 883 } 884 884 } 885 885 ··· 1136 1122 { 1137 1123 struct io_worker *worker; 1138 1124 1139 - if (cb->func != create_worker_cb || cb->func != create_worker_cont) 1125 + if (cb->func != create_worker_cb && cb->func != create_worker_cont) 1140 1126 return false; 1141 1127 worker = container_of(cb, struct io_worker, create_work); 1142 1128 return worker->wqe->wq == data; ··· 1157 1143 1158 1144 while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { 1159 1145 struct io_worker *worker; 1146 + struct io_wqe_acct *acct; 1160 1147 1161 1148 worker = container_of(cb, struct io_worker, create_work); 1162 - atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); 1149 + acct = io_wqe_get_acct(worker); 1150 + atomic_dec(&acct->nr_running); 1151 + raw_spin_lock(&worker->wqe->lock); 1152 + acct->nr_workers--; 1153 + raw_spin_unlock(&worker->wqe->lock); 1163 1154 io_worker_ref_put(wq); 1164 1155 clear_bit_unlock(0, &worker->create_state); 1165 1156 io_worker_release(worker);
+14 -2
fs/io_uring.c
··· 1482 1482 struct io_timeout_data *io = req->async_data; 1483 1483 1484 1484 if (hrtimer_try_to_cancel(&io->timer) != -1) { 1485 + if (status) 1486 + req_set_fail(req); 1485 1487 atomic_set(&req->ctx->cq_timeouts, 1486 1488 atomic_read(&req->ctx->cq_timeouts) + 1); 1487 1489 list_del_init(&req->timeout.list); ··· 1621 1619 1622 1620 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) 1623 1621 { 1622 + /* see waitqueue_active() comment */ 1623 + smp_mb(); 1624 + 1624 1625 if (ctx->flags & IORING_SETUP_SQPOLL) { 1625 - if (wq_has_sleeper(&ctx->cq_wait)) 1626 + if (waitqueue_active(&ctx->cq_wait)) 1626 1627 wake_up_all(&ctx->cq_wait); 1627 1628 } 1628 1629 if (io_should_trigger_evfd(ctx)) ··· 10555 10550 if (ctx->flags & IORING_SETUP_SQPOLL) { 10556 10551 sqd = ctx->sq_data; 10557 10552 if (sqd) { 10553 + /* 10554 + * Observe the correct sqd->lock -> ctx->uring_lock 10555 + * ordering. Fine to drop uring_lock here, we hold 10556 + * a ref to the ctx. 10557 + */ 10558 + mutex_unlock(&ctx->uring_lock); 10558 10559 mutex_lock(&sqd->lock); 10560 + mutex_lock(&ctx->uring_lock); 10559 10561 tctx = sqd->thread->io_uring; 10560 10562 } 10561 10563 } else { ··· 10865 10853 BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); 10866 10854 10867 10855 BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); 10868 - BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); 10856 + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); 10869 10857 10870 10858 req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | 10871 10859 SLAB_ACCOUNT);