Merge tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull io_uring updates from Jens Axboe:
"Some later fixes for io_uring:

- Small cleanup series from Pavel

- Belt and suspenders build time check of sqe size and layout
(Stefan)

- Addition of ->show_fdinfo() on request of Jann Horn, to aid in
understanding mapped personalities

- eventfd recursion/deadlock fix, for both io_uring and aio

- Fixup for send/recv handling

- Fixup for double deferral of read/write request

- Fix for potential double completion event for close request

- Adjust fadvise advice async/inline behavior

- Fix for shutdown hang with SQPOLL thread

- Fix for potential use-after-free of fixed file table"

* tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block:
io_uring: cleanup fixed file data table references
io_uring: spin for sq thread to idle on shutdown
aio: prevent potential eventfd recursion on poll
io_uring: put the flag changing code in the same spot
io_uring: iterate req cache backwards
io_uring: punt even fadvise() WILLNEED to async context
io_uring: fix sporadic double CQE entry for close
io_uring: remove extra ->file check
io_uring: don't map read/write iovec potentially twice
io_uring: use the proper helpers for io_send/recv
io_uring: prevent potential eventfd recursion on poll
eventfd: track eventfd_signal() recursion depth
io_uring: add BUILD_BUG_ON() to assert the layout of struct io_uring_sqe
io_uring: add ->show_fdinfo() for the io_uring file descriptor

Linus Torvalds 6 years ago c1ef57a3 ed535f2c

+251 -52

4 changed files

expand all

aio.c

eventfd.c

io_uring.c

include

linux

eventfd.h

+18 -2

fs/aio.c

··· 1610 1610 return 0; 1611 1611 } 1612 1612 1613 + static void aio_poll_put_work(struct work_struct *work) 1614 + { 1615 + struct poll_iocb *req = container_of(work, struct poll_iocb, work); 1616 + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); 1617 + 1618 + iocb_put(iocb); 1619 + } 1620 + 1613 1621 static void aio_poll_complete_work(struct work_struct *work) 1614 1622 { 1615 1623 struct poll_iocb *req = container_of(work, struct poll_iocb, work); ··· 1682 1674 list_del_init(&req->wait.entry); 1683 1675 1684 1676 if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { 1677 + struct kioctx *ctx = iocb->ki_ctx; 1678 + 1685 1679 /* 1686 1680 * Try to complete the iocb inline if we can. Use 1687 1681 * irqsave/irqrestore because not all filesystems (e.g. fuse) ··· 1693 1683 list_del(&iocb->ki_list); 1694 1684 iocb->ki_res.res = mangle_poll(mask); 1695 1685 req->done = true; 1696 - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); 1697 - iocb_put(iocb); 1686 + if (iocb->ki_eventfd && eventfd_signal_count()) { 1687 + iocb = NULL; 1688 + INIT_WORK(&req->work, aio_poll_put_work); 1689 + schedule_work(&req->work); 1690 + } 1691 + spin_unlock_irqrestore(&ctx->ctx_lock, flags); 1692 + if (iocb) 1693 + iocb_put(iocb); 1698 1694 } else { 1699 1695 schedule_work(&req->work); 1700 1696 }

+15

fs/eventfd.c

··· 24 24 #include <linux/seq_file.h> 25 25 #include <linux/idr.h> 26 26 27 + DEFINE_PER_CPU(int, eventfd_wake_count); 28 + 27 29 static DEFINE_IDA(eventfd_ida); 28 30 29 31 struct eventfd_ctx { ··· 62 60 { 63 61 unsigned long flags; 64 62 63 + /* 64 + * Deadlock or stack overflow issues can happen if we recurse here 65 + * through waitqueue wakeup handlers. If the caller users potentially 66 + * nested waitqueues with custom wakeup handlers, then it should 67 + * check eventfd_signal_count() before calling this function. If 68 + * it returns true, the eventfd_signal() call should be deferred to a 69 + * safe context. 70 + */ 71 + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) 72 + return 0; 73 + 65 74 spin_lock_irqsave(&ctx->wqh.lock, flags); 75 + this_cpu_inc(eventfd_wake_count); 66 76 if (ULLONG_MAX - ctx->count < n) 67 77 n = ULLONG_MAX - ctx->count; 68 78 ctx->count += n; 69 79 if (waitqueue_active(&ctx->wqh)) 70 80 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 81 + this_cpu_dec(eventfd_wake_count); 71 82 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 72 83 73 84 return n;

+204 -50

fs/io_uring.c

··· 585 585 * io_kiocb alloc cache 586 586 */ 587 587 void *reqs[IO_IOPOLL_BATCH]; 588 - unsigned int free_reqs; 589 - unsigned int cur_req; 588 + unsigned int free_reqs; 590 589 591 590 /* 592 591 * File reference cache ··· 753 754 struct io_uring_files_update *ip, 754 755 unsigned nr_args); 755 756 static int io_grab_files(struct io_kiocb *req); 757 + static void io_ring_file_ref_flush(struct fixed_file_data *data); 756 758 757 759 static struct kmem_cache *req_cachep; 758 760 ··· 1020 1020 1021 1021 static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) 1022 1022 { 1023 + if (!ctx->cq_ev_fd) 1024 + return false; 1023 1025 if (!ctx->eventfd_async) 1024 1026 return true; 1025 1027 return io_wq_current_is_worker() || in_interrupt(); 1026 1028 } 1027 1029 1028 - static void io_cqring_ev_posted(struct io_ring_ctx *ctx) 1030 + static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev) 1029 1031 { 1030 1032 if (waitqueue_active(&ctx->wait)) 1031 1033 wake_up(&ctx->wait); 1032 1034 if (waitqueue_active(&ctx->sqo_wait)) 1033 1035 wake_up(&ctx->sqo_wait); 1034 - if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx)) 1036 + if (trigger_ev) 1035 1037 eventfd_signal(ctx->cq_ev_fd, 1); 1038 + } 1039 + 1040 + static void io_cqring_ev_posted(struct io_ring_ctx *ctx) 1041 + { 1042 + __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx)); 1036 1043 } 1037 1044 1038 1045 /* Returns true if there are no backlogged entries after the flush */ ··· 1190 1183 ret = 1; 1191 1184 } 1192 1185 state->free_reqs = ret - 1; 1193 - state->cur_req = 1; 1194 - req = state->reqs[0]; 1186 + req = state->reqs[ret - 1]; 1195 1187 } else { 1196 - req = state->reqs[state->cur_req]; 1197 1188 state->free_reqs--; 1198 - state->cur_req++; 1189 + req = state->reqs[state->free_reqs]; 1199 1190 } 1200 1191 1201 1192 got_it: ··· 1860 1855 unsigned ioprio; 1861 1856 int ret; 1862 1857 1863 - if (!req->file) 1864 - return -EBADF; 1865 - 1866 1858 if (S_ISREG(file_inode(req->file)->i_mode)) 1867 1859 req->flags |= REQ_F_ISREG; 1868 1860 ··· 1868 1866 req->flags |= REQ_F_CUR_POS; 1869 1867 kiocb->ki_pos = req->file->f_pos; 1870 1868 } 1871 - kiocb->ki_flags = iocb_flags(kiocb->ki_filp); 1872 1869 kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); 1870 + kiocb->ki_flags = iocb_flags(kiocb->ki_filp); 1871 + ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); 1872 + if (unlikely(ret)) 1873 + return ret; 1873 1874 1874 1875 ioprio = READ_ONCE(sqe->ioprio); 1875 1876 if (ioprio) { ··· 1883 1878 kiocb->ki_ioprio = ioprio; 1884 1879 } else 1885 1880 kiocb->ki_ioprio = get_current_ioprio(); 1886 - 1887 - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); 1888 - if (unlikely(ret)) 1889 - return ret; 1890 1881 1891 1882 /* don't allow async punt if RWF_NOWAIT was requested */ 1892 1883 if ((kiocb->ki_flags & IOCB_NOWAIT) || ··· 2165 2164 { 2166 2165 if (!io_op_defs[req->opcode].async_ctx) 2167 2166 return 0; 2168 - if (!req->io && io_alloc_async_ctx(req)) 2169 - return -ENOMEM; 2167 + if (!req->io) { 2168 + if (io_alloc_async_ctx(req)) 2169 + return -ENOMEM; 2170 2170 2171 - io_req_map_rw(req, io_size, iovec, fast_iov, iter); 2171 + io_req_map_rw(req, io_size, iovec, fast_iov, iter); 2172 + } 2172 2173 req->work.func = io_rw_async; 2173 2174 return 0; 2174 2175 } ··· 2727 2724 struct io_fadvise *fa = &req->fadvise; 2728 2725 int ret; 2729 2726 2730 - /* DONTNEED may block, others _should_ not */ 2731 - if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock) 2732 - return -EAGAIN; 2727 + if (force_nonblock) { 2728 + switch (fa->advice) { 2729 + case POSIX_FADV_NORMAL: 2730 + case POSIX_FADV_RANDOM: 2731 + case POSIX_FADV_SEQUENTIAL: 2732 + break; 2733 + default: 2734 + return -EAGAIN; 2735 + } 2736 + } 2733 2737 2734 2738 ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); 2735 2739 if (ret < 0) ··· 2847 2837 int ret; 2848 2838 2849 2839 ret = filp_close(req->close.put_file, req->work.files); 2850 - if (ret < 0) { 2840 + if (ret < 0) 2851 2841 req_set_fail_links(req); 2852 - } 2853 2842 io_cqring_add_event(req, ret); 2854 2843 } 2855 2844 2856 2845 fput(req->close.put_file); 2857 2846 2858 - /* we bypassed the re-issue, drop the submission reference */ 2859 - io_put_req(req); 2860 2847 io_put_req_find_next(req, &nxt); 2861 2848 if (nxt) 2862 2849 io_wq_assign_next(workptr, nxt); ··· 2895 2888 2896 2889 eagain: 2897 2890 req->work.func = io_close_finish; 2898 - return -EAGAIN; 2891 + /* 2892 + * Do manual async queue here to avoid grabbing files - we don't 2893 + * need the files, and it'll cause io_close_finish() to close 2894 + * the file again and cause a double CQE entry for this request 2895 + */ 2896 + io_queue_async_work(req); 2897 + return 0; 2899 2898 } 2900 2899 2901 2900 static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) ··· 3096 3083 else if (force_nonblock) 3097 3084 flags |= MSG_DONTWAIT; 3098 3085 3099 - ret = __sys_sendmsg_sock(sock, &msg, flags); 3086 + msg.msg_flags = flags; 3087 + ret = sock_sendmsg(sock, &msg); 3100 3088 if (force_nonblock && ret == -EAGAIN) 3101 3089 return -EAGAIN; 3102 3090 if (ret == -ERESTARTSYS) ··· 3123 3109 3124 3110 sr->msg_flags = READ_ONCE(sqe->msg_flags); 3125 3111 sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 3112 + sr->len = READ_ONCE(sqe->len); 3126 3113 3127 3114 if (!io || req->opcode == IORING_OP_RECV) 3128 3115 return 0; ··· 3242 3227 else if (force_nonblock) 3243 3228 flags |= MSG_DONTWAIT; 3244 3229 3245 - ret = __sys_recvmsg_sock(sock, &msg, NULL, NULL, flags); 3230 + ret = sock_recvmsg(sock, &msg, flags); 3246 3231 if (force_nonblock && ret == -EAGAIN) 3247 3232 return -EAGAIN; 3248 3233 if (ret == -ERESTARTSYS) ··· 3576 3561 __io_poll_flush(req->ctx, nodes); 3577 3562 } 3578 3563 3564 + static void io_poll_trigger_evfd(struct io_wq_work **workptr) 3565 + { 3566 + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); 3567 + 3568 + eventfd_signal(req->ctx->cq_ev_fd, 1); 3569 + io_put_req(req); 3570 + } 3571 + 3579 3572 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 3580 3573 void *key) 3581 3574 { ··· 3609 3586 3610 3587 if (llist_empty(&ctx->poll_llist) && 3611 3588 spin_trylock_irqsave(&ctx->completion_lock, flags)) { 3589 + bool trigger_ev; 3590 + 3612 3591 hash_del(&req->hash_node); 3613 3592 io_poll_complete(req, mask, 0); 3614 - req->flags |= REQ_F_COMP_LOCKED; 3615 - io_put_req(req); 3616 - spin_unlock_irqrestore(&ctx->completion_lock, flags); 3617 3593 3618 - io_cqring_ev_posted(ctx); 3619 - req = NULL; 3594 + trigger_ev = io_should_trigger_evfd(ctx); 3595 + if (trigger_ev && eventfd_signal_count()) { 3596 + trigger_ev = false; 3597 + req->work.func = io_poll_trigger_evfd; 3598 + } else { 3599 + req->flags |= REQ_F_COMP_LOCKED; 3600 + io_put_req(req); 3601 + req = NULL; 3602 + } 3603 + spin_unlock_irqrestore(&ctx->completion_lock, flags); 3604 + __io_cqring_ev_posted(ctx, trigger_ev); 3620 3605 } else { 3621 3606 req->result = mask; 3622 3607 req->llist_node.next = NULL; ··· 4846 4815 blk_finish_plug(&state->plug); 4847 4816 io_file_put(state); 4848 4817 if (state->free_reqs) 4849 - kmem_cache_free_bulk(req_cachep, state->free_reqs, 4850 - &state->reqs[state->cur_req]); 4818 + kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); 4851 4819 } 4852 4820 4853 4821 /* ··· 5071 5041 * reap events and wake us up. 5072 5042 */ 5073 5043 if (inflight || 5074 - (!time_after(jiffies, timeout) && ret != -EBUSY)) { 5044 + (!time_after(jiffies, timeout) && ret != -EBUSY && 5045 + !percpu_ref_is_dying(&ctx->refs))) { 5075 5046 cond_resched(); 5076 5047 continue; 5077 5048 } ··· 5262 5231 if (!data) 5263 5232 return -ENXIO; 5264 5233 5265 - /* protect against inflight atomic switch, which drops the ref */ 5266 - percpu_ref_get(&data->refs); 5267 - /* wait for existing switches */ 5268 - flush_work(&data->ref_work); 5269 5234 percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); 5270 - wait_for_completion(&data->done); 5271 - percpu_ref_put(&data->refs); 5272 - /* flush potential new switch */ 5273 5235 flush_work(&data->ref_work); 5236 + wait_for_completion(&data->done); 5237 + io_ring_file_ref_flush(data); 5274 5238 percpu_ref_exit(&data->refs); 5275 5239 5276 5240 __io_sqe_files_unregister(ctx); ··· 5503 5477 struct completion *done; 5504 5478 }; 5505 5479 5506 - static void io_ring_file_ref_switch(struct work_struct *work) 5480 + static void io_ring_file_ref_flush(struct fixed_file_data *data) 5507 5481 { 5508 5482 struct io_file_put *pfile, *tmp; 5509 - struct fixed_file_data *data; 5510 5483 struct llist_node *node; 5511 - 5512 - data = container_of(work, struct fixed_file_data, ref_work); 5513 5484 5514 5485 while ((node = llist_del_all(&data->put_llist)) != NULL) { 5515 5486 llist_for_each_entry_safe(pfile, tmp, node, llist) { ··· 5517 5494 kfree(pfile); 5518 5495 } 5519 5496 } 5497 + } 5520 5498 5499 + static void io_ring_file_ref_switch(struct work_struct *work) 5500 + { 5501 + struct fixed_file_data *data; 5502 + 5503 + data = container_of(work, struct fixed_file_data, ref_work); 5504 + io_ring_file_ref_flush(data); 5521 5505 percpu_ref_get(&data->refs); 5522 5506 percpu_ref_switch_to_percpu(&data->refs); 5523 5507 } ··· 5535 5505 5536 5506 data = container_of(ref, struct fixed_file_data, refs); 5537 5507 5538 - /* we can't safely switch from inside this context, punt to wq */ 5539 - queue_work(system_wq, &data->ref_work); 5508 + /* 5509 + * We can't safely switch from inside this context, punt to wq. If 5510 + * the table ref is going away, the table is being unregistered. 5511 + * Don't queue up the async work for that case, the caller will 5512 + * handle it. 5513 + */ 5514 + if (!percpu_ref_is_dying(&data->refs)) 5515 + queue_work(system_wq, &data->ref_work); 5540 5516 } 5541 5517 5542 5518 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, ··· 6331 6295 percpu_ref_kill(&ctx->refs); 6332 6296 mutex_unlock(&ctx->uring_lock); 6333 6297 6298 + /* 6299 + * Wait for sq thread to idle, if we have one. It won't spin on new 6300 + * work after we've killed the ctx ref above. This is important to do 6301 + * before we cancel existing commands, as the thread could otherwise 6302 + * be queueing new work post that. If that's work we need to cancel, 6303 + * it could cause shutdown to hang. 6304 + */ 6305 + while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) 6306 + cpu_relax(); 6307 + 6334 6308 io_kill_timeouts(ctx); 6335 6309 io_poll_remove_all(ctx); 6336 6310 ··· 6547 6501 return submitted ? submitted : ret; 6548 6502 } 6549 6503 6504 + static int io_uring_show_cred(int id, void *p, void *data) 6505 + { 6506 + const struct cred *cred = p; 6507 + struct seq_file *m = data; 6508 + struct user_namespace *uns = seq_user_ns(m); 6509 + struct group_info *gi; 6510 + kernel_cap_t cap; 6511 + unsigned __capi; 6512 + int g; 6513 + 6514 + seq_printf(m, "%5d\n", id); 6515 + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); 6516 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); 6517 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); 6518 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); 6519 + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); 6520 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); 6521 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); 6522 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); 6523 + seq_puts(m, "\n\tGroups:\t"); 6524 + gi = cred->group_info; 6525 + for (g = 0; g < gi->ngroups; g++) { 6526 + seq_put_decimal_ull(m, g ? " " : "", 6527 + from_kgid_munged(uns, gi->gid[g])); 6528 + } 6529 + seq_puts(m, "\n\tCapEff:\t"); 6530 + cap = cred->cap_effective; 6531 + CAP_FOR_EACH_U32(__capi) 6532 + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); 6533 + seq_putc(m, '\n'); 6534 + return 0; 6535 + } 6536 + 6537 + static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) 6538 + { 6539 + int i; 6540 + 6541 + mutex_lock(&ctx->uring_lock); 6542 + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); 6543 + for (i = 0; i < ctx->nr_user_files; i++) { 6544 + struct fixed_file_table *table; 6545 + struct file *f; 6546 + 6547 + table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; 6548 + f = table->files[i & IORING_FILE_TABLE_MASK]; 6549 + if (f) 6550 + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); 6551 + else 6552 + seq_printf(m, "%5u: <none>\n", i); 6553 + } 6554 + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); 6555 + for (i = 0; i < ctx->nr_user_bufs; i++) { 6556 + struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; 6557 + 6558 + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, 6559 + (unsigned int) buf->len); 6560 + } 6561 + if (!idr_is_empty(&ctx->personality_idr)) { 6562 + seq_printf(m, "Personalities:\n"); 6563 + idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); 6564 + } 6565 + mutex_unlock(&ctx->uring_lock); 6566 + } 6567 + 6568 + static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) 6569 + { 6570 + struct io_ring_ctx *ctx = f->private_data; 6571 + 6572 + if (percpu_ref_tryget(&ctx->refs)) { 6573 + __io_uring_show_fdinfo(ctx, m); 6574 + percpu_ref_put(&ctx->refs); 6575 + } 6576 + } 6577 + 6550 6578 static const struct file_operations io_uring_fops = { 6551 6579 .release = io_uring_release, 6552 6580 .flush = io_uring_flush, ··· 6631 6511 #endif 6632 6512 .poll = io_uring_poll, 6633 6513 .fasync = io_uring_fasync, 6514 + .show_fdinfo = io_uring_show_fdinfo, 6634 6515 }; 6635 6516 6636 6517 static int io_allocate_scq_urings(struct io_ring_ctx *ctx, ··· 7084 6963 7085 6964 static int __init io_uring_init(void) 7086 6965 { 6966 + #define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ 6967 + BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ 6968 + BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ 6969 + } while (0) 6970 + 6971 + #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ 6972 + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) 6973 + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); 6974 + BUILD_BUG_SQE_ELEM(0, __u8, opcode); 6975 + BUILD_BUG_SQE_ELEM(1, __u8, flags); 6976 + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); 6977 + BUILD_BUG_SQE_ELEM(4, __s32, fd); 6978 + BUILD_BUG_SQE_ELEM(8, __u64, off); 6979 + BUILD_BUG_SQE_ELEM(8, __u64, addr2); 6980 + BUILD_BUG_SQE_ELEM(16, __u64, addr); 6981 + BUILD_BUG_SQE_ELEM(24, __u32, len); 6982 + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); 6983 + BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); 6984 + BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); 6985 + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); 6986 + BUILD_BUG_SQE_ELEM(28, __u16, poll_events); 6987 + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); 6988 + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); 6989 + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); 6990 + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); 6991 + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); 6992 + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); 6993 + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); 6994 + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); 6995 + BUILD_BUG_SQE_ELEM(32, __u64, user_data); 6996 + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); 6997 + BUILD_BUG_SQE_ELEM(42, __u16, personality); 6998 + 7087 6999 BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); 7088 7000 req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC); 7089 7001 return 0;

+14

include/linux/eventfd.h

··· 12 12 #include <linux/fcntl.h> 13 13 #include <linux/wait.h> 14 14 #include <linux/err.h> 15 + #include <linux/percpu-defs.h> 16 + #include <linux/percpu.h> 15 17 16 18 /* 17 19 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining ··· 42 40 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, 43 41 __u64 *cnt); 44 42 43 + DECLARE_PER_CPU(int, eventfd_wake_count); 44 + 45 + static inline bool eventfd_signal_count(void) 46 + { 47 + return this_cpu_read(eventfd_wake_count); 48 + } 49 + 45 50 #else /* CONFIG_EVENTFD */ 46 51 47 52 /* ··· 75 66 wait_queue_entry_t *wait, __u64 *cnt) 76 67 { 77 68 return -ENOSYS; 69 + } 70 + 71 + static inline bool eventfd_signal_count(void) 72 + { 73 + return false; 78 74 } 79 75 80 76 #endif