at master 4.1 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2#include <linux/kernel.h> 3#include <linux/errno.h> 4#include <linux/mm.h> 5#include <linux/slab.h> 6#include <linux/eventfd.h> 7#include <linux/eventpoll.h> 8#include <linux/io_uring.h> 9#include <linux/io_uring_types.h> 10 11#include "io-wq.h" 12#include "eventfd.h" 13 14struct io_ev_fd { 15 struct eventfd_ctx *cq_ev_fd; 16 unsigned int eventfd_async; 17 /* protected by ->completion_lock */ 18 unsigned last_cq_tail; 19 refcount_t refs; 20 atomic_t ops; 21 struct rcu_head rcu; 22}; 23 24enum { 25 IO_EVENTFD_OP_SIGNAL_BIT, 26}; 27 28static void io_eventfd_free(struct rcu_head *rcu) 29{ 30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 31 32 eventfd_ctx_put(ev_fd->cq_ev_fd); 33 kfree(ev_fd); 34} 35 36static void io_eventfd_put(struct io_ev_fd *ev_fd) 37{ 38 if (refcount_dec_and_test(&ev_fd->refs)) 39 call_rcu(&ev_fd->rcu, io_eventfd_free); 40} 41 42static void io_eventfd_do_signal(struct rcu_head *rcu) 43{ 44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 45 46 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 47 io_eventfd_put(ev_fd); 48} 49 50/* 51 * Returns true if the caller should put the ev_fd reference, false if not. 52 */ 53static bool __io_eventfd_signal(struct io_ev_fd *ev_fd) 54{ 55 if (eventfd_signal_allowed()) { 56 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 57 return true; 58 } 59 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { 60 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); 61 return false; 62 } 63 return true; 64} 65 66/* 67 * Trigger if eventfd_async isn't set, or if it's set and the caller is 68 * an async worker. 69 */ 70static bool io_eventfd_trigger(struct io_ev_fd *ev_fd) 71{ 72 return !ev_fd->eventfd_async || io_wq_current_is_worker(); 73} 74 75void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) 76{ 77 bool skip = false; 78 struct io_ev_fd *ev_fd; 79 80 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) 81 return; 82 83 guard(rcu)(); 84 ev_fd = rcu_dereference(ctx->io_ev_fd); 85 /* 86 * Check again if ev_fd exists in case an io_eventfd_unregister call 87 * completed between the NULL check of ctx->io_ev_fd at the start of 88 * the function and rcu_read_lock. 89 */ 90 if (!ev_fd) 91 return; 92 if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs)) 93 return; 94 95 if (cqe_event) { 96 /* 97 * Eventfd should only get triggered when at least one event 98 * has been posted. Some applications rely on the eventfd 99 * notification count only changing IFF a new CQE has been 100 * added to the CQ ring. There's no dependency on 1:1 101 * relationship between how many times this function is called 102 * (and hence the eventfd count) and number of CQEs posted to 103 * the CQ ring. 104 */ 105 spin_lock(&ctx->completion_lock); 106 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail; 107 ev_fd->last_cq_tail = ctx->cached_cq_tail; 108 spin_unlock(&ctx->completion_lock); 109 } 110 111 if (skip || __io_eventfd_signal(ev_fd)) 112 io_eventfd_put(ev_fd); 113} 114 115int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, 116 unsigned int eventfd_async) 117{ 118 struct io_ev_fd *ev_fd; 119 __s32 __user *fds = arg; 120 int fd; 121 122 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 123 lockdep_is_held(&ctx->uring_lock)); 124 if (ev_fd) 125 return -EBUSY; 126 127 if (copy_from_user(&fd, fds, sizeof(*fds))) 128 return -EFAULT; 129 130 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); 131 if (!ev_fd) 132 return -ENOMEM; 133 134 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); 135 if (IS_ERR(ev_fd->cq_ev_fd)) { 136 int ret = PTR_ERR(ev_fd->cq_ev_fd); 137 138 kfree(ev_fd); 139 return ret; 140 } 141 142 spin_lock(&ctx->completion_lock); 143 ev_fd->last_cq_tail = ctx->cached_cq_tail; 144 spin_unlock(&ctx->completion_lock); 145 146 ev_fd->eventfd_async = eventfd_async; 147 ctx->has_evfd = true; 148 refcount_set(&ev_fd->refs, 1); 149 atomic_set(&ev_fd->ops, 0); 150 rcu_assign_pointer(ctx->io_ev_fd, ev_fd); 151 return 0; 152} 153 154int io_eventfd_unregister(struct io_ring_ctx *ctx) 155{ 156 struct io_ev_fd *ev_fd; 157 158 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 159 lockdep_is_held(&ctx->uring_lock)); 160 if (ev_fd) { 161 ctx->has_evfd = false; 162 rcu_assign_pointer(ctx->io_ev_fd, NULL); 163 io_eventfd_put(ev_fd); 164 return 0; 165 } 166 167 return -ENXIO; 168}