Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/eventfd.h>
7#include <linux/eventpoll.h>
8#include <linux/io_uring.h>
9#include <linux/io_uring_types.h>
10
11#include "io-wq.h"
12#include "eventfd.h"
13
14struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async;
17 /* protected by ->completion_lock */
18 unsigned last_cq_tail;
19 refcount_t refs;
20 atomic_t ops;
21 struct rcu_head rcu;
22};
23
24enum {
25 IO_EVENTFD_OP_SIGNAL_BIT,
26};
27
28static void io_eventfd_free(struct rcu_head *rcu)
29{
30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
31
32 eventfd_ctx_put(ev_fd->cq_ev_fd);
33 kfree(ev_fd);
34}
35
36static void io_eventfd_do_signal(struct rcu_head *rcu)
37{
38 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
39
40 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
41
42 if (refcount_dec_and_test(&ev_fd->refs))
43 io_eventfd_free(rcu);
44}
45
46static void io_eventfd_put(struct io_ev_fd *ev_fd)
47{
48 if (refcount_dec_and_test(&ev_fd->refs))
49 call_rcu(&ev_fd->rcu, io_eventfd_free);
50}
51
52static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
53{
54 if (put_ref)
55 io_eventfd_put(ev_fd);
56 rcu_read_unlock();
57}
58
59/*
60 * Returns true if the caller should put the ev_fd reference, false if not.
61 */
62static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
63{
64 if (eventfd_signal_allowed()) {
65 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
66 return true;
67 }
68 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
69 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
70 return false;
71 }
72 return true;
73}
74
75/*
76 * Trigger if eventfd_async isn't set, or if it's set and the caller is
77 * an async worker. If ev_fd isn't valid, obviously return false.
78 */
79static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
80{
81 if (ev_fd)
82 return !ev_fd->eventfd_async || io_wq_current_is_worker();
83 return false;
84}
85
86/*
87 * On success, returns with an ev_fd reference grabbed and the RCU read
88 * lock held.
89 */
90static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
91{
92 struct io_ev_fd *ev_fd;
93
94 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
95 return NULL;
96
97 rcu_read_lock();
98
99 /*
100 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
101 * and eventfd_signal
102 */
103 ev_fd = rcu_dereference(ctx->io_ev_fd);
104
105 /*
106 * Check again if ev_fd exists in case an io_eventfd_unregister call
107 * completed between the NULL check of ctx->io_ev_fd at the start of
108 * the function and rcu_read_lock.
109 */
110 if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
111 return ev_fd;
112
113 rcu_read_unlock();
114 return NULL;
115}
116
117void io_eventfd_signal(struct io_ring_ctx *ctx)
118{
119 struct io_ev_fd *ev_fd;
120
121 ev_fd = io_eventfd_grab(ctx);
122 if (ev_fd)
123 io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
124}
125
126void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
127{
128 struct io_ev_fd *ev_fd;
129
130 ev_fd = io_eventfd_grab(ctx);
131 if (ev_fd) {
132 bool skip, put_ref = true;
133
134 /*
135 * Eventfd should only get triggered when at least one event
136 * has been posted. Some applications rely on the eventfd
137 * notification count only changing IFF a new CQE has been
138 * added to the CQ ring. There's no dependency on 1:1
139 * relationship between how many times this function is called
140 * (and hence the eventfd count) and number of CQEs posted to
141 * the CQ ring.
142 */
143 spin_lock(&ctx->completion_lock);
144 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
145 ev_fd->last_cq_tail = ctx->cached_cq_tail;
146 spin_unlock(&ctx->completion_lock);
147
148 if (!skip)
149 put_ref = __io_eventfd_signal(ev_fd);
150
151 io_eventfd_release(ev_fd, put_ref);
152 }
153}
154
155int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
156 unsigned int eventfd_async)
157{
158 struct io_ev_fd *ev_fd;
159 __s32 __user *fds = arg;
160 int fd;
161
162 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
163 lockdep_is_held(&ctx->uring_lock));
164 if (ev_fd)
165 return -EBUSY;
166
167 if (copy_from_user(&fd, fds, sizeof(*fds)))
168 return -EFAULT;
169
170 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
171 if (!ev_fd)
172 return -ENOMEM;
173
174 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
175 if (IS_ERR(ev_fd->cq_ev_fd)) {
176 int ret = PTR_ERR(ev_fd->cq_ev_fd);
177
178 kfree(ev_fd);
179 return ret;
180 }
181
182 spin_lock(&ctx->completion_lock);
183 ev_fd->last_cq_tail = ctx->cached_cq_tail;
184 spin_unlock(&ctx->completion_lock);
185
186 ev_fd->eventfd_async = eventfd_async;
187 ctx->has_evfd = true;
188 refcount_set(&ev_fd->refs, 1);
189 atomic_set(&ev_fd->ops, 0);
190 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
191 return 0;
192}
193
194int io_eventfd_unregister(struct io_ring_ctx *ctx)
195{
196 struct io_ev_fd *ev_fd;
197
198 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
199 lockdep_is_held(&ctx->uring_lock));
200 if (ev_fd) {
201 ctx->has_evfd = false;
202 rcu_assign_pointer(ctx->io_ev_fd, NULL);
203 io_eventfd_put(ev_fd);
204 return 0;
205 }
206
207 return -ENXIO;
208}