Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: MIT */
2
3#include <linux/io_uring.h>
4#include <sys/mman.h>
5#include <sys/syscall.h>
6#include <stdio.h>
7#include <string.h>
8#include <unistd.h>
9#include <sys/uio.h>
10
11struct io_sq_ring {
12 unsigned int *head;
13 unsigned int *tail;
14 unsigned int *ring_mask;
15 unsigned int *ring_entries;
16 unsigned int *flags;
17 unsigned int *array;
18};
19
20struct io_cq_ring {
21 unsigned int *head;
22 unsigned int *tail;
23 unsigned int *ring_mask;
24 unsigned int *ring_entries;
25 struct io_uring_cqe *cqes;
26};
27
28struct io_uring_sq {
29 unsigned int *khead;
30 unsigned int *ktail;
31 unsigned int *kring_mask;
32 unsigned int *kring_entries;
33 unsigned int *kflags;
34 unsigned int *kdropped;
35 unsigned int *array;
36 struct io_uring_sqe *sqes;
37
38 unsigned int sqe_head;
39 unsigned int sqe_tail;
40
41 size_t ring_sz;
42};
43
44struct io_uring_cq {
45 unsigned int *khead;
46 unsigned int *ktail;
47 unsigned int *kring_mask;
48 unsigned int *kring_entries;
49 unsigned int *koverflow;
50 struct io_uring_cqe *cqes;
51
52 size_t ring_sz;
53};
54
55struct io_uring {
56 struct io_uring_sq sq;
57 struct io_uring_cq cq;
58 int ring_fd;
59 unsigned flags;
60};
61
62#if defined(__x86_64) || defined(__i386__)
63#define read_barrier() __asm__ __volatile__("":::"memory")
64#define write_barrier() __asm__ __volatile__("":::"memory")
65#else
66#define read_barrier() __sync_synchronize()
67#define write_barrier() __sync_synchronize()
68#endif
69
70static inline int io_uring_mmap(int fd, struct io_uring_params *p,
71 struct io_uring_sq *sq, struct io_uring_cq *cq)
72{
73 size_t size;
74 void *ptr;
75 int ret;
76
77 if (p->flags & IORING_SETUP_NO_SQARRAY) {
78 sq->ring_sz = p->cq_off.cqes;
79 sq->ring_sz += p->cq_entries * sizeof(struct io_uring_cqe);
80 } else {
81 sq->ring_sz = p->sq_off.array;
82 sq->ring_sz += p->sq_entries * sizeof(unsigned int);
83 }
84
85 ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
86 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
87 if (ptr == MAP_FAILED)
88 return -errno;
89 sq->khead = ptr + p->sq_off.head;
90 sq->ktail = ptr + p->sq_off.tail;
91 sq->kring_mask = ptr + p->sq_off.ring_mask;
92 sq->kring_entries = ptr + p->sq_off.ring_entries;
93 sq->kflags = ptr + p->sq_off.flags;
94 sq->kdropped = ptr + p->sq_off.dropped;
95 if (!(p->flags & IORING_SETUP_NO_SQARRAY))
96 sq->array = ptr + p->sq_off.array;
97
98 size = p->sq_entries * sizeof(struct io_uring_sqe);
99 sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
100 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
101 if (sq->sqes == MAP_FAILED) {
102 ret = -errno;
103err:
104 munmap(sq->khead, sq->ring_sz);
105 return ret;
106 }
107
108 cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
109 ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
110 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
111 if (ptr == MAP_FAILED) {
112 ret = -errno;
113 munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
114 goto err;
115 }
116 cq->khead = ptr + p->cq_off.head;
117 cq->ktail = ptr + p->cq_off.tail;
118 cq->kring_mask = ptr + p->cq_off.ring_mask;
119 cq->kring_entries = ptr + p->cq_off.ring_entries;
120 cq->koverflow = ptr + p->cq_off.overflow;
121 cq->cqes = ptr + p->cq_off.cqes;
122 return 0;
123}
124
125static inline int io_uring_setup(unsigned int entries,
126 struct io_uring_params *p)
127{
128 return syscall(__NR_io_uring_setup, entries, p);
129}
130
131static inline int io_uring_enter(int fd, unsigned int to_submit,
132 unsigned int min_complete,
133 unsigned int flags, sigset_t *sig)
134{
135 return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
136 flags, sig, _NSIG / 8);
137}
138
139static inline int io_uring_queue_init_params(unsigned int entries,
140 struct io_uring *ring,
141 struct io_uring_params *p)
142{
143 int fd, ret;
144
145 memset(ring, 0, sizeof(*ring));
146
147 fd = io_uring_setup(entries, p);
148 if (fd < 0)
149 return fd;
150 ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
151 if (!ret) {
152 ring->ring_fd = fd;
153 ring->flags = p->flags;
154 } else {
155 close(fd);
156 }
157 return ret;
158}
159
160static inline int io_uring_queue_init(unsigned int entries,
161 struct io_uring *ring,
162 unsigned int flags)
163{
164 struct io_uring_params p;
165
166 memset(&p, 0, sizeof(p));
167 p.flags = flags;
168
169 return io_uring_queue_init_params(entries, ring, &p);
170}
171
172/* Get a sqe */
173static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
174{
175 struct io_uring_sq *sq = &ring->sq;
176
177 if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
178 return NULL;
179 return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
180}
181
182static inline int io_uring_wait_cqe(struct io_uring *ring,
183 struct io_uring_cqe **cqe_ptr)
184{
185 struct io_uring_cq *cq = &ring->cq;
186 const unsigned int mask = *cq->kring_mask;
187 unsigned int head = *cq->khead;
188 int ret;
189
190 *cqe_ptr = NULL;
191 do {
192 read_barrier();
193 if (head != *cq->ktail) {
194 *cqe_ptr = &cq->cqes[head & mask];
195 break;
196 }
197 ret = io_uring_enter(ring->ring_fd, 0, 1,
198 IORING_ENTER_GETEVENTS, NULL);
199 if (ret < 0)
200 return -errno;
201 } while (1);
202
203 return 0;
204}
205
206static inline int io_uring_submit(struct io_uring *ring)
207{
208 struct io_uring_sq *sq = &ring->sq;
209 const unsigned int mask = *sq->kring_mask;
210 unsigned int ktail, submitted, to_submit;
211 int ret;
212
213 read_barrier();
214 if (*sq->khead != *sq->ktail) {
215 submitted = *sq->kring_entries;
216 goto submit;
217 }
218 if (sq->sqe_head == sq->sqe_tail)
219 return 0;
220
221 ktail = *sq->ktail;
222 to_submit = sq->sqe_tail - sq->sqe_head;
223
224 if (!(ring->flags & IORING_SETUP_NO_SQARRAY)) {
225 for (submitted = 0; submitted < to_submit; submitted++) {
226 read_barrier();
227 sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
228 }
229 } else {
230 ktail += to_submit;
231 sq->sqe_head += to_submit;
232 submitted = to_submit;
233 }
234
235 if (!submitted)
236 return 0;
237
238 if (*sq->ktail != ktail) {
239 write_barrier();
240 *sq->ktail = ktail;
241 write_barrier();
242 }
243submit:
244 ret = io_uring_enter(ring->ring_fd, submitted, 0,
245 IORING_ENTER_GETEVENTS, NULL);
246 return ret < 0 ? -errno : ret;
247}
248
249static inline void io_uring_queue_exit(struct io_uring *ring)
250{
251 struct io_uring_sq *sq = &ring->sq;
252
253 munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
254 munmap(sq->khead, sq->ring_sz);
255 close(ring->ring_fd);
256}
257
258/* Prepare and send the SQE */
259static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op,
260 int sockfd,
261 int level, int optname,
262 const void *optval,
263 int optlen)
264{
265 memset(sqe, 0, sizeof(*sqe));
266 sqe->opcode = (__u8)IORING_OP_URING_CMD;
267 sqe->fd = sockfd;
268 sqe->cmd_op = op;
269
270 sqe->level = level;
271 sqe->optname = optname;
272 sqe->optval = (unsigned long long)optval;
273 sqe->optlen = optlen;
274}
275
276static inline int io_uring_register_buffers(struct io_uring *ring,
277 const struct iovec *iovecs,
278 unsigned int nr_iovecs)
279{
280 int ret;
281
282 ret = syscall(__NR_io_uring_register, ring->ring_fd,
283 IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
284 return (ret < 0) ? -errno : ret;
285}
286
287static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
288 const void *buf, size_t len, int flags)
289{
290 memset(sqe, 0, sizeof(*sqe));
291 sqe->opcode = (__u8)IORING_OP_SEND;
292 sqe->fd = sockfd;
293 sqe->addr = (unsigned long)buf;
294 sqe->len = len;
295 sqe->msg_flags = (__u32)flags;
296}
297
298static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
299 const void *buf, size_t len, int flags,
300 unsigned int zc_flags)
301{
302 io_uring_prep_send(sqe, sockfd, buf, len, flags);
303 sqe->opcode = (__u8)IORING_OP_SEND_ZC;
304 sqe->ioprio = zc_flags;
305}
306
307static inline void io_uring_cqe_seen(struct io_uring *ring)
308{
309 *(&ring->cq)->khead += 1;
310 write_barrier();
311}