Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/file.h>
5#include <linux/io_uring/cmd.h>
6#include <linux/io_uring/net.h>
7#include <linux/security.h>
8#include <linux/nospec.h>
9#include <net/sock.h>
10
11#include <uapi/linux/io_uring.h>
12#include <asm/ioctls.h>
13
14#include "io_uring.h"
15#include "alloc_cache.h"
16#include "rsrc.h"
17#include "uring_cmd.h"
18
19void io_cmd_cache_free(const void *entry)
20{
21 struct io_async_cmd *ac = (struct io_async_cmd *)entry;
22
23 io_vec_free(&ac->vec);
24 kfree(ac);
25}
26
27static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
28{
29 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
30 struct io_async_cmd *ac = req->async_data;
31 struct io_uring_cmd_data *cache = &ac->data;
32
33 if (cache->op_data) {
34 kfree(cache->op_data);
35 cache->op_data = NULL;
36 }
37
38 if (issue_flags & IO_URING_F_UNLOCKED)
39 return;
40
41 io_alloc_cache_vec_kasan(&ac->vec);
42 if (ac->vec.nr > IO_VEC_CACHE_SOFT_CAP)
43 io_vec_free(&ac->vec);
44
45 if (io_alloc_cache_put(&req->ctx->cmd_cache, cache)) {
46 ioucmd->sqe = NULL;
47 req->async_data = NULL;
48 req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP);
49 }
50}
51
52void io_uring_cmd_cleanup(struct io_kiocb *req)
53{
54 io_req_uring_cleanup(req, 0);
55}
56
57bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
58 struct io_uring_task *tctx, bool cancel_all)
59{
60 struct hlist_node *tmp;
61 struct io_kiocb *req;
62 bool ret = false;
63
64 lockdep_assert_held(&ctx->uring_lock);
65
66 hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
67 hash_node) {
68 struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
69 struct io_uring_cmd);
70 struct file *file = req->file;
71
72 if (!cancel_all && req->tctx != tctx)
73 continue;
74
75 if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
76 file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL |
77 IO_URING_F_COMPLETE_DEFER);
78 ret = true;
79 }
80 }
81 io_submit_flush_completions(ctx);
82 return ret;
83}
84
85static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd,
86 unsigned int issue_flags)
87{
88 struct io_kiocb *req = cmd_to_io_kiocb(cmd);
89 struct io_ring_ctx *ctx = req->ctx;
90
91 if (!(cmd->flags & IORING_URING_CMD_CANCELABLE))
92 return;
93
94 cmd->flags &= ~IORING_URING_CMD_CANCELABLE;
95 io_ring_submit_lock(ctx, issue_flags);
96 hlist_del(&req->hash_node);
97 io_ring_submit_unlock(ctx, issue_flags);
98}
99
100/*
101 * Mark this command as concelable, then io_uring_try_cancel_uring_cmd()
102 * will try to cancel this issued command by sending ->uring_cmd() with
103 * issue_flags of IO_URING_F_CANCEL.
104 *
105 * The command is guaranteed to not be done when calling ->uring_cmd()
106 * with IO_URING_F_CANCEL, but it is driver's responsibility to deal
107 * with race between io_uring canceling and normal completion.
108 */
109void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
110 unsigned int issue_flags)
111{
112 struct io_kiocb *req = cmd_to_io_kiocb(cmd);
113 struct io_ring_ctx *ctx = req->ctx;
114
115 if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) {
116 cmd->flags |= IORING_URING_CMD_CANCELABLE;
117 io_ring_submit_lock(ctx, issue_flags);
118 hlist_add_head(&req->hash_node, &ctx->cancelable_uring_cmd);
119 io_ring_submit_unlock(ctx, issue_flags);
120 }
121}
122EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
123
124static void io_uring_cmd_work(struct io_kiocb *req, io_tw_token_t tw)
125{
126 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
127 unsigned int flags = IO_URING_F_COMPLETE_DEFER;
128
129 if (io_should_terminate_tw())
130 flags |= IO_URING_F_TASK_DEAD;
131
132 /* task_work executor checks the deffered list completion */
133 ioucmd->task_work_cb(ioucmd, flags);
134}
135
136void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
137 void (*task_work_cb)(struct io_uring_cmd *, unsigned),
138 unsigned flags)
139{
140 struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
141
142 ioucmd->task_work_cb = task_work_cb;
143 req->io_task_work.func = io_uring_cmd_work;
144 __io_req_task_work_add(req, flags);
145}
146EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
147
148static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
149 u64 extra1, u64 extra2)
150{
151 req->big_cqe.extra1 = extra1;
152 req->big_cqe.extra2 = extra2;
153}
154
155/*
156 * Called by consumers of io_uring_cmd, if they originally returned
157 * -EIOCBQUEUED upon receiving the command.
158 */
159void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
160 unsigned issue_flags)
161{
162 struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
163
164 io_uring_cmd_del_cancelable(ioucmd, issue_flags);
165
166 if (ret < 0)
167 req_set_fail(req);
168
169 io_req_set_res(req, ret, 0);
170 if (req->ctx->flags & IORING_SETUP_CQE32)
171 io_req_set_cqe32_extra(req, res2, 0);
172 io_req_uring_cleanup(req, issue_flags);
173 if (req->ctx->flags & IORING_SETUP_IOPOLL) {
174 /* order with io_iopoll_req_issued() checking ->iopoll_complete */
175 smp_store_release(&req->iopoll_completed, 1);
176 } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
177 if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
178 return;
179 io_req_complete_defer(req);
180 } else {
181 req->io_task_work.func = io_req_task_complete;
182 io_req_task_work_add(req);
183 }
184}
185EXPORT_SYMBOL_GPL(io_uring_cmd_done);
186
187static int io_uring_cmd_prep_setup(struct io_kiocb *req,
188 const struct io_uring_sqe *sqe)
189{
190 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
191 struct io_async_cmd *ac;
192
193 /* see io_uring_cmd_get_async_data() */
194 BUILD_BUG_ON(offsetof(struct io_async_cmd, data) != 0);
195
196 ac = io_uring_alloc_async_data(&req->ctx->cmd_cache, req);
197 if (!ac)
198 return -ENOMEM;
199 ac->data.op_data = NULL;
200
201 /*
202 * Unconditionally cache the SQE for now - this is only needed for
203 * requests that go async, but prep handlers must ensure that any
204 * sqe data is stable beyond prep. Since uring_cmd is special in
205 * that it doesn't read in per-op data, play it safe and ensure that
206 * any SQE data is stable beyond prep. This can later get relaxed.
207 */
208 memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx));
209 ioucmd->sqe = ac->sqes;
210 return 0;
211}
212
213int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
214{
215 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
216
217 if (sqe->__pad1)
218 return -EINVAL;
219
220 ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
221 if (ioucmd->flags & ~IORING_URING_CMD_MASK)
222 return -EINVAL;
223
224 if (ioucmd->flags & IORING_URING_CMD_FIXED)
225 req->buf_index = READ_ONCE(sqe->buf_index);
226
227 ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
228
229 return io_uring_cmd_prep_setup(req, sqe);
230}
231
232int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
233{
234 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
235 struct io_ring_ctx *ctx = req->ctx;
236 struct file *file = req->file;
237 int ret;
238
239 if (!file->f_op->uring_cmd)
240 return -EOPNOTSUPP;
241
242 ret = security_uring_cmd(ioucmd);
243 if (ret)
244 return ret;
245
246 if (ctx->flags & IORING_SETUP_SQE128)
247 issue_flags |= IO_URING_F_SQE128;
248 if (ctx->flags & IORING_SETUP_CQE32)
249 issue_flags |= IO_URING_F_CQE32;
250 if (io_is_compat(ctx))
251 issue_flags |= IO_URING_F_COMPAT;
252 if (ctx->flags & IORING_SETUP_IOPOLL) {
253 if (!file->f_op->uring_cmd_iopoll)
254 return -EOPNOTSUPP;
255 issue_flags |= IO_URING_F_IOPOLL;
256 req->iopoll_completed = 0;
257 }
258
259 ret = file->f_op->uring_cmd(ioucmd, issue_flags);
260 if (ret == -EAGAIN || ret == -EIOCBQUEUED)
261 return ret;
262 if (ret < 0)
263 req_set_fail(req);
264 io_req_uring_cleanup(req, issue_flags);
265 io_req_set_res(req, ret, 0);
266 return IOU_OK;
267}
268
269int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
270 struct iov_iter *iter,
271 struct io_uring_cmd *ioucmd,
272 unsigned int issue_flags)
273{
274 struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
275
276 return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags);
277}
278EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
279
280int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
281 const struct iovec __user *uvec,
282 size_t uvec_segs,
283 int ddir, struct iov_iter *iter,
284 unsigned issue_flags)
285{
286 struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
287 struct io_async_cmd *ac = req->async_data;
288 int ret;
289
290 ret = io_prep_reg_iovec(req, &ac->vec, uvec, uvec_segs);
291 if (ret)
292 return ret;
293
294 return io_import_reg_vec(ddir, iter, req, &ac->vec, uvec_segs,
295 issue_flags);
296}
297EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed_vec);
298
299void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
300{
301 struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
302
303 io_req_queue_iowq(req);
304}
305
306static inline int io_uring_cmd_getsockopt(struct socket *sock,
307 struct io_uring_cmd *cmd,
308 unsigned int issue_flags)
309{
310 const struct io_uring_sqe *sqe = cmd->sqe;
311 bool compat = !!(issue_flags & IO_URING_F_COMPAT);
312 int optlen, optname, level, err;
313 void __user *optval;
314
315 level = READ_ONCE(sqe->level);
316 if (level != SOL_SOCKET)
317 return -EOPNOTSUPP;
318
319 optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
320 optname = READ_ONCE(sqe->optname);
321 optlen = READ_ONCE(sqe->optlen);
322
323 err = do_sock_getsockopt(sock, compat, level, optname,
324 USER_SOCKPTR(optval),
325 KERNEL_SOCKPTR(&optlen));
326 if (err)
327 return err;
328
329 /* On success, return optlen */
330 return optlen;
331}
332
333static inline int io_uring_cmd_setsockopt(struct socket *sock,
334 struct io_uring_cmd *cmd,
335 unsigned int issue_flags)
336{
337 const struct io_uring_sqe *sqe = cmd->sqe;
338 bool compat = !!(issue_flags & IO_URING_F_COMPAT);
339 int optname, optlen, level;
340 void __user *optval;
341 sockptr_t optval_s;
342
343 optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
344 optname = READ_ONCE(sqe->optname);
345 optlen = READ_ONCE(sqe->optlen);
346 level = READ_ONCE(sqe->level);
347 optval_s = USER_SOCKPTR(optval);
348
349 return do_sock_setsockopt(sock, compat, level, optname, optval_s,
350 optlen);
351}
352
353#if defined(CONFIG_NET)
354int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
355{
356 struct socket *sock = cmd->file->private_data;
357 struct sock *sk = sock->sk;
358 struct proto *prot = READ_ONCE(sk->sk_prot);
359 int ret, arg = 0;
360
361 if (!prot || !prot->ioctl)
362 return -EOPNOTSUPP;
363
364 switch (cmd->cmd_op) {
365 case SOCKET_URING_OP_SIOCINQ:
366 ret = prot->ioctl(sk, SIOCINQ, &arg);
367 if (ret)
368 return ret;
369 return arg;
370 case SOCKET_URING_OP_SIOCOUTQ:
371 ret = prot->ioctl(sk, SIOCOUTQ, &arg);
372 if (ret)
373 return ret;
374 return arg;
375 case SOCKET_URING_OP_GETSOCKOPT:
376 return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
377 case SOCKET_URING_OP_SETSOCKOPT:
378 return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
379 default:
380 return -EOPNOTSUPP;
381 }
382}
383EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
384#endif