Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-5.19/io_uring-socket' into for-5.19/io_uring-passthrough

* for-5.19/io_uring-socket:
io_uring: use the text representation of ops in trace
io_uring: rename op -> opcode
io_uring: add io_uring_get_opcode
io_uring: add type to op enum
io_uring: add socket(2) support
net: add __sys_socket_file()
io_uring: fix trace for reduced sqe padding
io_uring: add fgetxattr and getxattr support
io_uring: add fsetxattr and setxattr support
fs: split off do_getxattr from getxattr
fs: split off setxattr_copy and do_setxattr function from setxattr

+681 -78
+29
fs/internal.h
··· 191 191 struct pipe_inode_info *opipe, 192 192 loff_t *offset, 193 193 size_t len, unsigned int flags); 194 + 195 + /* 196 + * fs/xattr.c: 197 + */ 198 + struct xattr_name { 199 + char name[XATTR_NAME_MAX + 1]; 200 + }; 201 + 202 + struct xattr_ctx { 203 + /* Value of attribute */ 204 + union { 205 + const void __user *cvalue; 206 + void __user *value; 207 + }; 208 + void *kvalue; 209 + size_t size; 210 + /* Attribute name */ 211 + struct xattr_name *kname; 212 + unsigned int flags; 213 + }; 214 + 215 + 216 + ssize_t do_getxattr(struct user_namespace *mnt_userns, 217 + struct dentry *d, 218 + struct xattr_ctx *ctx); 219 + 220 + int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); 221 + int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, 222 + struct xattr_ctx *ctx);
+471
fs/io_uring.c
··· 80 80 #include <linux/io_uring.h> 81 81 #include <linux/audit.h> 82 82 #include <linux/security.h> 83 + #include <linux/xattr.h> 83 84 84 85 #define CREATE_TRACE_POINTS 85 86 #include <trace/events/io_uring.h> ··· 579 578 unsigned long nofile; 580 579 }; 581 580 581 + struct io_socket { 582 + struct file *file; 583 + int domain; 584 + int type; 585 + int protocol; 586 + int flags; 587 + u32 file_slot; 588 + unsigned long nofile; 589 + }; 590 + 582 591 struct io_sync { 583 592 struct file *file; 584 593 loff_t len; ··· 793 782 struct wait_page_queue wpq; 794 783 }; 795 784 785 + struct io_xattr { 786 + struct file *file; 787 + struct xattr_ctx ctx; 788 + struct filename *filename; 789 + }; 790 + 796 791 enum { 797 792 REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, 798 793 REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, ··· 963 946 struct io_symlink symlink; 964 947 struct io_hardlink hardlink; 965 948 struct io_msg msg; 949 + struct io_xattr xattr; 950 + struct io_socket sock; 966 951 }; 967 952 968 953 u8 opcode; ··· 1265 1246 .needs_file = 1, 1266 1247 .iopoll = 1, 1267 1248 }, 1249 + [IORING_OP_FSETXATTR] = { 1250 + .needs_file = 1 1251 + }, 1252 + [IORING_OP_SETXATTR] = {}, 1253 + [IORING_OP_FGETXATTR] = { 1254 + .needs_file = 1 1255 + }, 1256 + [IORING_OP_GETXATTR] = {}, 1257 + [IORING_OP_SOCKET] = { 1258 + .audit_skip = 1, 1259 + }, 1268 1260 }; 1269 1261 1270 1262 /* requests with any of those set should undergo io_disarm_next() */ ··· 1319 1289 static struct kmem_cache *req_cachep; 1320 1290 1321 1291 static const struct file_operations io_uring_fops; 1292 + 1293 + const char *io_uring_get_opcode(u8 opcode) 1294 + { 1295 + switch ((enum io_uring_op)opcode) { 1296 + case IORING_OP_NOP: 1297 + return "NOP"; 1298 + case IORING_OP_READV: 1299 + return "READV"; 1300 + case IORING_OP_WRITEV: 1301 + return "WRITEV"; 1302 + case IORING_OP_FSYNC: 1303 + return "FSYNC"; 1304 + case IORING_OP_READ_FIXED: 1305 + return "READ_FIXED"; 1306 + case IORING_OP_WRITE_FIXED: 1307 + return "WRITE_FIXED"; 1308 + case IORING_OP_POLL_ADD: 1309 + return "POLL_ADD"; 1310 + case IORING_OP_POLL_REMOVE: 1311 + return "POLL_REMOVE"; 1312 + case IORING_OP_SYNC_FILE_RANGE: 1313 + return "SYNC_FILE_RANGE"; 1314 + case IORING_OP_SENDMSG: 1315 + return "SENDMSG"; 1316 + case IORING_OP_RECVMSG: 1317 + return "RECVMSG"; 1318 + case IORING_OP_TIMEOUT: 1319 + return "TIMEOUT"; 1320 + case IORING_OP_TIMEOUT_REMOVE: 1321 + return "TIMEOUT_REMOVE"; 1322 + case IORING_OP_ACCEPT: 1323 + return "ACCEPT"; 1324 + case IORING_OP_ASYNC_CANCEL: 1325 + return "ASYNC_CANCEL"; 1326 + case IORING_OP_LINK_TIMEOUT: 1327 + return "LINK_TIMEOUT"; 1328 + case IORING_OP_CONNECT: 1329 + return "CONNECT"; 1330 + case IORING_OP_FALLOCATE: 1331 + return "FALLOCATE"; 1332 + case IORING_OP_OPENAT: 1333 + return "OPENAT"; 1334 + case IORING_OP_CLOSE: 1335 + return "CLOSE"; 1336 + case IORING_OP_FILES_UPDATE: 1337 + return "FILES_UPDATE"; 1338 + case IORING_OP_STATX: 1339 + return "STATX"; 1340 + case IORING_OP_READ: 1341 + return "READ"; 1342 + case IORING_OP_WRITE: 1343 + return "WRITE"; 1344 + case IORING_OP_FADVISE: 1345 + return "FADVISE"; 1346 + case IORING_OP_MADVISE: 1347 + return "MADVISE"; 1348 + case IORING_OP_SEND: 1349 + return "SEND"; 1350 + case IORING_OP_RECV: 1351 + return "RECV"; 1352 + case IORING_OP_OPENAT2: 1353 + return "OPENAT2"; 1354 + case IORING_OP_EPOLL_CTL: 1355 + return "EPOLL_CTL"; 1356 + case IORING_OP_SPLICE: 1357 + return "SPLICE"; 1358 + case IORING_OP_PROVIDE_BUFFERS: 1359 + return "PROVIDE_BUFFERS"; 1360 + case IORING_OP_REMOVE_BUFFERS: 1361 + return "REMOVE_BUFFERS"; 1362 + case IORING_OP_TEE: 1363 + return "TEE"; 1364 + case IORING_OP_SHUTDOWN: 1365 + return "SHUTDOWN"; 1366 + case IORING_OP_RENAMEAT: 1367 + return "RENAMEAT"; 1368 + case IORING_OP_UNLINKAT: 1369 + return "UNLINKAT"; 1370 + case IORING_OP_MKDIRAT: 1371 + return "MKDIRAT"; 1372 + case IORING_OP_SYMLINKAT: 1373 + return "SYMLINKAT"; 1374 + case IORING_OP_LINKAT: 1375 + return "LINKAT"; 1376 + case IORING_OP_MSG_RING: 1377 + return "MSG_RING"; 1378 + case IORING_OP_FSETXATTR: 1379 + return "FSETXATTR"; 1380 + case IORING_OP_SETXATTR: 1381 + return "SETXATTR"; 1382 + case IORING_OP_FGETXATTR: 1383 + return "FGETXATTR"; 1384 + case IORING_OP_GETXATTR: 1385 + return "GETXATTR"; 1386 + case IORING_OP_SOCKET: 1387 + return "SOCKET"; 1388 + case IORING_OP_LAST: 1389 + return "INVALID"; 1390 + } 1391 + return "INVALID"; 1392 + } 1322 1393 1323 1394 struct sock *io_uring_get_socket(struct file *file) 1324 1395 { ··· 4336 4205 return 0; 4337 4206 } 4338 4207 4208 + static inline void __io_xattr_finish(struct io_kiocb *req) 4209 + { 4210 + struct io_xattr *ix = &req->xattr; 4211 + 4212 + if (ix->filename) 4213 + putname(ix->filename); 4214 + 4215 + kfree(ix->ctx.kname); 4216 + kvfree(ix->ctx.kvalue); 4217 + } 4218 + 4219 + static void io_xattr_finish(struct io_kiocb *req, int ret) 4220 + { 4221 + req->flags &= ~REQ_F_NEED_CLEANUP; 4222 + 4223 + __io_xattr_finish(req); 4224 + if (ret < 0) 4225 + req_set_fail(req); 4226 + 4227 + io_req_complete(req, ret); 4228 + } 4229 + 4230 + static int __io_getxattr_prep(struct io_kiocb *req, 4231 + const struct io_uring_sqe *sqe) 4232 + { 4233 + struct io_xattr *ix = &req->xattr; 4234 + const char __user *name; 4235 + int ret; 4236 + 4237 + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4238 + return -EINVAL; 4239 + if (unlikely(sqe->ioprio)) 4240 + return -EINVAL; 4241 + if (unlikely(req->flags & REQ_F_FIXED_FILE)) 4242 + return -EBADF; 4243 + 4244 + ix->filename = NULL; 4245 + ix->ctx.kvalue = NULL; 4246 + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); 4247 + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 4248 + ix->ctx.size = READ_ONCE(sqe->len); 4249 + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); 4250 + 4251 + if (ix->ctx.flags) 4252 + return -EINVAL; 4253 + 4254 + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); 4255 + if (!ix->ctx.kname) 4256 + return -ENOMEM; 4257 + 4258 + ret = strncpy_from_user(ix->ctx.kname->name, name, 4259 + sizeof(ix->ctx.kname->name)); 4260 + if (!ret || ret == sizeof(ix->ctx.kname->name)) 4261 + ret = -ERANGE; 4262 + if (ret < 0) { 4263 + kfree(ix->ctx.kname); 4264 + return ret; 4265 + } 4266 + 4267 + req->flags |= REQ_F_NEED_CLEANUP; 4268 + return 0; 4269 + } 4270 + 4271 + static int io_fgetxattr_prep(struct io_kiocb *req, 4272 + const struct io_uring_sqe *sqe) 4273 + { 4274 + return __io_getxattr_prep(req, sqe); 4275 + } 4276 + 4277 + static int io_getxattr_prep(struct io_kiocb *req, 4278 + const struct io_uring_sqe *sqe) 4279 + { 4280 + struct io_xattr *ix = &req->xattr; 4281 + const char __user *path; 4282 + int ret; 4283 + 4284 + ret = __io_getxattr_prep(req, sqe); 4285 + if (ret) 4286 + return ret; 4287 + 4288 + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); 4289 + 4290 + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); 4291 + if (IS_ERR(ix->filename)) { 4292 + ret = PTR_ERR(ix->filename); 4293 + ix->filename = NULL; 4294 + } 4295 + 4296 + return ret; 4297 + } 4298 + 4299 + static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags) 4300 + { 4301 + struct io_xattr *ix = &req->xattr; 4302 + int ret; 4303 + 4304 + if (issue_flags & IO_URING_F_NONBLOCK) 4305 + return -EAGAIN; 4306 + 4307 + ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt), 4308 + req->file->f_path.dentry, 4309 + &ix->ctx); 4310 + 4311 + io_xattr_finish(req, ret); 4312 + return 0; 4313 + } 4314 + 4315 + static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) 4316 + { 4317 + struct io_xattr *ix = &req->xattr; 4318 + unsigned int lookup_flags = LOOKUP_FOLLOW; 4319 + struct path path; 4320 + int ret; 4321 + 4322 + if (issue_flags & IO_URING_F_NONBLOCK) 4323 + return -EAGAIN; 4324 + 4325 + retry: 4326 + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); 4327 + if (!ret) { 4328 + ret = do_getxattr(mnt_user_ns(path.mnt), 4329 + path.dentry, 4330 + &ix->ctx); 4331 + 4332 + path_put(&path); 4333 + if (retry_estale(ret, lookup_flags)) { 4334 + lookup_flags |= LOOKUP_REVAL; 4335 + goto retry; 4336 + } 4337 + } 4338 + 4339 + io_xattr_finish(req, ret); 4340 + return 0; 4341 + } 4342 + 4343 + static int __io_setxattr_prep(struct io_kiocb *req, 4344 + const struct io_uring_sqe *sqe) 4345 + { 4346 + struct io_xattr *ix = &req->xattr; 4347 + const char __user *name; 4348 + int ret; 4349 + 4350 + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 4351 + return -EINVAL; 4352 + if (unlikely(sqe->ioprio)) 4353 + return -EINVAL; 4354 + if (unlikely(req->flags & REQ_F_FIXED_FILE)) 4355 + return -EBADF; 4356 + 4357 + ix->filename = NULL; 4358 + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); 4359 + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 4360 + ix->ctx.kvalue = NULL; 4361 + ix->ctx.size = READ_ONCE(sqe->len); 4362 + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); 4363 + 4364 + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); 4365 + if (!ix->ctx.kname) 4366 + return -ENOMEM; 4367 + 4368 + ret = setxattr_copy(name, &ix->ctx); 4369 + if (ret) { 4370 + kfree(ix->ctx.kname); 4371 + return ret; 4372 + } 4373 + 4374 + req->flags |= REQ_F_NEED_CLEANUP; 4375 + return 0; 4376 + } 4377 + 4378 + static int io_setxattr_prep(struct io_kiocb *req, 4379 + const struct io_uring_sqe *sqe) 4380 + { 4381 + struct io_xattr *ix = &req->xattr; 4382 + const char __user *path; 4383 + int ret; 4384 + 4385 + ret = __io_setxattr_prep(req, sqe); 4386 + if (ret) 4387 + return ret; 4388 + 4389 + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); 4390 + 4391 + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); 4392 + if (IS_ERR(ix->filename)) { 4393 + ret = PTR_ERR(ix->filename); 4394 + ix->filename = NULL; 4395 + } 4396 + 4397 + return ret; 4398 + } 4399 + 4400 + static int io_fsetxattr_prep(struct io_kiocb *req, 4401 + const struct io_uring_sqe *sqe) 4402 + { 4403 + return __io_setxattr_prep(req, sqe); 4404 + } 4405 + 4406 + static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, 4407 + struct path *path) 4408 + { 4409 + struct io_xattr *ix = &req->xattr; 4410 + int ret; 4411 + 4412 + ret = mnt_want_write(path->mnt); 4413 + if (!ret) { 4414 + ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx); 4415 + mnt_drop_write(path->mnt); 4416 + } 4417 + 4418 + return ret; 4419 + } 4420 + 4421 + static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags) 4422 + { 4423 + int ret; 4424 + 4425 + if (issue_flags & IO_URING_F_NONBLOCK) 4426 + return -EAGAIN; 4427 + 4428 + ret = __io_setxattr(req, issue_flags, &req->file->f_path); 4429 + io_xattr_finish(req, ret); 4430 + 4431 + return 0; 4432 + } 4433 + 4434 + static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags) 4435 + { 4436 + struct io_xattr *ix = &req->xattr; 4437 + unsigned int lookup_flags = LOOKUP_FOLLOW; 4438 + struct path path; 4439 + int ret; 4440 + 4441 + if (issue_flags & IO_URING_F_NONBLOCK) 4442 + return -EAGAIN; 4443 + 4444 + retry: 4445 + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); 4446 + if (!ret) { 4447 + ret = __io_setxattr(req, issue_flags, &path); 4448 + path_put(&path); 4449 + if (retry_estale(ret, lookup_flags)) { 4450 + lookup_flags |= LOOKUP_REVAL; 4451 + goto retry; 4452 + } 4453 + } 4454 + 4455 + io_xattr_finish(req, ret); 4456 + return 0; 4457 + } 4458 + 4339 4459 static int io_unlinkat_prep(struct io_kiocb *req, 4340 4460 const struct io_uring_sqe *sqe) 4341 4461 { ··· 6142 5760 return 0; 6143 5761 } 6144 5762 5763 + static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 5764 + { 5765 + struct io_socket *sock = &req->sock; 5766 + 5767 + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 5768 + return -EINVAL; 5769 + if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index) 5770 + return -EINVAL; 5771 + 5772 + sock->domain = READ_ONCE(sqe->fd); 5773 + sock->type = READ_ONCE(sqe->off); 5774 + sock->protocol = READ_ONCE(sqe->len); 5775 + sock->file_slot = READ_ONCE(sqe->file_index); 5776 + sock->nofile = rlimit(RLIMIT_NOFILE); 5777 + 5778 + sock->flags = sock->type & ~SOCK_TYPE_MASK; 5779 + if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 5780 + return -EINVAL; 5781 + if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 5782 + return -EINVAL; 5783 + return 0; 5784 + } 5785 + 5786 + static int io_socket(struct io_kiocb *req, unsigned int issue_flags) 5787 + { 5788 + struct io_socket *sock = &req->sock; 5789 + bool fixed = !!sock->file_slot; 5790 + struct file *file; 5791 + int ret, fd; 5792 + 5793 + if (!fixed) { 5794 + fd = __get_unused_fd_flags(sock->flags, sock->nofile); 5795 + if (unlikely(fd < 0)) 5796 + return fd; 5797 + } 5798 + file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 5799 + if (IS_ERR(file)) { 5800 + if (!fixed) 5801 + put_unused_fd(fd); 5802 + ret = PTR_ERR(file); 5803 + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 5804 + return -EAGAIN; 5805 + if (ret == -ERESTARTSYS) 5806 + ret = -EINTR; 5807 + req_set_fail(req); 5808 + } else if (!fixed) { 5809 + fd_install(fd, file); 5810 + ret = fd; 5811 + } else { 5812 + ret = io_install_fixed_file(req, file, issue_flags, 5813 + sock->file_slot - 1); 5814 + } 5815 + __io_req_complete(req, issue_flags, ret, 0); 5816 + return 0; 5817 + } 5818 + 6145 5819 static int io_connect_prep_async(struct io_kiocb *req) 6146 5820 { 6147 5821 struct io_async_connect *io = req->async_data; ··· 6283 5845 IO_NETOP_PREP_ASYNC(recvmsg); 6284 5846 IO_NETOP_PREP_ASYNC(connect); 6285 5847 IO_NETOP_PREP(accept); 5848 + IO_NETOP_PREP(socket); 6286 5849 IO_NETOP_FN(send); 6287 5850 IO_NETOP_FN(recv); 6288 5851 #endif /* CONFIG_NET */ ··· 7586 7147 return io_linkat_prep(req, sqe); 7587 7148 case IORING_OP_MSG_RING: 7588 7149 return io_msg_ring_prep(req, sqe); 7150 + case IORING_OP_FSETXATTR: 7151 + return io_fsetxattr_prep(req, sqe); 7152 + case IORING_OP_SETXATTR: 7153 + return io_setxattr_prep(req, sqe); 7154 + case IORING_OP_FGETXATTR: 7155 + return io_fgetxattr_prep(req, sqe); 7156 + case IORING_OP_GETXATTR: 7157 + return io_getxattr_prep(req, sqe); 7158 + case IORING_OP_SOCKET: 7159 + return io_socket_prep(req, sqe); 7589 7160 } 7590 7161 7591 7162 printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", ··· 7744 7295 case IORING_OP_STATX: 7745 7296 if (req->statx.filename) 7746 7297 putname(req->statx.filename); 7298 + break; 7299 + case IORING_OP_SETXATTR: 7300 + case IORING_OP_FSETXATTR: 7301 + case IORING_OP_GETXATTR: 7302 + case IORING_OP_FGETXATTR: 7303 + __io_xattr_finish(req); 7747 7304 break; 7748 7305 } 7749 7306 } ··· 7906 7451 break; 7907 7452 case IORING_OP_MSG_RING: 7908 7453 ret = io_msg_ring(req, issue_flags); 7454 + break; 7455 + case IORING_OP_FSETXATTR: 7456 + ret = io_fsetxattr(req, issue_flags); 7457 + break; 7458 + case IORING_OP_SETXATTR: 7459 + ret = io_setxattr(req, issue_flags); 7460 + break; 7461 + case IORING_OP_FGETXATTR: 7462 + ret = io_fgetxattr(req, issue_flags); 7463 + break; 7464 + case IORING_OP_GETXATTR: 7465 + ret = io_getxattr(req, issue_flags); 7466 + break; 7467 + case IORING_OP_SOCKET: 7468 + ret = io_socket(req, issue_flags); 7909 7469 break; 7910 7470 default: 7911 7471 ret = -EINVAL; ··· 12495 12025 BUILD_BUG_SQE_ELEM(42, __u16, personality); 12496 12026 BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); 12497 12027 BUILD_BUG_SQE_ELEM(44, __u32, file_index); 12028 + BUILD_BUG_SQE_ELEM(48, __u64, addr3); 12498 12029 12499 12030 BUILD_BUG_ON(sizeof(struct io_uring_files_update) != 12500 12031 sizeof(struct io_uring_rsrc_update));
+97 -46
fs/xattr.c
··· 25 25 26 26 #include <linux/uaccess.h> 27 27 28 + #include "internal.h" 29 + 28 30 static const char * 29 31 strcmp_prefix(const char *a, const char *a_prefix) 30 32 { ··· 541 539 /* 542 540 * Extended attribute SET operations 543 541 */ 544 - static long 545 - setxattr(struct user_namespace *mnt_userns, struct dentry *d, 546 - const char __user *name, const void __user *value, size_t size, 547 - int flags) 542 + 543 + int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) 548 544 { 549 545 int error; 550 - void *kvalue = NULL; 551 - char kname[XATTR_NAME_MAX + 1]; 552 546 553 - if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) 547 + if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE)) 554 548 return -EINVAL; 555 549 556 - error = strncpy_from_user(kname, name, sizeof(kname)); 557 - if (error == 0 || error == sizeof(kname)) 558 - error = -ERANGE; 550 + error = strncpy_from_user(ctx->kname->name, name, 551 + sizeof(ctx->kname->name)); 552 + if (error == 0 || error == sizeof(ctx->kname->name)) 553 + return -ERANGE; 559 554 if (error < 0) 560 555 return error; 561 556 562 - if (size) { 563 - if (size > XATTR_SIZE_MAX) 557 + error = 0; 558 + if (ctx->size) { 559 + if (ctx->size > XATTR_SIZE_MAX) 564 560 return -E2BIG; 565 - kvalue = kvmalloc(size, GFP_KERNEL); 566 - if (!kvalue) 567 - return -ENOMEM; 568 - if (copy_from_user(kvalue, value, size)) { 569 - error = -EFAULT; 570 - goto out; 561 + 562 + ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size); 563 + if (IS_ERR(ctx->kvalue)) { 564 + error = PTR_ERR(ctx->kvalue); 565 + ctx->kvalue = NULL; 571 566 } 572 - if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || 573 - (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) 574 - posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), 575 - kvalue, size); 576 567 } 577 568 578 - error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); 579 - out: 580 - kvfree(kvalue); 569 + return error; 570 + } 581 571 572 + static void setxattr_convert(struct user_namespace *mnt_userns, 573 + struct dentry *d, struct xattr_ctx *ctx) 574 + { 575 + if (ctx->size && 576 + ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || 577 + (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) 578 + posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), 579 + ctx->kvalue, ctx->size); 580 + } 581 + 582 + int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, 583 + struct xattr_ctx *ctx) 584 + { 585 + setxattr_convert(mnt_userns, dentry, ctx); 586 + return vfs_setxattr(mnt_userns, dentry, ctx->kname->name, 587 + ctx->kvalue, ctx->size, ctx->flags); 588 + } 589 + 590 + static long 591 + setxattr(struct user_namespace *mnt_userns, struct dentry *d, 592 + const char __user *name, const void __user *value, size_t size, 593 + int flags) 594 + { 595 + struct xattr_name kname; 596 + struct xattr_ctx ctx = { 597 + .cvalue = value, 598 + .kvalue = NULL, 599 + .size = size, 600 + .kname = &kname, 601 + .flags = flags, 602 + }; 603 + int error; 604 + 605 + error = setxattr_copy(name, &ctx); 606 + if (error) 607 + return error; 608 + 609 + error = do_setxattr(mnt_userns, d, &ctx); 610 + 611 + kvfree(ctx.kvalue); 582 612 return error; 583 613 } 584 614 ··· 676 642 /* 677 643 * Extended attribute GET operations 678 644 */ 679 - static ssize_t 680 - getxattr(struct user_namespace *mnt_userns, struct dentry *d, 681 - const char __user *name, void __user *value, size_t size) 645 + ssize_t 646 + do_getxattr(struct user_namespace *mnt_userns, struct dentry *d, 647 + struct xattr_ctx *ctx) 682 648 { 683 649 ssize_t error; 684 - void *kvalue = NULL; 685 - char kname[XATTR_NAME_MAX + 1]; 650 + char *kname = ctx->kname->name; 686 651 687 - error = strncpy_from_user(kname, name, sizeof(kname)); 688 - if (error == 0 || error == sizeof(kname)) 689 - error = -ERANGE; 690 - if (error < 0) 691 - return error; 692 - 693 - if (size) { 694 - if (size > XATTR_SIZE_MAX) 695 - size = XATTR_SIZE_MAX; 696 - kvalue = kvzalloc(size, GFP_KERNEL); 697 - if (!kvalue) 652 + if (ctx->size) { 653 + if (ctx->size > XATTR_SIZE_MAX) 654 + ctx->size = XATTR_SIZE_MAX; 655 + ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL); 656 + if (!ctx->kvalue) 698 657 return -ENOMEM; 699 658 } 700 659 701 - error = vfs_getxattr(mnt_userns, d, kname, kvalue, size); 660 + error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size); 702 661 if (error > 0) { 703 662 if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || 704 663 (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) 705 664 posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), 706 - kvalue, error); 707 - if (size && copy_to_user(value, kvalue, error)) 665 + ctx->kvalue, error); 666 + if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error)) 708 667 error = -EFAULT; 709 - } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { 668 + } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) { 710 669 /* The file system tried to returned a value bigger 711 670 than XATTR_SIZE_MAX bytes. Not possible. */ 712 671 error = -E2BIG; 713 672 } 714 673 715 - kvfree(kvalue); 674 + return error; 675 + } 716 676 677 + static ssize_t 678 + getxattr(struct user_namespace *mnt_userns, struct dentry *d, 679 + const char __user *name, void __user *value, size_t size) 680 + { 681 + ssize_t error; 682 + struct xattr_name kname; 683 + struct xattr_ctx ctx = { 684 + .value = value, 685 + .kvalue = NULL, 686 + .size = size, 687 + .kname = &kname, 688 + .flags = 0, 689 + }; 690 + 691 + error = strncpy_from_user(kname.name, name, sizeof(kname.name)); 692 + if (error == 0 || error == sizeof(kname.name)) 693 + error = -ERANGE; 694 + if (error < 0) 695 + return error; 696 + 697 + error = do_getxattr(mnt_userns, d, &ctx); 698 + 699 + kvfree(ctx.kvalue); 717 700 return error; 718 701 } 719 702
+5
include/linux/io_uring.h
··· 10 10 void __io_uring_cancel(bool cancel_all); 11 11 void __io_uring_free(struct task_struct *tsk); 12 12 void io_uring_unreg_ringfd(void); 13 + const char *io_uring_get_opcode(u8 opcode); 13 14 14 15 static inline void io_uring_files_cancel(void) 15 16 { ··· 42 41 } 43 42 static inline void io_uring_free(struct task_struct *tsk) 44 43 { 44 + } 45 + static inline const char *io_uring_get_opcode(u8 opcode) 46 + { 47 + return ""; 45 48 } 46 49 #endif 47 50
+1
include/linux/socket.h
··· 434 434 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 435 435 int __user *upeer_addrlen, int flags); 436 436 extern int __sys_socket(int family, int type, int protocol); 437 + extern struct file *__sys_socket_file(int family, int type, int protocol); 437 438 extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); 438 439 extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, 439 440 int addrlen, int file_flags);
+26 -19
include/trace/events/io_uring.h
··· 7 7 8 8 #include <linux/tracepoint.h> 9 9 #include <uapi/linux/io_uring.h> 10 + #include <linux/io_uring.h> 10 11 11 12 struct io_wq_work; 12 13 ··· 170 169 __entry->rw = rw; 171 170 ), 172 171 173 - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p", 174 - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, 172 + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", 173 + __entry->ctx, __entry->req, __entry->user_data, 174 + io_uring_get_opcode(__entry->opcode), 175 175 __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) 176 176 ); 177 177 ··· 207 205 __entry->opcode = opcode; 208 206 ), 209 207 210 - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d", 211 - __entry->ctx, __entry->req, __entry->data, __entry->opcode) 208 + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", 209 + __entry->ctx, __entry->req, __entry->data, 210 + io_uring_get_opcode(__entry->opcode)) 212 211 ); 213 212 214 213 /** ··· 308 305 __entry->link = link; 309 306 ), 310 307 311 - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p", 312 - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, 313 - __entry->link) 308 + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", 309 + __entry->ctx, __entry->req, __entry->user_data, 310 + io_uring_get_opcode(__entry->opcode), __entry->link) 314 311 ); 315 312 316 313 /** ··· 392 389 __entry->sq_thread = sq_thread; 393 390 ), 394 391 395 - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, " 392 + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " 396 393 "non block %d, sq_thread %d", __entry->ctx, __entry->req, 397 - __entry->user_data, __entry->opcode, 394 + __entry->user_data, io_uring_get_opcode(__entry->opcode), 398 395 __entry->flags, __entry->force_nonblock, __entry->sq_thread) 399 396 ); 400 397 ··· 436 433 __entry->events = events; 437 434 ), 438 435 439 - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x", 440 - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, 436 + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", 437 + __entry->ctx, __entry->req, __entry->user_data, 438 + io_uring_get_opcode(__entry->opcode), 441 439 __entry->mask, __entry->events) 442 440 ); 443 441 ··· 474 470 __entry->mask = mask; 475 471 ), 476 472 477 - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x", 478 - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, 473 + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", 474 + __entry->ctx, __entry->req, __entry->user_data, 475 + io_uring_get_opcode(__entry->opcode), 479 476 __entry->mask) 480 477 ); 481 478 ··· 511 506 __field( u16, personality ) 512 507 __field( u32, file_index ) 513 508 __field( u64, pad1 ) 514 - __field( u64, pad2 ) 509 + __field( u64, addr3 ) 515 510 __field( int, error ) 516 511 ), 517 512 ··· 530 525 __entry->personality = sqe->personality; 531 526 __entry->file_index = sqe->file_index; 532 527 __entry->pad1 = sqe->__pad2[0]; 533 - __entry->pad2 = sqe->__pad2[1]; 528 + __entry->addr3 = sqe->addr3; 534 529 __entry->error = error; 535 530 ), 536 531 537 532 TP_printk("ring %p, req %p, user_data 0x%llx, " 538 - "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, " 533 + "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, " 539 534 "len=%u, rw_flags=0x%x, buf_index=%d, " 540 - "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", 535 + "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, " 536 + "error=%d", 541 537 __entry->ctx, __entry->req, __entry->user_data, 542 - __entry->opcode, __entry->flags, __entry->ioprio, 538 + io_uring_get_opcode(__entry->opcode), 539 + __entry->flags, __entry->ioprio, 543 540 (unsigned long long)__entry->off, 544 541 (unsigned long long) __entry->addr, __entry->len, 545 542 __entry->op_flags, 546 543 __entry->buf_index, __entry->personality, __entry->file_index, 547 544 (unsigned long long) __entry->pad1, 548 - (unsigned long long) __entry->pad2, __entry->error) 545 + (unsigned long long) __entry->addr3, __entry->error) 549 546 ); 550 547 551 548
+9 -2
include/uapi/linux/io_uring.h
··· 45 45 __u32 rename_flags; 46 46 __u32 unlink_flags; 47 47 __u32 hardlink_flags; 48 + __u32 xattr_flags; 48 49 }; 49 50 __u64 user_data; /* data to be passed back at completion time */ 50 51 /* pack this to avoid bogus arm OABI complaints */ ··· 61 60 __s32 splice_fd_in; 62 61 __u32 file_index; 63 62 }; 64 - __u64 __pad2[2]; 63 + __u64 addr3; 64 + __u64 __pad2[1]; 65 65 }; 66 66 67 67 enum { ··· 119 117 */ 120 118 #define IORING_SETUP_TASKRUN_FLAG (1U << 9) 121 119 122 - enum { 120 + enum io_uring_op { 123 121 IORING_OP_NOP, 124 122 IORING_OP_READV, 125 123 IORING_OP_WRITEV, ··· 161 159 IORING_OP_SYMLINKAT, 162 160 IORING_OP_LINKAT, 163 161 IORING_OP_MSG_RING, 162 + IORING_OP_FSETXATTR, 163 + IORING_OP_SETXATTR, 164 + IORING_OP_FGETXATTR, 165 + IORING_OP_GETXATTR, 166 + IORING_OP_SOCKET, 164 167 165 168 /* this goes last, obviously */ 166 169 IORING_OP_LAST,
+43 -11
net/socket.c
··· 504 504 struct socket *sock_from_file(struct file *file) 505 505 { 506 506 if (file->f_op == &socket_file_ops) 507 - return file->private_data; /* set in sock_map_fd */ 507 + return file->private_data; /* set in sock_alloc_file */ 508 508 509 509 return NULL; 510 510 } ··· 1538 1538 } 1539 1539 EXPORT_SYMBOL(sock_create_kern); 1540 1540 1541 - int __sys_socket(int family, int type, int protocol) 1541 + static struct socket *__sys_socket_create(int family, int type, int protocol) 1542 1542 { 1543 - int retval; 1544 1543 struct socket *sock; 1545 - int flags; 1544 + int retval; 1546 1545 1547 1546 /* Check the SOCK_* constants for consistency. */ 1548 1547 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); ··· 1549 1550 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1550 1551 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1551 1552 1552 - flags = type & ~SOCK_TYPE_MASK; 1553 - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1554 - return -EINVAL; 1553 + if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1554 + return ERR_PTR(-EINVAL); 1555 1555 type &= SOCK_TYPE_MASK; 1556 - 1557 - if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1558 - flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1559 1556 1560 1557 retval = sock_create(family, type, protocol, &sock); 1561 1558 if (retval < 0) 1562 - return retval; 1559 + return ERR_PTR(retval); 1560 + 1561 + return sock; 1562 + } 1563 + 1564 + struct file *__sys_socket_file(int family, int type, int protocol) 1565 + { 1566 + struct socket *sock; 1567 + struct file *file; 1568 + int flags; 1569 + 1570 + sock = __sys_socket_create(family, type, protocol); 1571 + if (IS_ERR(sock)) 1572 + return ERR_CAST(sock); 1573 + 1574 + flags = type & ~SOCK_TYPE_MASK; 1575 + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1576 + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1577 + 1578 + file = sock_alloc_file(sock, flags, NULL); 1579 + if (IS_ERR(file)) 1580 + sock_release(sock); 1581 + 1582 + return file; 1583 + } 1584 + 1585 + int __sys_socket(int family, int type, int protocol) 1586 + { 1587 + struct socket *sock; 1588 + int flags; 1589 + 1590 + sock = __sys_socket_create(family, type, protocol); 1591 + if (IS_ERR(sock)) 1592 + return PTR_ERR(sock); 1593 + 1594 + flags = type & ~SOCK_TYPE_MASK; 1595 + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1596 + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1563 1597 1564 1598 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1565 1599 }