Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

io_uring/bpf_filter: pass in expected filter payload size

It's quite possible that opcodes that have payloads attached to them,
like IORING_OP_OPENAT/OPENAT2 or IORING_OP_SOCKET, that these paylods
can change over time. For example, on the openat/openat2 side, the
struct open_how argument is extensible, and could be extended in the
future to allow further arguments to be passed in.

Allow registration of a cBPF filter to give the size of the filter as
seen by userspace. If that filter is for an opcode that takes extra
payload data, allow it if the application payload expectation is the
same size than the kernels. If that is the case, the kernel supports
filtering on the payload that the application expects. If the size
differs, the behavior depends on the IO_URING_BPF_FILTER_SZ_STRICT flag:

1) If IO_URING_BPF_FILTER_SZ_STRICT is set and the size expectation
differs, fail the attempt to load the filter.

2) If IO_URING_BPF_FILTER_SZ_STRICT isn't set, allow the filter if
the userspace pdu size is smaller than what the kernel offers.

3) Regardless if IO_URING_BPF_FILTER_SZ_STRICT, fail loading the filter
if the userspace pdu size is bigger than what the kernel supports.

An attempt to load a filter due to sizing will error with -EMSGSIZE.
For that error, the registration struct will have filter->pdu_size
populated with the pdu size that the kernel uses.

Reported-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

+58 -19
+7 -1
include/uapi/linux/io_uring/bpf_filter.h
··· 35 35 * If set, any currently unset opcode will have a deny filter attached 36 36 */ 37 37 IO_URING_BPF_FILTER_DENY_REST = 1, 38 + /* 39 + * If set, if kernel and application don't agree on pdu_size for 40 + * the given opcode, fail the registration of the filter. 41 + */ 42 + IO_URING_BPF_FILTER_SZ_STRICT = 2, 38 43 }; 39 44 40 45 struct io_uring_bpf_filter { 41 46 __u32 opcode; /* io_uring opcode to filter */ 42 47 __u32 flags; 43 48 __u32 filter_len; /* number of BPF instructions */ 44 - __u32 resv; 49 + __u8 pdu_size; /* expected pdu size for opcode */ 50 + __u8 resv[3]; 45 51 __u64 filter_ptr; /* pointer to BPF filter */ 46 52 __u64 resv2[5]; 47 53 };
+51 -18
io_uring/bpf_filter.c
··· 308 308 return ERR_PTR(-EBUSY); 309 309 } 310 310 311 - #define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST 311 + #define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \ 312 + IO_URING_BPF_FILTER_SZ_STRICT) 313 + 314 + static int io_bpf_filter_import(struct io_uring_bpf *reg, 315 + struct io_uring_bpf __user *arg) 316 + { 317 + const struct io_issue_def *def; 318 + int ret; 319 + 320 + if (copy_from_user(reg, arg, sizeof(*reg))) 321 + return -EFAULT; 322 + if (reg->cmd_type != IO_URING_BPF_CMD_FILTER) 323 + return -EINVAL; 324 + if (reg->cmd_flags || reg->resv) 325 + return -EINVAL; 326 + 327 + if (reg->filter.opcode >= IORING_OP_LAST) 328 + return -EINVAL; 329 + if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS) 330 + return -EINVAL; 331 + if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv))) 332 + return -EINVAL; 333 + if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2))) 334 + return -EINVAL; 335 + if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS) 336 + return -EINVAL; 337 + 338 + /* Verify filter size */ 339 + def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)]; 340 + 341 + /* same size, always ok */ 342 + ret = 0; 343 + if (reg->filter.pdu_size == def->filter_pdu_size) 344 + ; 345 + /* size differs, fail in strict mode */ 346 + else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT) 347 + ret = -EMSGSIZE; 348 + /* userspace filter is bigger, always disallow */ 349 + else if (reg->filter.pdu_size > def->filter_pdu_size) 350 + ret = -EMSGSIZE; 351 + 352 + /* copy back kernel filter size */ 353 + reg->filter.pdu_size = def->filter_pdu_size; 354 + if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter))) 355 + return -EFAULT; 356 + 357 + return ret; 358 + } 312 359 313 360 int io_register_bpf_filter(struct io_restriction *res, 314 361 struct io_uring_bpf __user *arg) ··· 367 320 struct sock_fprog fprog; 368 321 int ret; 369 322 370 - if (copy_from_user(&reg, arg, sizeof(reg))) 371 - return -EFAULT; 372 - if (reg.cmd_type != IO_URING_BPF_CMD_FILTER) 373 - return -EINVAL; 374 - if (reg.cmd_flags || reg.resv) 375 - return -EINVAL; 376 - 377 - if (reg.filter.opcode >= IORING_OP_LAST) 378 - return -EINVAL; 379 - if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS) 380 - return -EINVAL; 381 - if (reg.filter.resv) 382 - return -EINVAL; 383 - if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2))) 384 - return -EINVAL; 385 - if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS) 386 - return -EINVAL; 323 + ret = io_bpf_filter_import(&reg, arg); 324 + if (ret) 325 + return ret; 387 326 388 327 fprog.len = reg.filter.filter_len; 389 328 fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);