Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Allow struct_ops prog to return referenced kptr

Allow a struct_ops program to return a referenced kptr if the struct_ops
operator's return type is a struct pointer. To make sure the returned
pointer continues to be valid in the kernel, several constraints are
required:

1) The type of the pointer must matches the return type
2) The pointer originally comes from the kernel (not locally allocated)
3) The pointer is in its unmodified form

Implementation wise, a referenced kptr first needs to be allowed to _leak_
in check_reference_leak() if it is in the return register. Then, in
check_return_code(), constraints 1-3 are checked. During struct_ops
registration, a check is also added to warn about operators with
non-struct pointer return.

In addition, since the first user, Qdisc_ops::dequeue, allows a NULL
pointer to be returned when there is no skb to be dequeued, we will allow
a scalar value with value equals to NULL to be returned.

In the future when there is a struct_ops user that always expects a valid
pointer to be returned from an operator, we may extend tagging to the
return value. We can tell the verifier to only allow NULL pointer return
if the return value is tagged with MAY_BE_NULL.

Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20250217190640.1748177-5-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Amery Hung and committed by
Alexei Starovoitov
8d9f547f 6991ec6b

+43 -5
+11 -1
kernel/bpf/bpf_struct_ops.c
··· 390 390 st_ops_desc->value_type = btf_type_by_id(btf, value_id); 391 391 392 392 for_each_member(i, t, member) { 393 - const struct btf_type *func_proto; 393 + const struct btf_type *func_proto, *ret_type; 394 394 void **stub_func_addr; 395 395 u32 moff; 396 396 ··· 426 426 */ 427 427 if (!func_proto || bpf_struct_ops_supported(st_ops, moff)) 428 428 continue; 429 + 430 + if (func_proto->type) { 431 + ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL); 432 + if (ret_type && !__btf_type_is_struct(ret_type)) { 433 + pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n", 434 + mname, st_ops->name); 435 + err = -EOPNOTSUPP; 436 + goto errout; 437 + } 438 + } 429 439 430 440 if (btf_distill_func_proto(log, btf, 431 441 func_proto, mname,
+32 -4
kernel/bpf/verifier.c
··· 10758 10758 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) 10759 10759 { 10760 10760 struct bpf_verifier_state *state = env->cur_state; 10761 + enum bpf_prog_type type = resolve_prog_type(env->prog); 10762 + struct bpf_reg_state *reg = reg_state(env, BPF_REG_0); 10761 10763 bool refs_lingering = false; 10762 10764 int i; 10763 10765 ··· 10768 10766 10769 10767 for (i = 0; i < state->acquired_refs; i++) { 10770 10768 if (state->refs[i].type != REF_TYPE_PTR) 10769 + continue; 10770 + /* Allow struct_ops programs to return a referenced kptr back to 10771 + * kernel. Type checks are performed later in check_return_code. 10772 + */ 10773 + if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit && 10774 + reg->ref_obj_id == state->refs[i].id) 10771 10775 continue; 10772 10776 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 10773 10777 state->refs[i].id, state->refs[i].insn_idx); ··· 16426 16418 const char *exit_ctx = "At program exit"; 16427 16419 struct tnum enforce_attach_type_range = tnum_unknown; 16428 16420 const struct bpf_prog *prog = env->prog; 16429 - struct bpf_reg_state *reg; 16421 + struct bpf_reg_state *reg = reg_state(env, regno); 16430 16422 struct bpf_retval_range range = retval_range(0, 1); 16431 16423 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 16432 16424 int err; 16433 16425 struct bpf_func_state *frame = env->cur_state->frame[0]; 16434 16426 const bool is_subprog = frame->subprogno; 16435 16427 bool return_32bit = false; 16428 + const struct btf_type *reg_type, *ret_type = NULL; 16436 16429 16437 16430 /* LSM and struct_ops func-ptr's return type could be "void" */ 16438 16431 if (!is_subprog || frame->in_exception_callback_fn) { ··· 16442 16433 if (prog->expected_attach_type == BPF_LSM_CGROUP) 16443 16434 /* See below, can be 0 or 0-1 depending on hook. */ 16444 16435 break; 16445 - fallthrough; 16436 + if (!prog->aux->attach_func_proto->type) 16437 + return 0; 16438 + break; 16446 16439 case BPF_PROG_TYPE_STRUCT_OPS: 16447 16440 if (!prog->aux->attach_func_proto->type) 16448 16441 return 0; 16442 + 16443 + if (frame->in_exception_callback_fn) 16444 + break; 16445 + 16446 + /* Allow a struct_ops program to return a referenced kptr if it 16447 + * matches the operator's return type and is in its unmodified 16448 + * form. A scalar zero (i.e., a null pointer) is also allowed. 16449 + */ 16450 + reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL; 16451 + ret_type = btf_type_resolve_ptr(prog->aux->attach_btf, 16452 + prog->aux->attach_func_proto->type, 16453 + NULL); 16454 + if (ret_type && ret_type == reg_type && reg->ref_obj_id) 16455 + return __check_ptr_off_reg(env, reg, regno, false); 16449 16456 break; 16450 16457 default: 16451 16458 break; ··· 16482 16457 verbose(env, "R%d leaks addr as return value\n", regno); 16483 16458 return -EACCES; 16484 16459 } 16485 - 16486 - reg = cur_regs(env) + regno; 16487 16460 16488 16461 if (frame->in_async_callback_fn) { 16489 16462 /* enforce return zero from async callbacks like timer */ ··· 16580 16557 16581 16558 case BPF_PROG_TYPE_NETFILTER: 16582 16559 range = retval_range(NF_DROP, NF_ACCEPT); 16560 + break; 16561 + case BPF_PROG_TYPE_STRUCT_OPS: 16562 + if (!ret_type) 16563 + return 0; 16564 + range = retval_range(0, 0); 16583 16565 break; 16584 16566 case BPF_PROG_TYPE_EXT: 16585 16567 /* freplace program can return anything as its return value