Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: verifier: Support eliding map lookup nullness

This commit allows progs to elide a null check on statically known map
lookup keys. In other words, if the verifier can statically prove that
the lookup will be in-bounds, allow the prog to drop the null check.

This is useful for two reasons:

1. Large numbers of nullness checks (especially when they cannot fail)
unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ.
2. It forms a tighter contract between programmer and verifier.

For (1), bpftrace is starting to make heavier use of percpu scratch
maps. As a result, for user scripts with large number of unrolled loops,
we are starting to hit jump complexity verification errors. These
percpu lookups cannot fail anyways, as we only use static key values.
Eliding nullness probably results in less work for verifier as well.

For (2), percpu scratch maps are often used as a larger stack, as the
currrent stack is limited to 512 bytes. In these situations, it is
desirable for the programmer to express: "this lookup should never fail,
and if it does, it means I messed up the code". By omitting the null
check, the programmer can "ask" the verifier to double check the logic.

Tests also have to be updated in sync with these changes, as the
verifier is more efficient with this change. Notable, iters.c tests had
to be changed to use a map type that still requires null checks, as it's
exercising verifier tracking logic w.r.t iterators.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/68f3ea96ff3809a87e502a11a4bd30177fc5823e.1736886479.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Daniel Xu and committed by
Alexei Starovoitov
d2102f2f 37cce22d

+99 -13
+89 -3
kernel/bpf/verifier.c
··· 287 287 u32 ret_btf_id; 288 288 u32 subprogno; 289 289 struct btf_field *kptr_field; 290 + s64 const_map_key; 290 291 }; 291 292 292 293 struct bpf_kfunc_call_arg_meta { ··· 9149 9148 return 0; 9150 9149 } 9151 9150 9151 + /* Returns constant key value if possible, else negative error */ 9152 + static s64 get_constant_map_key(struct bpf_verifier_env *env, 9153 + struct bpf_reg_state *key, 9154 + u32 key_size) 9155 + { 9156 + struct bpf_func_state *state = func(env, key); 9157 + struct bpf_reg_state *reg; 9158 + int slot, spi, off; 9159 + int spill_size = 0; 9160 + int zero_size = 0; 9161 + int stack_off; 9162 + int i, err; 9163 + u8 *stype; 9164 + 9165 + if (!env->bpf_capable) 9166 + return -EOPNOTSUPP; 9167 + if (key->type != PTR_TO_STACK) 9168 + return -EOPNOTSUPP; 9169 + if (!tnum_is_const(key->var_off)) 9170 + return -EOPNOTSUPP; 9171 + 9172 + stack_off = key->off + key->var_off.value; 9173 + slot = -stack_off - 1; 9174 + spi = slot / BPF_REG_SIZE; 9175 + off = slot % BPF_REG_SIZE; 9176 + stype = state->stack[spi].slot_type; 9177 + 9178 + /* First handle precisely tracked STACK_ZERO */ 9179 + for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) 9180 + zero_size++; 9181 + if (zero_size >= key_size) 9182 + return 0; 9183 + 9184 + /* Check that stack contains a scalar spill of expected size */ 9185 + if (!is_spilled_scalar_reg(&state->stack[spi])) 9186 + return -EOPNOTSUPP; 9187 + for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--) 9188 + spill_size++; 9189 + if (spill_size != key_size) 9190 + return -EOPNOTSUPP; 9191 + 9192 + reg = &state->stack[spi].spilled_ptr; 9193 + if (!tnum_is_const(reg->var_off)) 9194 + /* Stack value not statically known */ 9195 + return -EOPNOTSUPP; 9196 + 9197 + /* We are relying on a constant value. So mark as precise 9198 + * to prevent pruning on it. 9199 + */ 9200 + bt_set_frame_slot(&env->bt, key->frameno, spi); 9201 + err = mark_chain_precision_batch(env); 9202 + if (err < 0) 9203 + return err; 9204 + 9205 + return reg->var_off.value; 9206 + } 9207 + 9152 9208 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, 9153 9209 struct bpf_call_arg_meta *meta, 9154 9210 const struct bpf_func_proto *fn, ··· 9216 9158 enum bpf_arg_type arg_type = fn->arg_type[arg]; 9217 9159 enum bpf_reg_type type = reg->type; 9218 9160 u32 *arg_btf_id = NULL; 9161 + u32 key_size; 9219 9162 int err = 0; 9220 9163 9221 9164 if (arg_type == ARG_DONTCARE) ··· 9350 9291 verbose(env, "invalid map_ptr to access map->key\n"); 9351 9292 return -EACCES; 9352 9293 } 9353 - err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, 9354 - BPF_READ, false, NULL); 9294 + key_size = meta->map_ptr->key_size; 9295 + err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); 9296 + if (err) 9297 + return err; 9298 + meta->const_map_key = get_constant_map_key(env, reg, key_size); 9299 + if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP) 9300 + return meta->const_map_key; 9355 9301 break; 9356 9302 case ARG_PTR_TO_MAP_VALUE: 9357 9303 if (type_may_be_null(arg_type) && register_is_null(reg)) ··· 10880 10816 state->callback_subprogno == subprogno); 10881 10817 } 10882 10818 10819 + /* Returns whether or not the given map type can potentially elide 10820 + * lookup return value nullness check. This is possible if the key 10821 + * is statically known. 10822 + */ 10823 + static bool can_elide_value_nullness(enum bpf_map_type type) 10824 + { 10825 + switch (type) { 10826 + case BPF_MAP_TYPE_ARRAY: 10827 + case BPF_MAP_TYPE_PERCPU_ARRAY: 10828 + return true; 10829 + default: 10830 + return false; 10831 + } 10832 + } 10833 + 10883 10834 static int get_helper_proto(struct bpf_verifier_env *env, int func_id, 10884 10835 const struct bpf_func_proto **ptr) 10885 10836 { ··· 11261 11182 "kernel subsystem misconfigured verifier\n"); 11262 11183 return -EINVAL; 11263 11184 } 11185 + 11186 + if (func_id == BPF_FUNC_map_lookup_elem && 11187 + can_elide_value_nullness(meta.map_ptr->map_type) && 11188 + meta.const_map_key >= 0 && 11189 + meta.const_map_key < meta.map_ptr->max_entries) 11190 + ret_flag &= ~PTR_MAYBE_NULL; 11191 + 11264 11192 regs[BPF_REG_0].map_ptr = meta.map_ptr; 11265 11193 regs[BPF_REG_0].map_uid = meta.map_uid; 11266 11194 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; 11267 - if (!type_may_be_null(ret_type) && 11195 + if (!type_may_be_null(ret_flag) && 11268 11196 btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) { 11269 11197 regs[BPF_REG_0].id = ++env->id_gen; 11270 11198 }
+7 -7
tools/testing/selftests/bpf/progs/iters.c
··· 524 524 } 525 525 526 526 struct { 527 - __uint(type, BPF_MAP_TYPE_ARRAY); 527 + __uint(type, BPF_MAP_TYPE_HASH); 528 528 __type(key, int); 529 529 __type(value, int); 530 530 __uint(max_entries, 1000); 531 - } arr_map SEC(".maps"); 531 + } hash_map SEC(".maps"); 532 532 533 533 SEC("?raw_tp") 534 534 __failure __msg("invalid mem access 'scalar'") ··· 539 539 540 540 MY_PID_GUARD(); 541 541 542 - map_val = bpf_map_lookup_elem(&arr_map, &key); 542 + map_val = bpf_map_lookup_elem(&hash_map, &key); 543 543 if (!map_val) 544 544 return 0; 545 545 ··· 561 561 562 562 MY_PID_GUARD(); 563 563 564 - map_val = bpf_map_lookup_elem(&arr_map, &key); 564 + map_val = bpf_map_lookup_elem(&hash_map, &key); 565 565 if (!map_val) 566 566 return 0; 567 567 568 568 bpf_repeat(1000000) { 569 - map_val = bpf_map_lookup_elem(&arr_map, &key); 569 + map_val = bpf_map_lookup_elem(&hash_map, &key); 570 570 } 571 571 572 572 *map_val = 123; ··· 585 585 MY_PID_GUARD(); 586 586 587 587 bpf_repeat(1000000) { 588 - map_val = bpf_map_lookup_elem(&arr_map, &key); 588 + map_val = bpf_map_lookup_elem(&hash_map, &key); 589 589 found = true; 590 590 } 591 591 ··· 606 606 MY_PID_GUARD(); 607 607 608 608 bpf_repeat(1000000) { 609 - map_val = bpf_map_lookup_elem(&arr_map, &key); 609 + map_val = bpf_map_lookup_elem(&hash_map, &key); 610 610 if (map_val) { 611 611 found = true; 612 612 break;
+1 -1
tools/testing/selftests/bpf/progs/map_kptr_fail.c
··· 345 345 } 346 346 347 347 SEC("?tc") 348 - __failure __msg("Unreleased reference id=5 alloc_insn=") 348 + __failure __msg("Unreleased reference id=4 alloc_insn=") 349 349 int kptr_xchg_ref_state(struct __sk_buff *ctx) 350 350 { 351 351 struct prog_test_ref_kfunc *p;
+1 -1
tools/testing/selftests/bpf/progs/verifier_map_in_map.c
··· 47 47 48 48 SEC("xdp") 49 49 __description("map in map state pruning") 50 - __success __msg("processed 26 insns") 50 + __success __msg("processed 15 insns") 51 51 __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ) 52 52 __naked void map_in_map_state_pruning(void) 53 53 {
+1 -1
tools/testing/selftests/bpf/verifier/map_kptr.c
··· 373 373 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 374 374 .fixup_map_kptr = { 1 }, 375 375 .result = REJECT, 376 - .errstr = "Unreleased reference id=5 alloc_insn=20", 376 + .errstr = "Unreleased reference id=4 alloc_insn=20", 377 377 .fixup_kfunc_btf_id = { 378 378 { "bpf_kfunc_call_test_acquire", 15 }, 379 379 }