Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-allow-bpf_for_each_map_elem-helper-with-different-input-maps'

Philo Lu says:

====================
bpf: allow bpf_for_each_map_elem() helper with different input maps

Currently, taking different maps within a single bpf_for_each_map_elem
call is not allowed. For example the following codes cannot pass the
verifier (with error "tail_call abusing map_ptr"):
```
static void test_by_pid(int pid)
{
if (pid <= 100)
bpf_for_each_map_elem(&map1, map_elem_cb, NULL, 0);
else
bpf_for_each_map_elem(&map2, map_elem_cb, NULL, 0);
}
```

This is because during bpf_for_each_map_elem verifying,
bpf_insn_aux_data->map_ptr_state is expected as map_ptr (instead of poison
state), which is then needed by set_map_elem_callback_state. However, as
there are two different map ptr input, map_ptr_state is marked as
BPF_MAP_PTR_POISON, and thus the second map_ptr would be lost.
BPF_MAP_PTR_POISON is also needed by bpf_for_each_map_elem to skip
retpoline optimization in do_misc_fixups(). Therefore, map_ptr_state and
map_ptr are both needed for bpf_for_each_map_elem.

This patchset solves it by transform bpf_insn_aux_data->map_ptr_state as a
new struct, storing poison/unpriv state and map pointer together without
additional memory overhead. Then bpf_for_each_map_elem works well with
different input maps. It also makes map_ptr_state logic clearer.

A test case is added to selftest, which would fail to load without this
patchset.

Changelogs
-> v1:
- PATCH 1/3:
- make the commit log clearer
- change poison and unpriv to bool in struct bpf_map_ptr_state, also the
return value in bpf_map_ptr_poisoned() and bpf_map_ptr_unpriv()
- PATCH 2/3:
- change the comments in set_map_elem_callback_state()
- PATCH 3/3:
- remove the "skipping the last element" logic during map updating
- change if() to ASSERT_OK()

Please review, thanks.
====================

Link: https://lore.kernel.org/r/20240405025536.18113-1-lulie@linux.alibaba.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+136 -26
+8 -1
include/linux/bpf_verifier.h
··· 502 502 u32 callback_subprogno; /* valid when fit_for_inline is true */ 503 503 }; 504 504 505 + /* pointer and state for maps */ 506 + struct bpf_map_ptr_state { 507 + struct bpf_map *map_ptr; 508 + bool poison; 509 + bool unpriv; 510 + }; 511 + 505 512 /* Possible states for alu_state member. */ 506 513 #define BPF_ALU_SANITIZE_SRC (1U << 0) 507 514 #define BPF_ALU_SANITIZE_DST (1U << 1) ··· 521 514 struct bpf_insn_aux_data { 522 515 union { 523 516 enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ 524 - unsigned long map_ptr_state; /* pointer/poison value for maps */ 517 + struct bpf_map_ptr_state map_ptr_state; 525 518 s32 call_imm; /* saved imm field of call insn */ 526 519 u32 alu_limit; /* limit for add/sub register with pointer */ 527 520 struct {
+17 -25
kernel/bpf/verifier.c
··· 190 190 #define BPF_MAP_KEY_POISON (1ULL << 63) 191 191 #define BPF_MAP_KEY_SEEN (1ULL << 62) 192 192 193 - #define BPF_MAP_PTR_UNPRIV 1UL 194 - #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ 195 - POISON_POINTER_DELTA)) 196 - #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) 197 - 198 193 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 199 194 200 195 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); ··· 204 209 205 210 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 206 211 { 207 - return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; 212 + return aux->map_ptr_state.poison; 208 213 } 209 214 210 215 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) 211 216 { 212 - return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; 217 + return aux->map_ptr_state.unpriv; 213 218 } 214 219 215 220 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 216 - const struct bpf_map *map, bool unpriv) 221 + struct bpf_map *map, 222 + bool unpriv, bool poison) 217 223 { 218 - BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); 219 224 unpriv |= bpf_map_ptr_unpriv(aux); 220 - aux->map_ptr_state = (unsigned long)map | 221 - (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); 225 + aux->map_ptr_state.unpriv = unpriv; 226 + aux->map_ptr_state.poison = poison; 227 + aux->map_ptr_state.map_ptr = map; 222 228 } 223 229 224 230 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) ··· 9651 9655 struct bpf_map *map; 9652 9656 int err; 9653 9657 9654 - if (bpf_map_ptr_poisoned(insn_aux)) { 9655 - verbose(env, "tail_call abusing map_ptr\n"); 9656 - return -EINVAL; 9657 - } 9658 - 9659 - map = BPF_MAP_PTR(insn_aux->map_ptr_state); 9658 + /* valid map_ptr and poison value does not matter */ 9659 + map = insn_aux->map_ptr_state.map_ptr; 9660 9660 if (!map->ops->map_set_for_each_callback_args || 9661 9661 !map->ops->map_for_each_callback) { 9662 9662 verbose(env, "callback function not allowed for map\n"); ··· 10011 10019 return -EACCES; 10012 10020 } 10013 10021 10014 - if (!BPF_MAP_PTR(aux->map_ptr_state)) 10022 + if (!aux->map_ptr_state.map_ptr) 10015 10023 bpf_map_ptr_store(aux, meta->map_ptr, 10016 - !meta->map_ptr->bypass_spec_v1); 10017 - else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) 10018 - bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, 10019 - !meta->map_ptr->bypass_spec_v1); 10024 + !meta->map_ptr->bypass_spec_v1, false); 10025 + else if (aux->map_ptr_state.map_ptr != meta->map_ptr) 10026 + bpf_map_ptr_store(aux, meta->map_ptr, 10027 + !meta->map_ptr->bypass_spec_v1, true); 10020 10028 return 0; 10021 10029 } 10022 10030 ··· 19832 19840 !bpf_map_ptr_unpriv(aux)) { 19833 19841 struct bpf_jit_poke_descriptor desc = { 19834 19842 .reason = BPF_POKE_REASON_TAIL_CALL, 19835 - .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), 19843 + .tail_call.map = aux->map_ptr_state.map_ptr, 19836 19844 .tail_call.key = bpf_map_key_immediate(aux), 19837 19845 .insn_idx = i + delta, 19838 19846 }; ··· 19861 19869 return -EINVAL; 19862 19870 } 19863 19871 19864 - map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 19872 + map_ptr = aux->map_ptr_state.map_ptr; 19865 19873 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 19866 19874 map_ptr->max_entries, 2); 19867 19875 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, ··· 19969 19977 if (bpf_map_ptr_poisoned(aux)) 19970 19978 goto patch_call_imm; 19971 19979 19972 - map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 19980 + map_ptr = aux->map_ptr_state.map_ptr; 19973 19981 ops = map_ptr->ops; 19974 19982 if (insn->imm == BPF_FUNC_map_lookup_elem && 19975 19983 ops->map_gen_lookup) {
+62
tools/testing/selftests/bpf/prog_tests/for_each.c
··· 5 5 #include "for_each_hash_map_elem.skel.h" 6 6 #include "for_each_array_map_elem.skel.h" 7 7 #include "for_each_map_elem_write_key.skel.h" 8 + #include "for_each_multi_maps.skel.h" 8 9 9 10 static unsigned int duration; 10 11 ··· 144 143 for_each_map_elem_write_key__destroy(skel); 145 144 } 146 145 146 + static void test_multi_maps(void) 147 + { 148 + struct for_each_multi_maps *skel; 149 + __u64 val, array_total, hash_total; 150 + __u32 key, max_entries; 151 + int i, err; 152 + 153 + LIBBPF_OPTS(bpf_test_run_opts, topts, 154 + .data_in = &pkt_v4, 155 + .data_size_in = sizeof(pkt_v4), 156 + .repeat = 1, 157 + ); 158 + 159 + skel = for_each_multi_maps__open_and_load(); 160 + if (!ASSERT_OK_PTR(skel, "for_each_multi_maps__open_and_load")) 161 + return; 162 + 163 + array_total = 0; 164 + max_entries = bpf_map__max_entries(skel->maps.arraymap); 165 + for (i = 0; i < max_entries; i++) { 166 + key = i; 167 + val = i + 1; 168 + array_total += val; 169 + err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key), 170 + &val, sizeof(val), BPF_ANY); 171 + if (!ASSERT_OK(err, "array_map_update")) 172 + goto out; 173 + } 174 + 175 + hash_total = 0; 176 + max_entries = bpf_map__max_entries(skel->maps.hashmap); 177 + for (i = 0; i < max_entries; i++) { 178 + key = i + 100; 179 + val = i + 1; 180 + hash_total += val; 181 + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), 182 + &val, sizeof(val), BPF_ANY); 183 + if (!ASSERT_OK(err, "hash_map_update")) 184 + goto out; 185 + } 186 + 187 + skel->bss->data_output = 0; 188 + skel->bss->use_array = 1; 189 + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); 190 + ASSERT_OK(err, "bpf_prog_test_run_opts"); 191 + ASSERT_OK(topts.retval, "retval"); 192 + ASSERT_EQ(skel->bss->data_output, array_total, "array output"); 193 + 194 + skel->bss->data_output = 0; 195 + skel->bss->use_array = 0; 196 + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); 197 + ASSERT_OK(err, "bpf_prog_test_run_opts"); 198 + ASSERT_OK(topts.retval, "retval"); 199 + ASSERT_EQ(skel->bss->data_output, hash_total, "hash output"); 200 + 201 + out: 202 + for_each_multi_maps__destroy(skel); 203 + } 204 + 147 205 void test_for_each(void) 148 206 { 149 207 if (test__start_subtest("hash_map")) ··· 211 151 test_array_map(); 212 152 if (test__start_subtest("write_map_key")) 213 153 test_write_map_key(); 154 + if (test__start_subtest("multi_maps")) 155 + test_multi_maps(); 214 156 }
+49
tools/testing/selftests/bpf/progs/for_each_multi_maps.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "vmlinux.h" 3 + #include <bpf/bpf_helpers.h> 4 + 5 + char _license[] SEC("license") = "GPL"; 6 + 7 + struct { 8 + __uint(type, BPF_MAP_TYPE_ARRAY); 9 + __uint(max_entries, 3); 10 + __type(key, __u32); 11 + __type(value, __u64); 12 + } arraymap SEC(".maps"); 13 + 14 + struct { 15 + __uint(type, BPF_MAP_TYPE_HASH); 16 + __uint(max_entries, 5); 17 + __type(key, __u32); 18 + __type(value, __u64); 19 + } hashmap SEC(".maps"); 20 + 21 + struct callback_ctx { 22 + int output; 23 + }; 24 + 25 + u32 data_output = 0; 26 + int use_array = 0; 27 + 28 + static __u64 29 + check_map_elem(struct bpf_map *map, __u32 *key, __u64 *val, 30 + struct callback_ctx *data) 31 + { 32 + data->output += *val; 33 + return 0; 34 + } 35 + 36 + SEC("tc") 37 + int test_pkt_access(struct __sk_buff *skb) 38 + { 39 + struct callback_ctx data; 40 + 41 + data.output = 0; 42 + if (use_array) 43 + bpf_for_each_map_elem(&arraymap, check_map_elem, &data, 0); 44 + else 45 + bpf_for_each_map_elem(&hashmap, check_map_elem, &data, 0); 46 + data_output = data.output; 47 + 48 + return 0; 49 + }