Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Zero-fill re-used per-cpu map element

Zero-fill element values for all other cpus than current, just as
when not using prealloc. This is the only way the bpf program can
ensure known initial values for all cpus ('onallcpus' cannot be
set when coming from the bpf program).

The scenario is: bpf program inserts some elements in a per-cpu
map, then deletes some (or userspace does). When later adding
new elements using bpf_map_update_elem(), the bpf program can
only set the value of the new elements for the current cpu.
When prealloc is enabled, previously deleted elements are re-used.
Without the fix, values for other cpus remain whatever they were
when the re-used entry was previously freed.

A selftest is added to validate correct operation in above
scenario as well as in case of LRU per-cpu map element re-use.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: David Verbeiren <david.verbeiren@tessares.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201104112332.15191-1-david.verbeiren@tessares.net

authored by

David Verbeiren and committed by
Alexei Starovoitov
d3bec013 7c0afcad

+275 -2
+28 -2
kernel/bpf/hashtab.c
··· 821 821 } 822 822 } 823 823 824 + static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 825 + void *value, bool onallcpus) 826 + { 827 + /* When using prealloc and not setting the initial value on all cpus, 828 + * zero-fill element values for other cpus (just as what happens when 829 + * not using prealloc). Otherwise, bpf program has no way to ensure 830 + * known initial values for cpus other than current one 831 + * (onallcpus=false always when coming from bpf prog). 832 + */ 833 + if (htab_is_prealloc(htab) && !onallcpus) { 834 + u32 size = round_up(htab->map.value_size, 8); 835 + int current_cpu = raw_smp_processor_id(); 836 + int cpu; 837 + 838 + for_each_possible_cpu(cpu) { 839 + if (cpu == current_cpu) 840 + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, 841 + size); 842 + else 843 + memset(per_cpu_ptr(pptr, cpu), 0, size); 844 + } 845 + } else { 846 + pcpu_copy_value(htab, pptr, value, onallcpus); 847 + } 848 + } 849 + 824 850 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 825 851 { 826 852 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && ··· 917 891 } 918 892 } 919 893 920 - pcpu_copy_value(htab, pptr, value, onallcpus); 894 + pcpu_init_value(htab, pptr, value, onallcpus); 921 895 922 896 if (!prealloc) 923 897 htab_elem_set_ptr(l_new, key_size, pptr); ··· 1209 1183 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1210 1184 value, onallcpus); 1211 1185 } else { 1212 - pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), 1186 + pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1213 1187 value, onallcpus); 1214 1188 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1215 1189 l_new = NULL;
+214
tools/testing/selftests/bpf/prog_tests/map_init.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ 3 + 4 + #include <test_progs.h> 5 + #include "test_map_init.skel.h" 6 + 7 + #define TEST_VALUE 0x1234 8 + #define FILL_VALUE 0xdeadbeef 9 + 10 + static int nr_cpus; 11 + static int duration; 12 + 13 + typedef unsigned long long map_key_t; 14 + typedef unsigned long long map_value_t; 15 + typedef struct { 16 + map_value_t v; /* padding */ 17 + } __bpf_percpu_val_align pcpu_map_value_t; 18 + 19 + 20 + static int map_populate(int map_fd, int num) 21 + { 22 + pcpu_map_value_t value[nr_cpus]; 23 + int i, err; 24 + map_key_t key; 25 + 26 + for (i = 0; i < nr_cpus; i++) 27 + bpf_percpu(value, i) = FILL_VALUE; 28 + 29 + for (key = 1; key <= num; key++) { 30 + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); 31 + if (!ASSERT_OK(err, "bpf_map_update_elem")) 32 + return -1; 33 + } 34 + 35 + return 0; 36 + } 37 + 38 + static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, 39 + int *map_fd, int populate) 40 + { 41 + struct test_map_init *skel; 42 + int err; 43 + 44 + skel = test_map_init__open(); 45 + if (!ASSERT_OK_PTR(skel, "skel_open")) 46 + return NULL; 47 + 48 + err = bpf_map__set_type(skel->maps.hashmap1, map_type); 49 + if (!ASSERT_OK(err, "bpf_map__set_type")) 50 + goto error; 51 + 52 + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); 53 + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) 54 + goto error; 55 + 56 + err = test_map_init__load(skel); 57 + if (!ASSERT_OK(err, "skel_load")) 58 + goto error; 59 + 60 + *map_fd = bpf_map__fd(skel->maps.hashmap1); 61 + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) 62 + goto error; 63 + 64 + err = map_populate(*map_fd, populate); 65 + if (!ASSERT_OK(err, "map_populate")) 66 + goto error_map; 67 + 68 + return skel; 69 + 70 + error_map: 71 + close(*map_fd); 72 + error: 73 + test_map_init__destroy(skel); 74 + return NULL; 75 + } 76 + 77 + /* executes bpf program that updates map with key, value */ 78 + static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, 79 + map_value_t value) 80 + { 81 + struct test_map_init__bss *bss; 82 + 83 + bss = skel->bss; 84 + 85 + bss->inKey = key; 86 + bss->inValue = value; 87 + bss->inPid = getpid(); 88 + 89 + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) 90 + return -1; 91 + 92 + /* Let tracepoint trigger */ 93 + syscall(__NR_getpgid); 94 + 95 + test_map_init__detach(skel); 96 + 97 + return 0; 98 + } 99 + 100 + static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) 101 + { 102 + int i, nzCnt = 0; 103 + map_value_t val; 104 + 105 + for (i = 0; i < nr_cpus; i++) { 106 + val = bpf_percpu(value, i); 107 + if (val) { 108 + if (CHECK(val != expected, "map value", 109 + "unexpected for cpu %d: 0x%llx\n", i, val)) 110 + return -1; 111 + nzCnt++; 112 + } 113 + } 114 + 115 + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", 116 + nzCnt)) 117 + return -1; 118 + 119 + return 0; 120 + } 121 + 122 + /* Add key=1 elem with values set for all CPUs 123 + * Delete elem key=1 124 + * Run bpf prog that inserts new key=1 elem with value=0x1234 125 + * (bpf prog can only set value for current CPU) 126 + * Lookup Key=1 and check value is as expected for all CPUs: 127 + * value set by bpf prog for one CPU, 0 for all others 128 + */ 129 + static void test_pcpu_map_init(void) 130 + { 131 + pcpu_map_value_t value[nr_cpus]; 132 + struct test_map_init *skel; 133 + int map_fd, err; 134 + map_key_t key; 135 + 136 + /* max 1 elem in map so insertion is forced to reuse freed entry */ 137 + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); 138 + if (!ASSERT_OK_PTR(skel, "prog_setup")) 139 + return; 140 + 141 + /* delete element so the entry can be re-used*/ 142 + key = 1; 143 + err = bpf_map_delete_elem(map_fd, &key); 144 + if (!ASSERT_OK(err, "bpf_map_delete_elem")) 145 + goto cleanup; 146 + 147 + /* run bpf prog that inserts new elem, re-using the slot just freed */ 148 + err = prog_run_insert_elem(skel, key, TEST_VALUE); 149 + if (!ASSERT_OK(err, "prog_run_insert_elem")) 150 + goto cleanup; 151 + 152 + /* check that key=1 was re-created by bpf prog */ 153 + err = bpf_map_lookup_elem(map_fd, &key, value); 154 + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) 155 + goto cleanup; 156 + 157 + /* and has expected values */ 158 + check_values_one_cpu(value, TEST_VALUE); 159 + 160 + cleanup: 161 + test_map_init__destroy(skel); 162 + } 163 + 164 + /* Add key=1 and key=2 elems with values set for all CPUs 165 + * Run bpf prog that inserts new key=3 elem 166 + * (only for current cpu; other cpus should have initial value = 0) 167 + * Lookup Key=1 and check value is as expected for all CPUs 168 + */ 169 + static void test_pcpu_lru_map_init(void) 170 + { 171 + pcpu_map_value_t value[nr_cpus]; 172 + struct test_map_init *skel; 173 + int map_fd, err; 174 + map_key_t key; 175 + 176 + /* Set up LRU map with 2 elements, values filled for all CPUs. 177 + * With these 2 elements, the LRU map is full 178 + */ 179 + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); 180 + if (!ASSERT_OK_PTR(skel, "prog_setup")) 181 + return; 182 + 183 + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ 184 + key = 3; 185 + err = prog_run_insert_elem(skel, key, TEST_VALUE); 186 + if (!ASSERT_OK(err, "prog_run_insert_elem")) 187 + goto cleanup; 188 + 189 + /* check that key=3 replaced one of earlier elements */ 190 + err = bpf_map_lookup_elem(map_fd, &key, value); 191 + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) 192 + goto cleanup; 193 + 194 + /* and has expected values */ 195 + check_values_one_cpu(value, TEST_VALUE); 196 + 197 + cleanup: 198 + test_map_init__destroy(skel); 199 + } 200 + 201 + void test_map_init(void) 202 + { 203 + nr_cpus = bpf_num_possible_cpus(); 204 + if (nr_cpus <= 1) { 205 + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); 206 + test__skip(); 207 + return; 208 + } 209 + 210 + if (test__start_subtest("pcpu_map_init")) 211 + test_pcpu_map_init(); 212 + if (test__start_subtest("pcpu_lru_map_init")) 213 + test_pcpu_lru_map_init(); 214 + }
+33
tools/testing/selftests/bpf/progs/test_map_init.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + 7 + __u64 inKey = 0; 8 + __u64 inValue = 0; 9 + __u32 inPid = 0; 10 + 11 + struct { 12 + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 13 + __uint(max_entries, 2); 14 + __type(key, __u64); 15 + __type(value, __u64); 16 + } hashmap1 SEC(".maps"); 17 + 18 + 19 + SEC("tp/syscalls/sys_enter_getpgid") 20 + int sysenter_getpgid(const void *ctx) 21 + { 22 + /* Just do it for once, when called from our own test prog. This 23 + * ensures the map value is only updated for a single CPU. 24 + */ 25 + int cur_pid = bpf_get_current_pid_tgid() >> 32; 26 + 27 + if (cur_pid == inPid) 28 + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); 29 + 30 + return 0; 31 + } 32 + 33 + char _license[] SEC("license") = "GPL";