Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf/benchs: Add benchmark tests for bloom filter throughput + false positive

This patch adds benchmark tests for the throughput (for lookups + updates)
and the false positive rate of bloom filter lookups, as well as some
minor refactoring of the bash script for running the benchmarks.

These benchmarks show that as the number of hash functions increases,
the throughput and the false positive rate of the bloom filter decreases.
>From the benchmark data, the approximate average false-positive rates
are roughly as follows:

1 hash function = ~30%
2 hash functions = ~15%
3 hash functions = ~5%
4 hash functions = ~2.5%
5 hash functions = ~1%
6 hash functions = ~0.5%
7 hash functions = ~0.35%
8 hash functions = ~0.15%
9 hash functions = ~0.1%
10 hash functions = ~0%

For reference data, the benchmarks run on one thread on a machine
with one numa node for 1 to 5 hash functions for 8-byte and 64-byte
values are as follows:

1 hash function:
50k entries
8-byte value
Lookups - 51.1 M/s operations
Updates - 33.6 M/s operations
False positive rate: 24.15%
64-byte value
Lookups - 15.7 M/s operations
Updates - 15.1 M/s operations
False positive rate: 24.2%
100k entries
8-byte value
Lookups - 51.0 M/s operations
Updates - 33.4 M/s operations
False positive rate: 24.04%
64-byte value
Lookups - 15.6 M/s operations
Updates - 14.6 M/s operations
False positive rate: 24.06%
500k entries
8-byte value
Lookups - 50.5 M/s operations
Updates - 33.1 M/s operations
False positive rate: 27.45%
64-byte value
Lookups - 15.6 M/s operations
Updates - 14.2 M/s operations
False positive rate: 27.42%
1 mil entries
8-byte value
Lookups - 49.7 M/s operations
Updates - 32.9 M/s operations
False positive rate: 27.45%
64-byte value
Lookups - 15.4 M/s operations
Updates - 13.7 M/s operations
False positive rate: 27.58%
2.5 mil entries
8-byte value
Lookups - 47.2 M/s operations
Updates - 31.8 M/s operations
False positive rate: 30.94%
64-byte value
Lookups - 15.3 M/s operations
Updates - 13.2 M/s operations
False positive rate: 30.95%
5 mil entries
8-byte value
Lookups - 41.1 M/s operations
Updates - 28.1 M/s operations
False positive rate: 31.01%
64-byte value
Lookups - 13.3 M/s operations
Updates - 11.4 M/s operations
False positive rate: 30.98%

2 hash functions:
50k entries
8-byte value
Lookups - 34.1 M/s operations
Updates - 20.1 M/s operations
False positive rate: 9.13%
64-byte value
Lookups - 8.4 M/s operations
Updates - 7.9 M/s operations
False positive rate: 9.21%
100k entries
8-byte value
Lookups - 33.7 M/s operations
Updates - 18.9 M/s operations
False positive rate: 9.13%
64-byte value
Lookups - 8.4 M/s operations
Updates - 7.7 M/s operations
False positive rate: 9.19%
500k entries
8-byte value
Lookups - 32.7 M/s operations
Updates - 18.1 M/s operations
False positive rate: 12.61%
64-byte value
Lookups - 8.4 M/s operations
Updates - 7.5 M/s operations
False positive rate: 12.61%
1 mil entries
8-byte value
Lookups - 30.6 M/s operations
Updates - 18.9 M/s operations
False positive rate: 12.54%
64-byte value
Lookups - 8.0 M/s operations
Updates - 7.0 M/s operations
False positive rate: 12.52%
2.5 mil entries
8-byte value
Lookups - 25.3 M/s operations
Updates - 16.7 M/s operations
False positive rate: 16.77%
64-byte value
Lookups - 7.9 M/s operations
Updates - 6.5 M/s operations
False positive rate: 16.88%
5 mil entries
8-byte value
Lookups - 20.8 M/s operations
Updates - 14.7 M/s operations
False positive rate: 16.78%
64-byte value
Lookups - 7.0 M/s operations
Updates - 6.0 M/s operations
False positive rate: 16.78%

3 hash functions:
50k entries
8-byte value
Lookups - 25.1 M/s operations
Updates - 14.6 M/s operations
False positive rate: 7.65%
64-byte value
Lookups - 5.8 M/s operations
Updates - 5.5 M/s operations
False positive rate: 7.58%
100k entries
8-byte value
Lookups - 24.7 M/s operations
Updates - 14.1 M/s operations
False positive rate: 7.71%
64-byte value
Lookups - 5.8 M/s operations
Updates - 5.3 M/s operations
False positive rate: 7.62%
500k entries
8-byte value
Lookups - 22.9 M/s operations
Updates - 13.9 M/s operations
False positive rate: 2.62%
64-byte value
Lookups - 5.6 M/s operations
Updates - 4.8 M/s operations
False positive rate: 2.7%
1 mil entries
8-byte value
Lookups - 19.8 M/s operations
Updates - 12.6 M/s operations
False positive rate: 2.60%
64-byte value
Lookups - 5.3 M/s operations
Updates - 4.4 M/s operations
False positive rate: 2.69%
2.5 mil entries
8-byte value
Lookups - 16.2 M/s operations
Updates - 10.7 M/s operations
False positive rate: 4.49%
64-byte value
Lookups - 4.9 M/s operations
Updates - 4.1 M/s operations
False positive rate: 4.41%
5 mil entries
8-byte value
Lookups - 18.8 M/s operations
Updates - 9.2 M/s operations
False positive rate: 4.45%
64-byte value
Lookups - 5.2 M/s operations
Updates - 3.9 M/s operations
False positive rate: 4.54%

4 hash functions:
50k entries
8-byte value
Lookups - 19.7 M/s operations
Updates - 11.1 M/s operations
False positive rate: 1.01%
64-byte value
Lookups - 4.4 M/s operations
Updates - 4.0 M/s operations
False positive rate: 1.00%
100k entries
8-byte value
Lookups - 19.5 M/s operations
Updates - 10.9 M/s operations
False positive rate: 1.00%
64-byte value
Lookups - 4.3 M/s operations
Updates - 3.9 M/s operations
False positive rate: 0.97%
500k entries
8-byte value
Lookups - 18.2 M/s operations
Updates - 10.6 M/s operations
False positive rate: 2.05%
64-byte value
Lookups - 4.3 M/s operations
Updates - 3.7 M/s operations
False positive rate: 2.05%
1 mil entries
8-byte value
Lookups - 15.5 M/s operations
Updates - 9.6 M/s operations
False positive rate: 1.99%
64-byte value
Lookups - 4.0 M/s operations
Updates - 3.4 M/s operations
False positive rate: 1.99%
2.5 mil entries
8-byte value
Lookups - 13.8 M/s operations
Updates - 7.7 M/s operations
False positive rate: 3.91%
64-byte value
Lookups - 3.7 M/s operations
Updates - 3.6 M/s operations
False positive rate: 3.78%
5 mil entries
8-byte value
Lookups - 13.0 M/s operations
Updates - 6.9 M/s operations
False positive rate: 3.93%
64-byte value
Lookups - 3.5 M/s operations
Updates - 3.7 M/s operations
False positive rate: 3.39%

5 hash functions:
50k entries
8-byte value
Lookups - 16.4 M/s operations
Updates - 9.1 M/s operations
False positive rate: 0.78%
64-byte value
Lookups - 3.5 M/s operations
Updates - 3.2 M/s operations
False positive rate: 0.77%
100k entries
8-byte value
Lookups - 16.3 M/s operations
Updates - 9.0 M/s operations
False positive rate: 0.79%
64-byte value
Lookups - 3.5 M/s operations
Updates - 3.2 M/s operations
False positive rate: 0.78%
500k entries
8-byte value
Lookups - 15.1 M/s operations
Updates - 8.8 M/s operations
False positive rate: 1.82%
64-byte value
Lookups - 3.4 M/s operations
Updates - 3.0 M/s operations
False positive rate: 1.78%
1 mil entries
8-byte value
Lookups - 13.2 M/s operations
Updates - 7.8 M/s operations
False positive rate: 1.81%
64-byte value
Lookups - 3.2 M/s operations
Updates - 2.8 M/s operations
False positive rate: 1.80%
2.5 mil entries
8-byte value
Lookups - 10.5 M/s operations
Updates - 5.9 M/s operations
False positive rate: 0.29%
64-byte value
Lookups - 3.2 M/s operations
Updates - 2.4 M/s operations
False positive rate: 0.28%
5 mil entries
8-byte value
Lookups - 9.6 M/s operations
Updates - 5.7 M/s operations
False positive rate: 0.30%
64-byte value
Lookups - 3.2 M/s operations
Updates - 2.7 M/s operations
False positive rate: 0.30%

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211027234504.30744-5-joannekoong@fb.com

authored by

Joanne Koong and committed by
Alexei Starovoitov
57fd1c63 ed9109ad

+695 -30
+4 -2
tools/testing/selftests/bpf/Makefile
··· 524 524 # Benchmark runner 525 525 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) 526 526 $(call msg,CC,,$@) 527 - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ 527 + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ 528 528 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h 529 529 $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h 530 530 $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ 531 531 $(OUTPUT)/perfbuf_bench.skel.h 532 + $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h 532 533 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) 533 534 $(OUTPUT)/bench: LDLIBS += -lm 534 535 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ 535 536 $(OUTPUT)/bench_count.o \ 536 537 $(OUTPUT)/bench_rename.o \ 537 538 $(OUTPUT)/bench_trigger.o \ 538 - $(OUTPUT)/bench_ringbufs.o 539 + $(OUTPUT)/bench_ringbufs.o \ 540 + $(OUTPUT)/bench_bloom_filter_map.o 539 541 $(call msg,BINARY,,$@) 540 542 $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) 541 543
+37
tools/testing/selftests/bpf/bench.c
··· 51 51 fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); 52 52 } 53 53 54 + void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) 55 + { 56 + long total = res->false_hits + res->hits + res->drops; 57 + 58 + printf("Iter %3d (%7.3lfus): ", 59 + iter, (delta_ns - 1000000000) / 1000.0); 60 + 61 + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", 62 + res->false_hits, total, ((float)res->false_hits / total) * 100); 63 + } 64 + 65 + void false_hits_report_final(struct bench_res res[], int res_cnt) 66 + { 67 + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; 68 + int i; 69 + 70 + for (i = 0; i < res_cnt; i++) { 71 + total_hits += res[i].hits; 72 + total_false_hits += res[i].false_hits; 73 + total_drops += res[i].drops; 74 + } 75 + total_ops = total_hits + total_false_hits + total_drops; 76 + 77 + printf("Summary: %ld false hits of %ld total operations. ", 78 + total_false_hits, total_ops); 79 + printf("Percentage = %2.2f %%\n", 80 + ((float)total_false_hits / total_ops) * 100); 81 + } 82 + 54 83 void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) 55 84 { 56 85 double hits_per_sec, drops_per_sec; ··· 161 132 }; 162 133 163 134 extern struct argp bench_ringbufs_argp; 135 + extern struct argp bench_bloom_map_argp; 164 136 165 137 static const struct argp_child bench_parsers[] = { 166 138 { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, 139 + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, 167 140 {}, 168 141 }; 169 142 ··· 354 323 extern const struct bench bench_rb_custom; 355 324 extern const struct bench bench_pb_libbpf; 356 325 extern const struct bench bench_pb_custom; 326 + extern const struct bench bench_bloom_lookup; 327 + extern const struct bench bench_bloom_update; 328 + extern const struct bench bench_bloom_false_positive; 357 329 358 330 static const struct bench *benchs[] = { 359 331 &bench_count_global, ··· 378 344 &bench_rb_custom, 379 345 &bench_pb_libbpf, 380 346 &bench_pb_custom, 347 + &bench_bloom_lookup, 348 + &bench_bloom_update, 349 + &bench_bloom_false_positive, 381 350 }; 382 351 383 352 static void setup_benchmark()
+3
tools/testing/selftests/bpf/bench.h
··· 33 33 struct bench_res { 34 34 long hits; 35 35 long drops; 36 + long false_hits; 36 37 }; 37 38 38 39 struct bench { ··· 57 56 void setup_libbpf(); 58 57 void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); 59 58 void hits_drops_report_final(struct bench_res res[], int res_cnt); 59 + void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); 60 + void false_hits_report_final(struct bench_res res[], int res_cnt); 60 61 61 62 static inline __u64 get_time_ns() { 62 63 struct timespec t;
+420
tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + 4 + #include <argp.h> 5 + #include <linux/log2.h> 6 + #include <pthread.h> 7 + #include "bench.h" 8 + #include "bloom_filter_bench.skel.h" 9 + #include "bpf_util.h" 10 + 11 + static struct ctx { 12 + bool use_array_map; 13 + bool use_hashmap; 14 + bool hashmap_use_bloom; 15 + bool count_false_hits; 16 + 17 + struct bloom_filter_bench *skel; 18 + 19 + int bloom_fd; 20 + int hashmap_fd; 21 + int array_map_fd; 22 + 23 + pthread_mutex_t map_done_mtx; 24 + pthread_cond_t map_done_cv; 25 + bool map_done; 26 + bool map_prepare_err; 27 + 28 + __u32 next_map_idx; 29 + } ctx = { 30 + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, 31 + .map_done_cv = PTHREAD_COND_INITIALIZER, 32 + }; 33 + 34 + struct stat { 35 + __u32 stats[3]; 36 + }; 37 + 38 + static struct { 39 + __u32 nr_entries; 40 + __u8 nr_hash_funcs; 41 + __u8 value_size; 42 + } args = { 43 + .nr_entries = 1000, 44 + .nr_hash_funcs = 3, 45 + .value_size = 8, 46 + }; 47 + 48 + enum { 49 + ARG_NR_ENTRIES = 3000, 50 + ARG_NR_HASH_FUNCS = 3001, 51 + ARG_VALUE_SIZE = 3002, 52 + }; 53 + 54 + static const struct argp_option opts[] = { 55 + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, 56 + "Set number of expected unique entries in the bloom filter"}, 57 + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, 58 + "Set number of hash functions in the bloom filter"}, 59 + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, 60 + "Set value size (in bytes) of bloom filter entries"}, 61 + {}, 62 + }; 63 + 64 + static error_t parse_arg(int key, char *arg, struct argp_state *state) 65 + { 66 + switch (key) { 67 + case ARG_NR_ENTRIES: 68 + args.nr_entries = strtol(arg, NULL, 10); 69 + if (args.nr_entries == 0) { 70 + fprintf(stderr, "Invalid nr_entries count."); 71 + argp_usage(state); 72 + } 73 + break; 74 + case ARG_NR_HASH_FUNCS: 75 + args.nr_hash_funcs = strtol(arg, NULL, 10); 76 + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { 77 + fprintf(stderr, 78 + "The bloom filter must use 1 to 15 hash functions."); 79 + argp_usage(state); 80 + } 81 + break; 82 + case ARG_VALUE_SIZE: 83 + args.value_size = strtol(arg, NULL, 10); 84 + if (args.value_size < 2 || args.value_size > 256) { 85 + fprintf(stderr, 86 + "Invalid value size. Must be between 2 and 256 bytes"); 87 + argp_usage(state); 88 + } 89 + break; 90 + default: 91 + return ARGP_ERR_UNKNOWN; 92 + } 93 + 94 + return 0; 95 + } 96 + 97 + /* exported into benchmark runner */ 98 + const struct argp bench_bloom_map_argp = { 99 + .options = opts, 100 + .parser = parse_arg, 101 + }; 102 + 103 + static void validate(void) 104 + { 105 + if (env.consumer_cnt != 1) { 106 + fprintf(stderr, 107 + "The bloom filter benchmarks do not support multi-consumer use\n"); 108 + exit(1); 109 + } 110 + } 111 + 112 + static inline void trigger_bpf_program(void) 113 + { 114 + syscall(__NR_getpgid); 115 + } 116 + 117 + static void *producer(void *input) 118 + { 119 + while (true) 120 + trigger_bpf_program(); 121 + 122 + return NULL; 123 + } 124 + 125 + static void *map_prepare_thread(void *arg) 126 + { 127 + __u32 val_size, i; 128 + void *val = NULL; 129 + int err; 130 + 131 + val_size = args.value_size; 132 + val = malloc(val_size); 133 + if (!val) { 134 + ctx.map_prepare_err = true; 135 + goto done; 136 + } 137 + 138 + while (true) { 139 + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); 140 + if (i > args.nr_entries) 141 + break; 142 + 143 + again: 144 + /* Populate hashmap, bloom filter map, and array map with the same 145 + * random values 146 + */ 147 + err = syscall(__NR_getrandom, val, val_size, 0); 148 + if (err != val_size) { 149 + ctx.map_prepare_err = true; 150 + fprintf(stderr, "failed to get random value: %d\n", -errno); 151 + break; 152 + } 153 + 154 + if (ctx.use_hashmap) { 155 + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); 156 + if (err) { 157 + if (err != -EEXIST) { 158 + ctx.map_prepare_err = true; 159 + fprintf(stderr, "failed to add elem to hashmap: %d\n", 160 + -errno); 161 + break; 162 + } 163 + goto again; 164 + } 165 + } 166 + 167 + i--; 168 + 169 + if (ctx.use_array_map) { 170 + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); 171 + if (err) { 172 + ctx.map_prepare_err = true; 173 + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); 174 + break; 175 + } 176 + } 177 + 178 + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) 179 + continue; 180 + 181 + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); 182 + if (err) { 183 + ctx.map_prepare_err = true; 184 + fprintf(stderr, 185 + "failed to add elem to bloom filter map: %d\n", -errno); 186 + break; 187 + } 188 + } 189 + done: 190 + pthread_mutex_lock(&ctx.map_done_mtx); 191 + ctx.map_done = true; 192 + pthread_cond_signal(&ctx.map_done_cv); 193 + pthread_mutex_unlock(&ctx.map_done_mtx); 194 + 195 + if (val) 196 + free(val); 197 + 198 + return NULL; 199 + } 200 + 201 + static void populate_maps(void) 202 + { 203 + unsigned int nr_cpus = bpf_num_possible_cpus(); 204 + pthread_t map_thread; 205 + int i, err, nr_rand_bytes; 206 + 207 + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); 208 + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); 209 + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); 210 + 211 + for (i = 0; i < nr_cpus; i++) { 212 + err = pthread_create(&map_thread, NULL, map_prepare_thread, 213 + NULL); 214 + if (err) { 215 + fprintf(stderr, "failed to create pthread: %d\n", -errno); 216 + exit(1); 217 + } 218 + } 219 + 220 + pthread_mutex_lock(&ctx.map_done_mtx); 221 + while (!ctx.map_done) 222 + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); 223 + pthread_mutex_unlock(&ctx.map_done_mtx); 224 + 225 + if (ctx.map_prepare_err) 226 + exit(1); 227 + 228 + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, 229 + ctx.skel->rodata->nr_rand_bytes, 0); 230 + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { 231 + fprintf(stderr, "failed to get random bytes\n"); 232 + exit(1); 233 + } 234 + } 235 + 236 + static void check_args(void) 237 + { 238 + if (args.value_size < 8) { 239 + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); 240 + 241 + if (args.nr_entries > nr_unique_entries) { 242 + fprintf(stderr, 243 + "Not enough unique values for the nr_entries requested\n"); 244 + exit(1); 245 + } 246 + } 247 + } 248 + 249 + static struct bloom_filter_bench *setup_skeleton(void) 250 + { 251 + struct bloom_filter_bench *skel; 252 + 253 + check_args(); 254 + 255 + setup_libbpf(); 256 + 257 + skel = bloom_filter_bench__open(); 258 + if (!skel) { 259 + fprintf(stderr, "failed to open skeleton\n"); 260 + exit(1); 261 + } 262 + 263 + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; 264 + skel->rodata->count_false_hits = ctx.count_false_hits; 265 + 266 + /* Resize number of entries */ 267 + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); 268 + 269 + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); 270 + 271 + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); 272 + 273 + /* Set value size */ 274 + bpf_map__set_value_size(skel->maps.array_map, args.value_size); 275 + 276 + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); 277 + 278 + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); 279 + 280 + /* For the hashmap, we use the value as the key as well */ 281 + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); 282 + 283 + skel->bss->value_size = args.value_size; 284 + 285 + /* Set number of hash functions */ 286 + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); 287 + 288 + if (bloom_filter_bench__load(skel)) { 289 + fprintf(stderr, "failed to load skeleton\n"); 290 + exit(1); 291 + } 292 + 293 + return skel; 294 + } 295 + 296 + static void bloom_lookup_setup(void) 297 + { 298 + struct bpf_link *link; 299 + 300 + ctx.use_array_map = true; 301 + 302 + ctx.skel = setup_skeleton(); 303 + 304 + populate_maps(); 305 + 306 + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); 307 + if (!link) { 308 + fprintf(stderr, "failed to attach program!\n"); 309 + exit(1); 310 + } 311 + } 312 + 313 + static void bloom_update_setup(void) 314 + { 315 + struct bpf_link *link; 316 + 317 + ctx.use_array_map = true; 318 + 319 + ctx.skel = setup_skeleton(); 320 + 321 + populate_maps(); 322 + 323 + link = bpf_program__attach(ctx.skel->progs.bloom_update); 324 + if (!link) { 325 + fprintf(stderr, "failed to attach program!\n"); 326 + exit(1); 327 + } 328 + } 329 + 330 + static void false_positive_setup(void) 331 + { 332 + struct bpf_link *link; 333 + 334 + ctx.use_hashmap = true; 335 + ctx.hashmap_use_bloom = true; 336 + ctx.count_false_hits = true; 337 + 338 + ctx.skel = setup_skeleton(); 339 + 340 + populate_maps(); 341 + 342 + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); 343 + if (!link) { 344 + fprintf(stderr, "failed to attach program!\n"); 345 + exit(1); 346 + } 347 + } 348 + 349 + static void measure(struct bench_res *res) 350 + { 351 + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; 352 + static unsigned long last_hits, last_drops, last_false_hits; 353 + unsigned int nr_cpus = bpf_num_possible_cpus(); 354 + int hit_key, drop_key, false_hit_key; 355 + int i; 356 + 357 + hit_key = ctx.skel->rodata->hit_key; 358 + drop_key = ctx.skel->rodata->drop_key; 359 + false_hit_key = ctx.skel->rodata->false_hit_key; 360 + 361 + if (ctx.skel->bss->error != 0) { 362 + fprintf(stderr, "error (%d) when searching the bloom filter\n", 363 + ctx.skel->bss->error); 364 + exit(1); 365 + } 366 + 367 + for (i = 0; i < nr_cpus; i++) { 368 + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; 369 + 370 + total_hits += s->stats[hit_key]; 371 + total_drops += s->stats[drop_key]; 372 + total_false_hits += s->stats[false_hit_key]; 373 + } 374 + 375 + res->hits = total_hits - last_hits; 376 + res->drops = total_drops - last_drops; 377 + res->false_hits = total_false_hits - last_false_hits; 378 + 379 + last_hits = total_hits; 380 + last_drops = total_drops; 381 + last_false_hits = total_false_hits; 382 + } 383 + 384 + static void *consumer(void *input) 385 + { 386 + return NULL; 387 + } 388 + 389 + const struct bench bench_bloom_lookup = { 390 + .name = "bloom-lookup", 391 + .validate = validate, 392 + .setup = bloom_lookup_setup, 393 + .producer_thread = producer, 394 + .consumer_thread = consumer, 395 + .measure = measure, 396 + .report_progress = hits_drops_report_progress, 397 + .report_final = hits_drops_report_final, 398 + }; 399 + 400 + const struct bench bench_bloom_update = { 401 + .name = "bloom-update", 402 + .validate = validate, 403 + .setup = bloom_update_setup, 404 + .producer_thread = producer, 405 + .consumer_thread = consumer, 406 + .measure = measure, 407 + .report_progress = hits_drops_report_progress, 408 + .report_final = hits_drops_report_final, 409 + }; 410 + 411 + const struct bench bench_bloom_false_positive = { 412 + .name = "bloom-false-positive", 413 + .validate = validate, 414 + .setup = false_positive_setup, 415 + .producer_thread = producer, 416 + .consumer_thread = consumer, 417 + .measure = measure, 418 + .report_progress = false_hits_report_progress, 419 + .report_final = false_hits_report_final, 420 + };
+28
tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + source ./benchs/run_common.sh 5 + 6 + set -eufo pipefail 7 + 8 + header "Bloom filter map" 9 + for v in 2 4 8 16 40; do 10 + for t in 1 4 8 12 16; do 11 + for h in {1..10}; do 12 + subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" 13 + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do 14 + printf "%'d entries -\n" $e 15 + printf "\t" 16 + summarize "Lookups, total operations: " \ 17 + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" 18 + printf "\t" 19 + summarize "Updates, total operations: " \ 20 + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" 21 + printf "\t" 22 + summarize_percentage "False positive rate: " \ 23 + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" 24 + done 25 + printf "\n" 26 + done 27 + done 28 + done
+2 -28
tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
··· 1 1 #!/bin/bash 2 2 3 + source ./benchs/run_common.sh 4 + 3 5 set -eufo pipefail 4 - 5 - RUN_BENCH="sudo ./bench -w3 -d10 -a" 6 - 7 - function hits() 8 - { 9 - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" 10 - } 11 - 12 - function drops() 13 - { 14 - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" 15 - } 16 - 17 - function header() 18 - { 19 - local len=${#1} 20 - 21 - printf "\n%s\n" "$1" 22 - for i in $(seq 1 $len); do printf '='; done 23 - printf '\n' 24 - } 25 - 26 - function summarize() 27 - { 28 - bench="$1" 29 - summary=$(echo $2 | tail -n1) 30 - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" 31 - } 32 6 33 7 header "Single-producer, parallel producer" 34 8 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+48
tools/testing/selftests/bpf/benchs/run_common.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + RUN_BENCH="sudo ./bench -w3 -d10 -a" 5 + 6 + function header() 7 + { 8 + local len=${#1} 9 + 10 + printf "\n%s\n" "$1" 11 + for i in $(seq 1 $len); do printf '='; done 12 + printf '\n' 13 + } 14 + 15 + function subtitle() 16 + { 17 + local len=${#1} 18 + printf "\t%s\n" "$1" 19 + } 20 + 21 + function hits() 22 + { 23 + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" 24 + } 25 + 26 + function drops() 27 + { 28 + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" 29 + } 30 + 31 + function percentage() 32 + { 33 + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" 34 + } 35 + 36 + function summarize() 37 + { 38 + bench="$1" 39 + summary=$(echo $2 | tail -n1) 40 + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" 41 + } 42 + 43 + function summarize_percentage() 44 + { 45 + bench="$1" 46 + summary=$(echo $2 | tail -n1) 47 + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" 48 + }
+153
tools/testing/selftests/bpf/progs/bloom_filter_bench.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + 4 + #include <errno.h> 5 + #include <linux/bpf.h> 6 + #include <stdbool.h> 7 + #include <bpf/bpf_helpers.h> 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + struct bpf_map; 12 + 13 + __u8 rand_vals[2500000]; 14 + const __u32 nr_rand_bytes = 2500000; 15 + 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_ARRAY); 18 + __uint(key_size, sizeof(__u32)); 19 + /* max entries and value_size will be set programmatically. 20 + * They are configurable from the userspace bench program. 21 + */ 22 + } array_map SEC(".maps"); 23 + 24 + struct { 25 + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); 26 + /* max entries, value_size, and # of hash functions will be set 27 + * programmatically. They are configurable from the userspace 28 + * bench program. 29 + */ 30 + __uint(map_extra, 3); 31 + } bloom_map SEC(".maps"); 32 + 33 + struct { 34 + __uint(type, BPF_MAP_TYPE_HASH); 35 + /* max entries, key_size, and value_size, will be set 36 + * programmatically. They are configurable from the userspace 37 + * bench program. 38 + */ 39 + } hashmap SEC(".maps"); 40 + 41 + struct callback_ctx { 42 + struct bpf_map *map; 43 + bool update; 44 + }; 45 + 46 + /* Tracks the number of hits, drops, and false hits */ 47 + struct { 48 + __u32 stats[3]; 49 + } __attribute__((__aligned__(256))) percpu_stats[256]; 50 + 51 + const __u32 hit_key = 0; 52 + const __u32 drop_key = 1; 53 + const __u32 false_hit_key = 2; 54 + 55 + __u8 value_size; 56 + 57 + const volatile bool hashmap_use_bloom; 58 + const volatile bool count_false_hits; 59 + 60 + int error = 0; 61 + 62 + static __always_inline void log_result(__u32 key) 63 + { 64 + __u32 cpu = bpf_get_smp_processor_id(); 65 + 66 + percpu_stats[cpu & 255].stats[key]++; 67 + } 68 + 69 + static __u64 70 + bloom_callback(struct bpf_map *map, __u32 *key, void *val, 71 + struct callback_ctx *data) 72 + { 73 + int err; 74 + 75 + if (data->update) 76 + err = bpf_map_push_elem(data->map, val, 0); 77 + else 78 + err = bpf_map_peek_elem(data->map, val); 79 + 80 + if (err) { 81 + error |= 1; 82 + return 1; /* stop the iteration */ 83 + } 84 + 85 + log_result(hit_key); 86 + 87 + return 0; 88 + } 89 + 90 + SEC("fentry/__x64_sys_getpgid") 91 + int bloom_lookup(void *ctx) 92 + { 93 + struct callback_ctx data; 94 + 95 + data.map = (struct bpf_map *)&bloom_map; 96 + data.update = false; 97 + 98 + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); 99 + 100 + return 0; 101 + } 102 + 103 + SEC("fentry/__x64_sys_getpgid") 104 + int bloom_update(void *ctx) 105 + { 106 + struct callback_ctx data; 107 + 108 + data.map = (struct bpf_map *)&bloom_map; 109 + data.update = true; 110 + 111 + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); 112 + 113 + return 0; 114 + } 115 + 116 + SEC("fentry/__x64_sys_getpgid") 117 + int bloom_hashmap_lookup(void *ctx) 118 + { 119 + __u64 *result; 120 + int i, err; 121 + 122 + __u32 index = bpf_get_prandom_u32(); 123 + __u32 bitmask = (1ULL << 21) - 1; 124 + 125 + for (i = 0; i < 1024; i++, index += value_size) { 126 + index = index & bitmask; 127 + 128 + if (hashmap_use_bloom) { 129 + err = bpf_map_peek_elem(&bloom_map, 130 + rand_vals + index); 131 + if (err) { 132 + if (err != -ENOENT) { 133 + error |= 2; 134 + return 0; 135 + } 136 + log_result(hit_key); 137 + continue; 138 + } 139 + } 140 + 141 + result = bpf_map_lookup_elem(&hashmap, 142 + rand_vals + index); 143 + if (result) { 144 + log_result(hit_key); 145 + } else { 146 + if (hashmap_use_bloom && count_false_hits) 147 + log_result(false_hit_key); 148 + log_result(drop_key); 149 + } 150 + } 151 + 152 + return 0; 153 + }