bpf/benchs: Add benchmarks for comparing hashmap lookups w/ vs. w/out bloom filter

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

This patch adds benchmark tests for comparing the performance of hashmap
lookups without the bloom filter vs. hashmap lookups with the bloom filter.

Checking the bloom filter first for whether the element exists should
overall enable a higher throughput for hashmap lookups, since if the
element does not exist in the bloom filter, we can avoid a costly lookup in
the hashmap.

On average, using 5 hash functions in the bloom filter tended to perform
the best across the widest range of different entry sizes. The benchmark
results using 5 hash functions (running on 8 threads on a machine with one
numa node, and taking the average of 3 runs) were roughly as follows:

value_size = 4 bytes -
10k entries: 30% faster
50k entries: 40% faster
100k entries: 40% faster
500k entres: 70% faster
1 million entries: 90% faster
5 million entries: 140% faster

value_size = 8 bytes -
10k entries: 30% faster
50k entries: 40% faster
100k entries: 50% faster
500k entres: 80% faster
1 million entries: 100% faster
5 million entries: 150% faster

value_size = 16 bytes -
10k entries: 20% faster
50k entries: 30% faster
100k entries: 35% faster
500k entres: 65% faster
1 million entries: 85% faster
5 million entries: 110% faster

value_size = 40 bytes -
10k entries: 5% faster
50k entries: 15% faster
100k entries: 20% faster
500k entres: 65% faster
1 million entries: 75% faster
5 million entries: 120% faster

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211027234504.30744-6-joannekoong@fb.com

authored by

Joanne Koong and committed by

Alexei Starovoitov 4 years ago f44bc543 57fd1c63

+104 -5

4 changed files

expand all

tools

testing

selftests

bpf

bench.c

benchs

bench_bloom_filter_map.c

run_bench_bloom_filter_map.sh

run_common.sh

+18 -5

tools/testing/selftests/bpf/bench.c

··· 92 92 printf("Iter %3d (%7.3lfus): ", 93 93 iter, (delta_ns - 1000000000) / 1000.0); 94 94 95 - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", 96 - hits_per_sec, hits_per_prod, drops_per_sec); 95 + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", 96 + hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); 97 97 } 98 98 99 99 void hits_drops_report_final(struct bench_res res[], int res_cnt) 100 100 { 101 101 int i; 102 - double hits_mean = 0.0, drops_mean = 0.0; 103 - double hits_stddev = 0.0, drops_stddev = 0.0; 102 + double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; 103 + double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; 104 + double total_ops; 104 105 105 106 for (i = 0; i < res_cnt; i++) { 106 107 hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); 107 108 drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); 108 109 } 110 + total_ops_mean = hits_mean + drops_mean; 109 111 110 112 if (res_cnt > 1) { 111 113 for (i = 0; i < res_cnt; i++) { ··· 117 115 drops_stddev += (drops_mean - res[i].drops / 1000000.0) * 118 116 (drops_mean - res[i].drops / 1000000.0) / 119 117 (res_cnt - 1.0); 118 + total_ops = res[i].hits + res[i].drops; 119 + total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * 120 + (total_ops_mean - total_ops / 1000000.0) / 121 + (res_cnt - 1.0); 120 122 } 121 123 hits_stddev = sqrt(hits_stddev); 122 124 drops_stddev = sqrt(drops_stddev); 125 + total_ops_stddev = sqrt(total_ops_stddev); 123 126 } 124 127 printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", 125 128 hits_mean, hits_stddev, hits_mean / env.producer_cnt); 126 - printf("drops %8.3lf \u00B1 %5.3lfM/s\n", 129 + printf("drops %8.3lf \u00B1 %5.3lfM/s, ", 127 130 drops_mean, drops_stddev); 131 + printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", 132 + total_ops_mean, total_ops_stddev); 128 133 } 129 134 130 135 const char *argp_program_version = "benchmark"; ··· 366 357 extern const struct bench bench_bloom_lookup; 367 358 extern const struct bench bench_bloom_update; 368 359 extern const struct bench bench_bloom_false_positive; 360 + extern const struct bench bench_hashmap_without_bloom; 361 + extern const struct bench bench_hashmap_with_bloom; 369 362 370 363 static const struct bench *benchs[] = { 371 364 &bench_count_global, ··· 392 381 &bench_bloom_lookup, 393 382 &bench_bloom_update, 394 383 &bench_bloom_false_positive, 384 + &bench_hashmap_without_bloom, 385 + &bench_hashmap_with_bloom, 395 386 }; 396 387 397 388 static void setup_benchmark()

+57

tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c

··· 346 346 } 347 347 } 348 348 349 + static void hashmap_with_bloom_setup(void) 350 + { 351 + struct bpf_link *link; 352 + 353 + ctx.use_hashmap = true; 354 + ctx.hashmap_use_bloom = true; 355 + 356 + ctx.skel = setup_skeleton(); 357 + 358 + populate_maps(); 359 + 360 + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); 361 + if (!link) { 362 + fprintf(stderr, "failed to attach program!\n"); 363 + exit(1); 364 + } 365 + } 366 + 367 + static void hashmap_no_bloom_setup(void) 368 + { 369 + struct bpf_link *link; 370 + 371 + ctx.use_hashmap = true; 372 + 373 + ctx.skel = setup_skeleton(); 374 + 375 + populate_maps(); 376 + 377 + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); 378 + if (!link) { 379 + fprintf(stderr, "failed to attach program!\n"); 380 + exit(1); 381 + } 382 + } 383 + 349 384 static void measure(struct bench_res *res) 350 385 { 351 386 unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; ··· 452 417 .measure = measure, 453 418 .report_progress = false_hits_report_progress, 454 419 .report_final = false_hits_report_final, 420 + }; 421 + 422 + const struct bench bench_hashmap_without_bloom = { 423 + .name = "hashmap-without-bloom", 424 + .validate = validate, 425 + .setup = hashmap_no_bloom_setup, 426 + .producer_thread = producer, 427 + .consumer_thread = consumer, 428 + .measure = measure, 429 + .report_progress = hits_drops_report_progress, 430 + .report_final = hits_drops_report_final, 431 + }; 432 + 433 + const struct bench bench_hashmap_with_bloom = { 434 + .name = "hashmap-with-bloom", 435 + .validate = validate, 436 + .setup = hashmap_with_bloom_setup, 437 + .producer_thread = producer, 438 + .consumer_thread = consumer, 439 + .measure = measure, 440 + .report_progress = hits_drops_report_progress, 441 + .report_final = hits_drops_report_final, 455 442 };

+17

tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh

··· 26 26 done 27 27 done 28 28 done 29 + 30 + header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" 31 + for v in 2 4 8 16 40; do 32 + for h in {1..10}; do 33 + subtitle "value_size: $v, # hashes: $h" 34 + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do 35 + printf "%'d entries -\n" $e 36 + printf "\t" 37 + summarize_total "Hashmap without bloom filter: " \ 38 + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" 39 + printf "\t" 40 + summarize_total "Hashmap with bloom filter: " \ 41 + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" 42 + done 43 + printf "\n" 44 + done 45 + done

+12

tools/testing/selftests/bpf/benchs/run_common.sh

··· 33 33 echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" 34 34 } 35 35 36 + function total() 37 + { 38 + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" 39 + } 40 + 36 41 function summarize() 37 42 { 38 43 bench="$1" ··· 50 45 bench="$1" 51 46 summary=$(echo $2 | tail -n1) 52 47 printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" 48 + } 49 + 50 + function summarize_total() 51 + { 52 + bench="$1" 53 + summary=$(echo $2 | tail -n1) 54 + printf "%-20s %s\n" "$bench" "$(total $summary)" 53 55 }