Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: Add benchmark for bpf_strncmp() helper

Add benchmark to compare the performance between home-made strncmp()
in bpf program and bpf_strncmp() helper. In summary, the performance
win of bpf_strncmp() under x86-64 is greater than 18% when the compared
string length is greater than 64, and is 179% when the length is 4095.
Under arm64 the performance win is even bigger: 33% when the length
is greater than 64 and 600% when the length is 4095.

The following is the details:

no-helper-X: use home-made strncmp() to compare X-sized string
helper-Y: use bpf_strncmp() to compare Y-sized string

Under x86-64:

no-helper-1 3.504 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-1 3.347 ± 0.001M/s (drops 0.000 ± 0.000M/s)

no-helper-8 3.357 ± 0.001M/s (drops 0.000 ± 0.000M/s)
helper-8 3.307 ± 0.001M/s (drops 0.000 ± 0.000M/s)

no-helper-32 3.064 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-32 3.253 ± 0.001M/s (drops 0.000 ± 0.000M/s)

no-helper-64 2.563 ± 0.001M/s (drops 0.000 ± 0.000M/s)
helper-64 3.040 ± 0.001M/s (drops 0.000 ± 0.000M/s)

no-helper-128 1.975 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-128 2.641 ± 0.000M/s (drops 0.000 ± 0.000M/s)

no-helper-512 0.759 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-512 1.574 ± 0.000M/s (drops 0.000 ± 0.000M/s)

no-helper-2048 0.329 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-2048 0.602 ± 0.000M/s (drops 0.000 ± 0.000M/s)

no-helper-4095 0.117 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-4095 0.327 ± 0.000M/s (drops 0.000 ± 0.000M/s)

Under arm64:

no-helper-1 2.806 ± 0.004M/s (drops 0.000 ± 0.000M/s)
helper-1 2.819 ± 0.002M/s (drops 0.000 ± 0.000M/s)

no-helper-8 2.797 ± 0.109M/s (drops 0.000 ± 0.000M/s)
helper-8 2.786 ± 0.025M/s (drops 0.000 ± 0.000M/s)

no-helper-32 2.399 ± 0.011M/s (drops 0.000 ± 0.000M/s)
helper-32 2.703 ± 0.002M/s (drops 0.000 ± 0.000M/s)

no-helper-64 2.020 ± 0.015M/s (drops 0.000 ± 0.000M/s)
helper-64 2.702 ± 0.073M/s (drops 0.000 ± 0.000M/s)

no-helper-128 1.604 ± 0.001M/s (drops 0.000 ± 0.000M/s)
helper-128 2.516 ± 0.002M/s (drops 0.000 ± 0.000M/s)

no-helper-512 0.699 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-512 2.106 ± 0.003M/s (drops 0.000 ± 0.000M/s)

no-helper-2048 0.215 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-2048 1.223 ± 0.003M/s (drops 0.000 ± 0.000M/s)

no-helper-4095 0.112 ± 0.000M/s (drops 0.000 ± 0.000M/s)
helper-4095 0.796 ± 0.000M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211210141652.877186-4-houtao1@huawei.com

authored by

Hou Tao and committed by
Alexei Starovoitov
9c42652f 9a93bf3f

+232 -1
+3 -1
tools/testing/selftests/bpf/Makefile
··· 537 537 $(OUTPUT)/perfbuf_bench.skel.h 538 538 $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h 539 539 $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h 540 + $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h 540 541 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) 541 542 $(OUTPUT)/bench: LDLIBS += -lm 542 543 $(OUTPUT)/bench: $(OUTPUT)/bench.o \ ··· 548 547 $(OUTPUT)/bench_trigger.o \ 549 548 $(OUTPUT)/bench_ringbufs.o \ 550 549 $(OUTPUT)/bench_bloom_filter_map.o \ 551 - $(OUTPUT)/bench_bpf_loop.o 550 + $(OUTPUT)/bench_bpf_loop.o \ 551 + $(OUTPUT)/bench_strncmp.o 552 552 $(call msg,BINARY,,$@) 553 553 $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ 554 554
+6
tools/testing/selftests/bpf/bench.c
··· 205 205 extern struct argp bench_ringbufs_argp; 206 206 extern struct argp bench_bloom_map_argp; 207 207 extern struct argp bench_bpf_loop_argp; 208 + extern struct argp bench_strncmp_argp; 208 209 209 210 static const struct argp_child bench_parsers[] = { 210 211 { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, 211 212 { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, 212 213 { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, 214 + { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, 213 215 {}, 214 216 }; 215 217 ··· 411 409 extern const struct bench bench_hashmap_without_bloom; 412 410 extern const struct bench bench_hashmap_with_bloom; 413 411 extern const struct bench bench_bpf_loop; 412 + extern const struct bench bench_strncmp_no_helper; 413 + extern const struct bench bench_strncmp_helper; 414 414 415 415 static const struct bench *benchs[] = { 416 416 &bench_count_global, ··· 445 441 &bench_hashmap_without_bloom, 446 442 &bench_hashmap_with_bloom, 447 443 &bench_bpf_loop, 444 + &bench_strncmp_no_helper, 445 + &bench_strncmp_helper, 448 446 }; 449 447 450 448 static void setup_benchmark()
+161
tools/testing/selftests/bpf/benchs/bench_strncmp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ 3 + #include <argp.h> 4 + #include "bench.h" 5 + #include "strncmp_bench.skel.h" 6 + 7 + static struct strncmp_ctx { 8 + struct strncmp_bench *skel; 9 + } ctx; 10 + 11 + static struct strncmp_args { 12 + u32 cmp_str_len; 13 + } args = { 14 + .cmp_str_len = 32, 15 + }; 16 + 17 + enum { 18 + ARG_CMP_STR_LEN = 5000, 19 + }; 20 + 21 + static const struct argp_option opts[] = { 22 + { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0, 23 + "Set the length of compared string" }, 24 + {}, 25 + }; 26 + 27 + static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state) 28 + { 29 + switch (key) { 30 + case ARG_CMP_STR_LEN: 31 + args.cmp_str_len = strtoul(arg, NULL, 10); 32 + if (!args.cmp_str_len || 33 + args.cmp_str_len >= sizeof(ctx.skel->bss->str)) { 34 + fprintf(stderr, "Invalid cmp str len (limit %zu)\n", 35 + sizeof(ctx.skel->bss->str)); 36 + argp_usage(state); 37 + } 38 + break; 39 + default: 40 + return ARGP_ERR_UNKNOWN; 41 + } 42 + 43 + return 0; 44 + } 45 + 46 + const struct argp bench_strncmp_argp = { 47 + .options = opts, 48 + .parser = strncmp_parse_arg, 49 + }; 50 + 51 + static void strncmp_validate(void) 52 + { 53 + if (env.consumer_cnt != 1) { 54 + fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); 55 + exit(1); 56 + } 57 + } 58 + 59 + static void strncmp_setup(void) 60 + { 61 + int err; 62 + char *target; 63 + size_t i, sz; 64 + 65 + sz = sizeof(ctx.skel->rodata->target); 66 + if (!sz || sz < sizeof(ctx.skel->bss->str)) { 67 + fprintf(stderr, "invalid string size (target %zu, src %zu)\n", 68 + sz, sizeof(ctx.skel->bss->str)); 69 + exit(1); 70 + } 71 + 72 + setup_libbpf(); 73 + 74 + ctx.skel = strncmp_bench__open(); 75 + if (!ctx.skel) { 76 + fprintf(stderr, "failed to open skeleton\n"); 77 + exit(1); 78 + } 79 + 80 + srandom(time(NULL)); 81 + target = ctx.skel->rodata->target; 82 + for (i = 0; i < sz - 1; i++) 83 + target[i] = '1' + random() % 9; 84 + target[sz - 1] = '\0'; 85 + 86 + ctx.skel->rodata->cmp_str_len = args.cmp_str_len; 87 + 88 + memcpy(ctx.skel->bss->str, target, args.cmp_str_len); 89 + ctx.skel->bss->str[args.cmp_str_len] = '\0'; 90 + /* Make bss->str < rodata->target */ 91 + ctx.skel->bss->str[args.cmp_str_len - 1] -= 1; 92 + 93 + err = strncmp_bench__load(ctx.skel); 94 + if (err) { 95 + fprintf(stderr, "failed to load skeleton\n"); 96 + strncmp_bench__destroy(ctx.skel); 97 + exit(1); 98 + } 99 + } 100 + 101 + static void strncmp_attach_prog(struct bpf_program *prog) 102 + { 103 + struct bpf_link *link; 104 + 105 + link = bpf_program__attach(prog); 106 + if (!link) { 107 + fprintf(stderr, "failed to attach program!\n"); 108 + exit(1); 109 + } 110 + } 111 + 112 + static void strncmp_no_helper_setup(void) 113 + { 114 + strncmp_setup(); 115 + strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper); 116 + } 117 + 118 + static void strncmp_helper_setup(void) 119 + { 120 + strncmp_setup(); 121 + strncmp_attach_prog(ctx.skel->progs.strncmp_helper); 122 + } 123 + 124 + static void *strncmp_producer(void *ctx) 125 + { 126 + while (true) 127 + (void)syscall(__NR_getpgid); 128 + return NULL; 129 + } 130 + 131 + static void *strncmp_consumer(void *ctx) 132 + { 133 + return NULL; 134 + } 135 + 136 + static void strncmp_measure(struct bench_res *res) 137 + { 138 + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); 139 + } 140 + 141 + const struct bench bench_strncmp_no_helper = { 142 + .name = "strncmp-no-helper", 143 + .validate = strncmp_validate, 144 + .setup = strncmp_no_helper_setup, 145 + .producer_thread = strncmp_producer, 146 + .consumer_thread = strncmp_consumer, 147 + .measure = strncmp_measure, 148 + .report_progress = hits_drops_report_progress, 149 + .report_final = hits_drops_report_final, 150 + }; 151 + 152 + const struct bench bench_strncmp_helper = { 153 + .name = "strncmp-helper", 154 + .validate = strncmp_validate, 155 + .setup = strncmp_helper_setup, 156 + .producer_thread = strncmp_producer, 157 + .consumer_thread = strncmp_consumer, 158 + .measure = strncmp_measure, 159 + .report_progress = hits_drops_report_progress, 160 + .report_final = hits_drops_report_final, 161 + };
+12
tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + source ./benchs/run_common.sh 5 + 6 + set -eufo pipefail 7 + 8 + for s in 1 8 64 512 2048 4095; do 9 + for b in no-helper helper; do 10 + summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})" 11 + done 12 + done
+50
tools/testing/selftests/bpf/progs/strncmp_bench.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ 3 + #include <linux/types.h> 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + 8 + #define STRNCMP_STR_SZ 4096 9 + 10 + /* Will be updated by benchmark before program loading */ 11 + const volatile unsigned int cmp_str_len = 1; 12 + const char target[STRNCMP_STR_SZ]; 13 + 14 + long hits = 0; 15 + char str[STRNCMP_STR_SZ]; 16 + 17 + char _license[] SEC("license") = "GPL"; 18 + 19 + static __always_inline int local_strncmp(const char *s1, unsigned int sz, 20 + const char *s2) 21 + { 22 + int ret = 0; 23 + unsigned int i; 24 + 25 + for (i = 0; i < sz; i++) { 26 + /* E.g. 0xff > 0x31 */ 27 + ret = (unsigned char)s1[i] - (unsigned char)s2[i]; 28 + if (ret || !s1[i]) 29 + break; 30 + } 31 + 32 + return ret; 33 + } 34 + 35 + SEC("tp/syscalls/sys_enter_getpgid") 36 + int strncmp_no_helper(void *ctx) 37 + { 38 + if (local_strncmp(str, cmp_str_len + 1, target) < 0) 39 + __sync_add_and_fetch(&hits, 1); 40 + return 0; 41 + } 42 + 43 + SEC("tp/syscalls/sys_enter_getpgid") 44 + int strncmp_helper(void *ctx) 45 + { 46 + if (bpf_strncmp(str, cmp_str_len + 1, target) < 0) 47 + __sync_add_and_fetch(&hits, 1); 48 + return 0; 49 + } 50 +