Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: Measure bpf_loop verifier performance

This patch tests bpf_loop in pyperf and strobemeta, and measures the
verifier performance of replacing the traditional for loop
with bpf_loop.

The results are as follows:

~strobemeta~

Baseline
verification time 6808200 usec
stack depth 496
processed 554252 insns (limit 1000000) max_states_per_insn 16
total_states 15878 peak_states 13489 mark_read 3110
#192 verif_scale_strobemeta:OK (unrolled loop)

Using bpf_loop
verification time 31589 usec
stack depth 96+400
processed 1513 insns (limit 1000000) max_states_per_insn 2
total_states 106 peak_states 106 mark_read 60
#193 verif_scale_strobemeta_bpf_loop:OK

~pyperf600~

Baseline
verification time 29702486 usec
stack depth 368
processed 626838 insns (limit 1000000) max_states_per_insn 7
total_states 30368 peak_states 30279 mark_read 748
#182 verif_scale_pyperf600:OK (unrolled loop)

Using bpf_loop
verification time 148488 usec
stack depth 320+40
processed 10518 insns (limit 1000000) max_states_per_insn 10
total_states 705 peak_states 517 mark_read 38
#183 verif_scale_pyperf600_bpf_loop:OK

Using the bpf_loop helper led to approximately a 99% decrease
in the verification time and in the number of instructions.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com

authored by

Joanne Koong and committed by
Alexei Starovoitov
f6e659b7 4e5070b6

+169 -4
+12
tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
··· 115 115 scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); 116 116 } 117 117 118 + void test_verif_scale_pyperf600_bpf_loop(void) 119 + { 120 + /* use the bpf_loop helper*/ 121 + scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); 122 + } 123 + 118 124 void test_verif_scale_pyperf600_nounroll() 119 125 { 120 126 /* no unroll at all. ··· 169 163 * ~350k processed_insns 170 164 */ 171 165 scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); 166 + } 167 + 168 + void test_verif_scale_strobemeta_bpf_loop(void) 169 + { 170 + /* use the bpf_loop helper*/ 171 + scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); 172 172 } 173 173 174 174 void test_verif_scale_strobemeta_nounroll1()
+70 -1
tools/testing/selftests/bpf/progs/pyperf.h
··· 159 159 __uint(value_size, sizeof(long long) * 127); 160 160 } stackmap SEC(".maps"); 161 161 162 + #ifdef USE_BPF_LOOP 163 + struct process_frame_ctx { 164 + int cur_cpu; 165 + int32_t *symbol_counter; 166 + void *frame_ptr; 167 + FrameData *frame; 168 + PidData *pidData; 169 + Symbol *sym; 170 + Event *event; 171 + bool done; 172 + }; 173 + 174 + #define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) 175 + 176 + static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) 177 + { 178 + int zero = 0; 179 + void *frame_ptr = ctx->frame_ptr; 180 + PidData *pidData = ctx->pidData; 181 + FrameData *frame = ctx->frame; 182 + int32_t *symbol_counter = ctx->symbol_counter; 183 + int cur_cpu = ctx->cur_cpu; 184 + Event *event = ctx->event; 185 + Symbol *sym = ctx->sym; 186 + 187 + if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { 188 + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 189 + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 190 + 191 + if (!symbol_id) { 192 + bpf_map_update_elem(&symbolmap, sym, &zero, 0); 193 + symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 194 + if (!symbol_id) { 195 + ctx->done = true; 196 + return 1; 197 + } 198 + } 199 + if (*symbol_id == new_symbol_id) 200 + (*symbol_counter)++; 201 + 202 + barrier_var(i); 203 + if (i >= STACK_MAX_LEN) 204 + return 1; 205 + 206 + event->stack[i] = *symbol_id; 207 + 208 + event->stack_len = i + 1; 209 + frame_ptr = frame->f_back; 210 + } 211 + return 0; 212 + } 213 + #endif /* USE_BPF_LOOP */ 214 + 162 215 #ifdef GLOBAL_FUNC 163 216 __noinline 164 217 #elif defined(SUBPROGS) ··· 281 228 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); 282 229 if (symbol_counter == NULL) 283 230 return 0; 231 + #ifdef USE_BPF_LOOP 232 + struct process_frame_ctx ctx = { 233 + .cur_cpu = cur_cpu, 234 + .symbol_counter = symbol_counter, 235 + .frame_ptr = frame_ptr, 236 + .frame = &frame, 237 + .pidData = pidData, 238 + .sym = &sym, 239 + .event = event, 240 + }; 241 + 242 + bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); 243 + if (ctx.done) 244 + return 0; 245 + #else 284 246 #ifdef NO_UNROLL 285 247 #pragma clang loop unroll(disable) 286 248 #else 287 249 #pragma clang loop unroll(full) 288 - #endif 250 + #endif /* NO_UNROLL */ 289 251 /* Unwind python stack */ 290 252 for (int i = 0; i < STACK_MAX_LEN; ++i) { 291 253 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { ··· 319 251 frame_ptr = frame.f_back; 320 252 } 321 253 } 254 + #endif /* USE_BPF_LOOP */ 322 255 event->stack_complete = frame_ptr == NULL; 323 256 } else { 324 257 event->stack_complete = 1;
+6
tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + 4 + #define STACK_MAX_LEN 600 5 + #define USE_BPF_LOOP 6 + #include "pyperf.h"
+72 -3
tools/testing/selftests/bpf/progs/strobemeta.h
··· 445 445 return payload; 446 446 } 447 447 448 + #ifdef USE_BPF_LOOP 449 + enum read_type { 450 + READ_INT_VAR, 451 + READ_MAP_VAR, 452 + READ_STR_VAR, 453 + }; 454 + 455 + struct read_var_ctx { 456 + struct strobemeta_payload *data; 457 + void *tls_base; 458 + struct strobemeta_cfg *cfg; 459 + void *payload; 460 + /* value gets mutated */ 461 + struct strobe_value_generic *value; 462 + enum read_type type; 463 + }; 464 + 465 + static int read_var_callback(__u32 index, struct read_var_ctx *ctx) 466 + { 467 + switch (ctx->type) { 468 + case READ_INT_VAR: 469 + if (index >= STROBE_MAX_INTS) 470 + return 1; 471 + read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); 472 + break; 473 + case READ_MAP_VAR: 474 + if (index >= STROBE_MAX_MAPS) 475 + return 1; 476 + ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, 477 + ctx->value, ctx->data, ctx->payload); 478 + break; 479 + case READ_STR_VAR: 480 + if (index >= STROBE_MAX_STRS) 481 + return 1; 482 + ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, 483 + ctx->value, ctx->data, ctx->payload); 484 + break; 485 + } 486 + return 0; 487 + } 488 + #endif /* USE_BPF_LOOP */ 489 + 448 490 /* 449 491 * read_strobe_meta returns NULL, if no metadata was read; otherwise returns 450 492 * pointer to *right after* payload ends ··· 517 475 */ 518 476 tls_base = (void *)task; 519 477 478 + #ifdef USE_BPF_LOOP 479 + struct read_var_ctx ctx = { 480 + .cfg = cfg, 481 + .tls_base = tls_base, 482 + .value = &value, 483 + .data = data, 484 + .payload = payload, 485 + }; 486 + int err; 487 + 488 + ctx.type = READ_INT_VAR; 489 + err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); 490 + if (err != STROBE_MAX_INTS) 491 + return NULL; 492 + 493 + ctx.type = READ_STR_VAR; 494 + err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); 495 + if (err != STROBE_MAX_STRS) 496 + return NULL; 497 + 498 + ctx.type = READ_MAP_VAR; 499 + err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); 500 + if (err != STROBE_MAX_MAPS) 501 + return NULL; 502 + #else 520 503 #ifdef NO_UNROLL 521 504 #pragma clang loop unroll(disable) 522 505 #else 523 506 #pragma unroll 524 - #endif 507 + #endif /* NO_UNROLL */ 525 508 for (int i = 0; i < STROBE_MAX_INTS; ++i) { 526 509 read_int_var(cfg, i, tls_base, &value, data); 527 510 } ··· 554 487 #pragma clang loop unroll(disable) 555 488 #else 556 489 #pragma unroll 557 - #endif 490 + #endif /* NO_UNROLL */ 558 491 for (int i = 0; i < STROBE_MAX_STRS; ++i) { 559 492 payload += read_str_var(cfg, i, tls_base, &value, data, payload); 560 493 } ··· 562 495 #pragma clang loop unroll(disable) 563 496 #else 564 497 #pragma unroll 565 - #endif 498 + #endif /* NO_UNROLL */ 566 499 for (int i = 0; i < STROBE_MAX_MAPS; ++i) { 567 500 payload = read_map_var(cfg, i, tls_base, &value, data, payload); 568 501 } 502 + #endif /* USE_BPF_LOOP */ 503 + 569 504 /* 570 505 * return pointer right after end of payload, so it's possible to 571 506 * calculate exact amount of useful data that needs to be sent
+9
tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + /* Copyright (c) 2021 Facebook */ 3 + 4 + #define STROBE_MAX_INTS 2 5 + #define STROBE_MAX_STRS 25 6 + #define STROBE_MAX_MAPS 100 7 + #define STROBE_MAX_MAP_ENTRIES 20 8 + #define USE_BPF_LOOP 9 + #include "strobemeta.h"