Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf trace: Remove unused bpf map 'syscalls'

augmented_raw_syscalls.c defines the bpf map 'syscalls' which is
initialized by perf tool in user space to indicate which system calls
are enabled for tracing, on the other flip eBPF program relies on the
map to filter out the trace events which are not enabled.

The map also includes a field 'string_args_len[6]' which presents the
string length if the corresponding argument is a string type.

Now the map 'syscalls' is not used, bpf program doesn't use it as filter
anymore, this is replaced by using the function bpf_tail_call() and
PROG_ARRAY syscalls map. And we don't need to explicitly set the string
length anymore, bpf_probe_read_str() is smart to copy the string and
return string length.

Therefore, it's safe to remove the bpf map 'syscalls'.

To consolidate the code, this patch removes the definition of map
'syscalls' from augmented_raw_syscalls.c and drops code for using
the map in the perf trace.

Note, since function trace__set_ev_qualifier_bpf_filter() is removed,
calling trace__init_syscall_bpf_progs() from it is also removed. We
don't need to worry it because trace__init_syscall_bpf_progs() is
still invoked from trace__init_syscalls_bpf_prog_array_maps() for
initialization the system call's bpf program callback.

After:

# perf trace -e examples/bpf/augmented_raw_syscalls.c,open* --max-events 10 perf stat --quiet sleep 0.001
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libdw.so.1", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind.so.8", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind-aarch64.so.8", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libcrypto.so.3", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libslang.so.2", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libperl.so.5.34", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3

# perf trace -e examples/bpf/augmented_raw_syscalls.c --max-events 10 perf stat --quiet sleep 0.001
... [continued]: execve()) = 0
brk(NULL) = 0xaaaab1d28000
faccessat(-100, "/etc/ld.so.preload", 4) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
close(3</usr/lib/aarch64-linux-gnu/libcrypto.so.3>) = 0
openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3</usr/lib/aarch64-linux-gnu/libcrypto.so.3>, 0xfffff33f70d0, 832) = 832
munmap(0xffffb5519000, 28672) = 0
munmap(0xffffb55b7000, 32880) = 0
mprotect(0xffffb55a6000, 61440, PROT_NONE) = 0

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221121075237.127706-6-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Leo Yan and committed by
Arnaldo Carvalho de Melo
8daf87f5 9bc427a0

-118
-101
tools/perf/builtin-trace.c
··· 122 122 struct syscalltbl *sctbl; 123 123 struct { 124 124 struct syscall *table; 125 - struct bpf_map *map; 126 125 struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY 127 126 struct bpf_map *sys_enter, 128 127 *sys_exit; ··· 1221 1222 const char *name; 1222 1223 struct syscall_fmt *fmt; 1223 1224 struct syscall_arg_fmt *arg_fmt; 1224 - }; 1225 - 1226 - /* 1227 - * Must match what is in the BPF program: 1228 - * 1229 - * tools/perf/examples/bpf/augmented_raw_syscalls.c 1230 - */ 1231 - struct bpf_map_syscall_entry { 1232 - bool enabled; 1233 - u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; 1234 1225 }; 1235 1226 1236 1227 /* ··· 3248 3259 3249 3260 static void trace__set_bpf_map_syscalls(struct trace *trace) 3250 3261 { 3251 - trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); 3252 3262 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); 3253 3263 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); 3254 3264 } ··· 3325 3337 { 3326 3338 struct syscall *sc = trace__syscall_info(trace, NULL, id); 3327 3339 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); 3328 - } 3329 - 3330 - static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) 3331 - { 3332 - struct syscall *sc = trace__syscall_info(trace, NULL, id); 3333 - int arg = 0; 3334 - 3335 - if (sc == NULL) 3336 - goto out; 3337 - 3338 - for (; arg < sc->nr_args; ++arg) { 3339 - entry->string_args_len[arg] = 0; 3340 - if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { 3341 - /* Should be set like strace -s strsize */ 3342 - entry->string_args_len[arg] = PATH_MAX; 3343 - } 3344 - } 3345 - out: 3346 - for (; arg < 6; ++arg) 3347 - entry->string_args_len[arg] = 0; 3348 - } 3349 - static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) 3350 - { 3351 - int fd = bpf_map__fd(trace->syscalls.map); 3352 - struct bpf_map_syscall_entry value = { 3353 - .enabled = !trace->not_ev_qualifier, 3354 - }; 3355 - int err = 0; 3356 - size_t i; 3357 - 3358 - for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { 3359 - int key = trace->ev_qualifier_ids.entries[i]; 3360 - 3361 - if (value.enabled) { 3362 - trace__init_bpf_map_syscall_args(trace, key, &value); 3363 - trace__init_syscall_bpf_progs(trace, key); 3364 - } 3365 - 3366 - err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); 3367 - if (err) 3368 - break; 3369 - } 3370 - 3371 - return err; 3372 - } 3373 - 3374 - static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled) 3375 - { 3376 - int fd = bpf_map__fd(trace->syscalls.map); 3377 - struct bpf_map_syscall_entry value = { 3378 - .enabled = enabled, 3379 - }; 3380 - int err = 0, key; 3381 - 3382 - for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { 3383 - if (enabled) 3384 - trace__init_bpf_map_syscall_args(trace, key, &value); 3385 - 3386 - err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); 3387 - if (err) 3388 - break; 3389 - } 3390 - 3391 - return err; 3392 - } 3393 - 3394 - static int trace__init_syscalls_bpf_map(struct trace *trace) 3395 - { 3396 - bool enabled = true; 3397 - 3398 - if (trace->ev_qualifier_ids.nr) 3399 - enabled = trace->not_ev_qualifier; 3400 - 3401 - return __trace__init_syscalls_bpf_map(trace, enabled); 3402 3340 } 3403 3341 3404 3342 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) ··· 3541 3627 { 3542 3628 } 3543 3629 3544 - static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) 3545 - { 3546 - return 0; 3547 - } 3548 - 3549 - static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused) 3550 - { 3551 - return 0; 3552 - } 3553 - 3554 3630 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, 3555 3631 const char *name __maybe_unused) 3556 3632 { ··· 3574 3670 3575 3671 static int trace__set_ev_qualifier_filter(struct trace *trace) 3576 3672 { 3577 - if (trace->syscalls.map) 3578 - return trace__set_ev_qualifier_bpf_filter(trace); 3579 3673 if (trace->syscalls.events.sys_enter) 3580 3674 return trace__set_ev_qualifier_tp_filter(trace); 3581 3675 return 0; ··· 3946 4044 err = trace__set_filter_pids(trace); 3947 4045 if (err < 0) 3948 4046 goto out_error_mem; 3949 - 3950 - if (trace->syscalls.map) 3951 - trace__init_syscalls_bpf_map(trace); 3952 4047 3953 4048 if (trace->syscalls.prog_array.sys_enter) 3954 4049 trace__init_syscalls_bpf_prog_array_maps(trace);
-17
tools/perf/examples/bpf/augmented_raw_syscalls.c
··· 38 38 } __augmented_syscalls__ SEC(".maps"); 39 39 40 40 /* 41 - * string_args_len: one per syscall arg, 0 means not a string or don't copy it, 42 - * PATH_MAX for copying everything, any other value to limit 43 - * it a la 'strace -s strsize'. 44 - */ 45 - struct syscall { 46 - bool enabled; 47 - __u16 string_args_len[6]; 48 - }; 49 - 50 - struct syscalls { 51 - __uint(type, BPF_MAP_TYPE_ARRAY); 52 - __type(key, int); 53 - __type(value, struct syscall); 54 - __uint(max_entries, 512); 55 - } syscalls SEC(".maps"); 56 - 57 - /* 58 41 * What to augment at entry? 59 42 * 60 43 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel