Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf trace: Add trace__bpf_sys_enter_beauty_map() to prepare for fetching data in BPF

Set up beauty_map, load it to BPF, in such format: if argument No.3 is a
struct of size 32 bytes (of syscall number 114) beauty_map[114][2] = 32;

if argument No.3 is a string (of syscall number 114) beauty_map[114][2] =
1;

if argument No.3 is a buffer, its size is indicated by argument No.4 (of
syscall number 114) beauty_map[114][2] = -4; /* -1 ~ -6, we'll read this
buffer size in BPF */

Committer notes:

Moved syscall_arg_fmt__cache_btf_struct() from a ifdef
HAVE_LIBBPF_SUPPORT to closer to where it is used, that is ifdef'ed on
HAVE_BPF_SKEL and thus breaks the build when building with
BUILD_BPF_SKEL=0, as detected using 'make -C tools/perf build-test'.

Also add 'struct beauty_map_enter' to tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
as we're using it in this patch, otherwise we get this while trying to
build at this point in the original patch series:

builtin-trace.c: In function ‘trace__init_syscalls_bpf_prog_array_maps’:
builtin-trace.c:3725:58: error: ‘struct <anonymous>’ has no member named ‘beauty_map_enter’
3725 | int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter);
|

We also have to take into account syscall_arg_fmt.from_user when telling
the kernel what to copy in the sys_enter generic collector, we don't
want to collect bogus data in buffers that will only be available to us
at sys_exit time, i.e. after the kernel has filled it, so leave this for
when we have such a sys_exit based collector.

Committer testing:

Not wired up yet, so all continues to work, using the existing BPF
collector and userspace beautifiers that are augmentation aware:

root@number:~# rm -f 987654 ; touch 123456 ; perf trace -e rename* mv 123456 987654
0.000 ( 0.031 ms): mv/20888 renameat2(olddfd: CWD, oldname: "123456", newdfd: CWD, newname: "987654", flags: NOREPLACE) = 0
root@number:~# perf trace -e connect,sendto ping -c 1 www.google.com
0.000 ( 0.014 ms): ping/20892 connect(fd: 5, uservaddr: { .family: LOCAL, path: /run/systemd/resolve/io.systemd.Resolve }, addrlen: 42) = 0
0.040 ( 0.003 ms): ping/20892 sendto(fd: 5, buff: 0x560b4ff17980, len: 97, flags: DONTWAIT|NOSIGNAL) = 97
0.480 ( 0.017 ms): ping/20892 sendto(fd: 5, buff: 0x7ffd82d07150, len: 20, addr: { .family: NETLINK }, addr_len: 0xc) = 20
0.526 ( 0.014 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET6, port: 0, addr: 2800:3f0:4004:810::2004 }, addrlen: 28) = 0
0.542 ( 0.002 ms): ping/20892 connect(fd: 5, uservaddr: { .family: UNSPEC }, addrlen: 16) = 0
0.544 ( 0.004 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET, port: 0, addr: 142.251.135.100 }, addrlen: 16) = 0
0.559 ( 0.002 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET, port: 1025, addr: 142.251.135.100 }, addrlen: 16PING www.google.com (142.251.135.100) 56(84) bytes of data.
) = 0
0.589 ( 0.058 ms): ping/20892 sendto(fd: 3, buff: 0x560b4ff11ac0, len: 64, addr: { .family: INET, port: 0, addr: 142.251.135.100 }, addr_len: 0x10) = 64
45.250 ( 0.029 ms): ping/20892 connect(fd: 5, uservaddr: { .family: LOCAL, path: /run/systemd/resolve/io.systemd.Resolve }, addrlen: 42) = 0
45.344 ( 0.012 ms): ping/20892 sendto(fd: 5, buff: 0x560b4ff19340, len: 111, flags: DONTWAIT|NOSIGNAL) = 111
64 bytes from rio09s08-in-f4.1e100.net (142.251.135.100): icmp_seq=1 ttl=49 time=44.4 ms

--- www.google.com ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 44.361/44.361/44.361/0.000 ms
root@number:~#

Signed-off-by: Howard Chu <howardchu95@gmail.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240815013626.935097-4-howardchu95@gmail.com
Link: https://lore.kernel.org/r/20240824163322.60796-3-howardchu95@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Howard Chu and committed by
Arnaldo Carvalho de Melo
7f403067 d92f490c

+117
+110
tools/perf/builtin-trace.c
··· 120 120 bool show_zero; 121 121 #ifdef HAVE_LIBBPF_SUPPORT 122 122 const struct btf_type *type; 123 + int type_id; /* used in btf_dump */ 123 124 #endif 124 125 }; 125 126 ··· 3463 3462 } 3464 3463 3465 3464 #ifdef HAVE_BPF_SKEL 3465 + static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type) 3466 + { 3467 + int id; 3468 + 3469 + if (arg_fmt->type != NULL) 3470 + return -1; 3471 + 3472 + id = btf__find_by_name(btf, type); 3473 + if (id < 0) 3474 + return -1; 3475 + 3476 + arg_fmt->type = btf__type_by_id(btf, id); 3477 + arg_fmt->type_id = id; 3478 + 3479 + return 0; 3480 + } 3481 + 3466 3482 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) 3467 3483 { 3468 3484 struct bpf_program *pos, *prog = NULL; ··· 3553 3535 { 3554 3536 struct syscall *sc = trace__syscall_info(trace, NULL, id); 3555 3537 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); 3538 + } 3539 + 3540 + static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) 3541 + { 3542 + struct tep_format_field *field; 3543 + struct syscall *sc = trace__syscall_info(trace, NULL, key); 3544 + const struct btf_type *bt; 3545 + char *struct_offset, *tmp, name[32]; 3546 + bool can_augment = false; 3547 + int i, cnt; 3548 + 3549 + if (sc == NULL) 3550 + return -1; 3551 + 3552 + trace__load_vmlinux_btf(trace); 3553 + if (trace->btf == NULL) 3554 + return -1; 3555 + 3556 + for (i = 0, field = sc->args; field; ++i, field = field->next) { 3557 + struct_offset = strstr(field->type, "struct "); 3558 + 3559 + // XXX We're only collecting pointer payloads _from_ user space 3560 + if (!sc->arg_fmt[i].from_user) 3561 + continue; 3562 + 3563 + if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct */ 3564 + struct_offset += 7; 3565 + 3566 + /* for 'struct foo *', we only want 'foo' */ 3567 + for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) { 3568 + } 3569 + 3570 + strncpy(name, struct_offset, cnt); 3571 + name[cnt] = '\0'; 3572 + 3573 + /* cache struct's btf_type and type_id */ 3574 + if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name)) 3575 + continue; 3576 + 3577 + bt = sc->arg_fmt[i].type; 3578 + beauty_array[i] = bt->size; 3579 + can_augment = true; 3580 + } else if (field->flags & TEP_FIELD_IS_POINTER && /* string */ 3581 + strcmp(field->type, "const char *") == 0 && 3582 + (strstr(field->name, "name") || 3583 + strstr(field->name, "path") || 3584 + strstr(field->name, "file") || 3585 + strstr(field->name, "root") || 3586 + strstr(field->name, "key") || 3587 + strstr(field->name, "special") || 3588 + strstr(field->name, "type") || 3589 + strstr(field->name, "description"))) { 3590 + beauty_array[i] = 1; 3591 + can_augment = true; 3592 + } else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */ 3593 + strstr(field->type, "char *") && 3594 + (strstr(field->name, "buf") || 3595 + strstr(field->name, "val") || 3596 + strstr(field->name, "msg"))) { 3597 + int j; 3598 + struct tep_format_field *field_tmp; 3599 + 3600 + /* find the size of the buffer that appears in pairs with buf */ 3601 + for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) { 3602 + if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */ 3603 + (strstr(field_tmp->name, "count") || 3604 + strstr(field_tmp->name, "siz") || /* size, bufsiz */ 3605 + (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) { 3606 + /* filename's got 'len' in it, we don't want that */ 3607 + beauty_array[i] = -(j + 1); 3608 + can_augment = true; 3609 + break; 3610 + } 3611 + } 3612 + } 3613 + } 3614 + 3615 + if (can_augment) 3616 + return 0; 3617 + 3618 + return -1; 3556 3619 } 3557 3620 3558 3621 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) ··· 3740 3641 { 3741 3642 int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); 3742 3643 int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); 3644 + int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter); 3743 3645 int err = 0; 3646 + unsigned int beauty_array[6]; 3744 3647 3745 3648 for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { 3746 3649 int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i); ··· 3759 3658 break; 3760 3659 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); 3761 3660 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); 3661 + if (err) 3662 + break; 3663 + 3664 + /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */ 3665 + memset(beauty_array, 0, sizeof(beauty_array)); 3666 + err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array); 3667 + if (err) 3668 + continue; 3669 + err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY); 3762 3670 if (err) 3763 3671 break; 3764 3672 }
+7
tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
··· 124 124 __uint(max_entries, 1); 125 125 } augmented_args_tmp SEC(".maps"); 126 126 127 + struct beauty_map_enter { 128 + __uint(type, BPF_MAP_TYPE_HASH); 129 + __type(key, int); 130 + __type(value, __u32[6]); 131 + __uint(max_entries, 512); 132 + } beauty_map_enter SEC(".maps"); 133 + 127 134 static inline struct augmented_args_payload *augmented_args_payload(void) 128 135 { 129 136 int key = 0;