Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf record offcpu: Constify control data for BPF

The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

root@x1:~# perf record --off-cpu
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.807 MB perf.data (5645 samples) ]

root@x1:~# perf evlist
cpu_atom/cycles/P
cpu_core/cycles/P
offcpu-time
dummy:u
root@x1:~# perf evlist -v
cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0xa00000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x400000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
offcpu-time: type: 1 (software), size: 136, config: 0xa (PERF_COUNT_SW_BPF_OUTPUT), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
dummy:u: type: 1 (software), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
root@x1:~# perf trace -e bpf --max-events 5 perf record --off-cpu
0.000 ( 0.015 ms): :2949124/2949124 bpf(cmd: 36, uattr: 0x7ffefc6dbe30, size: 8) = -1 EOPNOTSUPP (Operation not supported)
0.031 ( 0.115 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbb60, size: 148) = 14
0.159 ( 0.037 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbc20, size: 148) = 14
23.868 ( 0.144 ms): perf/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbad0, size: 148) = 14
24.027 ( 0.014 ms): perf/2949124 bpf(uattr: 0x7ffefc6dbc80, size: 80) = 14
root@x1:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
8b3b1bb3 4afdc00c

+13 -12
+8 -8
tools/perf/util/bpf_off_cpu.c
··· 73 73 struct evlist *evlist = arg; 74 74 75 75 /* update task filter for the given workload */ 76 - if (!skel->bss->has_cpu && !skel->bss->has_task && 76 + if (skel->rodata->has_task && skel->rodata->uses_tgid && 77 77 perf_thread_map__pid(evlist->core.threads, 0) != -1) { 78 78 int fd; 79 79 u32 pid; 80 80 u8 val = 1; 81 81 82 - skel->bss->has_task = 1; 83 - skel->bss->uses_tgid = 1; 84 82 fd = bpf_map__fd(skel->maps.task_filter); 85 83 pid = perf_thread_map__pid(evlist->core.threads, 0); 86 84 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); ··· 146 148 if (target->cpu_list) { 147 149 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 148 150 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 151 + skel->rodata->has_cpu = 1; 149 152 } 150 153 151 154 if (target->pid) { ··· 172 173 ntasks = MAX_PROC; 173 174 174 175 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 176 + skel->rodata->has_task = 1; 177 + skel->rodata->uses_tgid = 1; 175 178 } else if (target__has_task(target)) { 176 179 ntasks = perf_thread_map__nr(evlist->core.threads); 177 180 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 181 + skel->rodata->has_task = 1; 178 182 } else if (target__none(target)) { 179 183 bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); 184 + skel->rodata->has_task = 1; 185 + skel->rodata->uses_tgid = 1; 180 186 } 181 187 182 188 if (evlist__first(evlist)->cgrp) { ··· 190 186 191 187 if (!cgroup_is_v2("perf_event")) 192 188 skel->rodata->uses_cgroup_v1 = true; 189 + skel->rodata->has_cgroup = 1; 193 190 } 194 191 195 192 if (opts->record_cgroup) { ··· 213 208 u32 cpu; 214 209 u8 val = 1; 215 210 216 - skel->bss->has_cpu = 1; 217 211 fd = bpf_map__fd(skel->maps.cpu_filter); 218 212 219 213 for (i = 0; i < ncpus; i++) { ··· 224 220 if (target->pid) { 225 221 u8 val = 1; 226 222 227 - skel->bss->has_task = 1; 228 - skel->bss->uses_tgid = 1; 229 223 fd = bpf_map__fd(skel->maps.task_filter); 230 224 231 225 strlist__for_each_entry(pos, pid_slist) { ··· 242 240 u32 pid; 243 241 u8 val = 1; 244 242 245 - skel->bss->has_task = 1; 246 243 fd = bpf_map__fd(skel->maps.task_filter); 247 244 248 245 for (i = 0; i < ntasks; i++) { ··· 254 253 struct evsel *evsel; 255 254 u8 val = 1; 256 255 257 - skel->bss->has_cgroup = 1; 258 256 fd = bpf_map__fd(skel->maps.cgroup_filter); 259 257 260 258 evlist__for_each_entry(evlist, evsel) {
+5 -4
tools/perf/util/bpf_skel/off_cpu.bpf.c
··· 85 85 } __attribute__((preserve_access_index)); 86 86 87 87 int enabled = 0; 88 - int has_cpu = 0; 89 - int has_task = 0; 90 - int has_cgroup = 0; 91 - int uses_tgid = 0; 88 + 89 + const volatile int has_cpu = 0; 90 + const volatile int has_task = 0; 91 + const volatile int has_cgroup = 0; 92 + const volatile int uses_tgid = 0; 92 93 93 94 const volatile bool has_prev_state = false; 94 95 const volatile bool needs_cgroup = false;