Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf stat: Enable counting events for BPF programs

Introduce 'perf stat -b' option, which counts events for BPF programs, like:

[root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000
1.487903822 115,200 ref-cycles
1.487903822 86,012 cycles
2.489147029 80,560 ref-cycles
2.489147029 73,784 cycles
3.490341825 60,720 ref-cycles
3.490341825 37,797 cycles
4.491540887 37,120 ref-cycles
4.491540887 31,963 cycles

The example above counts 'cycles' and 'ref-cycles' of BPF program of id
254. This is similar to bpftool-prog-profile command, but more
flexible.

'perf stat -b' creates per-cpu perf_event and loads fentry/fexit BPF
programs (monitor-progs) to the target BPF program (target-prog). The
monitor-progs read perf_event before and after the target-prog, and
aggregate the difference in a BPF map. Then the user space reads data
from these maps.

A new 'struct bpf_counter' is introduced to provide a common interface
that uses BPF programs/maps to count perf events.

Committer notes:

Removed all but bpf_counter.h includes from evsel.h, not needed at all.

Also BPF map lookups for PERCPU_ARRAYs need to have as its value receive
buffer passed to the kernel libbpf_num_possible_cpus() entries, not
evsel__nr_cpus(evsel), as the former uses
/sys/devices/system/cpu/possible while the later uses
/sys/devices/system/cpu/online, which may be less than the 'possible'
number making the bpf map lookup overwrite memory and cause hard to
debug memory corruption.

We need to continue using evsel__nr_cpus(evsel) when accessing the
perf_counts array tho, not to overwrite another are of memory :-)

Signed-off-by: Song Liu <songliubraving@fb.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/lkml/20210120163031.GU12699@kernel.org/
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Link: http://lore.kernel.org/lkml/20201229214214.3413833-4-songliubraving@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Song Liu and committed by
Arnaldo Carvalho de Melo
fa853c4b fbcdaa19

+645 -18
+18
tools/perf/Documentation/perf-stat.txt
··· 75 75 --tid=<tid>:: 76 76 stat events on existing thread id (comma separated list) 77 77 78 + -b:: 79 + --bpf-prog:: 80 + stat events on existing bpf program id (comma separated list), 81 + requiring root rights. bpftool-prog could be used to find program 82 + id all bpf programs in the system. For example: 83 + 84 + # bpftool prog | head -n 1 85 + 17247: tracepoint name sys_enter tag 192d548b9d754067 gpl 86 + 87 + # perf stat -e cycles,instructions --bpf-prog 17247 --timeout 1000 88 + 89 + Performance counter stats for 'BPF program(s) 17247': 90 + 91 + 85,967 cycles 92 + 28,982 instructions # 0.34 insn per cycle 93 + 94 + 1.102235068 seconds time elapsed 95 + 78 96 ifdef::HAVE_LIBPFM[] 79 97 --pfm-events events:: 80 98 Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+1 -1
tools/perf/Makefile.perf
··· 1015 1015 1016 1016 SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) 1017 1017 SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) 1018 - SKELETONS := 1018 + SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h 1019 1019 1020 1020 ifdef BUILD_BPF_SKEL 1021 1021 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
+68 -14
tools/perf/builtin-stat.c
··· 67 67 #include "util/top.h" 68 68 #include "util/affinity.h" 69 69 #include "util/pfm.h" 70 + #include "util/bpf_counter.h" 70 71 #include "asm/bug.h" 71 72 72 73 #include <linux/time64.h> ··· 410 409 return 0; 411 410 } 412 411 412 + static int read_bpf_map_counters(void) 413 + { 414 + struct evsel *counter; 415 + int err; 416 + 417 + evlist__for_each_entry(evsel_list, counter) { 418 + err = bpf_counter__read(counter); 419 + if (err) 420 + return err; 421 + } 422 + return 0; 423 + } 424 + 413 425 static void read_counters(struct timespec *rs) 414 426 { 415 427 struct evsel *counter; 428 + int err; 416 429 417 - if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) 418 - return; 430 + if (!stat_config.stop_read_counter) { 431 + if (target__has_bpf(&target)) 432 + err = read_bpf_map_counters(); 433 + else 434 + err = read_affinity_counters(rs); 435 + if (err < 0) 436 + return; 437 + } 419 438 420 439 evlist__for_each_entry(evsel_list, counter) { 421 440 if (counter->err) ··· 517 496 return false; 518 497 } 519 498 520 - static void enable_counters(void) 499 + static int enable_counters(void) 521 500 { 501 + struct evsel *evsel; 502 + int err; 503 + 504 + if (target__has_bpf(&target)) { 505 + evlist__for_each_entry(evsel_list, evsel) { 506 + err = bpf_counter__enable(evsel); 507 + if (err) 508 + return err; 509 + } 510 + } 511 + 522 512 if (stat_config.initial_delay < 0) { 523 513 pr_info(EVLIST_DISABLED_MSG); 524 - return; 514 + return 0; 525 515 } 526 516 527 517 if (stat_config.initial_delay > 0) { ··· 550 518 if (stat_config.initial_delay > 0) 551 519 pr_info(EVLIST_ENABLED_MSG); 552 520 } 521 + return 0; 553 522 } 554 523 555 524 static void disable_counters(void) ··· 753 720 const bool forks = (argc > 0); 754 721 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 755 722 struct affinity affinity; 756 - int i, cpu; 723 + int i, cpu, err; 757 724 bool second_pass = false; 758 725 759 726 if (forks) { ··· 769 736 770 737 if (affinity__setup(&affinity) < 0) 771 738 return -1; 739 + 740 + if (target__has_bpf(&target)) { 741 + evlist__for_each_entry(evsel_list, counter) { 742 + if (bpf_counter__load(counter, &target)) 743 + return -1; 744 + } 745 + } 772 746 773 747 evlist__for_each_cpu (evsel_list, i, cpu) { 774 748 affinity__set(&affinity, cpu); ··· 890 850 } 891 851 892 852 if (STAT_RECORD) { 893 - int err, fd = perf_data__fd(&perf_stat.data); 853 + int fd = perf_data__fd(&perf_stat.data); 894 854 895 855 if (is_pipe) { 896 856 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); ··· 916 876 917 877 if (forks) { 918 878 evlist__start_workload(evsel_list); 919 - enable_counters(); 879 + err = enable_counters(); 880 + if (err) 881 + return -1; 920 882 921 883 if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) 922 884 status = dispatch_events(forks, timeout, interval, &times); ··· 937 895 if (WIFSIGNALED(status)) 938 896 psignal(WTERMSIG(status), argv[0]); 939 897 } else { 940 - enable_counters(); 898 + err = enable_counters(); 899 + if (err) 900 + return -1; 941 901 status = dispatch_events(forks, timeout, interval, &times); 942 902 } 943 903 ··· 1129 1085 "stat events on existing process id"), 1130 1086 OPT_STRING('t', "tid", &target.tid, "tid", 1131 1087 "stat events on existing thread id"), 1088 + #ifdef HAVE_BPF_SKEL 1089 + OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", 1090 + "stat events on existing bpf program id"), 1091 + #endif 1132 1092 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1133 1093 "system-wide collection from all CPUs"), 1134 1094 OPT_BOOLEAN('g', "group", &group, ··· 2112 2064 "perf stat [<options>] [<command>]", 2113 2065 NULL 2114 2066 }; 2115 - int status = -EINVAL, run_idx; 2067 + int status = -EINVAL, run_idx, err; 2116 2068 const char *mode; 2117 2069 FILE *output = stderr; 2118 2070 unsigned int interval, timeout; 2119 2071 const char * const stat_subcommands[] = { "record", "report" }; 2072 + char errbuf[BUFSIZ]; 2120 2073 2121 2074 setlocale(LC_ALL, ""); 2122 2075 ··· 2228 2179 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2229 2180 stat_config.big_num = false; 2230 2181 2182 + err = target__validate(&target); 2183 + if (err) { 2184 + target__strerror(&target, err, errbuf, BUFSIZ); 2185 + pr_warning("%s\n", errbuf); 2186 + } 2187 + 2231 2188 setup_system_wide(argc); 2232 2189 2233 2190 /* ··· 2306 2251 goto out; 2307 2252 } 2308 2253 } 2309 - 2310 - target__validate(&target); 2311 2254 2312 2255 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 2313 2256 target.per_thread = true; ··· 2437 2384 * tools remain -acme 2438 2385 */ 2439 2386 int fd = perf_data__fd(&perf_stat.data); 2440 - int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2441 - process_synthesized_event, 2442 - &perf_stat.session->machines.host); 2387 + 2388 + err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2389 + process_synthesized_event, 2390 + &perf_stat.session->machines.host); 2443 2391 if (err) { 2444 2392 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2445 2393 "older tools may produce warnings about this file\n.");
+1
tools/perf/util/Build
··· 135 135 136 136 perf-$(CONFIG_LIBBPF) += bpf-loader.o 137 137 perf-$(CONFIG_LIBBPF) += bpf_map.o 138 + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o 138 139 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o 139 140 perf-$(CONFIG_LIBELF) += symbol-elf.o 140 141 perf-$(CONFIG_LIBELF) += probe-file.o
+314
tools/perf/util/bpf_counter.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* Copyright (c) 2019 Facebook */ 4 + 5 + #include <assert.h> 6 + #include <limits.h> 7 + #include <unistd.h> 8 + #include <sys/time.h> 9 + #include <sys/resource.h> 10 + #include <linux/err.h> 11 + #include <linux/zalloc.h> 12 + #include <bpf/bpf.h> 13 + #include <bpf/btf.h> 14 + #include <bpf/libbpf.h> 15 + 16 + #include "bpf_counter.h" 17 + #include "counts.h" 18 + #include "debug.h" 19 + #include "evsel.h" 20 + #include "target.h" 21 + 22 + #include "bpf_skel/bpf_prog_profiler.skel.h" 23 + 24 + static inline void *u64_to_ptr(__u64 ptr) 25 + { 26 + return (void *)(unsigned long)ptr; 27 + } 28 + 29 + static void set_max_rlimit(void) 30 + { 31 + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; 32 + 33 + setrlimit(RLIMIT_MEMLOCK, &rinf); 34 + } 35 + 36 + static struct bpf_counter *bpf_counter_alloc(void) 37 + { 38 + struct bpf_counter *counter; 39 + 40 + counter = zalloc(sizeof(*counter)); 41 + if (counter) 42 + INIT_LIST_HEAD(&counter->list); 43 + return counter; 44 + } 45 + 46 + static int bpf_program_profiler__destroy(struct evsel *evsel) 47 + { 48 + struct bpf_counter *counter, *tmp; 49 + 50 + list_for_each_entry_safe(counter, tmp, 51 + &evsel->bpf_counter_list, list) { 52 + list_del_init(&counter->list); 53 + bpf_prog_profiler_bpf__destroy(counter->skel); 54 + free(counter); 55 + } 56 + assert(list_empty(&evsel->bpf_counter_list)); 57 + 58 + return 0; 59 + } 60 + 61 + static char *bpf_target_prog_name(int tgt_fd) 62 + { 63 + struct bpf_prog_info_linear *info_linear; 64 + struct bpf_func_info *func_info; 65 + const struct btf_type *t; 66 + char *name = NULL; 67 + struct btf *btf; 68 + 69 + info_linear = bpf_program__get_prog_info_linear( 70 + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); 71 + if (IS_ERR_OR_NULL(info_linear)) { 72 + pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); 73 + return NULL; 74 + } 75 + 76 + if (info_linear->info.btf_id == 0 || 77 + btf__get_from_id(info_linear->info.btf_id, &btf)) { 78 + pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); 79 + goto out; 80 + } 81 + 82 + func_info = u64_to_ptr(info_linear->info.func_info); 83 + t = btf__type_by_id(btf, func_info[0].type_id); 84 + if (!t) { 85 + pr_debug("btf %d doesn't have type %d\n", 86 + info_linear->info.btf_id, func_info[0].type_id); 87 + goto out; 88 + } 89 + name = strdup(btf__name_by_offset(btf, t->name_off)); 90 + out: 91 + free(info_linear); 92 + return name; 93 + } 94 + 95 + static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) 96 + { 97 + struct bpf_prog_profiler_bpf *skel; 98 + struct bpf_counter *counter; 99 + struct bpf_program *prog; 100 + char *prog_name; 101 + int prog_fd; 102 + int err; 103 + 104 + prog_fd = bpf_prog_get_fd_by_id(prog_id); 105 + if (prog_fd < 0) { 106 + pr_err("Failed to open fd for bpf prog %u\n", prog_id); 107 + return -1; 108 + } 109 + counter = bpf_counter_alloc(); 110 + if (!counter) { 111 + close(prog_fd); 112 + return -1; 113 + } 114 + 115 + skel = bpf_prog_profiler_bpf__open(); 116 + if (!skel) { 117 + pr_err("Failed to open bpf skeleton\n"); 118 + goto err_out; 119 + } 120 + 121 + skel->rodata->num_cpu = evsel__nr_cpus(evsel); 122 + 123 + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); 124 + bpf_map__resize(skel->maps.fentry_readings, 1); 125 + bpf_map__resize(skel->maps.accum_readings, 1); 126 + 127 + prog_name = bpf_target_prog_name(prog_fd); 128 + if (!prog_name) { 129 + pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id); 130 + goto err_out; 131 + } 132 + 133 + bpf_object__for_each_program(prog, skel->obj) { 134 + err = bpf_program__set_attach_target(prog, prog_fd, prog_name); 135 + if (err) { 136 + pr_err("bpf_program__set_attach_target failed.\n" 137 + "Does bpf prog %u have BTF?\n", prog_id); 138 + goto err_out; 139 + } 140 + } 141 + set_max_rlimit(); 142 + err = bpf_prog_profiler_bpf__load(skel); 143 + if (err) { 144 + pr_err("bpf_prog_profiler_bpf__load failed\n"); 145 + goto err_out; 146 + } 147 + 148 + assert(skel != NULL); 149 + counter->skel = skel; 150 + list_add(&counter->list, &evsel->bpf_counter_list); 151 + close(prog_fd); 152 + return 0; 153 + err_out: 154 + bpf_prog_profiler_bpf__destroy(skel); 155 + free(counter); 156 + close(prog_fd); 157 + return -1; 158 + } 159 + 160 + static int bpf_program_profiler__load(struct evsel *evsel, struct target *target) 161 + { 162 + char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p; 163 + u32 prog_id; 164 + int ret; 165 + 166 + bpf_str_ = bpf_str = strdup(target->bpf_str); 167 + if (!bpf_str) 168 + return -1; 169 + 170 + while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) { 171 + prog_id = strtoul(tok, &p, 10); 172 + if (prog_id == 0 || prog_id == UINT_MAX || 173 + (*p != '\0' && *p != ',')) { 174 + pr_err("Failed to parse bpf prog ids %s\n", 175 + target->bpf_str); 176 + return -1; 177 + } 178 + 179 + ret = bpf_program_profiler_load_one(evsel, prog_id); 180 + if (ret) { 181 + bpf_program_profiler__destroy(evsel); 182 + free(bpf_str_); 183 + return -1; 184 + } 185 + bpf_str = NULL; 186 + } 187 + free(bpf_str_); 188 + return 0; 189 + } 190 + 191 + static int bpf_program_profiler__enable(struct evsel *evsel) 192 + { 193 + struct bpf_counter *counter; 194 + int ret; 195 + 196 + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { 197 + assert(counter->skel != NULL); 198 + ret = bpf_prog_profiler_bpf__attach(counter->skel); 199 + if (ret) { 200 + bpf_program_profiler__destroy(evsel); 201 + return ret; 202 + } 203 + } 204 + return 0; 205 + } 206 + 207 + static int bpf_program_profiler__read(struct evsel *evsel) 208 + { 209 + // perf_cpu_map uses /sys/devices/system/cpu/online 210 + int num_cpu = evsel__nr_cpus(evsel); 211 + // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible 212 + // Sometimes possible > online, like on a Ryzen 3900X that has 24 213 + // threads but its possible showed 0-31 -acme 214 + int num_cpu_bpf = libbpf_num_possible_cpus(); 215 + struct bpf_perf_event_value values[num_cpu_bpf]; 216 + struct bpf_counter *counter; 217 + int reading_map_fd; 218 + __u32 key = 0; 219 + int err, cpu; 220 + 221 + if (list_empty(&evsel->bpf_counter_list)) 222 + return -EAGAIN; 223 + 224 + for (cpu = 0; cpu < num_cpu; cpu++) { 225 + perf_counts(evsel->counts, cpu, 0)->val = 0; 226 + perf_counts(evsel->counts, cpu, 0)->ena = 0; 227 + perf_counts(evsel->counts, cpu, 0)->run = 0; 228 + } 229 + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { 230 + struct bpf_prog_profiler_bpf *skel = counter->skel; 231 + 232 + assert(skel != NULL); 233 + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); 234 + 235 + err = bpf_map_lookup_elem(reading_map_fd, &key, values); 236 + if (err) { 237 + pr_err("failed to read value\n"); 238 + return err; 239 + } 240 + 241 + for (cpu = 0; cpu < num_cpu; cpu++) { 242 + perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter; 243 + perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled; 244 + perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running; 245 + } 246 + } 247 + return 0; 248 + } 249 + 250 + static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, 251 + int fd) 252 + { 253 + struct bpf_prog_profiler_bpf *skel; 254 + struct bpf_counter *counter; 255 + int ret; 256 + 257 + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { 258 + skel = counter->skel; 259 + assert(skel != NULL); 260 + 261 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), 262 + &cpu, &fd, BPF_ANY); 263 + if (ret) 264 + return ret; 265 + } 266 + return 0; 267 + } 268 + 269 + struct bpf_counter_ops bpf_program_profiler_ops = { 270 + .load = bpf_program_profiler__load, 271 + .enable = bpf_program_profiler__enable, 272 + .read = bpf_program_profiler__read, 273 + .destroy = bpf_program_profiler__destroy, 274 + .install_pe = bpf_program_profiler__install_pe, 275 + }; 276 + 277 + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) 278 + { 279 + if (list_empty(&evsel->bpf_counter_list)) 280 + return 0; 281 + return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); 282 + } 283 + 284 + int bpf_counter__load(struct evsel *evsel, struct target *target) 285 + { 286 + if (target__has_bpf(target)) 287 + evsel->bpf_counter_ops = &bpf_program_profiler_ops; 288 + 289 + if (evsel->bpf_counter_ops) 290 + return evsel->bpf_counter_ops->load(evsel, target); 291 + return 0; 292 + } 293 + 294 + int bpf_counter__enable(struct evsel *evsel) 295 + { 296 + if (list_empty(&evsel->bpf_counter_list)) 297 + return 0; 298 + return evsel->bpf_counter_ops->enable(evsel); 299 + } 300 + 301 + int bpf_counter__read(struct evsel *evsel) 302 + { 303 + if (list_empty(&evsel->bpf_counter_list)) 304 + return -EAGAIN; 305 + return evsel->bpf_counter_ops->read(evsel); 306 + } 307 + 308 + void bpf_counter__destroy(struct evsel *evsel) 309 + { 310 + if (list_empty(&evsel->bpf_counter_list)) 311 + return; 312 + evsel->bpf_counter_ops->destroy(evsel); 313 + evsel->bpf_counter_ops = NULL; 314 + }
+72
tools/perf/util/bpf_counter.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __PERF_BPF_COUNTER_H 3 + #define __PERF_BPF_COUNTER_H 1 4 + 5 + #include <linux/list.h> 6 + 7 + struct evsel; 8 + struct target; 9 + struct bpf_counter; 10 + 11 + typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); 12 + typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, 13 + struct target *target); 14 + typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, 15 + int cpu, 16 + int fd); 17 + 18 + struct bpf_counter_ops { 19 + bpf_counter_evsel_target_op load; 20 + bpf_counter_evsel_op enable; 21 + bpf_counter_evsel_op read; 22 + bpf_counter_evsel_op destroy; 23 + bpf_counter_evsel_install_pe_op install_pe; 24 + }; 25 + 26 + struct bpf_counter { 27 + void *skel; 28 + struct list_head list; 29 + }; 30 + 31 + #ifdef HAVE_BPF_SKEL 32 + 33 + int bpf_counter__load(struct evsel *evsel, struct target *target); 34 + int bpf_counter__enable(struct evsel *evsel); 35 + int bpf_counter__read(struct evsel *evsel); 36 + void bpf_counter__destroy(struct evsel *evsel); 37 + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); 38 + 39 + #else /* HAVE_BPF_SKEL */ 40 + 41 + #include<linux/err.h> 42 + 43 + static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, 44 + struct target *target __maybe_unused) 45 + { 46 + return 0; 47 + } 48 + 49 + static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) 50 + { 51 + return 0; 52 + } 53 + 54 + static inline int bpf_counter__read(struct evsel *evsel __maybe_unused) 55 + { 56 + return -EAGAIN; 57 + } 58 + 59 + static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused) 60 + { 61 + } 62 + 63 + static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, 64 + int cpu __maybe_unused, 65 + int fd __maybe_unused) 66 + { 67 + return 0; 68 + } 69 + 70 + #endif /* HAVE_BPF_SKEL */ 71 + 72 + #endif /* __PERF_BPF_COUNTER_H */
+93
tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2020 Facebook 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + 7 + /* map of perf event fds, num_cpu * num_metric entries */ 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 10 + __uint(key_size, sizeof(__u32)); 11 + __uint(value_size, sizeof(int)); 12 + } events SEC(".maps"); 13 + 14 + /* readings at fentry */ 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 17 + __uint(key_size, sizeof(__u32)); 18 + __uint(value_size, sizeof(struct bpf_perf_event_value)); 19 + __uint(max_entries, 1); 20 + } fentry_readings SEC(".maps"); 21 + 22 + /* accumulated readings */ 23 + struct { 24 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 25 + __uint(key_size, sizeof(__u32)); 26 + __uint(value_size, sizeof(struct bpf_perf_event_value)); 27 + __uint(max_entries, 1); 28 + } accum_readings SEC(".maps"); 29 + 30 + const volatile __u32 num_cpu = 1; 31 + 32 + SEC("fentry/XXX") 33 + int BPF_PROG(fentry_XXX) 34 + { 35 + __u32 key = bpf_get_smp_processor_id(); 36 + struct bpf_perf_event_value *ptr; 37 + __u32 zero = 0; 38 + long err; 39 + 40 + /* look up before reading, to reduce error */ 41 + ptr = bpf_map_lookup_elem(&fentry_readings, &zero); 42 + if (!ptr) 43 + return 0; 44 + 45 + err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr)); 46 + if (err) 47 + return 0; 48 + 49 + return 0; 50 + } 51 + 52 + static inline void 53 + fexit_update_maps(struct bpf_perf_event_value *after) 54 + { 55 + struct bpf_perf_event_value *before, diff, *accum; 56 + __u32 zero = 0; 57 + 58 + before = bpf_map_lookup_elem(&fentry_readings, &zero); 59 + /* only account samples with a valid fentry_reading */ 60 + if (before && before->counter) { 61 + struct bpf_perf_event_value *accum; 62 + 63 + diff.counter = after->counter - before->counter; 64 + diff.enabled = after->enabled - before->enabled; 65 + diff.running = after->running - before->running; 66 + 67 + accum = bpf_map_lookup_elem(&accum_readings, &zero); 68 + if (accum) { 69 + accum->counter += diff.counter; 70 + accum->enabled += diff.enabled; 71 + accum->running += diff.running; 72 + } 73 + } 74 + } 75 + 76 + SEC("fexit/XXX") 77 + int BPF_PROG(fexit_XXX) 78 + { 79 + struct bpf_perf_event_value reading; 80 + __u32 cpu = bpf_get_smp_processor_id(); 81 + __u32 one = 1, zero = 0; 82 + int err; 83 + 84 + /* read all events before updating the maps, to reduce error */ 85 + err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading)); 86 + if (err) 87 + return 0; 88 + 89 + fexit_update_maps(&reading); 90 + return 0; 91 + } 92 + 93 + char LICENSE[] SEC("license") = "Dual BSD/GPL";
+5
tools/perf/util/evsel.c
··· 25 25 #include <stdlib.h> 26 26 #include <perf/evsel.h> 27 27 #include "asm/bug.h" 28 + #include "bpf_counter.h" 28 29 #include "callchain.h" 29 30 #include "cgroup.h" 30 31 #include "counts.h" ··· 248 247 evsel->bpf_obj = NULL; 249 248 evsel->bpf_fd = -1; 250 249 INIT_LIST_HEAD(&evsel->config_terms); 250 + INIT_LIST_HEAD(&evsel->bpf_counter_list); 251 251 perf_evsel__object.init(evsel); 252 252 evsel->sample_size = __evsel__sample_size(attr->sample_type); 253 253 evsel__calc_id_pos(evsel); ··· 1370 1368 { 1371 1369 assert(list_empty(&evsel->core.node)); 1372 1370 assert(evsel->evlist == NULL); 1371 + bpf_counter__destroy(evsel); 1373 1372 evsel__free_counts(evsel); 1374 1373 perf_evsel__free_fd(&evsel->core); 1375 1374 perf_evsel__free_id(&evsel->core); ··· 1785 1782 group_fd, flags); 1786 1783 1787 1784 FD(evsel, cpu, thread) = fd; 1785 + 1786 + bpf_counter__install_pe(evsel, cpu, fd); 1788 1787 1789 1788 if (unlikely(test_attr__enabled)) { 1790 1789 test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+5
tools/perf/util/evsel.h
··· 17 17 struct perf_counts; 18 18 struct perf_stat_evsel; 19 19 union perf_event; 20 + struct bpf_counter_ops; 21 + struct target; 20 22 21 23 typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); 22 24 ··· 129 127 * See also evsel__has_callchain(). 130 128 */ 131 129 __u64 synth_sample_type; 130 + struct list_head bpf_counter_list; 131 + struct bpf_counter_ops *bpf_counter_ops; 132 132 }; 133 133 134 134 struct perf_missing_features { ··· 428 424 struct perf_env *evsel__env(struct evsel *evsel); 429 425 430 426 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); 427 + 431 428 #endif /* __PERF_EVSEL_H */
+21
tools/perf/util/python.c
··· 80 80 } 81 81 82 82 /* 83 + * XXX: All these evsel destructors need some better mechanism, like a linked 84 + * list of destructors registered when the relevant code indeed is used instead 85 + * of having more and more calls in perf_evsel__delete(). -- acme 86 + * 87 + * For now, add some more: 88 + * 89 + * Not to drag the BPF bandwagon... 90 + */ 91 + void bpf_counter__destroy(struct evsel *evsel); 92 + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); 93 + 94 + void bpf_counter__destroy(struct evsel *evsel __maybe_unused) 95 + { 96 + } 97 + 98 + int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) 99 + { 100 + return 0; 101 + } 102 + 103 + /* 83 104 * Support debug printing even though util/debug.c is not linked. That means 84 105 * implementing 'verbose' and 'eprintf'. 85 106 */
+3 -1
tools/perf/util/stat-display.c
··· 1045 1045 if (!config->csv_output) { 1046 1046 fprintf(output, "\n"); 1047 1047 fprintf(output, " Performance counter stats for "); 1048 - if (_target->system_wide) 1048 + if (_target->bpf_str) 1049 + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); 1050 + else if (_target->system_wide) 1049 1051 fprintf(output, "\'system wide"); 1050 1052 else if (_target->cpu_list) 1051 1053 fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+1 -1
tools/perf/util/stat.c
··· 527 527 if (leader->core.nr_members > 1) 528 528 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 529 529 530 - attr->inherit = !config->no_inherit; 530 + attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); 531 531 532 532 /* 533 533 * Some events get initialized with sample_(period/type) set,
+33 -1
tools/perf/util/target.c
··· 56 56 ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; 57 57 } 58 58 59 + /* BPF and CPU are mutually exclusive */ 60 + if (target->bpf_str && target->cpu_list) { 61 + target->cpu_list = NULL; 62 + if (ret == TARGET_ERRNO__SUCCESS) 63 + ret = TARGET_ERRNO__BPF_OVERRIDE_CPU; 64 + } 65 + 66 + /* BPF and PID/TID are mutually exclusive */ 67 + if (target->bpf_str && target->tid) { 68 + target->tid = NULL; 69 + if (ret == TARGET_ERRNO__SUCCESS) 70 + ret = TARGET_ERRNO__BPF_OVERRIDE_PID; 71 + } 72 + 73 + /* BPF and UID are mutually exclusive */ 74 + if (target->bpf_str && target->uid_str) { 75 + target->uid_str = NULL; 76 + if (ret == TARGET_ERRNO__SUCCESS) 77 + ret = TARGET_ERRNO__BPF_OVERRIDE_UID; 78 + } 79 + 80 + /* BPF and THREADS are mutually exclusive */ 81 + if (target->bpf_str && target->per_thread) { 82 + target->per_thread = false; 83 + if (ret == TARGET_ERRNO__SUCCESS) 84 + ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD; 85 + } 86 + 59 87 /* THREAD and SYSTEM/CPU are mutually exclusive */ 60 88 if (target->per_thread && (target->system_wide || target->cpu_list)) { 61 89 target->per_thread = false; ··· 137 109 "PID/TID switch overriding SYSTEM", 138 110 "UID switch overriding SYSTEM", 139 111 "SYSTEM/CPU switch overriding PER-THREAD", 112 + "BPF switch overriding CPU", 113 + "BPF switch overriding PID/TID", 114 + "BPF switch overriding UID", 115 + "BPF switch overriding THREAD", 140 116 "Invalid User: %s", 141 117 "Problems obtaining information for user %s", 142 118 }; ··· 166 134 167 135 switch (errnum) { 168 136 case TARGET_ERRNO__PID_OVERRIDE_CPU ... 169 - TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: 137 + TARGET_ERRNO__BPF_OVERRIDE_THREAD: 170 138 snprintf(buf, buflen, "%s", msg); 171 139 break; 172 140
+10
tools/perf/util/target.h
··· 10 10 const char *tid; 11 11 const char *cpu_list; 12 12 const char *uid_str; 13 + const char *bpf_str; 13 14 uid_t uid; 14 15 bool system_wide; 15 16 bool uses_mmap; ··· 37 36 TARGET_ERRNO__PID_OVERRIDE_SYSTEM, 38 37 TARGET_ERRNO__UID_OVERRIDE_SYSTEM, 39 38 TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, 39 + TARGET_ERRNO__BPF_OVERRIDE_CPU, 40 + TARGET_ERRNO__BPF_OVERRIDE_PID, 41 + TARGET_ERRNO__BPF_OVERRIDE_UID, 42 + TARGET_ERRNO__BPF_OVERRIDE_THREAD, 40 43 41 44 /* for target__parse_uid() */ 42 45 TARGET_ERRNO__INVALID_UID, ··· 62 57 static inline bool target__has_cpu(struct target *target) 63 58 { 64 59 return target->system_wide || target->cpu_list; 60 + } 61 + 62 + static inline bool target__has_bpf(struct target *target) 63 + { 64 + return target->bpf_str; 65 65 } 66 66 67 67 static inline bool target__none(struct target *target)