perf kwork: Implement BPF trace · tjh.dev/kernel@daf07d2

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

perf kwork: Implement BPF trace

'perf record' generates perf.data, which generates extra interrupts
for hard disk, amount of data to be collected increases with time.

Using eBPF trace can process the data in kernel, which solves the
preceding two problems.

Add -b/--use-bpf option for latency and report to support
tracing kwork events using eBPF:

1. Create bpf prog and attach to tracepoints,
2. Start tracing after command is entered,
3. After user hit "ctrl+c", stop tracing and report,
4. Support CPU and name filtering.

This commit implements the framework code and
does not add specific event support.

Test cases:

# perf kwork rep -h

Usage: perf kwork report [<options>]

-b, --use-bpf Use BPF to measure kwork runtime
-C, --cpu <cpu> list of cpus to profile
-i, --input <file> input file name
-n, --name <name> event name to profile
-s, --sort <key[,key2...]>
sort by key(s): runtime, max, count
-S, --with-summary Show summary with statistics
--time <str> Time span for analysis (start,stop)

# perf kwork lat -h

Usage: perf kwork latency [<options>]

-b, --use-bpf Use BPF to measure kwork latency
-C, --cpu <cpu> list of cpus to profile
-i, --input <file> input file name
-n, --name <name> event name to profile
-s, --sort <key[,key2...]>
sort by key(s): avg, max, count
--time <str> Time span for analysis (start,stop)

# perf kwork lat -b
Unsupported bpf trace class irq

# perf kwork rep -b
Unsupported bpf trace class irq

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Clarke <pc@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220709015033.38326-15-yangjihong1@huawei.com
[ Simplify work_findnew() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Yang Jihong and committed by

Arnaldo Carvalho de Melo 3 years ago daf07d22 bcc8b3e8

+467 -6

7 changed files

expand all

tools

perf

Documentation

perf-kwork.txt

Makefile.perf

builtin-kwork.c

util

Build

bpf_kwork.c

bpf_skel

kwork_trace.bpf.c

kwork.h

+10

tools/perf/Documentation/perf-kwork.txt

··· 26 26 Example usage: 27 27 perf kwork record -- sleep 1 28 28 perf kwork report 29 + perf kwork report -b 29 30 perf kwork latency 31 + perf kwork latency -b 30 32 perf kwork timehist 31 33 32 34 By default it shows the individual work events such as irq, workqeueu, ··· 75 73 OPTIONS for 'perf kwork report' 76 74 ---------------------------- 77 75 76 + -b:: 77 + --use-bpf:: 78 + Use BPF to measure kwork runtime 79 + 78 80 -C:: 79 81 --cpu:: 80 82 Only show events for the given CPU(s) (comma separated list). ··· 108 102 109 103 OPTIONS for 'perf kwork latency' 110 104 ---------------------------- 105 + 106 + -b:: 107 + --use-bpf:: 108 + Use BPF to measure kwork latency 111 109 112 110 -C:: 113 111 --cpu::

tools/perf/Makefile.perf

··· 1029 1029 SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h 1030 1030 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h 1031 1031 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h 1032 + SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h 1032 1033 1033 1034 $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): 1034 1035 $(Q)$(MKDIR) -p $@

+68 -6

tools/perf/builtin-kwork.c

··· 329 329 struct kwork_work *key, 330 330 struct list_head *sort_list) 331 331 { 332 - struct kwork_work *work = NULL; 332 + struct kwork_work *work = work_search(root, key, sort_list); 333 333 334 - work = work_search(root, key, sort_list); 335 334 if (work != NULL) 336 335 return work; 337 336 338 337 work = work_new(key); 339 - if (work == NULL) 340 - return NULL; 338 + if (work) 339 + work_insert(root, work, sort_list); 341 340 342 - work_insert(root, work, sort_list); 343 341 return work; 344 342 } 345 343 ··· 1427 1429 } 1428 1430 } 1429 1431 1432 + struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork, 1433 + struct kwork_class *class, 1434 + struct kwork_work *key) 1435 + { 1436 + struct kwork_work *work = NULL; 1437 + 1438 + work = work_new(key); 1439 + if (work == NULL) 1440 + return NULL; 1441 + 1442 + work_insert(&class->work_root, work, &kwork->cmp_id); 1443 + return work; 1444 + } 1445 + 1446 + static void sig_handler(int sig) 1447 + { 1448 + /* 1449 + * Simply capture termination signal so that 1450 + * the program can continue after pause returns 1451 + */ 1452 + pr_debug("Captuer signal %d\n", sig); 1453 + } 1454 + 1455 + static int perf_kwork__report_bpf(struct perf_kwork *kwork) 1456 + { 1457 + int ret; 1458 + 1459 + signal(SIGINT, sig_handler); 1460 + signal(SIGTERM, sig_handler); 1461 + 1462 + ret = perf_kwork__trace_prepare_bpf(kwork); 1463 + if (ret) 1464 + return -1; 1465 + 1466 + printf("Starting trace, Hit <Ctrl+C> to stop and report\n"); 1467 + 1468 + perf_kwork__trace_start(); 1469 + 1470 + /* 1471 + * a simple pause, wait here for stop signal 1472 + */ 1473 + pause(); 1474 + 1475 + perf_kwork__trace_finish(); 1476 + 1477 + perf_kwork__report_read_bpf(kwork); 1478 + 1479 + perf_kwork__report_cleanup_bpf(); 1480 + 1481 + return 0; 1482 + } 1483 + 1430 1484 static int perf_kwork__report(struct perf_kwork *kwork) 1431 1485 { 1432 1486 int ret; 1433 1487 struct rb_node *next; 1434 1488 struct kwork_work *work; 1435 1489 1436 - ret = perf_kwork__read_events(kwork); 1490 + if (kwork->use_bpf) 1491 + ret = perf_kwork__report_bpf(kwork); 1492 + else 1493 + ret = perf_kwork__read_events(kwork); 1494 + 1437 1495 if (ret != 0) 1438 1496 return -1; 1439 1497 ··· 1722 1668 "input file name"), 1723 1669 OPT_BOOLEAN('S', "with-summary", &kwork.summary, 1724 1670 "Show summary with statistics"), 1671 + #ifdef HAVE_BPF_SKEL 1672 + OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf, 1673 + "Use BPF to measure kwork runtime"), 1674 + #endif 1725 1675 OPT_PARENT(kwork_options) 1726 1676 }; 1727 1677 const struct option latency_options[] = { ··· 1739 1681 "Time span for analysis (start,stop)"), 1740 1682 OPT_STRING('i', "input", &input_name, "file", 1741 1683 "input file name"), 1684 + #ifdef HAVE_BPF_SKEL 1685 + OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf, 1686 + "Use BPF to measure kwork latency"), 1687 + #endif 1742 1688 OPT_PARENT(kwork_options) 1743 1689 }; 1744 1690 const struct option timehist_options[] = {

tools/perf/util/Build

··· 148 148 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o 149 149 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o 150 150 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o 151 + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o 151 152 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o 152 153 perf-$(CONFIG_LIBELF) += symbol-elf.o 153 154 perf-$(CONFIG_LIBELF) += probe-file.o

+278

tools/perf/util/bpf_kwork.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * bpf_kwork.c 4 + * 5 + * Copyright (c) 2022 Huawei Inc, Yang Jihong <yangjihong1@huawei.com> 6 + */ 7 + 8 + #include <time.h> 9 + #include <fcntl.h> 10 + #include <stdio.h> 11 + #include <unistd.h> 12 + 13 + #include <linux/time64.h> 14 + 15 + #include "util/debug.h" 16 + #include "util/kwork.h" 17 + 18 + #include <bpf/bpf.h> 19 + 20 + #include "util/bpf_skel/kwork_trace.skel.h" 21 + 22 + /* 23 + * This should be in sync with "util/kwork_trace.bpf.c" 24 + */ 25 + #define MAX_KWORKNAME 128 26 + 27 + struct work_key { 28 + u32 type; 29 + u32 cpu; 30 + u64 id; 31 + }; 32 + 33 + struct report_data { 34 + u64 nr; 35 + u64 total_time; 36 + u64 max_time; 37 + u64 max_time_start; 38 + u64 max_time_end; 39 + }; 40 + 41 + struct kwork_class_bpf { 42 + struct kwork_class *class; 43 + 44 + void (*load_prepare)(struct perf_kwork *kwork); 45 + int (*get_work_name)(struct work_key *key, char **ret_name); 46 + }; 47 + 48 + static struct kwork_trace_bpf *skel; 49 + 50 + static struct timespec ts_start; 51 + static struct timespec ts_end; 52 + 53 + void perf_kwork__trace_start(void) 54 + { 55 + clock_gettime(CLOCK_MONOTONIC, &ts_start); 56 + skel->bss->enabled = 1; 57 + } 58 + 59 + void perf_kwork__trace_finish(void) 60 + { 61 + clock_gettime(CLOCK_MONOTONIC, &ts_end); 62 + skel->bss->enabled = 0; 63 + } 64 + 65 + static struct kwork_class_bpf * 66 + kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = { 67 + [KWORK_CLASS_IRQ] = NULL, 68 + [KWORK_CLASS_SOFTIRQ] = NULL, 69 + [KWORK_CLASS_WORKQUEUE] = NULL, 70 + }; 71 + 72 + static bool valid_kwork_class_type(enum kwork_class_type type) 73 + { 74 + return type >= 0 && type < KWORK_CLASS_MAX ? true : false; 75 + } 76 + 77 + static int setup_filters(struct perf_kwork *kwork) 78 + { 79 + u8 val = 1; 80 + int i, nr_cpus, key, fd; 81 + struct perf_cpu_map *map; 82 + 83 + if (kwork->cpu_list != NULL) { 84 + fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter); 85 + if (fd < 0) { 86 + pr_debug("Invalid cpu filter fd\n"); 87 + return -1; 88 + } 89 + 90 + map = perf_cpu_map__new(kwork->cpu_list); 91 + if (map == NULL) { 92 + pr_debug("Invalid cpu_list\n"); 93 + return -1; 94 + } 95 + 96 + nr_cpus = libbpf_num_possible_cpus(); 97 + for (i = 0; i < perf_cpu_map__nr(map); i++) { 98 + struct perf_cpu cpu = perf_cpu_map__cpu(map, i); 99 + 100 + if (cpu.cpu >= nr_cpus) { 101 + perf_cpu_map__put(map); 102 + pr_err("Requested cpu %d too large\n", cpu.cpu); 103 + return -1; 104 + } 105 + bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY); 106 + } 107 + perf_cpu_map__put(map); 108 + 109 + skel->bss->has_cpu_filter = 1; 110 + } 111 + 112 + if (kwork->profile_name != NULL) { 113 + if (strlen(kwork->profile_name) >= MAX_KWORKNAME) { 114 + pr_err("Requested name filter %s too large, limit to %d\n", 115 + kwork->profile_name, MAX_KWORKNAME - 1); 116 + return -1; 117 + } 118 + 119 + fd = bpf_map__fd(skel->maps.perf_kwork_name_filter); 120 + if (fd < 0) { 121 + pr_debug("Invalid name filter fd\n"); 122 + return -1; 123 + } 124 + 125 + key = 0; 126 + bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY); 127 + 128 + skel->bss->has_name_filter = 1; 129 + } 130 + 131 + return 0; 132 + } 133 + 134 + int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork) 135 + { 136 + struct bpf_program *prog; 137 + struct kwork_class *class; 138 + struct kwork_class_bpf *class_bpf; 139 + enum kwork_class_type type; 140 + 141 + skel = kwork_trace_bpf__open(); 142 + if (!skel) { 143 + pr_debug("Failed to open kwork trace skeleton\n"); 144 + return -1; 145 + } 146 + 147 + /* 148 + * set all progs to non-autoload, 149 + * then set corresponding progs according to config 150 + */ 151 + bpf_object__for_each_program(prog, skel->obj) 152 + bpf_program__set_autoload(prog, false); 153 + 154 + list_for_each_entry(class, &kwork->class_list, list) { 155 + type = class->type; 156 + if (!valid_kwork_class_type(type) || 157 + (kwork_class_bpf_supported_list[type] == NULL)) { 158 + pr_err("Unsupported bpf trace class %s\n", class->name); 159 + goto out; 160 + } 161 + 162 + class_bpf = kwork_class_bpf_supported_list[type]; 163 + class_bpf->class = class; 164 + 165 + if (class_bpf->load_prepare != NULL) 166 + class_bpf->load_prepare(kwork); 167 + } 168 + 169 + if (kwork_trace_bpf__load(skel)) { 170 + pr_debug("Failed to load kwork trace skeleton\n"); 171 + goto out; 172 + } 173 + 174 + if (setup_filters(kwork)) 175 + goto out; 176 + 177 + if (kwork_trace_bpf__attach(skel)) { 178 + pr_debug("Failed to attach kwork trace skeleton\n"); 179 + goto out; 180 + } 181 + 182 + return 0; 183 + 184 + out: 185 + kwork_trace_bpf__destroy(skel); 186 + return -1; 187 + } 188 + 189 + static int add_work(struct perf_kwork *kwork, 190 + struct work_key *key, 191 + struct report_data *data) 192 + { 193 + struct kwork_work *work; 194 + struct kwork_class_bpf *bpf_trace; 195 + struct kwork_work tmp = { 196 + .id = key->id, 197 + .name = NULL, 198 + .cpu = key->cpu, 199 + }; 200 + enum kwork_class_type type = key->type; 201 + 202 + if (!valid_kwork_class_type(type)) { 203 + pr_debug("Invalid class type %d to add work\n", type); 204 + return -1; 205 + } 206 + 207 + bpf_trace = kwork_class_bpf_supported_list[type]; 208 + tmp.class = bpf_trace->class; 209 + 210 + if ((bpf_trace->get_work_name != NULL) && 211 + (bpf_trace->get_work_name(key, &tmp.name))) 212 + return -1; 213 + 214 + work = perf_kwork_add_work(kwork, tmp.class, &tmp); 215 + if (work == NULL) 216 + return -1; 217 + 218 + if (kwork->report == KWORK_REPORT_RUNTIME) { 219 + work->nr_atoms = data->nr; 220 + work->total_runtime = data->total_time; 221 + work->max_runtime = data->max_time; 222 + work->max_runtime_start = data->max_time_start; 223 + work->max_runtime_end = data->max_time_end; 224 + } else if (kwork->report == KWORK_REPORT_LATENCY) { 225 + work->nr_atoms = data->nr; 226 + work->total_latency = data->total_time; 227 + work->max_latency = data->max_time; 228 + work->max_latency_start = data->max_time_start; 229 + work->max_latency_end = data->max_time_end; 230 + } else { 231 + pr_debug("Invalid bpf report type %d\n", kwork->report); 232 + return -1; 233 + } 234 + 235 + kwork->timestart = (u64)ts_start.tv_sec * NSEC_PER_SEC + ts_start.tv_nsec; 236 + kwork->timeend = (u64)ts_end.tv_sec * NSEC_PER_SEC + ts_end.tv_nsec; 237 + 238 + return 0; 239 + } 240 + 241 + int perf_kwork__report_read_bpf(struct perf_kwork *kwork) 242 + { 243 + struct report_data data; 244 + struct work_key key = { 245 + .type = 0, 246 + .cpu = 0, 247 + .id = 0, 248 + }; 249 + struct work_key prev = { 250 + .type = 0, 251 + .cpu = 0, 252 + .id = 0, 253 + }; 254 + int fd = bpf_map__fd(skel->maps.perf_kwork_report); 255 + 256 + if (fd < 0) { 257 + pr_debug("Invalid report fd\n"); 258 + return -1; 259 + } 260 + 261 + while (!bpf_map_get_next_key(fd, &prev, &key)) { 262 + if ((bpf_map_lookup_elem(fd, &key, &data)) != 0) { 263 + pr_debug("Failed to lookup report elem\n"); 264 + return -1; 265 + } 266 + 267 + if ((data.nr != 0) && (add_work(kwork, &key, &data) != 0)) 268 + return -1; 269 + 270 + prev = key; 271 + } 272 + return 0; 273 + } 274 + 275 + void perf_kwork__report_cleanup_bpf(void) 276 + { 277 + kwork_trace_bpf__destroy(skel); 278 + }

+74

tools/perf/util/bpf_skel/kwork_trace.bpf.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2022, Huawei 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + 8 + #define KWORK_COUNT 100 9 + #define MAX_KWORKNAME 128 10 + 11 + /* 12 + * This should be in sync with "util/kwork.h" 13 + */ 14 + enum kwork_class_type { 15 + KWORK_CLASS_IRQ, 16 + KWORK_CLASS_SOFTIRQ, 17 + KWORK_CLASS_WORKQUEUE, 18 + KWORK_CLASS_MAX, 19 + }; 20 + 21 + struct work_key { 22 + __u32 type; 23 + __u32 cpu; 24 + __u64 id; 25 + }; 26 + 27 + struct report_data { 28 + __u64 nr; 29 + __u64 total_time; 30 + __u64 max_time; 31 + __u64 max_time_start; 32 + __u64 max_time_end; 33 + }; 34 + 35 + struct { 36 + __uint(type, BPF_MAP_TYPE_HASH); 37 + __uint(key_size, sizeof(struct work_key)); 38 + __uint(value_size, MAX_KWORKNAME); 39 + __uint(max_entries, KWORK_COUNT); 40 + } perf_kwork_names SEC(".maps"); 41 + 42 + struct { 43 + __uint(type, BPF_MAP_TYPE_HASH); 44 + __uint(key_size, sizeof(struct work_key)); 45 + __uint(value_size, sizeof(__u64)); 46 + __uint(max_entries, KWORK_COUNT); 47 + } perf_kwork_time SEC(".maps"); 48 + 49 + struct { 50 + __uint(type, BPF_MAP_TYPE_HASH); 51 + __uint(key_size, sizeof(struct work_key)); 52 + __uint(value_size, sizeof(struct report_data)); 53 + __uint(max_entries, KWORK_COUNT); 54 + } perf_kwork_report SEC(".maps"); 55 + 56 + struct { 57 + __uint(type, BPF_MAP_TYPE_HASH); 58 + __uint(key_size, sizeof(__u32)); 59 + __uint(value_size, sizeof(__u8)); 60 + __uint(max_entries, 1); 61 + } perf_kwork_cpu_filter SEC(".maps"); 62 + 63 + struct { 64 + __uint(type, BPF_MAP_TYPE_ARRAY); 65 + __uint(key_size, sizeof(__u32)); 66 + __uint(value_size, MAX_KWORKNAME); 67 + __uint(max_entries, 1); 68 + } perf_kwork_name_filter SEC(".maps"); 69 + 70 + int enabled = 0; 71 + int has_cpu_filter = 0; 72 + int has_name_filter = 0; 73 + 74 + char LICENSE[] SEC("license") = "Dual BSD/GPL";

+35

tools/perf/util/kwork.h

··· 203 203 const char *sort_order; 204 204 bool show_callchain; 205 205 unsigned int max_stack; 206 + bool use_bpf; 206 207 207 208 /* 208 209 * statistics ··· 219 218 u64 all_count; 220 219 u64 nr_skipped_events[KWORK_TRACE_MAX + 1]; 221 220 }; 221 + 222 + struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork, 223 + struct kwork_class *class, 224 + struct kwork_work *key); 225 + 226 + #ifdef HAVE_BPF_SKEL 227 + 228 + int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork); 229 + int perf_kwork__report_read_bpf(struct perf_kwork *kwork); 230 + void perf_kwork__report_cleanup_bpf(void); 231 + 232 + void perf_kwork__trace_start(void); 233 + void perf_kwork__trace_finish(void); 234 + 235 + #else /* !HAVE_BPF_SKEL */ 236 + 237 + static inline int 238 + perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused) 239 + { 240 + return -1; 241 + } 242 + 243 + static inline int 244 + perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused) 245 + { 246 + return -1; 247 + } 248 + 249 + static inline void perf_kwork__report_cleanup_bpf(void) {} 250 + 251 + static inline void perf_kwork__trace_start(void) {} 252 + static inline void perf_kwork__trace_finish(void) {} 253 + 254 + #endif /* HAVE_BPF_SKEL */ 222 255 223 256 #endif /* PERF_UTIL_KWORK_H */