Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf kwork: Add workqueue trace BPF support

Implements workqueue trace bpf function.

Test cases:

# perf kwork -k workqueue lat -b
Starting trace, Hit <Ctrl+C> to stop and report
^C
Kwork Name | Cpu | Avg delay | Count | Max delay | Max delay start | Max delay end |
--------------------------------------------------------------------------------------------------------------------------------
(w)addrconf_verify_work | 0002 | 5.856 ms | 1 | 5.856 ms | 111994.634313 s | 111994.640169 s |
(w)vmstat_update | 0001 | 1.247 ms | 1 | 1.247 ms | 111996.462651 s | 111996.463899 s |
(w)neigh_periodic_work | 0001 | 1.183 ms | 1 | 1.183 ms | 111996.462789 s | 111996.463973 s |
(w)neigh_managed_work | 0001 | 0.989 ms | 2 | 1.635 ms | 111996.462820 s | 111996.464455 s |
(w)wb_workfn | 0000 | 0.667 ms | 1 | 0.667 ms | 111996.384273 s | 111996.384940 s |
(w)bpf_prog_free_deferred | 0001 | 0.495 ms | 1 | 0.495 ms | 111986.314201 s | 111986.314696 s |
(w)mix_interrupt_randomness | 0002 | 0.421 ms | 6 | 0.749 ms | 111995.927750 s | 111995.928499 s |
(w)vmstat_shepherd | 0000 | 0.374 ms | 2 | 0.385 ms | 111991.265242 s | 111991.265627 s |
(w)e1000_watchdog | 0002 | 0.356 ms | 5 | 0.390 ms | 111994.528380 s | 111994.528770 s |
(w)vmstat_update | 0000 | 0.231 ms | 2 | 0.365 ms | 111996.384407 s | 111996.384772 s |
(w)flush_to_ldisc | 0006 | 0.165 ms | 1 | 0.165 ms | 111995.930606 s | 111995.930771 s |
(w)flush_to_ldisc | 0000 | 0.094 ms | 2 | 0.095 ms | 111996.460453 s | 111996.460548 s |
--------------------------------------------------------------------------------------------------------------------------------

# perf kwork -k workqueue rep -b
Starting trace, Hit <Ctrl+C> to stop and report
^C
Kwork Name | Cpu | Total Runtime | Count | Max runtime | Max runtime start | Max runtime end |
--------------------------------------------------------------------------------------------------------------------------------
(w)e1000_watchdog | 0002 | 0.627 ms | 2 | 0.324 ms | 112002.720665 s | 112002.720989 s |
(w)flush_to_ldisc | 0007 | 0.598 ms | 2 | 0.534 ms | 112000.875226 s | 112000.875761 s |
(w)wq_barrier_func | 0007 | 0.492 ms | 1 | 0.492 ms | 112000.876981 s | 112000.877473 s |
(w)flush_to_ldisc | 0007 | 0.281 ms | 1 | 0.281 ms | 112005.826882 s | 112005.827163 s |
(w)mix_interrupt_randomness | 0002 | 0.229 ms | 3 | 0.102 ms | 112005.825671 s | 112005.825774 s |
(w)vmstat_shepherd | 0000 | 0.202 ms | 1 | 0.202 ms | 112001.504511 s | 112001.504713 s |
(w)bpf_prog_free_deferred | 0001 | 0.181 ms | 1 | 0.181 ms | 112000.883251 s | 112000.883432 s |
(w)wb_workfn | 0007 | 0.130 ms | 1 | 0.130 ms | 112001.505195 s | 112001.505325 s |
(w)vmstat_update | 0000 | 0.053 ms | 1 | 0.053 ms | 112001.504763 s | 112001.504815 s |
--------------------------------------------------------------------------------------------------------------------------------

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Clarke <pc@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220709015033.38326-18-yangjihong1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Yang Jihong and committed by
Arnaldo Carvalho de Melo
acfb65fe 5a81927a

+101 -1
+17 -1
tools/perf/util/bpf_kwork.c
··· 114 114 .get_work_name = get_work_name_from_map, 115 115 }; 116 116 117 + static void workqueue_load_prepare(struct perf_kwork *kwork) 118 + { 119 + if (kwork->report == KWORK_REPORT_RUNTIME) { 120 + bpf_program__set_autoload(skel->progs.report_workqueue_execute_start, true); 121 + bpf_program__set_autoload(skel->progs.report_workqueue_execute_end, true); 122 + } else if (kwork->report == KWORK_REPORT_LATENCY) { 123 + bpf_program__set_autoload(skel->progs.latency_workqueue_activate_work, true); 124 + bpf_program__set_autoload(skel->progs.latency_workqueue_execute_start, true); 125 + } 126 + } 127 + 128 + static struct kwork_class_bpf kwork_workqueue_bpf = { 129 + .load_prepare = workqueue_load_prepare, 130 + .get_work_name = get_work_name_from_map, 131 + }; 132 + 117 133 static struct kwork_class_bpf * 118 134 kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = { 119 135 [KWORK_CLASS_IRQ] = &kwork_irq_bpf, 120 136 [KWORK_CLASS_SOFTIRQ] = &kwork_softirq_bpf, 121 - [KWORK_CLASS_WORKQUEUE] = NULL, 137 + [KWORK_CLASS_WORKQUEUE] = &kwork_workqueue_bpf, 122 138 }; 123 139 124 140 static bool valid_kwork_class_type(enum kwork_class_type type)
+84
tools/perf/util/bpf_skel/kwork_trace.bpf.c
··· 167 167 bpf_map_update_elem(map, key, name, BPF_ANY); 168 168 } 169 169 170 + static __always_inline int update_timestart(void *map, struct work_key *key) 171 + { 172 + if (!trace_event_match(key, NULL)) 173 + return 0; 174 + 175 + do_update_timestart(map, key); 176 + return 0; 177 + } 178 + 170 179 static __always_inline int update_timestart_and_name(void *time_map, 171 180 void *names_map, 172 181 struct work_key *key, ··· 197 188 return 0; 198 189 199 190 do_update_timeend(report_map, time_map, key); 191 + 192 + return 0; 193 + } 194 + 195 + static __always_inline int update_timeend_and_name(void *report_map, 196 + void *time_map, 197 + void *names_map, 198 + struct work_key *key, 199 + char *name) 200 + { 201 + if (!trace_event_match(key, name)) 202 + return 0; 203 + 204 + do_update_timeend(report_map, time_map, key); 205 + do_update_name(names_map, key, name); 200 206 201 207 return 0; 202 208 } ··· 318 294 }; 319 295 320 296 return update_timeend(&perf_kwork_report, &perf_kwork_time, &key); 297 + } 298 + 299 + SEC("tracepoint/workqueue/workqueue_execute_start") 300 + int report_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx) 301 + { 302 + struct work_key key = { 303 + .type = KWORK_CLASS_WORKQUEUE, 304 + .cpu = bpf_get_smp_processor_id(), 305 + .id = (__u64)ctx->work, 306 + }; 307 + 308 + return update_timestart(&perf_kwork_time, &key); 309 + } 310 + 311 + SEC("tracepoint/workqueue/workqueue_execute_end") 312 + int report_workqueue_execute_end(struct trace_event_raw_workqueue_execute_end *ctx) 313 + { 314 + char name[MAX_KWORKNAME]; 315 + struct work_key key = { 316 + .type = KWORK_CLASS_WORKQUEUE, 317 + .cpu = bpf_get_smp_processor_id(), 318 + .id = (__u64)ctx->work, 319 + }; 320 + unsigned long long func_addr = (unsigned long long)ctx->function; 321 + 322 + __builtin_memset(name, 0, sizeof(name)); 323 + bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr)); 324 + 325 + return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time, 326 + &perf_kwork_names, &key, name); 327 + } 328 + 329 + SEC("tracepoint/workqueue/workqueue_activate_work") 330 + int latency_workqueue_activate_work(struct trace_event_raw_workqueue_activate_work *ctx) 331 + { 332 + struct work_key key = { 333 + .type = KWORK_CLASS_WORKQUEUE, 334 + .cpu = bpf_get_smp_processor_id(), 335 + .id = (__u64)ctx->work, 336 + }; 337 + 338 + return update_timestart(&perf_kwork_time, &key); 339 + } 340 + 341 + SEC("tracepoint/workqueue/workqueue_execute_start") 342 + int latency_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx) 343 + { 344 + char name[MAX_KWORKNAME]; 345 + struct work_key key = { 346 + .type = KWORK_CLASS_WORKQUEUE, 347 + .cpu = bpf_get_smp_processor_id(), 348 + .id = (__u64)ctx->work, 349 + }; 350 + unsigned long long func_addr = (unsigned long long)ctx->function; 351 + 352 + __builtin_memset(name, 0, sizeof(name)); 353 + bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr)); 354 + 355 + return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time, 356 + &perf_kwork_names, &key, name); 321 357 } 322 358 323 359 char LICENSE[] SEC("license") = "Dual BSD/GPL";