Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf lock contention: Support lock type filtering for BPF

Likewise, add type_filter BPF hash map and check it when user gave a
lock type filter.

$ sudo ./perf lock con -ab -Y rwlock -- ./perf bench sched messaging
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 10 groups == 400 processes run

Total time: 0.203 [sec]
contended total wait max wait avg wait type caller

15 156.19 us 19.45 us 10.41 us rwlock:W do_exit+0x36d
1 11.12 us 11.12 us 11.12 us rwlock:R do_wait+0x8b
1 5.09 us 5.09 us 5.09 us rwlock:W release_task+0x6e

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221219201732.460111-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
529772c4 b4a7eff9

+35 -3
+1
tools/perf/builtin-lock.c
··· 1689 1689 .map_nr_entries = bpf_map_entries, 1690 1690 .max_stack = max_stack_depth, 1691 1691 .stack_skip = stack_skip, 1692 + .filters = &filters, 1692 1693 }; 1693 1694 1694 1695 session = perf_session__new(use_bpf ? NULL : &data, &eops);
+14 -1
tools/perf/util/bpf_lock_contention.c
··· 20 20 int lock_contention_prepare(struct lock_contention *con) 21 21 { 22 22 int i, fd; 23 - int ncpus = 1, ntasks = 1; 23 + int ncpus = 1, ntasks = 1, ntypes = 1; 24 24 struct evlist *evlist = con->evlist; 25 25 struct target *target = con->target; 26 26 ··· 46 46 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 47 47 if (target__has_task(target)) 48 48 ntasks = perf_thread_map__nr(evlist->core.threads); 49 + if (con->filters->nr_types) 50 + ntypes = con->filters->nr_types; 49 51 50 52 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 51 53 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 54 + bpf_map__set_max_entries(skel->maps.type_filter, ntypes); 52 55 53 56 if (lock_contention_bpf__load(skel) < 0) { 54 57 pr_err("Failed to load lock-contention BPF skeleton\n"); ··· 91 88 skel->bss->has_task = 1; 92 89 fd = bpf_map__fd(skel->maps.task_filter); 93 90 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 91 + } 92 + 93 + if (con->filters->nr_types) { 94 + u8 val = 1; 95 + 96 + skel->bss->has_type = 1; 97 + fd = bpf_map__fd(skel->maps.type_filter); 98 + 99 + for (i = 0; i < con->filters->nr_types; i++) 100 + bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY); 94 101 } 95 102 96 103 /* these don't work well if in the rodata section */
+19 -2
tools/perf/util/bpf_skel/lock_contention.bpf.c
··· 62 62 __uint(max_entries, 1); 63 63 } task_filter SEC(".maps"); 64 64 65 + struct { 66 + __uint(type, BPF_MAP_TYPE_HASH); 67 + __uint(key_size, sizeof(__u32)); 68 + __uint(value_size, sizeof(__u8)); 69 + __uint(max_entries, 1); 70 + } type_filter SEC(".maps"); 71 + 65 72 /* control flags */ 66 73 int enabled; 67 74 int has_cpu; 68 75 int has_task; 76 + int has_type; 69 77 int stack_skip; 70 78 71 79 /* determine the key of lock stat */ ··· 82 74 /* error stat */ 83 75 int lost; 84 76 85 - static inline int can_record(void) 77 + static inline int can_record(u64 *ctx) 86 78 { 87 79 if (has_cpu) { 88 80 __u32 cpu = bpf_get_smp_processor_id(); ··· 98 90 __u32 pid = bpf_get_current_pid_tgid(); 99 91 100 92 ok = bpf_map_lookup_elem(&task_filter, &pid); 93 + if (!ok) 94 + return 0; 95 + } 96 + 97 + if (has_type) { 98 + __u8 *ok; 99 + __u32 flags = (__u32)ctx[1]; 100 + 101 + ok = bpf_map_lookup_elem(&type_filter, &flags); 101 102 if (!ok) 102 103 return 0; 103 104 } ··· 133 116 __u32 pid; 134 117 struct tstamp_data *pelem; 135 118 136 - if (!enabled || !can_record()) 119 + if (!enabled || !can_record(ctx)) 137 120 return 0; 138 121 139 122 pid = bpf_get_current_pid_tgid();
+1
tools/perf/util/lock-contention.h
··· 118 118 struct target *target; 119 119 struct machine *machine; 120 120 struct hlist_head *result; 121 + struct lock_filter *filters; 121 122 unsigned long map_nr_entries; 122 123 int lost; 123 124 int max_stack;