Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf: Add branch stack counters

Currently, the additional information of a branch entry is stored in a
u64 space. With more and more information added, the space is running
out. For example, the information of occurrences of events will be added
for each branch.

Two places were suggested to append the counters.
https://lore.kernel.org/lkml/20230802215814.GH231007@hirez.programming.kicks-ass.net/
One place is right after the flags of each branch entry. It changes the
existing struct perf_branch_entry. The later ARCH specific
implementation has to be really careful to consistently pick
the right struct.
The other place is right after the entire struct perf_branch_stack.
The disadvantage is that the pointer of the extra space has to be
recorded. The common interface perf_sample_save_brstack() has to be
updated.

The latter is much straightforward, and should be easily understood and
maintained. It is implemented in the patch.

Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate
the event which is recorded in the branch info.

The "u64 counters" may store the occurrences of several events. The
information regarding the number of events/counters and the width of
each counter should be exposed via sysfs as a reference for the perf
tool. Define the branch_counter_nr and branch_counter_width ABI here.
The support will be implemented later in the Intel-specific patch.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231025201626.3000228-1-kan.liang@linux.intel.com

authored by

Kan Liang and committed by
Peter Zijlstra
571d91dc 744940f1

+46 -7
+6
Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
··· 16 16 Example output in powerpc: 17 17 grep . /sys/bus/event_source/devices/cpu/caps/* 18 18 /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9 19 + 20 + The "branch_counter_nr" in the supported platform exposes the 21 + maximum number of counters which can be shown in the u64 counters 22 + of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width" 23 + exposes the width of each counter. Both of them can be used by 24 + the perf tool to parse the logged counters in each branch.
+1 -1
arch/powerpc/perf/core-book3s.c
··· 2313 2313 struct cpu_hw_events *cpuhw; 2314 2314 cpuhw = this_cpu_ptr(&cpu_hw_events); 2315 2315 power_pmu_bhrb_read(event, cpuhw); 2316 - perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack); 2316 + perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL); 2317 2317 } 2318 2318 2319 2319 if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+1 -1
arch/x86/events/amd/core.c
··· 940 940 continue; 941 941 942 942 if (has_branch_stack(event)) 943 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 943 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 944 944 945 945 if (perf_event_overflow(event, &data, regs)) 946 946 x86_pmu_stop(event, 0);
+1 -1
arch/x86/events/core.c
··· 1702 1702 perf_sample_data_init(&data, 0, event->hw.last_period); 1703 1703 1704 1704 if (has_branch_stack(event)) 1705 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 1705 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 1706 1706 1707 1707 if (perf_event_overflow(event, &data, regs)) 1708 1708 x86_pmu_stop(event, 0);
+1 -1
arch/x86/events/intel/core.c
··· 3047 3047 perf_sample_data_init(&data, 0, event->hw.last_period); 3048 3048 3049 3049 if (has_branch_stack(event)) 3050 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 3050 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 3051 3051 3052 3052 if (perf_event_overflow(event, &data, regs)) 3053 3053 x86_pmu_stop(event, 0);
+2 -2
arch/x86/events/intel/ds.c
··· 1755 1755 setup_pebs_time(event, data, pebs->tsc); 1756 1756 1757 1757 if (has_branch_stack(event)) 1758 - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 1758 + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 1759 1759 } 1760 1760 1761 1761 static void adaptive_pebs_save_regs(struct pt_regs *regs, ··· 1912 1912 1913 1913 if (has_branch_stack(event)) { 1914 1914 intel_pmu_store_pebs_lbrs(lbr); 1915 - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 1915 + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 1916 1916 } 1917 1917 } 1918 1918
+16 -1
include/linux/perf_event.h
··· 1139 1139 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; 1140 1140 } 1141 1141 1142 + static inline bool branch_sample_counters(const struct perf_event *event) 1143 + { 1144 + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; 1145 + } 1142 1146 1143 1147 struct perf_sample_data { 1144 1148 /* ··· 1177 1173 struct perf_callchain_entry *callchain; 1178 1174 struct perf_raw_record *raw; 1179 1175 struct perf_branch_stack *br_stack; 1176 + u64 *br_stack_cntr; 1180 1177 union perf_sample_weight weight; 1181 1178 union perf_mem_data_src data_src; 1182 1179 u64 txn; ··· 1255 1250 1256 1251 static inline void perf_sample_save_brstack(struct perf_sample_data *data, 1257 1252 struct perf_event *event, 1258 - struct perf_branch_stack *brs) 1253 + struct perf_branch_stack *brs, 1254 + u64 *brs_cntr) 1259 1255 { 1260 1256 int size = sizeof(u64); /* nr */ 1261 1257 ··· 1264 1258 size += sizeof(u64); 1265 1259 size += brs->nr * sizeof(struct perf_branch_entry); 1266 1260 1261 + /* 1262 + * The extension space for counters is appended after the 1263 + * struct perf_branch_stack. It is used to store the occurrences 1264 + * of events of each branch. 1265 + */ 1266 + if (brs_cntr) 1267 + size += brs->nr * sizeof(u64); 1268 + 1267 1269 data->br_stack = brs; 1270 + data->br_stack_cntr = brs_cntr; 1268 1271 data->dyn_size += size; 1269 1272 data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; 1270 1273 }
+10
include/uapi/linux/perf_event.h
··· 204 204 205 205 PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ 206 206 207 + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ 208 + 207 209 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 208 210 }; 209 211 ··· 236 234 PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 237 235 238 236 PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, 237 + 238 + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, 239 239 240 240 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 241 241 }; ··· 986 982 * { u64 nr; 987 983 * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 988 984 * { u64 from, to, flags } lbr[nr]; 985 + * # 986 + * # The format of the counters is decided by the 987 + * # "branch_counter_nr" and "branch_counter_width", 988 + * # which are defined in the ABI. 989 + * # 990 + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS 989 991 * } && PERF_SAMPLE_BRANCH_STACK 990 992 * 991 993 * { u64 abi; # enum perf_sample_regs_abi
+8
kernel/events/core.c
··· 7341 7341 if (branch_sample_hw_index(event)) 7342 7342 perf_output_put(handle, data->br_stack->hw_idx); 7343 7343 perf_output_copy(handle, data->br_stack->entries, size); 7344 + /* 7345 + * Add the extension space which is appended 7346 + * right after the struct perf_branch_stack. 7347 + */ 7348 + if (data->br_stack_cntr) { 7349 + size = data->br_stack->nr * sizeof(u64); 7350 + perf_output_copy(handle, data->br_stack_cntr, size); 7351 + } 7344 7352 } else { 7345 7353 /* 7346 7354 * we always store at least the value of nr