Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Add branch counter knob

Add a new branch filter, "counter", for the branch counter option. It is
used to mark the events which should be logged in the branch. If it is
applied with the -j option, the counters of all the events should be
logged in the branch. If the legacy kernel doesn't support the new
branch sample type, switching off the branch counter filter.

The stored counter values in each branch are displayed right after the
regular branch stack information via perf report -D.

Usage examples:

# perf record -e "{branch-instructions,branch-misses}:S" -j any,counter

Only the first event, branch-instructions, collect the LBR. Both
branch-instructions and branch-misses are marked as logged events. The
occurrences information of them can be found in the branch stack
extension space of each branch.

# perf record -e "{cpu/branch-instructions,branch_type=any/,cpu/branch-misses,branch_type=counter/}"

Only the first event, branch-instructions, collect the LBR. Only the
branch-misses event is marked as a logged event.

Committer notes:

I noticed 'perf test "Sample parsing"' failing, reported to the list and
Kan provided a patch that checks if the evsel has a leader and that
evsel->evlist is set, the comment in the source code further explains
it.

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tinghao Zhang <tinghao.zhang@intel.com>
Link: https://lore.kernel.org/r/20231025201626.3000228-8-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
9fbb4b02 ac9cd724

+55 -3
+4
tools/perf/Documentation/perf-record.txt
··· 445 445 4th-Gen Xeon+ server), the save branch type is unconditionally enabled 446 446 when the taken branch stack sampling is enabled. 447 447 - priv: save privilege state during sampling in case binary is not available later 448 + - counter: save occurrences of the event since the last branch entry. Currently, the 449 + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and 450 + later platforms. An error out is expected if it's used on the unsupported 451 + kernel or CPUs. 448 452 449 453 + 450 454 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+34 -1
tools/perf/util/evsel.c
··· 1832 1832 1833 1833 static void evsel__disable_missing_features(struct evsel *evsel) 1834 1834 { 1835 + if (perf_missing_features.branch_counters) 1836 + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; 1835 1837 if (perf_missing_features.read_lost) 1836 1838 evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; 1837 1839 if (perf_missing_features.weight_struct) { ··· 1887 1885 * Must probe features in the order they were added to the 1888 1886 * perf_event_attr interface. 1889 1887 */ 1890 - if (!perf_missing_features.read_lost && 1888 + if (!perf_missing_features.branch_counters && 1889 + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { 1890 + perf_missing_features.branch_counters = true; 1891 + pr_debug2("switching off branch counters support\n"); 1892 + return true; 1893 + } else if (!perf_missing_features.read_lost && 1891 1894 (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { 1892 1895 perf_missing_features.read_lost = true; 1893 1896 pr_debug2("switching off PERF_FORMAT_LOST support\n"); ··· 2325 2318 return new_val; 2326 2319 } 2327 2320 2321 + static inline bool evsel__has_branch_counters(const struct evsel *evsel) 2322 + { 2323 + struct evsel *cur, *leader = evsel__leader(evsel); 2324 + 2325 + /* The branch counters feature only supports group */ 2326 + if (!leader || !evsel->evlist) 2327 + return false; 2328 + 2329 + evlist__for_each_entry(evsel->evlist, cur) { 2330 + if ((leader == evsel__leader(cur)) && 2331 + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) 2332 + return true; 2333 + } 2334 + return false; 2335 + } 2336 + 2328 2337 int evsel__parse_sample(struct evsel *evsel, union perf_event *event, 2329 2338 struct perf_sample *data) 2330 2339 { ··· 2574 2551 2575 2552 OVERFLOW_CHECK(array, sz, max_size); 2576 2553 array = (void *)array + sz; 2554 + 2555 + if (evsel__has_branch_counters(evsel)) { 2556 + OVERFLOW_CHECK_u64(array); 2557 + 2558 + data->branch_stack_cntr = (u64 *)array; 2559 + sz = data->branch_stack->nr * sizeof(u64); 2560 + 2561 + OVERFLOW_CHECK(array, sz, max_size); 2562 + array = (void *)array + sz; 2563 + } 2577 2564 } 2578 2565 2579 2566 if (type & PERF_SAMPLE_REGS_USER) {
+1
tools/perf/util/evsel.h
··· 191 191 bool code_page_size; 192 192 bool weight_struct; 193 193 bool read_lost; 194 + bool branch_counters; 194 195 }; 195 196 196 197 extern struct perf_missing_features perf_missing_features;
+1
tools/perf/util/parse-branch-options.c
··· 36 36 BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), 37 37 BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), 38 38 BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), 39 + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), 39 40 BRANCH_END 40 41 }; 41 42
+1
tools/perf/util/perf_event_attr_fprintf.c
··· 55 55 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 56 56 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 57 57 bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), 58 + bit_name(COUNTERS), 58 59 { .name = NULL, } 59 60 }; 60 61 #undef bit_name
+1
tools/perf/util/sample.h
··· 113 113 void *raw_data; 114 114 struct ip_callchain *callchain; 115 115 struct branch_stack *branch_stack; 116 + u64 *branch_stack_cntr; 116 117 struct regs_dump user_regs; 117 118 struct regs_dump intr_regs; 118 119 struct stack_dump user_stack;
+13 -2
tools/perf/util/session.c
··· 1150 1150 i, callchain->ips[i]); 1151 1151 } 1152 1152 1153 - static void branch_stack__printf(struct perf_sample *sample, bool callstack) 1153 + static void branch_stack__printf(struct perf_sample *sample, 1154 + struct evsel *evsel) 1154 1155 { 1155 1156 struct branch_entry *entries = perf_sample__branch_entries(sample); 1157 + bool callstack = evsel__has_branch_callstack(evsel); 1158 + u64 *branch_stack_cntr = sample->branch_stack_cntr; 1159 + struct perf_env *env = evsel__env(evsel); 1156 1160 uint64_t i; 1157 1161 1158 1162 if (!callstack) { ··· 1197 1193 printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from); 1198 1194 } 1199 1195 } 1196 + } 1197 + 1198 + if (branch_stack_cntr) { 1199 + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", 1200 + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); 1201 + for (i = 0; i < sample->branch_stack->nr; i++) 1202 + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); 1200 1203 } 1201 1204 } 1202 1205 ··· 1366 1355 callchain__printf(evsel, sample); 1367 1356 1368 1357 if (evsel__has_br_stack(evsel)) 1369 - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); 1358 + branch_stack__printf(sample, evsel); 1370 1359 1371 1360 if (sample_type & PERF_SAMPLE_REGS_USER) 1372 1361 regs_user__printf(sample, arch);