Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf script: Add branch counters

It's useful to print the branch counter information for each jump in
the brstackinsn when it's available.

Add a new field 'brcntr' to display the branch counter information.

By default, the abbreviation will be used to indicate the branch
counter. In the verbose mode, the real event name is shown.

$ perf script -F +brstackinsn,+brcntr

# Branch counter abbr list:
# branch-instructions:ppp = A
# branch-misses = B
# '-' No event occurs
# '+' Event occurrences may be lost due to branch counter saturated
tchain_edit 332203 3366329.405674: 53030 branch-instructions:ppp: 401781 f3+0x2c (home/sdp/test/tchain_edit)
f3+31:
0000000000401774 insn: eb 04 br_cntr: AA # PRED 5 cycles [5]
000000000040177a insn: 81 7d fc 0f 27 00 00
0000000000401781 insn: 7e e3 br_cntr: A # PRED 1 cycles [6] 2.00 IPC
0000000000401766 insn: 8b 45 fc
0000000000401769 insn: 83 e0 01
000000000040176c insn: 85 c0
000000000040176e insn: 74 06 br_cntr: A # PRED 1 cycles [7] 4.00 IPC
0000000000401776 insn: 83 45 fc 01
000000000040177a insn: 81 7d fc 0f 27 00 00
0000000000401781 insn: 7e e3 br_cntr: A # PRED 7 cycles [14] 0.43 IPC

$ perf script -F +brstackinsn,+brcntr -v

tchain_edit 332203 3366329.405674: 53030 branch-instructions:ppp: 401781 f3+0x2c (/home/sdp/os.linux.perf.test-suite/kernels/lbr_kernel/tchain_edit)
f3+31:
0000000000401774 insn: eb 04 br_cntr: branch-instructions:ppp 2 branch-misses 0 # PRED 5 cycles [5]
000000000040177a insn: 81 7d fc 0f 27 00 00
0000000000401781 insn: 7e e3 br_cntr: branch-instructions:ppp 1 branch-misses 0 # PRED 1 cycles [6] 2.00 IPC
0000000000401766 insn: 8b 45 fc
0000000000401769 insn: 83 e0 01
000000000040176c insn: 85 c0
000000000040176e insn: 74 06 br_cntr: branch-instructions:ppp 1 branch-misses 0 # PRED 1 cycles [7] 4.00 IPC
0000000000401776 insn: 83 45 fc 01
000000000040177a insn: 81 7d fc 0f 27 00 00
0000000000401781 insn: 7e e3 br_cntr: branch-instructions:ppp 1 branch-misses 0 # PRED 7 cycles [14] 0.43 IPC

Originally-by: Tinghao Zhang <tinghao.zhang@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240813160208.2493643-9-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
6f9d8d1d e6952dce

+63 -8
+1 -1
tools/perf/Documentation/perf-script.txt
··· 134 134 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, 135 135 brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, 136 136 insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, 137 - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, 137 + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, brcntr, 138 138 139 139 Field list can be prepended with the type, trace, sw or hw, 140 140 to indicate to which event type the field list applies.
+62 -7
tools/perf/builtin-script.c
··· 62 62 #include "util/record.h" 63 63 #include "util/util.h" 64 64 #include "util/cgroup.h" 65 + #include "util/annotate.h" 65 66 #include "perf.h" 66 67 67 68 #include <linux/ctype.h> ··· 139 138 PERF_OUTPUT_DSOFF = 1ULL << 41, 140 139 PERF_OUTPUT_DISASM = 1ULL << 42, 141 140 PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, 141 + PERF_OUTPUT_BRCNTR = 1ULL << 44, 142 142 }; 143 143 144 144 struct perf_script { ··· 215 213 {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, 216 214 {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, 217 215 {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, 216 + {.str = "brcntr", .field = PERF_OUTPUT_BRCNTR}, 218 217 }; 219 218 220 219 enum { ··· 523 520 "Hint: run 'perf record -b ...'\n"); 524 521 return -EINVAL; 525 522 } 523 + if (PRINT_FIELD(BRCNTR) && 524 + !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_COUNTERS)) { 525 + pr_err("Display of branch counter requested but it's not enabled\n" 526 + "Hint: run 'perf record -j any,counter ...'\n"); 527 + return -EINVAL; 528 + } 526 529 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && 527 530 evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) 528 531 return -EINVAL; ··· 797 788 unsigned long long nsecs; 798 789 int printed = 0; 799 790 char tstr[128]; 791 + 792 + /* 793 + * Print the branch counter's abbreviation list, 794 + * if the branch counter is available. 795 + */ 796 + if (PRINT_FIELD(BRCNTR) && !verbose) { 797 + char *buf; 798 + 799 + if (!annotation_br_cntr_abbr_list(&buf, evsel, true)) { 800 + printed += fprintf(stdout, "%s", buf); 801 + free(buf); 802 + } 803 + } 800 804 801 805 if (PRINT_FIELD(MACHINE_PID) && sample->machine_pid) 802 806 printed += fprintf(fp, "VM:%5d ", sample->machine_pid); ··· 1217 1195 struct perf_insn *x, u8 *inbuf, int len, 1218 1196 int insn, FILE *fp, int *total_cycles, 1219 1197 struct perf_event_attr *attr, 1220 - struct thread *thread) 1198 + struct thread *thread, 1199 + struct evsel *evsel, 1200 + u64 br_cntr) 1221 1201 { 1222 1202 int ilen = 0; 1223 1203 int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip); ··· 1238 1214 printed += map__fprintf_srcline(al.map, al.addr, " srcline: ", fp); 1239 1215 printed += fprintf(fp, "\t"); 1240 1216 addr_location__exit(&al); 1217 + } 1218 + 1219 + if (PRINT_FIELD(BRCNTR)) { 1220 + unsigned int width = evsel__env(evsel)->br_cntr_width; 1221 + unsigned int i = 0, j, num, mask = (1L << width) - 1; 1222 + struct evsel *pos = evsel__leader(evsel); 1223 + 1224 + printed += fprintf(fp, "br_cntr: "); 1225 + evlist__for_each_entry_from(evsel->evlist, pos) { 1226 + if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) 1227 + continue; 1228 + if (evsel__leader(pos) != evsel__leader(evsel)) 1229 + break; 1230 + 1231 + num = (br_cntr >> (i++ * width)) & mask; 1232 + if (!verbose) { 1233 + for (j = 0; j < num; j++) 1234 + printed += fprintf(fp, "%s", pos->abbr_name); 1235 + } else 1236 + printed += fprintf(fp, "%s %d ", pos->name, num); 1237 + } 1238 + printed += fprintf(fp, "\t"); 1241 1239 } 1242 1240 1243 1241 printed += fprintf(fp, "#%s%s%s%s", ··· 1318 1272 } 1319 1273 1320 1274 static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, 1275 + struct evsel *evsel, 1321 1276 struct thread *thread, 1322 1277 struct perf_event_attr *attr, 1323 1278 struct machine *machine, FILE *fp) ··· 1332 1285 unsigned off; 1333 1286 struct symbol *lastsym = NULL; 1334 1287 int total_cycles = 0; 1288 + u64 br_cntr = 0; 1335 1289 1336 1290 if (!(br && br->nr)) 1337 1291 return 0; ··· 1343 1295 x.thread = thread; 1344 1296 x.machine = machine; 1345 1297 x.cpu = sample->cpu; 1298 + 1299 + if (PRINT_FIELD(BRCNTR) && sample->branch_stack_cntr) 1300 + br_cntr = sample->branch_stack_cntr[nr - 1]; 1346 1301 1347 1302 printed += fprintf(fp, "%c", '\n'); 1348 1303 ··· 1358 1307 x.cpumode, x.cpu, &lastsym, attr, fp); 1359 1308 printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], 1360 1309 &x, buffer, len, 0, fp, &total_cycles, 1361 - attr, thread); 1310 + attr, thread, evsel, br_cntr); 1362 1311 if (PRINT_FIELD(SRCCODE)) 1363 1312 printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); 1364 1313 } ··· 1388 1337 1389 1338 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); 1390 1339 if (ip == end) { 1340 + if (PRINT_FIELD(BRCNTR) && sample->branch_stack_cntr) 1341 + br_cntr = sample->branch_stack_cntr[i]; 1391 1342 printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, 1392 - &total_cycles, attr, thread); 1343 + &total_cycles, attr, thread, evsel, br_cntr); 1393 1344 if (PRINT_FIELD(SRCCODE)) 1394 1345 printed += print_srccode(thread, x.cpumode, ip); 1395 1346 break; ··· 1600 1547 } 1601 1548 1602 1549 static int perf_sample__fprintf_insn(struct perf_sample *sample, 1550 + struct evsel *evsel, 1603 1551 struct perf_event_attr *attr, 1604 1552 struct thread *thread, 1605 1553 struct machine *machine, FILE *fp, ··· 1621 1567 printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); 1622 1568 } 1623 1569 if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) 1624 - printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); 1570 + printed += perf_sample__fprintf_brstackinsn(sample, evsel, thread, attr, machine, fp); 1625 1571 1626 1572 return printed; 1627 1573 } ··· 1693 1639 if (print_srcline_last) 1694 1640 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); 1695 1641 1696 - printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); 1642 + printed += perf_sample__fprintf_insn(sample, evsel, attr, thread, machine, fp, al); 1697 1643 printed += fprintf(fp, "\n"); 1698 1644 if (PRINT_FIELD(SRCCODE)) { 1699 1645 int ret = map__fprintf_srccode(al->map, al->addr, stdout, ··· 2351 2297 2352 2298 if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 2353 2299 perf_sample__fprintf_bpf_output(sample, fp); 2354 - perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); 2300 + perf_sample__fprintf_insn(sample, evsel, attr, thread, machine, fp, al); 2355 2301 2356 2302 if (PRINT_FIELD(PHYS_ADDR)) 2357 2303 fprintf(fp, "%16" PRIx64, sample->phys_addr); ··· 4001 3947 "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," 4002 3948 "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," 4003 3949 "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," 4004 - "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", 3950 + "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat," 3951 + "brcntr", 4005 3952 parse_output_fields), 4006 3953 OPT_BOOLEAN('a', "all-cpus", &system_wide, 4007 3954 "system-wide collection from all CPUs"),