Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf script: Add 'brstackinsnlen' for branch stacks

When analyzing with 'perf script', it's useful to understand the
captured instruction and the next sequential instruction.

To calculate the address of the next sequential instruction, the length
of the captured instruction is required.

For example, you can’t know the next sequential instruction after an
unconditional branch unless you calculate that based on its length.

For branch stacks, 'perf script' only prints the instruction bytes with
'brstackinsn', but lacks the instruction length.

Add 'brstackinsnlen' to print the instruction length.

$ perf script -F ip,brstackinsn,brstackinsnlen --xed
7fa555be8f75
_start:
00007fa555be8090 mov %rsp, %rdi ilen: 3
00007fa555be8093 callq 0x7fa555be8ea0 ilen: 5 # PRED 102 cycles [102] 0.02 IPC
_dl_start+38:
00007fa555be8ec6 movq %rdx,0x227853(%rip) ilen: 7
00007fa555be8ecd leaq 0x227f94(%rip),%rdx ilen: 7

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Ahmad Yasin <ahmad.yasin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/1647871212-184070-1-git-send-email-kan.liang@linux.intel.com
[ Added the new field to tools/perf/Documentation/perf-script.txt ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
6f680c6a bc355822

+38 -14
+6 -2
tools/perf/Documentation/perf-script.txt
··· 129 129 Comma separated list of fields to print. Options are: 130 130 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 131 131 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, 132 - brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, 133 - metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat. 132 + brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth, 133 + phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat. 134 134 Field list can be prepended with the type, trace, sw or hw, 135 135 to indicate to which event type the field list applies. 136 136 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace ··· 240 240 When brstackinsn is specified the full assembler sequences of branch sequences for each sample 241 241 is printed. This is the full execution path leading to the sample. This is only supported when the 242 242 sample was recorded with perf record -b or -j any. 243 + 244 + Use brstackinsnlen to print the brstackinsn lenght. For example, you 245 + can’t know the next sequential instruction after an unconditional branch unless 246 + you calculate that based on its length. 243 247 244 248 The brstackoff field will print an offset into a specific dso/binary. 245 249
+32 -12
tools/perf/builtin-script.c
··· 124 124 PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, 125 125 PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, 126 126 PERF_OUTPUT_INS_LAT = 1ULL << 35, 127 + PERF_OUTPUT_BRSTACKINSNLEN = 1ULL << 36, 127 128 }; 128 129 129 130 struct perf_script { ··· 192 191 {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, 193 192 {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, 194 193 {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT}, 194 + {.str = "brstackinsnlen", .field = PERF_OUTPUT_BRSTACKINSNLEN}, 195 195 }; 196 196 197 197 enum { ··· 490 488 "selected. Hence, no address to lookup the source line number.\n"); 491 489 return -EINVAL; 492 490 } 493 - if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && 491 + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && 494 492 !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { 495 493 pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" 496 494 "Hint: run 'perf record -b ...'\n"); ··· 1122 1120 1123 1121 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, 1124 1122 struct perf_insn *x, u8 *inbuf, int len, 1125 - int insn, FILE *fp, int *total_cycles) 1123 + int insn, FILE *fp, int *total_cycles, 1124 + struct perf_event_attr *attr) 1126 1125 { 1127 - int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip, 1128 - dump_insn(x, ip, inbuf, len, NULL), 1126 + int ilen = 0; 1127 + int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, 1128 + dump_insn(x, ip, inbuf, len, &ilen)); 1129 + 1130 + if (PRINT_FIELD(BRSTACKINSNLEN)) 1131 + printed += fprintf(fp, "ilen: %d\t", ilen); 1132 + 1133 + printed += fprintf(fp, "#%s%s%s%s", 1129 1134 en->flags.predicted ? " PRED" : "", 1130 1135 en->flags.mispred ? " MISPRED" : "", 1131 1136 en->flags.in_tx ? " INTX" : "", ··· 1218 1209 printed += ip__fprintf_sym(entries[nr - 1].from, thread, 1219 1210 x.cpumode, x.cpu, &lastsym, attr, fp); 1220 1211 printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], 1221 - &x, buffer, len, 0, fp, &total_cycles); 1212 + &x, buffer, len, 0, fp, &total_cycles, 1213 + attr); 1222 1214 if (PRINT_FIELD(SRCCODE)) 1223 1215 printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); 1224 1216 } ··· 1250 1240 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); 1251 1241 if (ip == end) { 1252 1242 printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, 1253 - &total_cycles); 1243 + &total_cycles, attr); 1254 1244 if (PRINT_FIELD(SRCCODE)) 1255 1245 printed += print_srccode(thread, x.cpumode, ip); 1256 1246 break; 1257 1247 } else { 1258 1248 ilen = 0; 1259 - printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, 1249 + printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, 1260 1250 dump_insn(&x, ip, buffer + off, len - off, &ilen)); 1251 + if (PRINT_FIELD(BRSTACKINSNLEN)) 1252 + printed += fprintf(fp, "\tilen: %d", ilen); 1253 + printed += fprintf(fp, "\n"); 1261 1254 if (ilen == 0) 1262 1255 break; 1263 1256 if (PRINT_FIELD(SRCCODE)) ··· 1303 1290 machine, thread, &x.is64bit, &x.cpumode, false); 1304 1291 if (len <= 0) 1305 1292 goto out; 1306 - printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip, 1307 - dump_insn(&x, sample->ip, buffer, len, NULL)); 1293 + ilen = 0; 1294 + printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, 1295 + dump_insn(&x, sample->ip, buffer, len, &ilen)); 1296 + if (PRINT_FIELD(BRSTACKINSNLEN)) 1297 + printed += fprintf(fp, "\tilen: %d", ilen); 1298 + printed += fprintf(fp, "\n"); 1308 1299 if (PRINT_FIELD(SRCCODE)) 1309 1300 print_srccode(thread, x.cpumode, sample->ip); 1310 1301 goto out; 1311 1302 } 1312 1303 for (off = 0; off <= end - start; off += ilen) { 1313 1304 ilen = 0; 1314 - printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off, 1305 + printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, 1315 1306 dump_insn(&x, start + off, buffer + off, len - off, &ilen)); 1307 + if (PRINT_FIELD(BRSTACKINSNLEN)) 1308 + printed += fprintf(fp, "\tilen: %d", ilen); 1309 + printed += fprintf(fp, "\n"); 1316 1310 if (ilen == 0) 1317 1311 break; 1318 1312 if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) { ··· 1477 1457 for (i = 0; i < sample->insn_len; i++) 1478 1458 printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]); 1479 1459 } 1480 - if (PRINT_FIELD(BRSTACKINSN)) 1460 + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) 1481 1461 printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); 1482 1462 1483 1463 return printed; ··· 3796 3776 "Valid types: hw,sw,trace,raw,synth. " 3797 3777 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 3798 3778 "addr,symoff,srcline,period,iregs,uregs,brstack," 3799 - "brstacksym,flags,bpf-output,brstackinsn,brstackoff," 3779 + "brstacksym,flags,bpf-output,brstackinsn,brstackinsnlen,brstackoff," 3800 3780 "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," 3801 3781 "data_page_size,code_page_size,ins_lat", 3802 3782 parse_output_fields),