Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf script: Add capstone support for '-F +brstackdisasm'

Support capstone output for the '-F +brstackinsn' branch dump.

The new output is enabled with the new field 'brstackdisasm'.

This was possible before with --xed, but now also allow it for users
that don't have xed using the builtin capstone support.

Before:

perf record -b emacs -Q --batch '()'
perf script -F +brstackinsn
...
emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237:
00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3]
00007f0ab2d17105 insn: 73 51
00007f0ab2d17107 insn: 48 89 c1
00007f0ab2d1710a insn: 48 39 ca
00007f0ab2d1710d insn: 73 96
00007f0ab2d1710f insn: 48 8d 04 11
00007f0ab2d17113 insn: 48 d1 e8
00007f0ab2d17116 insn: 49 8d 34 c1
00007f0ab2d1711a insn: 44 3a 06
00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC
00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC
00007f0ab2d17158 insn: 48 8d 50 01
00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC
00007f0ab2d170f0 insn: 48 39 ca
00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC

After (perf must be compiled with capstone):

perf script -F +brstackdisasm

...
emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237:
00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3]
00007f0ab2d17105 jae intel_check_word.constprop.0+0x128
00007f0ab2d17107 movq %rax, %rcx
00007f0ab2d1710a cmpq %rcx, %rdx
00007f0ab2d1710d jae intel_check_word.constprop.0+0x75
00007f0ab2d1710f leaq (%rcx, %rdx), %rax
00007f0ab2d17113 shrq $1, %rax
00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi
00007f0ab2d1711a cmpb (%rsi), %r8b
00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC
00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC
00007f0ab2d17158 leaq 1(%rax), %rdx
00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC
00007f0ab2d170f0 cmpq %rcx, %rdx
00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20240401210925.209671-3-ak@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Andi Kleen and committed by
Arnaldo Carvalho de Melo
d8120446 38ab6013

+86 -9
+5 -2
tools/perf/Documentation/perf-script.txt
··· 132 132 Comma separated list of fields to print. Options are: 133 133 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, 134 134 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, 135 - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, 135 + brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, 136 136 insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, 137 - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. 137 + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, 138 138 139 139 Field list can be prepended with the type, trace, sw or hw, 140 140 to indicate to which event type the field list applies. ··· 256 256 Use brstackinsnlen to print the brstackinsn lenght. For example, you 257 257 can’t know the next sequential instruction after an unconditional branch unless 258 258 you calculate that based on its length. 259 + 260 + brstackdisasm acts like brstackinsn, but will print disassembled instructions if 261 + perf is built with the capstone library. 259 262 260 263 The brstackoff field will print an offset into a specific dso/binary. 261 264
+25 -7
tools/perf/builtin-script.c
··· 136 136 PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, 137 137 PERF_OUTPUT_DSOFF = 1ULL << 41, 138 138 PERF_OUTPUT_DISASM = 1ULL << 42, 139 + PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, 139 140 }; 140 141 141 142 struct perf_script { ··· 211 210 {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, 212 211 {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, 213 212 {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, 213 + {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, 214 214 }; 215 215 216 216 enum { ··· 512 510 "selected. Hence, no address to lookup the source line number.\n"); 513 511 return -EINVAL; 514 512 } 515 - if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && 513 + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) 514 + && !allow_user_set && 516 515 !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { 517 516 pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" 518 517 "Hint: run 'perf record -b ...'\n"); ··· 1165 1162 return ret; 1166 1163 } 1167 1164 1165 + static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused, 1166 + struct perf_insn *x, uint64_t ip, 1167 + u8 *inbuf, int inlen, int *lenp) 1168 + { 1169 + #ifdef HAVE_LIBCAPSTONE_SUPPORT 1170 + if (PRINT_FIELD(BRSTACKDISASM)) { 1171 + const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); 1172 + if (p) 1173 + return p; 1174 + } 1175 + #endif 1176 + return dump_insn(x, ip, inbuf, inlen, lenp); 1177 + } 1178 + 1168 1179 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, 1169 1180 struct perf_insn *x, u8 *inbuf, int len, 1170 1181 int insn, FILE *fp, int *total_cycles, ··· 1187 1170 { 1188 1171 int ilen = 0; 1189 1172 int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, 1190 - dump_insn(x, ip, inbuf, len, &ilen)); 1173 + any_dump_insn(attr, x, ip, inbuf, len, &ilen)); 1191 1174 1192 1175 if (PRINT_FIELD(BRSTACKINSNLEN)) 1193 1176 printed += fprintf(fp, "ilen: %d\t", ilen); ··· 1279 1262 nr = max_blocks + 1; 1280 1263 1281 1264 x.thread = thread; 1265 + x.machine = machine; 1282 1266 x.cpu = sample->cpu; 1283 1267 1284 1268 printed += fprintf(fp, "%c", '\n'); ··· 1331 1313 } else { 1332 1314 ilen = 0; 1333 1315 printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, 1334 - dump_insn(&x, ip, buffer + off, len - off, &ilen)); 1316 + any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); 1335 1317 if (PRINT_FIELD(BRSTACKINSNLEN)) 1336 1318 printed += fprintf(fp, "\tilen: %d", ilen); 1337 1319 printed += fprintf(fp, "\n"); ··· 1379 1361 goto out; 1380 1362 ilen = 0; 1381 1363 printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, 1382 - dump_insn(&x, sample->ip, buffer, len, &ilen)); 1364 + any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); 1383 1365 if (PRINT_FIELD(BRSTACKINSNLEN)) 1384 1366 printed += fprintf(fp, "\tilen: %d", ilen); 1385 1367 printed += fprintf(fp, "\n"); ··· 1390 1372 for (off = 0; off <= end - start; off += ilen) { 1391 1373 ilen = 0; 1392 1374 printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, 1393 - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); 1375 + any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); 1394 1376 if (PRINT_FIELD(BRSTACKINSNLEN)) 1395 1377 printed += fprintf(fp, "\tilen: %d", ilen); 1396 1378 printed += fprintf(fp, "\n"); ··· 1552 1534 printed += fprintf(fp, "\t\t"); 1553 1535 printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); 1554 1536 } 1555 - if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) 1537 + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) 1556 1538 printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); 1557 1539 1558 1540 return printed; ··· 3958 3940 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," 3959 3941 "addr,symoff,srcline,period,iregs,uregs,brstack," 3960 3942 "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," 3961 - "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," 3943 + "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," 3962 3944 "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," 3963 3945 "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", 3964 3946 parse_output_fields),
+1
tools/perf/util/dump-insn.h
··· 11 11 struct perf_insn { 12 12 /* Initialized by callers: */ 13 13 struct thread *thread; 14 + struct machine *machine; 14 15 u8 cpumode; 15 16 bool is64bit; 16 17 int cpu;
+52
tools/perf/util/print_insn.c
··· 12 12 #include "machine.h" 13 13 #include "thread.h" 14 14 #include "print_insn.h" 15 + #include "dump-insn.h" 15 16 #include "map.h" 16 17 #include "dso.h" 17 18 ··· 70 69 } 71 70 72 71 return 0; 72 + } 73 + 74 + static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) 75 + { 76 + struct addr_location al; 77 + bool printed = false; 78 + 79 + if (insn->detail && insn->detail->x86.op_count == 1) { 80 + cs_x86_op *op = &insn->detail->x86.operands[0]; 81 + 82 + addr_location__init(&al); 83 + if (op->type == X86_OP_IMM && 84 + thread__find_symbol(thread, x->cpumode, op->imm, &al) && 85 + al.sym && 86 + al.addr < al.sym->end) { 87 + snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, 88 + al.sym->name, al.addr - al.sym->start, op->imm); 89 + printed = true; 90 + } 91 + addr_location__exit(&al); 92 + } 93 + 94 + if (!printed) 95 + snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); 96 + } 97 + 98 + const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, 99 + u8 *inbuf, int inlen, int *lenp) 100 + { 101 + int ret; 102 + int count; 103 + cs_insn *insn; 104 + csh cs_handle; 105 + 106 + ret = capstone_init(x->machine, &cs_handle, x->is64bit); 107 + if (ret < 0) 108 + return NULL; 109 + 110 + count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); 111 + if (count > 0) { 112 + if (machine__normalized_is(x->machine, "x86")) 113 + dump_insn_x86(x->thread, &insn[0], x); 114 + else 115 + snprintf(x->out, sizeof(x->out), "%s %s", 116 + insn[0].mnemonic, insn[0].op_str); 117 + *lenp = insn->size; 118 + cs_free(insn, count); 119 + } else { 120 + return NULL; 121 + } 122 + return x->out; 73 123 } 74 124 75 125 static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
+3
tools/perf/util/print_insn.h
··· 8 8 struct perf_sample; 9 9 struct thread; 10 10 struct machine; 11 + struct perf_insn; 11 12 12 13 size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, 13 14 struct machine *machine, FILE *fp, struct addr_location *al); 14 15 size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); 16 + const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, 17 + u8 *inbuf, int inlen, int *lenp); 15 18 16 19 #endif /* PERF_PRINT_INSN_H */