Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf scripts python: Support Arm CoreSight trace data disassembly

This commit adds python script to parse CoreSight tracing event and
print out source line and disassembly, it generates readable program
execution flow for easier humans inspecting.

The script receives CoreSight tracing packet with below format:

+------------+------------+------------+
packet(n): | addr | ip | cpu |
+------------+------------+------------+
packet(n+1): | addr | ip | cpu |
+------------+------------+------------+

packet::addr presents the start address of the coming branch sample, and
packet::ip is the last address of the branch smple. Therefore, a code
section between branches starts from packet(n)::addr and it stops at
packet(n+1)::ip. As results we combines the two continuous packets to
generate the address range for instructions:

[ sample(n)::addr .. sample(n+1)::ip ]

The script supports both objdump or llvm-objdump for disassembly with
specifying option '-d'. If doesn't specify option '-d', the script
simply outputs source lines and symbols.

Below shows usages with llvm-objdump or objdump to output disassembly.

# perf script -s scripts/python/arm-cs-trace-disasm.py -- -d llvm-objdump-11 -k ./vmlinux
ARM CoreSight Trace Data Assembler Dump
ffff800008eb3198 <etm4_enable_hw>:
ffff800008eb3310: c0 38 00 35 cbnz w0, 0xffff800008eb3a28 <etm4_enable_hw+0x890>
ffff800008eb3314: 9f 3f 03 d5 dsb sy
ffff800008eb3318: df 3f 03 d5 isb
ffff800008eb331c: f5 5b 42 a9 ldp x21, x22, [sp, #32]
ffff800008eb3320: fb 73 45 a9 ldp x27, x28, [sp, #80]
ffff800008eb3324: e0 82 40 39 ldrb w0, [x23, #32]
ffff800008eb3328: 60 00 00 34 cbz w0, 0xffff800008eb3334 <etm4_enable_hw+0x19c>
ffff800008eb332c: e0 03 19 aa mov x0, x25
ffff800008eb3330: 8c fe ff 97 bl 0xffff800008eb2d60 <etm4_cs_lock.isra.0.part.0>
main 6728/6728 [0004] 0.000000000 etm4_enable_hw+0x198 [kernel.kallsyms]
ffff800008eb2d60 <etm4_cs_lock.isra.0.part.0>:
ffff800008eb2d60: 1f 20 03 d5 nop
ffff800008eb2d64: 1f 20 03 d5 nop
ffff800008eb2d68: 3f 23 03 d5 hint #25
ffff800008eb2d6c: 00 00 40 f9 ldr x0, [x0]
ffff800008eb2d70: 9f 3f 03 d5 dsb sy
ffff800008eb2d74: 00 c0 3e 91 add x0, x0, #4016
ffff800008eb2d78: 1f 00 00 b9 str wzr, [x0]
ffff800008eb2d7c: bf 23 03 d5 hint #29
ffff800008eb2d80: c0 03 5f d6 ret
main 6728/6728 [0004] 0.000000000 etm4_cs_lock.isra.0.part.0+0x20

# perf script -s scripts/python/arm-cs-trace-disasm.py -- -d objdump -k ./vmlinux
ARM CoreSight Trace Data Assembler Dump
ffff800008eb3310 <etm4_enable_hw+0x178>:
ffff800008eb3310: 350038c0 cbnz w0, ffff800008eb3a28 <etm4_enable_hw+0x890>
ffff800008eb3314: d5033f9f dsb sy
ffff800008eb3318: d5033fdf isb
ffff800008eb331c: a9425bf5 ldp x21, x22, [sp, #32]
ffff800008eb3320: a94573fb ldp x27, x28, [sp, #80]
ffff800008eb3324: 394082e0 ldrb w0, [x23, #32]
ffff800008eb3328: 34000060 cbz w0, ffff800008eb3334 <etm4_enable_hw+0x19c>
ffff800008eb332c: aa1903e0 mov x0, x25
ffff800008eb3330: 97fffe8c bl ffff800008eb2d60 <etm4_cs_lock.isra.0.part.0>
main 6728/6728 [0004] 0.000000000 etm4_enable_hw+0x198 [kernel.kallsyms]
ffff800008eb2d60 <etm4_cs_lock.isra.0.part.0>:
ffff800008eb2d60: d503201f nop
ffff800008eb2d64: d503201f nop
ffff800008eb2d68: d503233f paciasp
ffff800008eb2d6c: f9400000 ldr x0, [x0]
ffff800008eb2d70: d5033f9f dsb sy
ffff800008eb2d74: 913ec000 add x0, x0, #0xfb0
ffff800008eb2d78: b900001f str wzr, [x0]
ffff800008eb2d7c: d50323bf autiasp
ffff800008eb2d80: d65f03c0 ret
main 6728/6728 [0004] 0.000000000 etm4_cs_lock.isra.0.part.0+0x20

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Co-authored-by: Al Grant <al.grant@arm.com>
Co-authored-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Co-authored-by: Tor Jeremiassen <tor@ti.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Eelco Chaudron <echaudro@redhat.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Brennan <stephen.s.brennan@oracle.com>
Cc: Tanmay Jagdale <tanmay@marvell.com>
Cc: coresight@lists.linaro.org
Cc: zengshun . wu <zengshun.wu@outlook.com>
Link: https://lore.kernel.org/r/20220521130446.4163597-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

+272
+272
tools/perf/scripts/python/arm-cs-trace-disasm.py
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember 3 + # 4 + # Author: Tor Jeremiassen <tor@ti.com> 5 + # Mathieu Poirier <mathieu.poirier@linaro.org> 6 + # Leo Yan <leo.yan@linaro.org> 7 + # Al Grant <Al.Grant@arm.com> 8 + 9 + from __future__ import print_function 10 + import os 11 + from os import path 12 + import sys 13 + import re 14 + from subprocess import * 15 + from optparse import OptionParser, make_option 16 + 17 + from perf_trace_context import perf_set_itrace_options, \ 18 + perf_sample_insn, perf_sample_srccode 19 + 20 + # Below are some example commands for using this script. 21 + # 22 + # Output disassembly with objdump: 23 + # perf script -s scripts/python/arm-cs-trace-disasm.py \ 24 + # -- -d objdump -k path/to/vmlinux 25 + # Output disassembly with llvm-objdump: 26 + # perf script -s scripts/python/arm-cs-trace-disasm.py \ 27 + # -- -d llvm-objdump-11 -k path/to/vmlinux 28 + # Output only source line and symbols: 29 + # perf script -s scripts/python/arm-cs-trace-disasm.py 30 + 31 + # Command line parsing. 32 + option_list = [ 33 + # formatting options for the bottom entry of the stack 34 + make_option("-k", "--vmlinux", dest="vmlinux_name", 35 + help="Set path to vmlinux file"), 36 + make_option("-d", "--objdump", dest="objdump_name", 37 + help="Set path to objdump executable file"), 38 + make_option("-v", "--verbose", dest="verbose", 39 + action="store_true", default=False, 40 + help="Enable debugging log") 41 + ] 42 + 43 + parser = OptionParser(option_list=option_list) 44 + (options, args) = parser.parse_args() 45 + 46 + # Initialize global dicts and regular expression 47 + disasm_cache = dict() 48 + cpu_data = dict() 49 + disasm_re = re.compile("^\s*([0-9a-fA-F]+):") 50 + disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") 51 + cache_size = 64*1024 52 + 53 + glb_source_file_name = None 54 + glb_line_number = None 55 + glb_dso = None 56 + 57 + def get_optional(perf_dict, field): 58 + if field in perf_dict: 59 + return perf_dict[field] 60 + return "[unknown]" 61 + 62 + def get_offset(perf_dict, field): 63 + if field in perf_dict: 64 + return f"+0x{perf_dict[field]:x}" 65 + return "" 66 + 67 + def get_dso_file_path(dso_name, dso_build_id): 68 + if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): 69 + if (options.vmlinux_name): 70 + return options.vmlinux_name; 71 + else: 72 + return dso_name 73 + 74 + if (dso_name == "[vdso]") : 75 + append = "/vdso" 76 + else: 77 + append = "/elf" 78 + 79 + dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}" 80 + # Replace duplicate slash chars to single slash char 81 + dso_path = dso_path.replace('//', '/', 1) 82 + return dso_path 83 + 84 + def read_disam(dso_fname, dso_start, start_addr, stop_addr): 85 + addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname 86 + 87 + # Don't let the cache get too big, clear it when it hits max size 88 + if (len(disasm_cache) > cache_size): 89 + disasm_cache.clear(); 90 + 91 + if addr_range in disasm_cache: 92 + disasm_output = disasm_cache[addr_range]; 93 + else: 94 + start_addr = start_addr - dso_start; 95 + stop_addr = stop_addr - dso_start; 96 + disasm = [ options.objdump_name, "-d", "-z", 97 + f"--start-address=0x{start_addr:x}", 98 + f"--stop-address=0x{stop_addr:x}" ] 99 + disasm += [ dso_fname ] 100 + disasm_output = check_output(disasm).decode('utf-8').split('\n') 101 + disasm_cache[addr_range] = disasm_output 102 + 103 + return disasm_output 104 + 105 + def print_disam(dso_fname, dso_start, start_addr, stop_addr): 106 + for line in read_disam(dso_fname, dso_start, start_addr, stop_addr): 107 + m = disasm_func_re.search(line) 108 + if m is None: 109 + m = disasm_re.search(line) 110 + if m is None: 111 + continue 112 + print(f"\t{line}") 113 + 114 + def print_sample(sample): 115 + print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \ 116 + f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \ 117 + f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}") 118 + 119 + def trace_begin(): 120 + print('ARM CoreSight Trace Data Assembler Dump') 121 + 122 + def trace_end(): 123 + print('End') 124 + 125 + def trace_unhandled(event_name, context, event_fields_dict): 126 + print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) 127 + 128 + def common_start_str(comm, sample): 129 + sec = int(sample["time"] / 1000000000) 130 + ns = sample["time"] % 1000000000 131 + cpu = sample["cpu"] 132 + pid = sample["pid"] 133 + tid = sample["tid"] 134 + return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} " 135 + 136 + # This code is copied from intel-pt-events.py for printing source code 137 + # line and symbols. 138 + def print_srccode(comm, param_dict, sample, symbol, dso): 139 + ip = sample["ip"] 140 + if symbol == "[unknown]": 141 + start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40) 142 + else: 143 + offs = get_offset(param_dict, "symoff") 144 + start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40) 145 + 146 + global glb_source_file_name 147 + global glb_line_number 148 + global glb_dso 149 + 150 + source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context) 151 + if source_file_name: 152 + if glb_line_number == line_number and glb_source_file_name == source_file_name: 153 + src_str = "" 154 + else: 155 + if len(source_file_name) > 40: 156 + src_file = ("..." + source_file_name[-37:]) + " " 157 + else: 158 + src_file = source_file_name.ljust(41) 159 + 160 + if source_line is None: 161 + src_str = src_file + str(line_number).rjust(4) + " <source not found>" 162 + else: 163 + src_str = src_file + str(line_number).rjust(4) + " " + source_line 164 + glb_dso = None 165 + elif dso == glb_dso: 166 + src_str = "" 167 + else: 168 + src_str = dso 169 + glb_dso = dso 170 + 171 + glb_line_number = line_number 172 + glb_source_file_name = source_file_name 173 + 174 + print(f"{start_str}{src_str}") 175 + 176 + def process_event(param_dict): 177 + global cache_size 178 + global options 179 + 180 + sample = param_dict["sample"] 181 + comm = param_dict["comm"] 182 + 183 + name = param_dict["ev_name"] 184 + dso = get_optional(param_dict, "dso") 185 + dso_bid = get_optional(param_dict, "dso_bid") 186 + dso_start = get_optional(param_dict, "dso_map_start") 187 + dso_end = get_optional(param_dict, "dso_map_end") 188 + symbol = get_optional(param_dict, "symbol") 189 + 190 + if (options.verbose == True): 191 + print(f"Event type: {name}") 192 + print_sample(sample) 193 + 194 + # If cannot find dso so cannot dump assembler, bail out 195 + if (dso == '[unknown]'): 196 + return 197 + 198 + # Validate dso start and end addresses 199 + if ((dso_start == '[unknown]') or (dso_end == '[unknown]')): 200 + print(f"Failed to find valid dso map for dso {dso}") 201 + return 202 + 203 + if (name[0:12] == "instructions"): 204 + print_srccode(comm, param_dict, sample, symbol, dso) 205 + return 206 + 207 + # Don't proceed if this event is not a branch sample, . 208 + if (name[0:8] != "branches"): 209 + return 210 + 211 + cpu = sample["cpu"] 212 + ip = sample["ip"] 213 + addr = sample["addr"] 214 + 215 + # Initialize CPU data if it's empty, and directly return back 216 + # if this is the first tracing event for this CPU. 217 + if (cpu_data.get(str(cpu) + 'addr') == None): 218 + cpu_data[str(cpu) + 'addr'] = addr 219 + return 220 + 221 + # The format for packet is: 222 + # 223 + # +------------+------------+------------+ 224 + # sample_prev: | addr | ip | cpu | 225 + # +------------+------------+------------+ 226 + # sample_next: | addr | ip | cpu | 227 + # +------------+------------+------------+ 228 + # 229 + # We need to combine the two continuous packets to get the instruction 230 + # range for sample_prev::cpu: 231 + # 232 + # [ sample_prev::addr .. sample_next::ip ] 233 + # 234 + # For this purose, sample_prev::addr is stored into cpu_data structure 235 + # and read back for 'start_addr' when the new packet comes, and we need 236 + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for 237 + # 'stop_addr' is for the sake of objdump so the final assembler dump can 238 + # include last instruction for sample_next::ip. 239 + start_addr = cpu_data[str(cpu) + 'addr'] 240 + stop_addr = ip + 4 241 + 242 + # Record for previous sample packet 243 + cpu_data[str(cpu) + 'addr'] = addr 244 + 245 + # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 246 + if (start_addr == 0 and stop_addr == 4): 247 + print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted") 248 + return 249 + 250 + if (start_addr < int(dso_start) or start_addr > int(dso_end)): 251 + print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") 252 + return 253 + 254 + if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): 255 + print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") 256 + return 257 + 258 + if (options.objdump_name != None): 259 + # It doesn't need to decrease virtual memory offset for disassembly 260 + # for kernel dso, so in this case we set vm_start to zero. 261 + if (dso == "[kernel.kallsyms]"): 262 + dso_vm_start = 0 263 + else: 264 + dso_vm_start = int(dso_start) 265 + 266 + dso_fname = get_dso_file_path(dso, dso_bid) 267 + if path.exists(dso_fname): 268 + print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) 269 + else: 270 + print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]") 271 + 272 + print_srccode(comm, param_dict, sample, symbol, dso)