Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

- Add BPF based off-CPU profiling

- Improvements for system wide recording, specially for Intel PT

- Improve DWARF unwinding on arm64

- Support Arm CoreSight trace data disassembly in 'perf script' python

- Fix build with new libbpf version, related to supporting older
versions of distro released libbpf packages

- Fix event syntax error caused by ExtSel in the JSON events infra

- Use stdio interface if slang is not supported in 'perf c2c'

- Add 'perf test' checking for perf stat CSV output

- Sync the msr-index.h copy with the kernel sources

* tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (38 commits)
tools arch x86: Sync the msr-index.h copy with the kernel sources
perf scripts python: Support Arm CoreSight trace data disassembly
perf scripting python: Expose dso and map information
perf jevents: Fix event syntax error caused by ExtSel
perf tools arm64: Add support for VG register
perf unwind arm64: Decouple Libunwind register names from Perf
perf unwind: Use dynamic register set for DWARF unwind
perf tools arm64: Copy perf_regs.h from the kernel
perf unwind arm64: Use perf's copy of kernel headers
perf c2c: Use stdio interface if slang is not supported
perf test: Add a basic offcpu profiling test
perf record: Add cgroup support for off-cpu profiling
perf record: Handle argument change in sched_switch
perf record: Implement basic filtering for off-cpu
perf record: Enable off-cpu analysis with BPF
perf report: Do not extend sample type of bpf-output event
perf test: Add checking for perf stat CSV output.
perf tools: Allow system-wide events to keep their own threads
perf tools: Allow system-wide events to keep their own CPUs
libperf evsel: Add comments for booleans
...

+1594 -197
+6 -1
tools/arch/arm64/include/uapi/asm/perf_regs.h
··· 36 36 PERF_REG_ARM64_LR, 37 37 PERF_REG_ARM64_SP, 38 38 PERF_REG_ARM64_PC, 39 - PERF_REG_ARM64_MAX, 39 + 40 + /* Extended/pseudo registers */ 41 + PERF_REG_ARM64_VG = 46, // SVE Vector Granule 42 + 43 + PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1, 44 + PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1 40 45 }; 41 46 #endif /* _ASM_ARM64_PERF_REGS_H */
+19
tools/arch/x86/include/asm/msr-index.h
··· 76 76 77 77 /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ 78 78 #define MSR_IA32_CORE_CAPS 0x000000cf 79 + #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 80 + #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT) 79 81 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 80 82 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) 81 83 ··· 155 153 156 154 #define MSR_IA32_POWER_CTL 0x000001fc 157 155 #define MSR_IA32_POWER_CTL_BIT_EE 19 156 + 157 + /* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ 158 + #define MSR_INTEGRITY_CAPS 0x000002d9 159 + #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 160 + #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) 158 161 159 162 #define MSR_LBR_NHM_FROM 0x00000680 160 163 #define MSR_LBR_NHM_TO 0x000006c0 ··· 319 312 320 313 /* Run Time Average Power Limiting (RAPL) Interface */ 321 314 315 + #define MSR_VR_CURRENT_CONFIG 0x00000601 322 316 #define MSR_RAPL_POWER_UNIT 0x00000606 323 317 324 318 #define MSR_PKG_POWER_LIMIT 0x00000610 ··· 510 502 #define MSR_AMD64_SEV 0xc0010131 511 503 #define MSR_AMD64_SEV_ENABLED_BIT 0 512 504 #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 505 + #define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 513 506 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) 514 507 #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) 508 + #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) 515 509 516 510 #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f 517 511 ··· 533 523 #define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) 534 524 #define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) 535 525 #define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) 526 + 527 + /* AMD Performance Counter Global Status and Control MSRs */ 528 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 529 + #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 530 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 536 531 537 532 /* Fam 17h MSRs */ 538 533 #define MSR_F17H_IRPERF 0xc00000e9 ··· 702 687 #define MSR_IA32_PERF_STATUS 0x00000198 703 688 #define MSR_IA32_PERF_CTL 0x00000199 704 689 #define INTEL_PERF_CTL_MASK 0xffff 690 + 691 + /* AMD Branch Sampling configuration */ 692 + #define MSR_AMD_DBG_EXTN_CFG 0xc000010f 693 + #define MSR_AMD_SAMP_BR_FROM 0xc0010300 705 694 706 695 #define MSR_IA32_MPERF 0x000000e7 707 696 #define MSR_IA32_APERF 0x000000e8
+4
tools/build/Makefile.feature
··· 99 99 clang \ 100 100 libbpf \ 101 101 libbpf-btf__load_from_kernel_by_id \ 102 + libbpf-bpf_prog_load \ 103 + libbpf-bpf_object__next_program \ 104 + libbpf-bpf_object__next_map \ 105 + libbpf-bpf_create_map \ 102 106 libpfm4 \ 103 107 libdebuginfod \ 104 108 clang-bpf-co-re
+20
tools/build/feature/Makefile
··· 58 58 test-bpf.bin \ 59 59 test-libbpf.bin \ 60 60 test-libbpf-btf__load_from_kernel_by_id.bin \ 61 + test-libbpf-bpf_prog_load.bin \ 62 + test-libbpf-bpf_map_create.bin \ 63 + test-libbpf-bpf_object__next_program.bin \ 64 + test-libbpf-bpf_object__next_map.bin \ 65 + test-libbpf-btf__raw_data.bin \ 61 66 test-get_cpuid.bin \ 62 67 test-sdt.bin \ 63 68 test-cxx.bin \ ··· 294 289 $(BUILD) -lbpf 295 290 296 291 $(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: 292 + $(BUILD) -lbpf 293 + 294 + $(OUTPUT)test-libbpf-bpf_prog_load.bin: 295 + $(BUILD) -lbpf 296 + 297 + $(OUTPUT)test-libbpf-bpf_map_create.bin: 298 + $(BUILD) -lbpf 299 + 300 + $(OUTPUT)test-libbpf-bpf_object__next_program.bin: 301 + $(BUILD) -lbpf 302 + 303 + $(OUTPUT)test-libbpf-bpf_object__next_map.bin: 304 + $(BUILD) -lbpf 305 + 306 + $(OUTPUT)test-libbpf-btf__raw_data.bin: 297 307 $(BUILD) -lbpf 298 308 299 309 $(OUTPUT)test-sdt.bin:
+8
tools/build/feature/test-libbpf-bpf_map_create.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <bpf/bpf.h> 3 + 4 + int main(void) 5 + { 6 + return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */, 7 + 0 /* value_size */, 0 /* max_entries */, NULL /* opts */); 8 + }
+8
tools/build/feature/test-libbpf-bpf_object__next_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <bpf/libbpf.h> 3 + 4 + int main(void) 5 + { 6 + bpf_object__next_map(NULL /* obj */, NULL /* prev */); 7 + return 0; 8 + }
+8
tools/build/feature/test-libbpf-bpf_object__next_program.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <bpf/libbpf.h> 3 + 4 + int main(void) 5 + { 6 + bpf_object__next_program(NULL /* obj */, NULL /* prev */); 7 + return 0; 8 + }
+9
tools/build/feature/test-libbpf-bpf_prog_load.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <bpf/bpf.h> 3 + 4 + int main(void) 5 + { 6 + return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */, 7 + NULL /* license */, NULL /* insns */, 8 + 0 /* insn_cnt */, NULL /* opts */); 9 + }
+3 -2
tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - #include <bpf/libbpf.h> 2 + #include <bpf/btf.h> 3 3 4 4 int main(void) 5 5 { 6 - return btf__load_from_kernel_by_id(20151128, NULL); 6 + btf__load_from_kernel_by_id(20151128); 7 + return 0; 7 8 }
+8
tools/build/feature/test-libbpf-btf__raw_data.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <bpf/btf.h> 3 + 4 + int main(void) 5 + { 6 + btf__raw_data(NULL /* btf_ro */, NULL /* size */); 7 + return 0; 8 + }
+30 -41
tools/lib/perf/evlist.c
··· 23 23 #include <perf/cpumap.h> 24 24 #include <perf/threadmap.h> 25 25 #include <api/fd/array.h> 26 + #include "internal.h" 26 27 27 28 void perf_evlist__init(struct perf_evlist *evlist) 28 29 { ··· 40 39 * We already have cpus for evsel (via PMU sysfs) so 41 40 * keep it, if there's no target cpu list defined. 42 41 */ 43 - if (!evsel->own_cpus || evlist->has_user_cpus) { 44 - perf_cpu_map__put(evsel->cpus); 45 - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); 46 - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) { 42 + if (!evsel->own_cpus || 43 + (!evsel->system_wide && evlist->has_user_cpus) || 44 + (!evsel->system_wide && 45 + !evsel->requires_cpu && 46 + perf_cpu_map__empty(evlist->user_requested_cpus))) { 47 47 perf_cpu_map__put(evsel->cpus); 48 48 evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); 49 49 } else if (evsel->cpus != evsel->own_cpus) { ··· 52 50 evsel->cpus = perf_cpu_map__get(evsel->own_cpus); 53 51 } 54 52 55 - perf_thread_map__put(evsel->threads); 56 - evsel->threads = perf_thread_map__get(evlist->threads); 53 + if (!evsel->system_wide) { 54 + perf_thread_map__put(evsel->threads); 55 + evsel->threads = perf_thread_map__get(evlist->threads); 56 + } 57 + 57 58 evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); 58 59 } 59 60 ··· 303 298 304 299 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 305 300 { 306 - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); 301 + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); 307 302 int nr_threads = perf_thread_map__nr(evlist->threads); 308 303 int nfds = 0; 309 304 struct perf_evsel *evsel; ··· 433 428 static int 434 429 mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 435 430 int idx, struct perf_mmap_param *mp, int cpu_idx, 436 - int thread, int *_output, int *_output_overwrite) 431 + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) 437 432 { 438 - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx); 433 + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); 439 434 struct perf_evsel *evsel; 440 435 int revent; 441 436 ··· 489 484 if (ops->mmap(map, mp, *output, evlist_cpu) < 0) 490 485 return -1; 491 486 487 + *nr_mmaps += 1; 488 + 492 489 if (!idx) 493 490 perf_evlist__set_mmap_first(evlist, map, overwrite); 494 491 } else { ··· 520 513 } 521 514 522 515 static int 523 - mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 524 - struct perf_mmap_param *mp) 525 - { 526 - int thread; 527 - int nr_threads = perf_thread_map__nr(evlist->threads); 528 - 529 - for (thread = 0; thread < nr_threads; thread++) { 530 - int output = -1; 531 - int output_overwrite = -1; 532 - 533 - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, 534 - &output, &output_overwrite)) 535 - goto out_unmap; 536 - } 537 - 538 - return 0; 539 - 540 - out_unmap: 541 - perf_evlist__munmap(evlist); 542 - return -1; 543 - } 544 - 545 - static int 546 516 mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 547 517 struct perf_mmap_param *mp) 548 518 { 549 519 int nr_threads = perf_thread_map__nr(evlist->threads); 550 - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); 520 + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); 521 + int nr_mmaps = 0; 551 522 int cpu, thread; 552 523 553 524 for (cpu = 0; cpu < nr_cpus; cpu++) { ··· 534 549 535 550 for (thread = 0; thread < nr_threads; thread++) { 536 551 if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, 537 - thread, &output, &output_overwrite)) 552 + thread, &output, &output_overwrite, &nr_mmaps)) 538 553 goto out_unmap; 539 554 } 540 555 } 556 + 557 + if (nr_mmaps != evlist->nr_mmaps) 558 + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); 541 559 542 560 return 0; 543 561 ··· 553 565 { 554 566 int nr_mmaps; 555 567 556 - nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus); 557 - if (perf_cpu_map__empty(evlist->user_requested_cpus)) 558 - nr_mmaps = perf_thread_map__nr(evlist->threads); 568 + /* One for each CPU */ 569 + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); 570 + if (perf_cpu_map__empty(evlist->all_cpus)) { 571 + /* Plus one for each thread */ 572 + nr_mmaps += perf_thread_map__nr(evlist->threads); 573 + /* Minus the per-thread CPU (-1) */ 574 + nr_mmaps -= 1; 575 + } 559 576 560 577 return nr_mmaps; 561 578 } ··· 570 577 struct perf_mmap_param *mp) 571 578 { 572 579 struct perf_evsel *evsel; 573 - const struct perf_cpu_map *cpus = evlist->user_requested_cpus; 574 580 575 581 if (!ops || !ops->get || !ops->mmap) 576 582 return -EINVAL; ··· 587 595 588 596 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 589 597 return -ENOMEM; 590 - 591 - if (perf_cpu_map__empty(cpus)) 592 - return mmap_per_thread(evlist, ops, mp); 593 598 594 599 return mmap_per_cpu(evlist, ops, mp); 595 600 }
+11
tools/lib/perf/include/internal/evsel.h
··· 49 49 50 50 /* parse modifier helper */ 51 51 int nr_members; 52 + /* 53 + * system_wide is for events that need to be on every CPU, irrespective 54 + * of user requested CPUs or threads. Map propagation will set cpus to 55 + * this event's own_cpus, whereby they will contribute to evlist 56 + * all_cpus. 57 + */ 52 58 bool system_wide; 59 + /* 60 + * Some events, for example uncore events, require a CPU. 61 + * i.e. it cannot be the 'any CPU' value of -1. 62 + */ 63 + bool requires_cpu; 53 64 int idx; 54 65 }; 55 66
+10
tools/perf/Documentation/perf-record.txt
··· 758 758 If the URLs is not specified, the value of DEBUGINFOD_URLS 759 759 system environment variable is used. 760 760 761 + --off-cpu:: 762 + Enable off-cpu profiling with BPF. The BPF program will collect 763 + task scheduling information with (user) stacktrace and save them 764 + as sample data of a software event named "offcpu-time". The 765 + sample period will have the time the task slept in nanoseconds. 766 + 767 + Note that BPF can collect stack traces using frame pointer ("fp") 768 + only, as of now. So the applications built without the frame 769 + pointer might see bogus addresses. 770 + 761 771 SEE ALSO 762 772 -------- 763 773 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
+25
tools/perf/Makefile.config
··· 573 573 ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) 574 574 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID 575 575 endif 576 + $(call feature_check,libbpf-bpf_prog_load) 577 + ifeq ($(feature-libbpf-bpf_prog_load), 1) 578 + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD 579 + endif 580 + $(call feature_check,libbpf-bpf_object__next_program) 581 + ifeq ($(feature-libbpf-bpf_object__next_program), 1) 582 + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM 583 + endif 584 + $(call feature_check,libbpf-bpf_object__next_map) 585 + ifeq ($(feature-libbpf-bpf_object__next_map), 1) 586 + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP 587 + endif 588 + $(call feature_check,libbpf-btf__raw_data) 589 + ifeq ($(feature-libbpf-btf__raw_data), 1) 590 + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA 591 + endif 592 + $(call feature_check,libbpf-bpf_map_create) 593 + ifeq ($(feature-libbpf-bpf_map_create), 1) 594 + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE 595 + endif 576 596 else 577 597 dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); 578 598 endif 579 599 else 580 600 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID 601 + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD 602 + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM 603 + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP 604 + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA 605 + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE 581 606 endif 582 607 endif 583 608
+1
tools/perf/Makefile.perf
··· 1038 1038 SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h 1039 1039 SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h 1040 1040 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h 1041 + SKELETONS += $(SKEL_OUT)/off_cpu.skel.h 1041 1042 1042 1043 $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): 1043 1044 $(Q)$(MKDIR) -p $@
+38
tools/perf/arch/arm64/util/perf_regs.c
··· 2 2 #include <errno.h> 3 3 #include <regex.h> 4 4 #include <string.h> 5 + #include <sys/auxv.h> 5 6 #include <linux/kernel.h> 6 7 #include <linux/zalloc.h> 7 8 9 + #include "../../../perf-sys.h" 8 10 #include "../../../util/debug.h" 9 11 #include "../../../util/event.h" 10 12 #include "../../../util/perf_regs.h" 13 + 14 + #ifndef HWCAP_SVE 15 + #define HWCAP_SVE (1 << 22) 16 + #endif 11 17 12 18 const struct sample_reg sample_reg_masks[] = { 13 19 SMPL_REG(x0, PERF_REG_ARM64_X0), ··· 49 43 SMPL_REG(lr, PERF_REG_ARM64_LR), 50 44 SMPL_REG(sp, PERF_REG_ARM64_SP), 51 45 SMPL_REG(pc, PERF_REG_ARM64_PC), 46 + SMPL_REG(vg, PERF_REG_ARM64_VG), 52 47 SMPL_REG_END 53 48 }; 54 49 ··· 137 130 } 138 131 139 132 return SDT_ARG_VALID; 133 + } 134 + 135 + uint64_t arch__user_reg_mask(void) 136 + { 137 + struct perf_event_attr attr = { 138 + .type = PERF_TYPE_HARDWARE, 139 + .config = PERF_COUNT_HW_CPU_CYCLES, 140 + .sample_type = PERF_SAMPLE_REGS_USER, 141 + .disabled = 1, 142 + .exclude_kernel = 1, 143 + .sample_period = 1, 144 + .sample_regs_user = PERF_REGS_MASK 145 + }; 146 + int fd; 147 + 148 + if (getauxval(AT_HWCAP) & HWCAP_SVE) 149 + attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG); 150 + 151 + /* 152 + * Check if the pmu supports perf extended regs, before 153 + * returning the register mask to sample. 154 + */ 155 + if (attr.sample_regs_user != PERF_REGS_MASK) { 156 + event_attr_init(&attr); 157 + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 158 + if (fd != -1) { 159 + close(fd); 160 + return attr.sample_regs_user; 161 + } 162 + } 163 + return PERF_REGS_MASK; 140 164 }
+2 -71
tools/perf/arch/arm64/util/unwind-libunwind.c
··· 10 10 11 11 int LIBUNWIND__ARCH_REG_ID(int regnum) 12 12 { 13 - switch (regnum) { 14 - case UNW_AARCH64_X0: 15 - return PERF_REG_ARM64_X0; 16 - case UNW_AARCH64_X1: 17 - return PERF_REG_ARM64_X1; 18 - case UNW_AARCH64_X2: 19 - return PERF_REG_ARM64_X2; 20 - case UNW_AARCH64_X3: 21 - return PERF_REG_ARM64_X3; 22 - case UNW_AARCH64_X4: 23 - return PERF_REG_ARM64_X4; 24 - case UNW_AARCH64_X5: 25 - return PERF_REG_ARM64_X5; 26 - case UNW_AARCH64_X6: 27 - return PERF_REG_ARM64_X6; 28 - case UNW_AARCH64_X7: 29 - return PERF_REG_ARM64_X7; 30 - case UNW_AARCH64_X8: 31 - return PERF_REG_ARM64_X8; 32 - case UNW_AARCH64_X9: 33 - return PERF_REG_ARM64_X9; 34 - case UNW_AARCH64_X10: 35 - return PERF_REG_ARM64_X10; 36 - case UNW_AARCH64_X11: 37 - return PERF_REG_ARM64_X11; 38 - case UNW_AARCH64_X12: 39 - return PERF_REG_ARM64_X12; 40 - case UNW_AARCH64_X13: 41 - return PERF_REG_ARM64_X13; 42 - case UNW_AARCH64_X14: 43 - return PERF_REG_ARM64_X14; 44 - case UNW_AARCH64_X15: 45 - return PERF_REG_ARM64_X15; 46 - case UNW_AARCH64_X16: 47 - return PERF_REG_ARM64_X16; 48 - case UNW_AARCH64_X17: 49 - return PERF_REG_ARM64_X17; 50 - case UNW_AARCH64_X18: 51 - return PERF_REG_ARM64_X18; 52 - case UNW_AARCH64_X19: 53 - return PERF_REG_ARM64_X19; 54 - case UNW_AARCH64_X20: 55 - return PERF_REG_ARM64_X20; 56 - case UNW_AARCH64_X21: 57 - return PERF_REG_ARM64_X21; 58 - case UNW_AARCH64_X22: 59 - return PERF_REG_ARM64_X22; 60 - case UNW_AARCH64_X23: 61 - return PERF_REG_ARM64_X23; 62 - case UNW_AARCH64_X24: 63 - return PERF_REG_ARM64_X24; 64 - case UNW_AARCH64_X25: 65 - return PERF_REG_ARM64_X25; 66 - case UNW_AARCH64_X26: 67 - return PERF_REG_ARM64_X26; 68 - case UNW_AARCH64_X27: 69 - return PERF_REG_ARM64_X27; 70 - case UNW_AARCH64_X28: 71 - return PERF_REG_ARM64_X28; 72 - case UNW_AARCH64_X29: 73 - return PERF_REG_ARM64_X29; 74 - case UNW_AARCH64_X30: 75 - return PERF_REG_ARM64_LR; 76 - case UNW_AARCH64_SP: 77 - return PERF_REG_ARM64_SP; 78 - case UNW_AARCH64_PC: 79 - return PERF_REG_ARM64_PC; 80 - default: 81 - pr_err("unwind: invalid reg id %d\n", regnum); 13 + if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX) 82 14 return -EINVAL; 83 - } 84 15 85 - return -EINVAL; 16 + return regnum; 86 17 }
+13 -18
tools/perf/arch/x86/util/intel-pt.c
··· 811 811 if (!cpu_wide && perf_can_record_cpu_wide()) { 812 812 struct evsel *switch_evsel; 813 813 814 - err = parse_events(evlist, "dummy:u", NULL); 815 - if (err) 816 - return err; 814 + switch_evsel = evlist__add_dummy_on_all_cpus(evlist); 815 + if (!switch_evsel) 816 + return -ENOMEM; 817 817 818 - switch_evsel = evlist__last(evlist); 819 - 820 - switch_evsel->core.attr.freq = 0; 821 - switch_evsel->core.attr.sample_period = 1; 822 818 switch_evsel->core.attr.context_switch = 1; 823 - 824 - switch_evsel->core.system_wide = true; 825 - switch_evsel->no_aux_samples = true; 826 819 switch_evsel->immediate = true; 827 820 828 821 evsel__set_sample_bit(switch_evsel, TID); ··· 864 871 865 872 /* Add dummy event to keep tracking */ 866 873 if (opts->full_auxtrace) { 874 + bool need_system_wide_tracking; 867 875 struct evsel *tracking_evsel; 868 876 869 - err = parse_events(evlist, "dummy:u", NULL); 870 - if (err) 871 - return err; 877 + /* 878 + * User space tasks can migrate between CPUs, so when tracing 879 + * selected CPUs, sideband for all CPUs is still needed. 880 + */ 881 + need_system_wide_tracking = evlist->core.has_user_cpus && 882 + !intel_pt_evsel->core.attr.exclude_user; 872 883 873 - tracking_evsel = evlist__last(evlist); 884 + tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); 885 + if (!tracking_evsel) 886 + return -ENOMEM; 874 887 875 888 evlist__set_tracking_event(evlist, tracking_evsel); 876 889 877 - tracking_evsel->core.attr.freq = 0; 878 - tracking_evsel->core.attr.sample_period = 1; 879 - 880 - tracking_evsel->no_aux_samples = true; 881 890 if (need_immediate) 882 891 tracking_evsel->immediate = true; 883 892
+4 -2
tools/perf/builtin-c2c.c
··· 2801 2801 "the input file to process"), 2802 2802 OPT_INCR('N', "node-info", &c2c.node_info, 2803 2803 "show extra node info in report (repeat for more info)"), 2804 - #ifdef HAVE_SLANG_SUPPORT 2805 2804 OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), 2806 - #endif 2807 2805 OPT_BOOLEAN(0, "stats", &c2c.stats_only, 2808 2806 "Display only statistic tables (implies --stdio)"), 2809 2807 OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, ··· 2830 2832 PARSE_OPT_STOP_AT_NON_OPTION); 2831 2833 if (argc) 2832 2834 usage_with_options(report_c2c_usage, options); 2835 + 2836 + #ifndef HAVE_SLANG_SUPPORT 2837 + c2c.use_stdio = true; 2838 + #endif 2833 2839 2834 2840 if (c2c.stats_only) 2835 2841 c2c.use_stdio = true;
+40 -24
tools/perf/builtin-record.c
··· 49 49 #include "util/clockid.h" 50 50 #include "util/pmu-hybrid.h" 51 51 #include "util/evlist-hybrid.h" 52 + #include "util/off_cpu.h" 52 53 #include "asm/bug.h" 53 54 #include "perf.h" 54 55 #include "cputopo.h" ··· 163 162 bool buildid_mmap; 164 163 bool timestamp_filename; 165 164 bool timestamp_boundary; 165 + bool off_cpu; 166 166 struct switch_output switch_output; 167 167 unsigned long long samples; 168 168 unsigned long output_max_size; /* = 0: unlimited */ ··· 871 869 static int record__config_text_poke(struct evlist *evlist) 872 870 { 873 871 struct evsel *evsel; 874 - int err; 875 872 876 873 /* Nothing to do if text poke is already configured */ 877 874 evlist__for_each_entry(evlist, evsel) { ··· 878 877 return 0; 879 878 } 880 879 881 - err = parse_events(evlist, "dummy:u", NULL); 882 - if (err) 883 - return err; 880 + evsel = evlist__add_dummy_on_all_cpus(evlist); 881 + if (!evsel) 882 + return -ENOMEM; 884 883 885 - evsel = evlist__last(evlist); 886 - 887 - evsel->core.attr.freq = 0; 888 - evsel->core.attr.sample_period = 1; 889 884 evsel->core.attr.text_poke = 1; 890 885 evsel->core.attr.ksymbol = 1; 891 - 892 - evsel->core.system_wide = true; 893 - evsel->no_aux_samples = true; 894 886 evsel->immediate = true; 895 - 896 - /* Text poke must be collected on all CPUs */ 897 - perf_cpu_map__put(evsel->core.own_cpus); 898 - evsel->core.own_cpus = perf_cpu_map__new(NULL); 899 - perf_cpu_map__put(evsel->core.cpus); 900 - evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 901 - 902 887 evsel__set_sample_bit(evsel, TIME); 903 888 904 889 return 0; 890 + } 891 + 892 + static int record__config_off_cpu(struct record *rec) 893 + { 894 + return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 905 895 } 906 896 907 897 static bool record__kcore_readable(struct machine *machine) ··· 974 982 } 975 983 } 976 984 985 + static bool evlist__per_thread(struct evlist *evlist) 986 + { 987 + return cpu_map__is_dummy(evlist->core.user_requested_cpus); 988 + } 989 + 977 990 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 978 991 { 979 992 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 980 993 struct mmap *mmap = evlist->mmap; 981 994 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 982 - struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 995 + struct perf_cpu_map *cpus = evlist->core.all_cpus; 996 + bool per_thread = evlist__per_thread(evlist); 983 997 984 - if (cpu_map__is_dummy(cpus)) 998 + if (per_thread) 985 999 thread_data->nr_mmaps = nr_mmaps; 986 1000 else 987 1001 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, ··· 1008 1010 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1009 1011 1010 1012 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1011 - if (cpu_map__is_dummy(cpus) || 1013 + if (per_thread || 1012 1014 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1013 1015 if (thread_data->maps) { 1014 1016 thread_data->maps[tm] = &mmap[m]; ··· 1883 1885 return err; 1884 1886 } 1885 1887 1886 - err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, 1888 + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 1887 1889 process_synthesized_event, NULL); 1888 1890 if (err < 0) { 1889 1891 pr_err("Couldn't synthesize cpu map.\n"); ··· 2597 2599 signr = WTERMSIG(exit_status); 2598 2600 } else 2599 2601 status = err; 2602 + 2603 + if (rec->off_cpu) 2604 + rec->bytes_written += off_cpu_write(rec->session); 2600 2605 2601 2606 record__synthesize(rec, true); 2602 2607 /* this will be recalculated during process_buildids() */ ··· 3325 3324 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3326 3325 "write collected trace data into several data files using parallel threads", 3327 3326 record__parse_threads), 3327 + OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3328 3328 OPT_END() 3329 3329 }; 3330 3330 ··· 3685 3683 static int record__init_thread_masks(struct record *rec) 3686 3684 { 3687 3685 int ret = 0; 3688 - struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; 3686 + struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3689 3687 3690 3688 if (!record__threads_enabled(rec)) 3691 3689 return record__init_thread_default_masks(rec, cpus); 3692 3690 3693 - if (cpu_map__is_dummy(cpus)) { 3691 + if (evlist__per_thread(rec->evlist)) { 3694 3692 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3695 3693 return -EINVAL; 3696 3694 } ··· 3745 3743 set_nobuild('\0', "vmlinux", true); 3746 3744 # undef set_nobuild 3747 3745 # undef REASON 3746 + #endif 3747 + 3748 + #ifndef HAVE_BPF_SKEL 3749 + # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3750 + set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3751 + # undef set_nobuild 3748 3752 #endif 3749 3753 3750 3754 rec->opts.affinity = PERF_AFFINITY_SYS; ··· 3985 3977 err = record__config_text_poke(rec->evlist); 3986 3978 if (err) { 3987 3979 pr_err("record__config_text_poke failed, error %d\n", err); 3980 + goto out; 3981 + } 3982 + } 3983 + 3984 + if (rec->off_cpu) { 3985 + err = record__config_off_cpu(rec); 3986 + if (err) { 3987 + pr_err("record__config_off_cpu failed, error %d\n", err); 3988 3988 goto out; 3989 3989 } 3990 3990 }
+1 -4
tools/perf/builtin-stat.c
··· 382 382 if (!counter->supported) 383 383 return -ENOENT; 384 384 385 - if (counter->core.system_wide) 386 - nthreads = 1; 387 - 388 385 for (thread = 0; thread < nthreads; thread++) { 389 386 struct perf_counts_values *count; 390 387 ··· 2258 2261 struct evsel *counter; 2259 2262 2260 2263 evlist__for_each_entry(evsel_list, counter) { 2261 - if (!counter->core.system_wide && 2264 + if (!counter->core.requires_cpu && 2262 2265 strcmp(counter->name, "duration_time")) { 2263 2266 return; 2264 2267 }
+1 -1
tools/perf/pmu-events/jevents.c
··· 605 605 } else if (json_streq(map, field, "ExtSel")) { 606 606 char *code = NULL; 607 607 addfield(map, &code, "", "", val); 608 - eventcode |= strtoul(code, NULL, 0) << 21; 608 + eventcode |= strtoul(code, NULL, 0) << 8; 609 609 free(code); 610 610 } else if (json_streq(map, field, "EventName")) { 611 611 addfield(map, &je.name, "", "", val);
+272
tools/perf/scripts/python/arm-cs-trace-disasm.py
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember 3 + # 4 + # Author: Tor Jeremiassen <tor@ti.com> 5 + # Mathieu Poirier <mathieu.poirier@linaro.org> 6 + # Leo Yan <leo.yan@linaro.org> 7 + # Al Grant <Al.Grant@arm.com> 8 + 9 + from __future__ import print_function 10 + import os 11 + from os import path 12 + import sys 13 + import re 14 + from subprocess import * 15 + from optparse import OptionParser, make_option 16 + 17 + from perf_trace_context import perf_set_itrace_options, \ 18 + perf_sample_insn, perf_sample_srccode 19 + 20 + # Below are some example commands for using this script. 21 + # 22 + # Output disassembly with objdump: 23 + # perf script -s scripts/python/arm-cs-trace-disasm.py \ 24 + # -- -d objdump -k path/to/vmlinux 25 + # Output disassembly with llvm-objdump: 26 + # perf script -s scripts/python/arm-cs-trace-disasm.py \ 27 + # -- -d llvm-objdump-11 -k path/to/vmlinux 28 + # Output only source line and symbols: 29 + # perf script -s scripts/python/arm-cs-trace-disasm.py 30 + 31 + # Command line parsing. 32 + option_list = [ 33 + # formatting options for the bottom entry of the stack 34 + make_option("-k", "--vmlinux", dest="vmlinux_name", 35 + help="Set path to vmlinux file"), 36 + make_option("-d", "--objdump", dest="objdump_name", 37 + help="Set path to objdump executable file"), 38 + make_option("-v", "--verbose", dest="verbose", 39 + action="store_true", default=False, 40 + help="Enable debugging log") 41 + ] 42 + 43 + parser = OptionParser(option_list=option_list) 44 + (options, args) = parser.parse_args() 45 + 46 + # Initialize global dicts and regular expression 47 + disasm_cache = dict() 48 + cpu_data = dict() 49 + disasm_re = re.compile("^\s*([0-9a-fA-F]+):") 50 + disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") 51 + cache_size = 64*1024 52 + 53 + glb_source_file_name = None 54 + glb_line_number = None 55 + glb_dso = None 56 + 57 + def get_optional(perf_dict, field): 58 + if field in perf_dict: 59 + return perf_dict[field] 60 + return "[unknown]" 61 + 62 + def get_offset(perf_dict, field): 63 + if field in perf_dict: 64 + return f"+0x{perf_dict[field]:x}" 65 + return "" 66 + 67 + def get_dso_file_path(dso_name, dso_build_id): 68 + if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): 69 + if (options.vmlinux_name): 70 + return options.vmlinux_name; 71 + else: 72 + return dso_name 73 + 74 + if (dso_name == "[vdso]") : 75 + append = "/vdso" 76 + else: 77 + append = "/elf" 78 + 79 + dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}" 80 + # Replace duplicate slash chars to single slash char 81 + dso_path = dso_path.replace('//', '/', 1) 82 + return dso_path 83 + 84 + def read_disam(dso_fname, dso_start, start_addr, stop_addr): 85 + addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname 86 + 87 + # Don't let the cache get too big, clear it when it hits max size 88 + if (len(disasm_cache) > cache_size): 89 + disasm_cache.clear(); 90 + 91 + if addr_range in disasm_cache: 92 + disasm_output = disasm_cache[addr_range]; 93 + else: 94 + start_addr = start_addr - dso_start; 95 + stop_addr = stop_addr - dso_start; 96 + disasm = [ options.objdump_name, "-d", "-z", 97 + f"--start-address=0x{start_addr:x}", 98 + f"--stop-address=0x{stop_addr:x}" ] 99 + disasm += [ dso_fname ] 100 + disasm_output = check_output(disasm).decode('utf-8').split('\n') 101 + disasm_cache[addr_range] = disasm_output 102 + 103 + return disasm_output 104 + 105 + def print_disam(dso_fname, dso_start, start_addr, stop_addr): 106 + for line in read_disam(dso_fname, dso_start, start_addr, stop_addr): 107 + m = disasm_func_re.search(line) 108 + if m is None: 109 + m = disasm_re.search(line) 110 + if m is None: 111 + continue 112 + print(f"\t{line}") 113 + 114 + def print_sample(sample): 115 + print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \ 116 + f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \ 117 + f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}") 118 + 119 + def trace_begin(): 120 + print('ARM CoreSight Trace Data Assembler Dump') 121 + 122 + def trace_end(): 123 + print('End') 124 + 125 + def trace_unhandled(event_name, context, event_fields_dict): 126 + print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) 127 + 128 + def common_start_str(comm, sample): 129 + sec = int(sample["time"] / 1000000000) 130 + ns = sample["time"] % 1000000000 131 + cpu = sample["cpu"] 132 + pid = sample["pid"] 133 + tid = sample["tid"] 134 + return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} " 135 + 136 + # This code is copied from intel-pt-events.py for printing source code 137 + # line and symbols. 138 + def print_srccode(comm, param_dict, sample, symbol, dso): 139 + ip = sample["ip"] 140 + if symbol == "[unknown]": 141 + start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40) 142 + else: 143 + offs = get_offset(param_dict, "symoff") 144 + start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40) 145 + 146 + global glb_source_file_name 147 + global glb_line_number 148 + global glb_dso 149 + 150 + source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context) 151 + if source_file_name: 152 + if glb_line_number == line_number and glb_source_file_name == source_file_name: 153 + src_str = "" 154 + else: 155 + if len(source_file_name) > 40: 156 + src_file = ("..." + source_file_name[-37:]) + " " 157 + else: 158 + src_file = source_file_name.ljust(41) 159 + 160 + if source_line is None: 161 + src_str = src_file + str(line_number).rjust(4) + " <source not found>" 162 + else: 163 + src_str = src_file + str(line_number).rjust(4) + " " + source_line 164 + glb_dso = None 165 + elif dso == glb_dso: 166 + src_str = "" 167 + else: 168 + src_str = dso 169 + glb_dso = dso 170 + 171 + glb_line_number = line_number 172 + glb_source_file_name = source_file_name 173 + 174 + print(f"{start_str}{src_str}") 175 + 176 + def process_event(param_dict): 177 + global cache_size 178 + global options 179 + 180 + sample = param_dict["sample"] 181 + comm = param_dict["comm"] 182 + 183 + name = param_dict["ev_name"] 184 + dso = get_optional(param_dict, "dso") 185 + dso_bid = get_optional(param_dict, "dso_bid") 186 + dso_start = get_optional(param_dict, "dso_map_start") 187 + dso_end = get_optional(param_dict, "dso_map_end") 188 + symbol = get_optional(param_dict, "symbol") 189 + 190 + if (options.verbose == True): 191 + print(f"Event type: {name}") 192 + print_sample(sample) 193 + 194 + # If cannot find dso so cannot dump assembler, bail out 195 + if (dso == '[unknown]'): 196 + return 197 + 198 + # Validate dso start and end addresses 199 + if ((dso_start == '[unknown]') or (dso_end == '[unknown]')): 200 + print(f"Failed to find valid dso map for dso {dso}") 201 + return 202 + 203 + if (name[0:12] == "instructions"): 204 + print_srccode(comm, param_dict, sample, symbol, dso) 205 + return 206 + 207 + # Don't proceed if this event is not a branch sample, . 208 + if (name[0:8] != "branches"): 209 + return 210 + 211 + cpu = sample["cpu"] 212 + ip = sample["ip"] 213 + addr = sample["addr"] 214 + 215 + # Initialize CPU data if it's empty, and directly return back 216 + # if this is the first tracing event for this CPU. 217 + if (cpu_data.get(str(cpu) + 'addr') == None): 218 + cpu_data[str(cpu) + 'addr'] = addr 219 + return 220 + 221 + # The format for packet is: 222 + # 223 + # +------------+------------+------------+ 224 + # sample_prev: | addr | ip | cpu | 225 + # +------------+------------+------------+ 226 + # sample_next: | addr | ip | cpu | 227 + # +------------+------------+------------+ 228 + # 229 + # We need to combine the two continuous packets to get the instruction 230 + # range for sample_prev::cpu: 231 + # 232 + # [ sample_prev::addr .. sample_next::ip ] 233 + # 234 + # For this purose, sample_prev::addr is stored into cpu_data structure 235 + # and read back for 'start_addr' when the new packet comes, and we need 236 + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for 237 + # 'stop_addr' is for the sake of objdump so the final assembler dump can 238 + # include last instruction for sample_next::ip. 239 + start_addr = cpu_data[str(cpu) + 'addr'] 240 + stop_addr = ip + 4 241 + 242 + # Record for previous sample packet 243 + cpu_data[str(cpu) + 'addr'] = addr 244 + 245 + # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 246 + if (start_addr == 0 and stop_addr == 4): 247 + print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted") 248 + return 249 + 250 + if (start_addr < int(dso_start) or start_addr > int(dso_end)): 251 + print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") 252 + return 253 + 254 + if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): 255 + print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") 256 + return 257 + 258 + if (options.objdump_name != None): 259 + # It doesn't need to decrease virtual memory offset for disassembly 260 + # for kernel dso, so in this case we set vm_start to zero. 261 + if (dso == "[kernel.kallsyms]"): 262 + dso_vm_start = 0 263 + else: 264 + dso_vm_start = int(dso_start) 265 + 266 + dso_fname = get_dso_file_path(dso, dso_bid) 267 + if path.exists(dso_fname): 268 + print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) 269 + else: 270 + print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]") 271 + 272 + print_srccode(comm, param_dict, sample, symbol, dso)
+48
tools/perf/tests/shell/lib/perf_csv_output_lint.py
··· 1 + #!/usr/bin/python 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + import argparse 5 + import sys 6 + 7 + # Basic sanity check of perf CSV output as specified in the man page. 8 + # Currently just checks the number of fields per line in output. 9 + 10 + ap = argparse.ArgumentParser() 11 + ap.add_argument('--no-args', action='store_true') 12 + ap.add_argument('--interval', action='store_true') 13 + ap.add_argument('--system-wide-no-aggr', action='store_true') 14 + ap.add_argument('--system-wide', action='store_true') 15 + ap.add_argument('--event', action='store_true') 16 + ap.add_argument('--per-core', action='store_true') 17 + ap.add_argument('--per-thread', action='store_true') 18 + ap.add_argument('--per-die', action='store_true') 19 + ap.add_argument('--per-node', action='store_true') 20 + ap.add_argument('--per-socket', action='store_true') 21 + ap.add_argument('--separator', default=',', nargs='?') 22 + args = ap.parse_args() 23 + 24 + Lines = sys.stdin.readlines() 25 + 26 + def check_csv_output(exp): 27 + for line in Lines: 28 + if 'failed' not in line: 29 + count = line.count(args.separator) 30 + if count != exp: 31 + sys.stdout.write(''.join(Lines)) 32 + raise RuntimeError(f'wrong number of fields. expected {exp} in {line}') 33 + 34 + try: 35 + if args.no_args or args.system_wide or args.event: 36 + expected_items = 6 37 + elif args.interval or args.per_thread or args.system_wide_no_aggr: 38 + expected_items = 7 39 + elif args.per_core or args.per_socket or args.per_node or args.per_die: 40 + expected_items = 8 41 + else: 42 + ap.print_help() 43 + raise RuntimeError('No checking option specified') 44 + check_csv_output(expected_items) 45 + 46 + except: 47 + sys.stdout.write('Test failed for input: ' + ''.join(Lines)) 48 + raise
+60
tools/perf/tests/shell/record_offcpu.sh
··· 1 + #!/bin/sh 2 + # perf record offcpu profiling tests 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + err=0 8 + perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 9 + 10 + cleanup() { 11 + rm -f ${perfdata} 12 + rm -f ${perfdata}.old 13 + trap - exit term int 14 + } 15 + 16 + trap_cleanup() { 17 + cleanup 18 + exit 1 19 + } 20 + trap trap_cleanup exit term int 21 + 22 + test_offcpu() { 23 + echo "Basic off-cpu test" 24 + if [ `id -u` != 0 ] 25 + then 26 + echo "Basic off-cpu test [Skipped permission]" 27 + err=2 28 + return 29 + fi 30 + if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL 31 + then 32 + echo "Basic off-cpu test [Skipped missing BPF support]" 33 + err=2 34 + return 35 + fi 36 + if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null 37 + then 38 + echo "Basic off-cpu test [Failed record]" 39 + err=1 40 + return 41 + fi 42 + if ! perf evlist -i ${perfdata} | grep -q "offcpu-time" 43 + then 44 + echo "Basic off-cpu test [Failed record]" 45 + err=1 46 + return 47 + fi 48 + if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep 49 + then 50 + echo "Basic off-cpu test [Failed missing output]" 51 + err=1 52 + return 53 + fi 54 + echo "Basic off-cpu test [Success]" 55 + } 56 + 57 + test_offcpu 58 + 59 + cleanup 60 + exit $err
+147
tools/perf/tests/shell/stat+csv_output.sh
··· 1 + #!/bin/bash 2 + # perf stat CSV output linter 3 + # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 4 + # Tests various perf stat CSV output commands for the 5 + # correct number of fields and the CSV separator set to ','. 6 + 7 + set -e 8 + 9 + pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py 10 + if [ "x$PYTHON" == "x" ] 11 + then 12 + if which python3 > /dev/null 13 + then 14 + PYTHON=python3 15 + elif which python > /dev/null 16 + then 17 + PYTHON=python 18 + else 19 + echo Skipping test, python not detected please set environment variable PYTHON. 20 + exit 2 21 + fi 22 + fi 23 + 24 + # Return true if perf_event_paranoid is > $1 and not running as root. 25 + function ParanoidAndNotRoot() 26 + { 27 + [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] 28 + } 29 + 30 + check_no_args() 31 + { 32 + echo -n "Checking CSV output: no args " 33 + perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args 34 + echo "[Success]" 35 + } 36 + 37 + check_system_wide() 38 + { 39 + echo -n "Checking CSV output: system wide " 40 + if ParanoidAndNotRoot 0 41 + then 42 + echo "[Skip] paranoid and not root" 43 + return 44 + fi 45 + perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide 46 + echo "[Success]" 47 + } 48 + 49 + check_system_wide_no_aggr() 50 + { 51 + echo -n "Checking CSV output: system wide " 52 + if ParanoidAndNotRoot 0 53 + then 54 + echo "[Skip] paranoid and not root" 55 + return 56 + fi 57 + echo -n "Checking CSV output: system wide no aggregation " 58 + perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr 59 + echo "[Success]" 60 + } 61 + 62 + check_interval() 63 + { 64 + echo -n "Checking CSV output: interval " 65 + perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval 66 + echo "[Success]" 67 + } 68 + 69 + 70 + check_event() 71 + { 72 + echo -n "Checking CSV output: event " 73 + perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event 74 + echo "[Success]" 75 + } 76 + 77 + check_per_core() 78 + { 79 + echo -n "Checking CSV output: per core " 80 + if ParanoidAndNotRoot 0 81 + then 82 + echo "[Skip] paranoid and not root" 83 + return 84 + fi 85 + perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core 86 + echo "[Success]" 87 + } 88 + 89 + check_per_thread() 90 + { 91 + echo -n "Checking CSV output: per thread " 92 + if ParanoidAndNotRoot 0 93 + then 94 + echo "[Skip] paranoid and not root" 95 + return 96 + fi 97 + perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread 98 + echo "[Success]" 99 + } 100 + 101 + check_per_die() 102 + { 103 + echo -n "Checking CSV output: per die " 104 + if ParanoidAndNotRoot 0 105 + then 106 + echo "[Skip] paranoid and not root" 107 + return 108 + fi 109 + perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die 110 + echo "[Success]" 111 + } 112 + 113 + check_per_node() 114 + { 115 + echo -n "Checking CSV output: per node " 116 + if ParanoidAndNotRoot 0 117 + then 118 + echo "[Skip] paranoid and not root" 119 + return 120 + fi 121 + perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node 122 + echo "[Success]" 123 + } 124 + 125 + check_per_socket() 126 + { 127 + echo -n "Checking CSV output: per socket " 128 + if ParanoidAndNotRoot 0 129 + then 130 + echo "[Skip] paranoid and not root" 131 + return 132 + fi 133 + perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket 134 + echo "[Success]" 135 + } 136 + 137 + check_no_args 138 + check_system_wide 139 + check_system_wide_no_aggr 140 + check_interval 141 + check_event 142 + check_per_core 143 + check_per_thread 144 + check_per_die 145 + check_per_node 146 + check_per_socket 147 + exit 0
+71
tools/perf/tests/shell/test_intel_pt.sh
··· 1 + #!/bin/sh 2 + # Miscellaneous Intel PT testing 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + # Skip if no Intel PT 8 + perf list | grep -q 'intel_pt//' || exit 2 9 + 10 + skip_cnt=0 11 + ok_cnt=0 12 + err_cnt=0 13 + 14 + tmpfile=`mktemp` 15 + perfdatafile=`mktemp` 16 + 17 + can_cpu_wide() 18 + { 19 + perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2 20 + return 0 21 + } 22 + 23 + test_system_wide_side_band() 24 + { 25 + # Need CPU 0 and CPU 1 26 + can_cpu_wide 0 || return $? 27 + can_cpu_wide 1 || return $? 28 + 29 + # Record on CPU 0 a task running on CPU 1 30 + perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname 31 + 32 + # Should get MMAP events from CPU 1 because they can be needed to decode 33 + mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l` 34 + 35 + if [ ${mmap_cnt} -gt 0 ] ; then 36 + return 0 37 + fi 38 + 39 + echo "Failed to record MMAP events on CPU 1 when tracing CPU 0" 40 + return 1 41 + } 42 + 43 + count_result() 44 + { 45 + if [ $1 -eq 2 ] ; then 46 + skip_cnt=`expr ${skip_cnt} \+ 1` 47 + return 48 + fi 49 + if [ $1 -eq 0 ] ; then 50 + ok_cnt=`expr ${ok_cnt} \+ 1` 51 + return 52 + fi 53 + err_cnt=`expr ${err_cnt} \+ 1` 54 + } 55 + 56 + test_system_wide_side_band 57 + 58 + count_result $? 59 + 60 + rm -f ${tmpfile} 61 + rm -f ${perfdatafile} 62 + 63 + if [ ${err_cnt} -gt 0 ] ; then 64 + exit 1 65 + fi 66 + 67 + if [ ${ok_cnt} -gt 0 ] ; then 68 + exit 0 69 + fi 70 + 71 + exit 2
+1
tools/perf/util/Build
··· 147 147 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o 148 148 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o 149 149 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o 150 + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o 150 151 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o 151 152 perf-$(CONFIG_LIBELF) += symbol-elf.o 152 153 perf-$(CONFIG_LIBELF) += probe-file.o
+11 -4
tools/perf/util/auxtrace.c
··· 125 125 mm->tid = mp->tid; 126 126 mm->cpu = mp->cpu.cpu; 127 127 128 - if (!mp->len) { 128 + if (!mp->len || !mp->mmap_needed) { 129 129 mm->base = NULL; 130 130 return 0; 131 131 } ··· 168 168 } 169 169 170 170 void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, 171 - struct evlist *evlist, int idx, 172 - bool per_cpu) 171 + struct evlist *evlist, 172 + struct evsel *evsel, int idx) 173 173 { 174 + bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); 175 + 176 + mp->mmap_needed = evsel->needs_auxtrace_mmap; 177 + 178 + if (!mp->mmap_needed) 179 + return; 180 + 174 181 mp->idx = idx; 175 182 176 183 if (per_cpu) { 177 - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); 184 + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); 178 185 if (evlist->core.threads) 179 186 mp->tid = perf_thread_map__pid(evlist->core.threads, 0); 180 187 else
+9 -4
tools/perf/util/auxtrace.h
··· 344 344 * @idx: index of this mmap 345 345 * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu 346 346 * mmap) otherwise %0 347 + * @mmap_needed: set to %false for non-auxtrace events. This is needed because 348 + * auxtrace mmapping is done in the same code path as non-auxtrace 349 + * mmapping but not every evsel that needs non-auxtrace mmapping 350 + * also needs auxtrace mmapping. 347 351 * @cpu: cpu number for a per-cpu mmap otherwise %-1 348 352 */ 349 353 struct auxtrace_mmap_params { ··· 357 353 int prot; 358 354 int idx; 359 355 pid_t tid; 356 + bool mmap_needed; 360 357 struct perf_cpu cpu; 361 358 }; 362 359 ··· 495 490 unsigned int auxtrace_pages, 496 491 bool auxtrace_overwrite); 497 492 void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, 498 - struct evlist *evlist, int idx, 499 - bool per_cpu); 493 + struct evlist *evlist, 494 + struct evsel *evsel, int idx); 500 495 501 496 typedef int (*process_auxtrace_t)(struct perf_tool *tool, 502 497 struct mmap *map, ··· 868 863 unsigned int auxtrace_pages, 869 864 bool auxtrace_overwrite); 870 865 void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, 871 - struct evlist *evlist, int idx, 872 - bool per_cpu); 866 + struct evlist *evlist, 867 + struct evsel *evsel, int idx); 873 868 874 869 #define ITRACE_HELP "" 875 870
+16 -8
tools/perf/util/bpf-event.c
··· 35 35 } 36 36 #endif 37 37 38 - int __weak bpf_prog_load(enum bpf_prog_type prog_type, 39 - const char *prog_name __maybe_unused, 40 - const char *license, 41 - const struct bpf_insn *insns, size_t insn_cnt, 42 - const struct bpf_prog_load_opts *opts) 38 + #ifndef HAVE_LIBBPF_BPF_PROG_LOAD 39 + int bpf_prog_load(enum bpf_prog_type prog_type, 40 + const char *prog_name __maybe_unused, 41 + const char *license, 42 + const struct bpf_insn *insns, size_t insn_cnt, 43 + const struct bpf_prog_load_opts *opts) 43 44 { 44 45 #pragma GCC diagnostic push 45 46 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" ··· 48 47 opts->kern_version, opts->log_buf, opts->log_size); 49 48 #pragma GCC diagnostic pop 50 49 } 50 + #endif 51 51 52 - struct bpf_program * __weak 52 + #ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM 53 + struct bpf_program * 53 54 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 54 55 { 55 56 #pragma GCC diagnostic push ··· 59 56 return bpf_program__next(prev, obj); 60 57 #pragma GCC diagnostic pop 61 58 } 59 + #endif 62 60 63 - struct bpf_map * __weak 61 + #ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP 62 + struct bpf_map * 64 63 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 65 64 { 66 65 #pragma GCC diagnostic push ··· 70 65 return bpf_map__next(prev, obj); 71 66 #pragma GCC diagnostic pop 72 67 } 68 + #endif 73 69 74 - const void * __weak 70 + #ifndef HAVE_LIBBPF_BTF__RAW_DATA 71 + const void * 75 72 btf__raw_data(const struct btf *btf_ro, __u32 *size) 76 73 { 77 74 #pragma GCC diagnostic push ··· 81 74 return btf__get_raw_data(btf_ro, size); 82 75 #pragma GCC diagnostic pop 83 76 } 77 + #endif 84 78 85 79 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) 86 80 {
+5 -1
tools/perf/util/bpf_counter.c
··· 312 312 (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); 313 313 } 314 314 315 - int __weak 315 + #ifndef HAVE_LIBBPF_BPF_MAP_CREATE 316 + LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, 317 + int value_size, int max_entries, __u32 map_flags); 318 + int 316 319 bpf_map_create(enum bpf_map_type map_type, 317 320 const char *map_name __maybe_unused, 318 321 __u32 key_size, ··· 328 325 return bpf_create_map(map_type, key_size, value_size, max_entries, 0); 329 326 #pragma GCC diagnostic pop 330 327 } 328 + #endif 331 329 332 330 static int bperf_lock_attr_map(struct target *target) 333 331 {
+338
tools/perf/util/bpf_off_cpu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "util/bpf_counter.h" 3 + #include "util/debug.h" 4 + #include "util/evsel.h" 5 + #include "util/evlist.h" 6 + #include "util/off_cpu.h" 7 + #include "util/perf-hooks.h" 8 + #include "util/record.h" 9 + #include "util/session.h" 10 + #include "util/target.h" 11 + #include "util/cpumap.h" 12 + #include "util/thread_map.h" 13 + #include "util/cgroup.h" 14 + #include <bpf/bpf.h> 15 + 16 + #include "bpf_skel/off_cpu.skel.h" 17 + 18 + #define MAX_STACKS 32 19 + /* we don't need actual timestamp, just want to put the samples at last */ 20 + #define OFF_CPU_TIMESTAMP (~0ull << 32) 21 + 22 + static struct off_cpu_bpf *skel; 23 + 24 + struct off_cpu_key { 25 + u32 pid; 26 + u32 tgid; 27 + u32 stack_id; 28 + u32 state; 29 + u64 cgroup_id; 30 + }; 31 + 32 + union off_cpu_data { 33 + struct perf_event_header hdr; 34 + u64 array[1024 / sizeof(u64)]; 35 + }; 36 + 37 + static int off_cpu_config(struct evlist *evlist) 38 + { 39 + struct evsel *evsel; 40 + struct perf_event_attr attr = { 41 + .type = PERF_TYPE_SOFTWARE, 42 + .config = PERF_COUNT_SW_BPF_OUTPUT, 43 + .size = sizeof(attr), /* to capture ABI version */ 44 + }; 45 + char *evname = strdup(OFFCPU_EVENT); 46 + 47 + if (evname == NULL) 48 + return -ENOMEM; 49 + 50 + evsel = evsel__new(&attr); 51 + if (!evsel) { 52 + free(evname); 53 + return -ENOMEM; 54 + } 55 + 56 + evsel->core.attr.freq = 1; 57 + evsel->core.attr.sample_period = 1; 58 + /* off-cpu analysis depends on stack trace */ 59 + evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; 60 + 61 + evlist__add(evlist, evsel); 62 + 63 + free(evsel->name); 64 + evsel->name = evname; 65 + 66 + return 0; 67 + } 68 + 69 + static void off_cpu_start(void *arg) 70 + { 71 + struct evlist *evlist = arg; 72 + 73 + /* update task filter for the given workload */ 74 + if (!skel->bss->has_cpu && !skel->bss->has_task && 75 + perf_thread_map__pid(evlist->core.threads, 0) != -1) { 76 + int fd; 77 + u32 pid; 78 + u8 val = 1; 79 + 80 + skel->bss->has_task = 1; 81 + fd = bpf_map__fd(skel->maps.task_filter); 82 + pid = perf_thread_map__pid(evlist->core.threads, 0); 83 + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 84 + } 85 + 86 + skel->bss->enabled = 1; 87 + } 88 + 89 + static void off_cpu_finish(void *arg __maybe_unused) 90 + { 91 + skel->bss->enabled = 0; 92 + off_cpu_bpf__destroy(skel); 93 + } 94 + 95 + /* v5.18 kernel added prev_state arg, so it needs to check the signature */ 96 + static void check_sched_switch_args(void) 97 + { 98 + const struct btf *btf = bpf_object__btf(skel->obj); 99 + const struct btf_type *t1, *t2, *t3; 100 + u32 type_id; 101 + 102 + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", 103 + BTF_KIND_TYPEDEF); 104 + if ((s32)type_id < 0) 105 + return; 106 + 107 + t1 = btf__type_by_id(btf, type_id); 108 + if (t1 == NULL) 109 + return; 110 + 111 + t2 = btf__type_by_id(btf, t1->type); 112 + if (t2 == NULL || !btf_is_ptr(t2)) 113 + return; 114 + 115 + t3 = btf__type_by_id(btf, t2->type); 116 + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { 117 + /* new format: pass prev_state as 4th arg */ 118 + skel->rodata->has_prev_state = true; 119 + } 120 + } 121 + 122 + int off_cpu_prepare(struct evlist *evlist, struct target *target, 123 + struct record_opts *opts) 124 + { 125 + int err, fd, i; 126 + int ncpus = 1, ntasks = 1, ncgrps = 1; 127 + 128 + if (off_cpu_config(evlist) < 0) { 129 + pr_err("Failed to config off-cpu BPF event\n"); 130 + return -1; 131 + } 132 + 133 + skel = off_cpu_bpf__open(); 134 + if (!skel) { 135 + pr_err("Failed to open off-cpu BPF skeleton\n"); 136 + return -1; 137 + } 138 + 139 + /* don't need to set cpu filter for system-wide mode */ 140 + if (target->cpu_list) { 141 + ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 142 + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 143 + } 144 + 145 + if (target__has_task(target)) { 146 + ntasks = perf_thread_map__nr(evlist->core.threads); 147 + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 148 + } 149 + 150 + if (evlist__first(evlist)->cgrp) { 151 + ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */ 152 + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); 153 + 154 + if (!cgroup_is_v2("perf_event")) 155 + skel->rodata->uses_cgroup_v1 = true; 156 + } 157 + 158 + if (opts->record_cgroup) { 159 + skel->rodata->needs_cgroup = true; 160 + 161 + if (!cgroup_is_v2("perf_event")) 162 + skel->rodata->uses_cgroup_v1 = true; 163 + } 164 + 165 + set_max_rlimit(); 166 + check_sched_switch_args(); 167 + 168 + err = off_cpu_bpf__load(skel); 169 + if (err) { 170 + pr_err("Failed to load off-cpu skeleton\n"); 171 + goto out; 172 + } 173 + 174 + if (target->cpu_list) { 175 + u32 cpu; 176 + u8 val = 1; 177 + 178 + skel->bss->has_cpu = 1; 179 + fd = bpf_map__fd(skel->maps.cpu_filter); 180 + 181 + for (i = 0; i < ncpus; i++) { 182 + cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; 183 + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 184 + } 185 + } 186 + 187 + if (target__has_task(target)) { 188 + u32 pid; 189 + u8 val = 1; 190 + 191 + skel->bss->has_task = 1; 192 + fd = bpf_map__fd(skel->maps.task_filter); 193 + 194 + for (i = 0; i < ntasks; i++) { 195 + pid = perf_thread_map__pid(evlist->core.threads, i); 196 + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 197 + } 198 + } 199 + 200 + if (evlist__first(evlist)->cgrp) { 201 + struct evsel *evsel; 202 + u8 val = 1; 203 + 204 + skel->bss->has_cgroup = 1; 205 + fd = bpf_map__fd(skel->maps.cgroup_filter); 206 + 207 + evlist__for_each_entry(evlist, evsel) { 208 + struct cgroup *cgrp = evsel->cgrp; 209 + 210 + if (cgrp == NULL) 211 + continue; 212 + 213 + if (!cgrp->id && read_cgroup_id(cgrp) < 0) { 214 + pr_err("Failed to read cgroup id of %s\n", 215 + cgrp->name); 216 + goto out; 217 + } 218 + 219 + bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY); 220 + } 221 + } 222 + 223 + err = off_cpu_bpf__attach(skel); 224 + if (err) { 225 + pr_err("Failed to attach off-cpu BPF skeleton\n"); 226 + goto out; 227 + } 228 + 229 + if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) || 230 + perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) { 231 + pr_err("Failed to attach off-cpu skeleton\n"); 232 + goto out; 233 + } 234 + 235 + return 0; 236 + 237 + out: 238 + off_cpu_bpf__destroy(skel); 239 + return -1; 240 + } 241 + 242 + int off_cpu_write(struct perf_session *session) 243 + { 244 + int bytes = 0, size; 245 + int fd, stack; 246 + u64 sample_type, val, sid = 0; 247 + struct evsel *evsel; 248 + struct perf_data_file *file = &session->data->file; 249 + struct off_cpu_key prev, key; 250 + union off_cpu_data data = { 251 + .hdr = { 252 + .type = PERF_RECORD_SAMPLE, 253 + .misc = PERF_RECORD_MISC_USER, 254 + }, 255 + }; 256 + u64 tstamp = OFF_CPU_TIMESTAMP; 257 + 258 + skel->bss->enabled = 0; 259 + 260 + evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT); 261 + if (evsel == NULL) { 262 + pr_err("%s evsel not found\n", OFFCPU_EVENT); 263 + return 0; 264 + } 265 + 266 + sample_type = evsel->core.attr.sample_type; 267 + 268 + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { 269 + if (evsel->core.id) 270 + sid = evsel->core.id[0]; 271 + } 272 + 273 + fd = bpf_map__fd(skel->maps.off_cpu); 274 + stack = bpf_map__fd(skel->maps.stacks); 275 + memset(&prev, 0, sizeof(prev)); 276 + 277 + while (!bpf_map_get_next_key(fd, &prev, &key)) { 278 + int n = 1; /* start from perf_event_header */ 279 + int ip_pos = -1; 280 + 281 + bpf_map_lookup_elem(fd, &key, &val); 282 + 283 + if (sample_type & PERF_SAMPLE_IDENTIFIER) 284 + data.array[n++] = sid; 285 + if (sample_type & PERF_SAMPLE_IP) { 286 + ip_pos = n; 287 + data.array[n++] = 0; /* will be updated */ 288 + } 289 + if (sample_type & PERF_SAMPLE_TID) 290 + data.array[n++] = (u64)key.pid << 32 | key.tgid; 291 + if (sample_type & PERF_SAMPLE_TIME) 292 + data.array[n++] = tstamp; 293 + if (sample_type & PERF_SAMPLE_ID) 294 + data.array[n++] = sid; 295 + if (sample_type & PERF_SAMPLE_CPU) 296 + data.array[n++] = 0; 297 + if (sample_type & PERF_SAMPLE_PERIOD) 298 + data.array[n++] = val; 299 + if (sample_type & PERF_SAMPLE_CALLCHAIN) { 300 + int len = 0; 301 + 302 + /* data.array[n] is callchain->nr (updated later) */ 303 + data.array[n + 1] = PERF_CONTEXT_USER; 304 + data.array[n + 2] = 0; 305 + 306 + bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); 307 + while (data.array[n + 2 + len]) 308 + len++; 309 + 310 + /* update length of callchain */ 311 + data.array[n] = len + 1; 312 + 313 + /* update sample ip with the first callchain entry */ 314 + if (ip_pos >= 0) 315 + data.array[ip_pos] = data.array[n + 2]; 316 + 317 + /* calculate sample callchain data array length */ 318 + n += len + 2; 319 + } 320 + if (sample_type & PERF_SAMPLE_CGROUP) 321 + data.array[n++] = key.cgroup_id; 322 + /* TODO: handle more sample types */ 323 + 324 + size = n * sizeof(u64); 325 + data.hdr.size = size; 326 + bytes += size; 327 + 328 + if (perf_data_file__write(file, &data, size) < 0) { 329 + pr_err("failed to write perf data, error: %m\n"); 330 + return bytes; 331 + } 332 + 333 + prev = key; 334 + /* increase dummy timestamp to sort later samples */ 335 + tstamp++; 336 + } 337 + return bytes; 338 + }
+229
tools/perf/util/bpf_skel/off_cpu.bpf.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2022 Google 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + #include <bpf/bpf_core_read.h> 7 + 8 + /* task->flags for off-cpu analysis */ 9 + #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 10 + 11 + /* task->state for off-cpu analysis */ 12 + #define TASK_INTERRUPTIBLE 0x0001 13 + #define TASK_UNINTERRUPTIBLE 0x0002 14 + 15 + #define MAX_STACKS 32 16 + #define MAX_ENTRIES 102400 17 + 18 + struct tstamp_data { 19 + __u32 stack_id; 20 + __u32 state; 21 + __u64 timestamp; 22 + }; 23 + 24 + struct offcpu_key { 25 + __u32 pid; 26 + __u32 tgid; 27 + __u32 stack_id; 28 + __u32 state; 29 + __u64 cgroup_id; 30 + }; 31 + 32 + struct { 33 + __uint(type, BPF_MAP_TYPE_STACK_TRACE); 34 + __uint(key_size, sizeof(__u32)); 35 + __uint(value_size, MAX_STACKS * sizeof(__u64)); 36 + __uint(max_entries, MAX_ENTRIES); 37 + } stacks SEC(".maps"); 38 + 39 + struct { 40 + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 41 + __uint(map_flags, BPF_F_NO_PREALLOC); 42 + __type(key, int); 43 + __type(value, struct tstamp_data); 44 + } tstamp SEC(".maps"); 45 + 46 + struct { 47 + __uint(type, BPF_MAP_TYPE_HASH); 48 + __uint(key_size, sizeof(struct offcpu_key)); 49 + __uint(value_size, sizeof(__u64)); 50 + __uint(max_entries, MAX_ENTRIES); 51 + } off_cpu SEC(".maps"); 52 + 53 + struct { 54 + __uint(type, BPF_MAP_TYPE_HASH); 55 + __uint(key_size, sizeof(__u32)); 56 + __uint(value_size, sizeof(__u8)); 57 + __uint(max_entries, 1); 58 + } cpu_filter SEC(".maps"); 59 + 60 + struct { 61 + __uint(type, BPF_MAP_TYPE_HASH); 62 + __uint(key_size, sizeof(__u32)); 63 + __uint(value_size, sizeof(__u8)); 64 + __uint(max_entries, 1); 65 + } task_filter SEC(".maps"); 66 + 67 + struct { 68 + __uint(type, BPF_MAP_TYPE_HASH); 69 + __uint(key_size, sizeof(__u64)); 70 + __uint(value_size, sizeof(__u8)); 71 + __uint(max_entries, 1); 72 + } cgroup_filter SEC(".maps"); 73 + 74 + /* old kernel task_struct definition */ 75 + struct task_struct___old { 76 + long state; 77 + } __attribute__((preserve_access_index)); 78 + 79 + int enabled = 0; 80 + int has_cpu = 0; 81 + int has_task = 0; 82 + int has_cgroup = 0; 83 + 84 + const volatile bool has_prev_state = false; 85 + const volatile bool needs_cgroup = false; 86 + const volatile bool uses_cgroup_v1 = false; 87 + 88 + /* 89 + * Old kernel used to call it task_struct->state and now it's '__state'. 90 + * Use BPF CO-RE "ignored suffix rule" to deal with it like below: 91 + * 92 + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes 93 + */ 94 + static inline int get_task_state(struct task_struct *t) 95 + { 96 + if (bpf_core_field_exists(t->__state)) 97 + return BPF_CORE_READ(t, __state); 98 + 99 + /* recast pointer to capture task_struct___old type for compiler */ 100 + struct task_struct___old *t_old = (void *)t; 101 + 102 + /* now use old "state" name of the field */ 103 + return BPF_CORE_READ(t_old, state); 104 + } 105 + 106 + static inline __u64 get_cgroup_id(struct task_struct *t) 107 + { 108 + struct cgroup *cgrp; 109 + 110 + if (uses_cgroup_v1) 111 + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); 112 + else 113 + cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); 114 + 115 + return BPF_CORE_READ(cgrp, kn, id); 116 + } 117 + 118 + static inline int can_record(struct task_struct *t, int state) 119 + { 120 + /* kernel threads don't have user stack */ 121 + if (t->flags & PF_KTHREAD) 122 + return 0; 123 + 124 + if (state != TASK_INTERRUPTIBLE && 125 + state != TASK_UNINTERRUPTIBLE) 126 + return 0; 127 + 128 + if (has_cpu) { 129 + __u32 cpu = bpf_get_smp_processor_id(); 130 + __u8 *ok; 131 + 132 + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); 133 + if (!ok) 134 + return 0; 135 + } 136 + 137 + if (has_task) { 138 + __u8 *ok; 139 + __u32 pid = t->pid; 140 + 141 + ok = bpf_map_lookup_elem(&task_filter, &pid); 142 + if (!ok) 143 + return 0; 144 + } 145 + 146 + if (has_cgroup) { 147 + __u8 *ok; 148 + __u64 cgrp_id = get_cgroup_id(t); 149 + 150 + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id); 151 + if (!ok) 152 + return 0; 153 + } 154 + 155 + return 1; 156 + } 157 + 158 + static int off_cpu_stat(u64 *ctx, struct task_struct *prev, 159 + struct task_struct *next, int state) 160 + { 161 + __u64 ts; 162 + __u32 stack_id; 163 + struct tstamp_data *pelem; 164 + 165 + ts = bpf_ktime_get_ns(); 166 + 167 + if (!can_record(prev, state)) 168 + goto next; 169 + 170 + stack_id = bpf_get_stackid(ctx, &stacks, 171 + BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK); 172 + 173 + pelem = bpf_task_storage_get(&tstamp, prev, NULL, 174 + BPF_LOCAL_STORAGE_GET_F_CREATE); 175 + if (!pelem) 176 + goto next; 177 + 178 + pelem->timestamp = ts; 179 + pelem->state = state; 180 + pelem->stack_id = stack_id; 181 + 182 + next: 183 + pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); 184 + 185 + if (pelem && pelem->timestamp) { 186 + struct offcpu_key key = { 187 + .pid = next->pid, 188 + .tgid = next->tgid, 189 + .stack_id = pelem->stack_id, 190 + .state = pelem->state, 191 + .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0, 192 + }; 193 + __u64 delta = ts - pelem->timestamp; 194 + __u64 *total; 195 + 196 + total = bpf_map_lookup_elem(&off_cpu, &key); 197 + if (total) 198 + *total += delta; 199 + else 200 + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); 201 + 202 + /* prevent to reuse the timestamp later */ 203 + pelem->timestamp = 0; 204 + } 205 + 206 + return 0; 207 + } 208 + 209 + SEC("tp_btf/sched_switch") 210 + int on_switch(u64 *ctx) 211 + { 212 + struct task_struct *prev, *next; 213 + int prev_state; 214 + 215 + if (!enabled) 216 + return 0; 217 + 218 + prev = (struct task_struct *)ctx[1]; 219 + next = (struct task_struct *)ctx[2]; 220 + 221 + if (has_prev_state) 222 + prev_state = (int)ctx[3]; 223 + else 224 + prev_state = get_task_state(prev); 225 + 226 + return off_cpu_stat(ctx, prev, next, prev_state); 227 + } 228 + 229 + char LICENSE[] SEC("license") = "Dual BSD/GPL";
+56 -5
tools/perf/util/evlist.c
··· 242 242 return 0; 243 243 } 244 244 245 - int evlist__add_dummy(struct evlist *evlist) 245 + static struct evsel *evlist__dummy_event(struct evlist *evlist) 246 246 { 247 247 struct perf_event_attr attr = { 248 248 .type = PERF_TYPE_SOFTWARE, 249 249 .config = PERF_COUNT_SW_DUMMY, 250 250 .size = sizeof(attr), /* to capture ABI version */ 251 251 }; 252 - struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); 252 + 253 + return evsel__new_idx(&attr, evlist->core.nr_entries); 254 + } 255 + 256 + int evlist__add_dummy(struct evlist *evlist) 257 + { 258 + struct evsel *evsel = evlist__dummy_event(evlist); 253 259 254 260 if (evsel == NULL) 255 261 return -ENOMEM; 256 262 257 263 evlist__add(evlist, evsel); 258 264 return 0; 265 + } 266 + 267 + static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) 268 + { 269 + evsel->core.system_wide = true; 270 + 271 + /* 272 + * All CPUs. 273 + * 274 + * Note perf_event_open() does not accept CPUs that are not online, so 275 + * in fact this CPU list will include only all online CPUs. 276 + */ 277 + perf_cpu_map__put(evsel->core.own_cpus); 278 + evsel->core.own_cpus = perf_cpu_map__new(NULL); 279 + perf_cpu_map__put(evsel->core.cpus); 280 + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 281 + 282 + /* No threads */ 283 + perf_thread_map__put(evsel->core.threads); 284 + evsel->core.threads = perf_thread_map__new_dummy(); 285 + 286 + evlist__add(evlist, evsel); 287 + } 288 + 289 + struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) 290 + { 291 + struct evsel *evsel = evlist__dummy_event(evlist); 292 + 293 + if (!evsel) 294 + return NULL; 295 + 296 + evsel->core.attr.exclude_kernel = 1; 297 + evsel->core.attr.exclude_guest = 1; 298 + evsel->core.attr.exclude_hv = 1; 299 + evsel->core.attr.freq = 0; 300 + evsel->core.attr.sample_period = 1; 301 + evsel->no_aux_samples = true; 302 + evsel->name = strdup("dummy:u"); 303 + 304 + if (system_wide) 305 + evlist__add_on_all_cpus(evlist, evsel); 306 + else 307 + evlist__add(evlist, evsel); 308 + 309 + return evsel; 259 310 } 260 311 261 312 static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) ··· 798 747 799 748 static void 800 749 perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, 801 - struct perf_evsel *_evsel __maybe_unused, 750 + struct perf_evsel *_evsel, 802 751 struct perf_mmap_param *_mp, 803 752 int idx) 804 753 { 805 754 struct evlist *evlist = container_of(_evlist, struct evlist, core); 806 755 struct mmap_params *mp = container_of(_mp, struct mmap_params, core); 807 - bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus); 756 + struct evsel *evsel = container_of(_evsel, struct evsel, core); 808 757 809 - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); 758 + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx); 810 759 } 811 760 812 761 static struct perf_mmap*
+5
tools/perf/util/evlist.h
··· 114 114 struct evsel *arch_evlist__leader(struct list_head *list); 115 115 116 116 int evlist__add_dummy(struct evlist *evlist); 117 + struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); 118 + static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) 119 + { 120 + return evlist__add_aux_dummy(evlist, true); 121 + } 117 122 118 123 int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, 119 124 evsel__sb_cb_t cb, void *data);
+4 -3
tools/perf/util/evsel.c
··· 296 296 return NULL; 297 297 evsel__init(evsel, attr, idx); 298 298 299 - if (evsel__is_bpf_output(evsel)) { 300 - evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | 299 + if (evsel__is_bpf_output(evsel) && !attr->sample_type) { 300 + evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | 301 301 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), 302 302 evsel->core.attr.sample_period = 1; 303 303 } ··· 409 409 evsel->core.threads = perf_thread_map__get(orig->core.threads); 410 410 evsel->core.nr_members = orig->core.nr_members; 411 411 evsel->core.system_wide = orig->core.system_wide; 412 + evsel->core.requires_cpu = orig->core.requires_cpu; 412 413 413 414 if (orig->name) { 414 415 evsel->name = strdup(orig->name); ··· 897 896 "specifying a subset with --user-regs may render DWARF unwinding unreliable, " 898 897 "so the minimal registers set (IP, SP) is explicitly forced.\n"); 899 898 } else { 900 - attr->sample_regs_user |= PERF_REGS_MASK; 899 + attr->sample_regs_user |= arch__user_reg_mask(); 901 900 } 902 901 attr->sample_stack_user = param->dump_size; 903 902 attr->exclude_callchain_user = 1;
+1 -1
tools/perf/util/libunwind/arm64.c
··· 24 24 #include "unwind.h" 25 25 #include "libunwind-aarch64.h" 26 26 #define perf_event_arm_regs perf_event_arm64_regs 27 - #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> 27 + #include <../../../arch/arm64/include/uapi/asm/perf_regs.h> 28 28 #undef perf_event_arm_regs 29 29 #include "../../arch/arm64/util/unwind-libunwind.c" 30 30
+2 -2
tools/perf/util/mmap.c
··· 62 62 63 63 void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, 64 64 struct evlist *evlist __maybe_unused, 65 - int idx __maybe_unused, 66 - bool per_cpu __maybe_unused) 65 + struct evsel *evsel __maybe_unused, 66 + int idx __maybe_unused) 67 67 { 68 68 } 69 69
+29
tools/perf/util/off_cpu.h
··· 1 + #ifndef PERF_UTIL_OFF_CPU_H 2 + #define PERF_UTIL_OFF_CPU_H 3 + 4 + struct evlist; 5 + struct target; 6 + struct perf_session; 7 + struct record_opts; 8 + 9 + #define OFFCPU_EVENT "offcpu-time" 10 + 11 + #ifdef HAVE_BPF_SKEL 12 + int off_cpu_prepare(struct evlist *evlist, struct target *target, 13 + struct record_opts *opts); 14 + int off_cpu_write(struct perf_session *session); 15 + #else 16 + static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, 17 + struct target *target __maybe_unused, 18 + struct record_opts *opts __maybe_unused) 19 + { 20 + return -1; 21 + } 22 + 23 + static inline int off_cpu_write(struct perf_session *session __maybe_unused) 24 + { 25 + return -1; 26 + } 27 + #endif 28 + 29 + #endif /* PERF_UTIL_OFF_CPU_H */
+1 -1
tools/perf/util/parse-events.c
··· 365 365 (*idx)++; 366 366 evsel->core.cpus = cpus; 367 367 evsel->core.own_cpus = perf_cpu_map__get(cpus); 368 - evsel->core.system_wide = pmu ? pmu->is_uncore : false; 368 + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; 369 369 evsel->auto_merge_stats = auto_merge_stats; 370 370 371 371 if (name)
+2
tools/perf/util/perf_regs.c
··· 103 103 return "lr"; 104 104 case PERF_REG_ARM64_PC: 105 105 return "pc"; 106 + case PERF_REG_ARM64_VG: 107 + return "vg"; 106 108 default: 107 109 return NULL; 108 110 }
+1
tools/perf/util/python-ext-sources
··· 38 38 util/affinity.c 39 39 util/rwsem.c 40 40 util/hashmap.c 41 + util/perf_regs.c 41 42 util/pmu-hybrid.c 42 43 util/fncache.c
+17 -4
tools/perf/util/scripting-engines/trace-event-python.c
··· 755 755 } 756 756 757 757 static void set_sym_in_dict(PyObject *dict, struct addr_location *al, 758 - const char *dso_field, const char *sym_field, 759 - const char *symoff_field) 758 + const char *dso_field, const char *dso_bid_field, 759 + const char *dso_map_start, const char *dso_map_end, 760 + const char *sym_field, const char *symoff_field) 760 761 { 762 + char sbuild_id[SBUILD_ID_SIZE]; 763 + 761 764 if (al->map) { 762 765 pydict_set_item_string_decref(dict, dso_field, 763 766 _PyUnicode_FromString(al->map->dso->name)); 767 + build_id__sprintf(&al->map->dso->bid, sbuild_id); 768 + pydict_set_item_string_decref(dict, dso_bid_field, 769 + _PyUnicode_FromString(sbuild_id)); 770 + pydict_set_item_string_decref(dict, dso_map_start, 771 + PyLong_FromUnsignedLong(al->map->start)); 772 + pydict_set_item_string_decref(dict, dso_map_end, 773 + PyLong_FromUnsignedLong(al->map->end)); 764 774 } 765 775 if (al->sym) { 766 776 pydict_set_item_string_decref(dict, sym_field, ··· 850 840 (const char *)sample->raw_data, sample->raw_size)); 851 841 pydict_set_item_string_decref(dict, "comm", 852 842 _PyUnicode_FromString(thread__comm_str(al->thread))); 853 - set_sym_in_dict(dict, al, "dso", "symbol", "symoff"); 843 + set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", 844 + "symbol", "symoff"); 854 845 855 846 pydict_set_item_string_decref(dict, "callchain", callchain); 856 847 ··· 867 856 if (addr_al) { 868 857 pydict_set_item_string_decref(dict_sample, "addr_correlates_sym", 869 858 PyBool_FromLong(1)); 870 - set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff"); 859 + set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", 860 + "addr_dso_map_start", "addr_dso_map_end", 861 + "addr_symbol", "addr_symoff"); 871 862 } 872 863 873 864 if (sample->flags)