Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools updates from Arnaldo Carvalho de Melo:
"Add Namhyung Kim as tools/perf/ co-maintainer, we're taking turns
processing patches, switching roles from perf-tools to perf-tools-next
at each Linux release.

Data profiling:

- Associate samples that identify loads and stores with data
structures. This uses events available on Intel, AMD and others and
DWARF info:

# To get memory access samples in kernel for 1 second (on Intel)
$ perf mem record -a -K --ldlat=4 -- sleep 1

# Similar for the AMD (but it requires 6.3+ kernel for BPF filters)
$ perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000' -- sleep 1

Then, amongst several modes of post processing, one can do things like:

$ perf report -s type,typeoff --hierarchy --group --stdio
...
#
# Samples: 10K of events 'cpu/mem-loads,ldlat=4/P, cpu/mem-stores/P, dummy:u'
# Event count (approx.): 602758064
#
# Overhead Data Type / Data Type Offset
# ........................... ............................
#
26.09% 3.28% 0.00% long unsigned int
26.09% 3.28% 0.00% long unsigned int +0 (no field)
18.48% 0.73% 0.00% struct page
10.83% 0.02% 0.00% struct page +8 (lru.next)
3.90% 0.28% 0.00% struct page +0 (flags)
3.45% 0.06% 0.00% struct page +24 (mapping)
0.25% 0.28% 0.00% struct page +48 (_mapcount.counter)
0.02% 0.06% 0.00% struct page +32 (index)
0.02% 0.00% 0.00% struct page +52 (_refcount.counter)
0.02% 0.01% 0.00% struct page +56 (memcg_data)
0.00% 0.01% 0.00% struct page +16 (lru.prev)
15.37% 17.54% 0.00% (stack operation)
15.37% 17.54% 0.00% (stack operation) +0 (no field)
11.71% 50.27% 0.00% (unknown)
11.71% 50.27% 0.00% (unknown) +0 (no field)

$ perf annotate --data-type
...
Annotate type: 'struct cfs_rq' in [kernel.kallsyms] (13 samples):
============================================================================
samples offset size field
13 0 640 struct cfs_rq {
2 0 16 struct load_weight load {
2 0 8 unsigned long weight;
0 8 4 u32 inv_weight;
};
0 16 8 unsigned long runnable_weight;
0 24 4 unsigned int nr_running;
1 28 4 unsigned int h_nr_running;
...

$ perf annotate --data-type=page --group
Annotate type: 'struct page' in [kernel.kallsyms] (480 samples):
event[0] = cpu/mem-loads,ldlat=4/P
event[1] = cpu/mem-stores/P
event[2] = dummy:u
===================================================================================
samples offset size field
447 33 0 0 64 struct page {
108 8 0 0 8 long unsigned int flags;
319 13 0 8 40 union {
319 13 0 8 40 struct {
236 2 0 8 16 union {
236 2 0 8 16 struct list_head lru {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct {
236 1 0 8 8 void* __filler;
0 1 0 16 4 unsigned int mlock_count;
};
236 2 0 8 16 struct list_head buddy_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct list_head pcp_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
};
82 4 0 24 8 struct address_space* mapping;
1 7 0 32 8 union {
1 7 0 32 8 long unsigned int index;
1 7 0 32 8 long unsigned int share;
};
0 0 0 40 8 long unsigned int private;
};

This uses the existing annotate code, calling objdump to do the
disassembly, with improvements to avoid having this take too long,
but longer term a switch to a disassembler library, possibly
reusing code in the kernel will be pursued.

This is the initial implementation, please use it and report
impressions and bugs. Make sure the kernel-debuginfo packages match
the running kernel. The 'perf report' phase for non short perf.data
files may take a while.

There is a great article about it on LWN:

https://lwn.net/Articles/955709/ - "Data-type profiling for perf"

One last test I did while writing this text, on a AMD Ryzen 5950X,
using a distro kernel, while doing a simple 'find /' on an
otherwise idle system resulted in:

# uname -r
6.6.9-100.fc38.x86_64
# perf -vv | grep BPF_
bpf: [ on ] # HAVE_LIBBPF_SUPPORT
bpf_skeletons: [ on ] # HAVE_BPF_SKEL
# rpm -qa | grep kernel-debuginfo
kernel-debuginfo-common-x86_64-6.6.9-100.fc38.x86_64
kernel-debuginfo-6.6.9-100.fc38.x86_64
#
# perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000'
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.199 MB perf.data (2913 samples) ]
#
# ls -la perf.data
-rw-------. 1 root root 2346486 Jan 9 18:36 perf.data
# perf evlist
ibs_op//
dummy:u
# perf evlist -v
ibs_op//: type: 11, size: 136, config: 0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|ADDR|CPU|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
#
# perf report -s type,typeoff --hierarchy --group --stdio
# Total Lost Samples: 0
#
# Samples: 2K of events 'ibs_op//, dummy:u'
# Event count (approx.): 1904553038
#
# Overhead Data Type / Data Type Offset
# ................... ............................
#
73.70% 0.00% (unknown)
73.70% 0.00% (unknown) +0 (no field)
3.01% 0.00% long unsigned int
3.00% 0.00% long unsigned int +0 (no field)
0.01% 0.00% long unsigned int +2 (no field)
2.73% 0.00% struct task_struct
1.71% 0.00% struct task_struct +52 (on_cpu)
0.38% 0.00% struct task_struct +2104 (rcu_read_unlock_special.b.blocked)
0.23% 0.00% struct task_struct +2100 (rcu_read_lock_nesting)
0.14% 0.00% struct task_struct +2384 ()
0.06% 0.00% struct task_struct +3096 (signal)
0.05% 0.00% struct task_struct +3616 (cgroups)
0.05% 0.00% struct task_struct +2344 (active_mm)
0.02% 0.00% struct task_struct +46 (flags)
0.02% 0.00% struct task_struct +2096 (migration_disabled)
0.01% 0.00% struct task_struct +24 (__state)
0.01% 0.00% struct task_struct +3956 (mm_cid_active)
0.01% 0.00% struct task_struct +1048 (cpus_ptr)
0.01% 0.00% struct task_struct +184 (se.group_node.next)
0.01% 0.00% struct task_struct +20 (thread_info.cpu)
0.00% 0.00% struct task_struct +104 (on_rq)
0.00% 0.00% struct task_struct +2456 (pid)
1.36% 0.00% struct module
0.59% 0.00% struct module +952 (kallsyms)
0.42% 0.00% struct module +0 (state)
0.23% 0.00% struct module +8 (list.next)
0.12% 0.00% struct module +216 (syms)
0.95% 0.00% struct inode
0.41% 0.00% struct inode +40 (i_sb)
0.22% 0.00% struct inode +0 (i_mode)
0.06% 0.00% struct inode +76 (i_rdev)
0.06% 0.00% struct inode +56 (i_security)
<SNIP>

perf top/report:

- Don't ignore job control, allowing control+Z + bg to work.

- Add s390 raw data interpretation for PAI (Processor Activity
Instrumentation) counters.

perf archive:

- Add new option '--all' to pack perf.data with DSOs.

- Add new option '--unpack' to expand tarballs.

Initialization speedups:

- Lazily initialize zstd streams to save memory when not using it.

- Lazily allocate/size mmap event copy.

- Lazy load kernel symbols in 'perf record'.

- Be lazier in allocating lost samples buffer in 'perf record'.

- Don't synthesize BPF events when disabled via the command line
(perf record --no-bpf-event).

Assorted improvements:

- Show note on AMD systems that the :p, :pp, :ppp and :P are all the
same, as IBS (Instruction Based Sampling) is used and it is
inherentely precise, not having levels of precision like in Intel
systems.

- When 'cycles' isn't available, fall back to the "task-clock" event
when not system wide, not to 'cpu-clock'.

- Add --debug-file option to redirect debug output, e.g.:

$ perf --debug-file /tmp/perf.log record -v true

- Shrink 'struct map' to under one cacheline by avoiding function
pointers for selecting if addresses are identity or DSO relative,
and using just a byte for some boolean struct members.

- Resolve the arch specific strerrno just once to use in
perf_env__arch_strerrno().

- Reduce memory for recording PERF_RECORD_LOST_SAMPLES event.

Assorted fixes:

- Fix the default 'perf top' usage on Intel hybrid systems, now it
starts with a browser showing the number of samples for Efficiency
(cpu_atom/cycles/P) and Performance (cpu_core/cycles/P). This
behaviour is similar on ARM64, with its respective set of
big.LITTLE processors.

- Fix segfault on build_mem_topology() error path.

- Fix 'perf mem' error on hybrid related to availability of mem event
in a PMU.

- Fix missing reference count gets (map, maps) in the db-export code.

- Avoid recursively taking env->bpf_progs.lock in the 'perf_env'
code.

- Use the newly introduced maps__for_each_map() to add missing
locking around iteration of 'struct map' entries.

- Parse NOTE segments until the build id is found, don't stop on the
first one, ELF files may have several such NOTE segments.

- Remove 'egrep' usage, its deprecated, use 'grep -E' instead.

- Warn first about missing libelf, not libbpf, that depends on
libelf.

- Use alternative to 'find ... -printf' as this isn't supported in
busybox.

- Address python 3.6 DeprecationWarning for string scapes.

- Fix memory leak in uniq() in libsubcmd.

- Fix man page formatting for 'perf lock'

- Fix some spelling mistakes.

perf tests:

- Fail shell tests that needs some symbol in perf itself if it is
stripped. These tests check if a symbol is resolved, if some hot
function is indeed detected by profiling, etc.

- The 'perf test sigtrap' test is currently failing on PREEMPT_RT,
skip it if sleeping spinlocks are detected (using BTF) and point to
the mailing list discussion about it. This test is also being
skipped on several architectures (powerpc, s390x, arm and aarch64)
due to other pending issues with intruction breakpoints.

- Adjust test case perf record offcpu profiling tests for s390.

- Fix 'Setup struct perf_event_attr' fails on s390 on z/VM guest,
addressing issues caused by the fallback from cycles to task-clock
done in this release.

- Fix mask for VG register in the user-regs test.

- Use shellcheck on 'perf test' shell scripts automatically to make
sure changes don't introduce things it flags as problematic.

- Add option to change objdump binary and allow it to be set via
'perf config'.

- Add basic 'perf script', 'perf list --json" and 'perf diff' tests.

- Basic branch counter support.

- Make DSO tests a suite rather than individual.

- Remove atomics from test_loop to avoid test failures.

- Fix call chain match on powerpc for the record+probe_libc_inet_pton
test.

- Improve Intel hybrid tests.

Vendor event files (JSON):

powerpc:

- Update datasource event name to fix duplicate events on IBM's
Power10.

- Add PVN for HX-C2000 CPU with Power8 Architecture.

Intel:

- Alderlake/rocketlake metric fixes.

- Update emeraldrapids events to v1.02.

- Update icelakex events to v1.23.

- Update sapphirerapids events to v1.17.

- Add skx, clx, icx and spr upi bandwidth metric.

AMD:

- Add Zen 4 memory controller events.

RISC-V:

- Add StarFive Dubhe-80 and Dubhe-90 JSON files.
https://www.starfivetech.com/en/site/cpu-u

- Add T-HEAD C9xx JSON file.
https://github.com/riscv-software-src/opensbi/blob/master/docs/platform/thead-c9xx.md

ARM64:

- Remove UTF-8 characters from cmn.json, that were causing build
failure in some distros.

- Add core PMU events and metrics for Ampere One X.

- Rename Ampere One's BPU_FLUSH_MEM_FAULT to GPC_FLUSH_MEM_FAULT

libperf:

- Rename several perf_cpu_map constructor names to clarify what they
really do.

- Ditto for some other methods, coping with some issues in their
semantics, like perf_cpu_map__empty() ->
perf_cpu_map__has_any_cpu_or_is_empty().

- Document perf_cpu_map__nr()'s behavior

perf stat:

- Exit if parse groups fails.

- Combine the -A/--no-aggr and --no-merge options.

- Fix help message for --metric-no-threshold option.

Hardware tracing:

ARM64 CoreSight:

- Bump minimum OpenCSD version to ensure a bugfix is present.

- Add 'T' itrace option for timestamp trace

- Set start vm addr of exectable file to 0 and don't ignore first
sample on the arm-cs-trace-disasm.py 'perf script'"

* tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (179 commits)
MAINTAINERS: Add Namhyung as tools/perf/ co-maintainer
perf test: test case 'Setup struct perf_event_attr' fails on s390 on z/vm
perf db-export: Fix missing reference count get in call_path_from_sample()
perf tests: Add perf script test
libsubcmd: Fix memory leak in uniq()
perf TUI: Don't ignore job control
perf vendor events intel: Update sapphirerapids events to v1.17
perf vendor events intel: Update icelakex events to v1.23
perf vendor events intel: Update emeraldrapids events to v1.02
perf vendor events intel: Alderlake/rocketlake metric fixes
perf x86 test: Add hybrid test for conflicting legacy/sysfs event
perf x86 test: Update hybrid expectations
perf vendor events amd: Add Zen 4 memory controller events
perf stat: Fix hard coded LL miss units
perf record: Reduce memory for recording PERF_RECORD_LOST_SAMPLES event
perf env: Avoid recursively taking env->bpf_progs.lock
perf annotate: Add --insn-stat option for debugging
perf annotate: Add --type-stat option for debugging
perf annotate: Support event group display
perf annotate: Add --data-type option
...

+7851 -2149
+1 -1
MAINTAINERS
··· 17140 17140 M: Peter Zijlstra <peterz@infradead.org> 17141 17141 M: Ingo Molnar <mingo@redhat.com> 17142 17142 M: Arnaldo Carvalho de Melo <acme@kernel.org> 17143 + M: Namhyung Kim <namhyung@kernel.org> 17143 17144 R: Mark Rutland <mark.rutland@arm.com> 17144 17145 R: Alexander Shishkin <alexander.shishkin@linux.intel.com> 17145 17146 R: Jiri Olsa <jolsa@kernel.org> 17146 - R: Namhyung Kim <namhyung@kernel.org> 17147 17147 R: Ian Rogers <irogers@google.com> 17148 17148 R: Adrian Hunter <adrian.hunter@intel.com> 17149 17149 L: linux-perf-users@vger.kernel.org
+1
tools/build/Makefile.feature
··· 32 32 backtrace \ 33 33 dwarf \ 34 34 dwarf_getlocations \ 35 + dwarf_getcfi \ 35 36 eventfd \ 36 37 fortify-source \ 37 38 get_current_dir_name \
+4
tools/build/feature/Makefile
··· 7 7 test-bionic.bin \ 8 8 test-dwarf.bin \ 9 9 test-dwarf_getlocations.bin \ 10 + test-dwarf_getcfi.bin \ 10 11 test-eventfd.bin \ 11 12 test-fortify-source.bin \ 12 13 test-get_current_dir_name.bin \ ··· 153 152 $(BUILD) $(DWARFLIBS) 154 153 155 154 $(OUTPUT)test-dwarf_getlocations.bin: 155 + $(BUILD) $(DWARFLIBS) 156 + 157 + $(OUTPUT)test-dwarf_getcfi.bin: 156 158 $(BUILD) $(DWARFLIBS) 157 159 158 160 $(OUTPUT)test-libelf-getphdrnum.bin:
+9
tools/build/feature/test-dwarf_getcfi.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdio.h> 3 + #include <elfutils/libdw.h> 4 + 5 + int main(void) 6 + { 7 + Dwarf *dwarf = NULL; 8 + return dwarf_getcfi(dwarf) == NULL; 9 + }
+2 -2
tools/build/feature/test-libopencsd.c
··· 4 4 /* 5 5 * Check OpenCSD library version is sufficient to provide required features 6 6 */ 7 - #define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) 7 + #define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) 8 8 #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) 9 - #error "OpenCSD >= 1.1.1 is required" 9 + #error "OpenCSD >= 1.2.1 is required" 10 10 #endif 11 11 12 12 int main(void)
+13
tools/include/uapi/linux/perf_event.h
··· 204 204 205 205 PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ 206 206 207 + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ 208 + 207 209 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 208 210 }; 209 211 ··· 236 234 PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 237 235 238 236 PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, 237 + 238 + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, 239 239 240 240 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 241 241 }; ··· 986 982 * { u64 nr; 987 983 * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 988 984 * { u64 from, to, flags } lbr[nr]; 985 + * # 986 + * # The format of the counters is decided by the 987 + * # "branch_counter_nr" and "branch_counter_width", 988 + * # which are defined in the ABI. 989 + * # 990 + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS 989 991 * } && PERF_SAMPLE_BRANCH_STACK 990 992 * 991 993 * { u64 abi; # enum perf_sample_regs_abi ··· 1436 1426 priv:3, /* privilege level */ 1437 1427 reserved:31; 1438 1428 }; 1429 + 1430 + /* Size of used info bits in struct perf_branch_entry */ 1431 + #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 1439 1432 1440 1433 union perf_sample_weight { 1441 1434 __u64 full;
+29 -51
tools/lib/api/fs/fs.c
··· 16 16 #include <sys/mount.h> 17 17 18 18 #include "fs.h" 19 + #include "../io.h" 19 20 #include "debug-internal.h" 20 21 21 22 #define _STR(x) #x ··· 345 344 return filename__read_ull_base(filename, value, 0); 346 345 } 347 346 348 - #define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ 349 - 350 347 int filename__read_str(const char *filename, char **buf, size_t *sizep) 351 348 { 352 - size_t size = 0, alloc_size = 0; 353 - void *bf = NULL, *nbf; 354 - int fd, n, err = 0; 355 - char sbuf[STRERR_BUFSIZE]; 349 + struct io io; 350 + char bf[128]; 351 + int err; 356 352 357 - fd = open(filename, O_RDONLY); 358 - if (fd < 0) 353 + io.fd = open(filename, O_RDONLY); 354 + if (io.fd < 0) 359 355 return -errno; 360 - 361 - do { 362 - if (size == alloc_size) { 363 - alloc_size += BUFSIZ; 364 - nbf = realloc(bf, alloc_size); 365 - if (!nbf) { 366 - err = -ENOMEM; 367 - break; 368 - } 369 - 370 - bf = nbf; 371 - } 372 - 373 - n = read(fd, bf + size, alloc_size - size); 374 - if (n < 0) { 375 - if (size) { 376 - pr_warn("read failed %d: %s\n", errno, 377 - strerror_r(errno, sbuf, sizeof(sbuf))); 378 - err = 0; 379 - } else 380 - err = -errno; 381 - 382 - break; 383 - } 384 - 385 - size += n; 386 - } while (n > 0); 387 - 388 - if (!err) { 389 - *sizep = size; 390 - *buf = bf; 356 + io__init(&io, io.fd, bf, sizeof(bf)); 357 + *buf = NULL; 358 + err = io__getdelim(&io, buf, sizep, /*delim=*/-1); 359 + if (err < 0) { 360 + free(*buf); 361 + *buf = NULL; 391 362 } else 392 - free(bf); 393 - 394 - close(fd); 363 + err = 0; 364 + close(io.fd); 395 365 return err; 396 366 } 397 367 ··· 447 475 448 476 int sysfs__read_bool(const char *entry, bool *value) 449 477 { 450 - char *buf; 451 - size_t size; 452 - int ret; 478 + struct io io; 479 + char bf[16]; 480 + int ret = 0; 481 + char path[PATH_MAX]; 482 + const char *sysfs = sysfs__mountpoint(); 453 483 454 - ret = sysfs__read_str(entry, &buf, &size); 455 - if (ret < 0) 456 - return ret; 484 + if (!sysfs) 485 + return -1; 457 486 458 - switch (buf[0]) { 487 + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); 488 + io.fd = open(path, O_RDONLY); 489 + if (io.fd < 0) 490 + return -errno; 491 + 492 + io__init(&io, io.fd, bf, sizeof(bf)); 493 + switch (io__get_char(&io)) { 459 494 case '1': 460 495 case 'y': 461 496 case 'Y': ··· 476 497 default: 477 498 ret = -1; 478 499 } 479 - 480 - free(buf); 500 + close(io.fd); 481 501 482 502 return ret; 483 503 }
+9 -3
tools/lib/api/io.h
··· 12 12 #include <stdlib.h> 13 13 #include <string.h> 14 14 #include <unistd.h> 15 + #include <linux/types.h> 15 16 16 17 struct io { 17 18 /* File descriptor being read/ */ ··· 141 140 } 142 141 } 143 142 144 - /* Read up to and including the first newline following the pattern of getline. */ 145 - static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) 143 + /* Read up to and including the first delim. */ 144 + static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim) 146 145 { 147 146 char buf[128]; 148 147 int buf_pos = 0; ··· 152 151 153 152 /* TODO: reuse previously allocated memory. */ 154 153 free(*line_out); 155 - while (ch != '\n') { 154 + while (ch != delim) { 156 155 ch = io__get_char(io); 157 156 158 157 if (ch < 0) ··· 183 182 free(line); 184 183 *line_out = NULL; 185 184 return -ENOMEM; 185 + } 186 + 187 + static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) 188 + { 189 + return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n'); 186 190 } 187 191 188 192 #endif /* __API_IO__ */
+1 -1
tools/lib/perf/Documentation/examples/sampling.c
··· 39 39 40 40 libperf_init(libperf_print); 41 41 42 - cpus = perf_cpu_map__new(NULL); 42 + cpus = perf_cpu_map__new_online_cpus(); 43 43 if (!cpus) { 44 44 fprintf(stderr, "failed to create cpus\n"); 45 45 return -1;
+1 -1
tools/lib/perf/Documentation/libperf-sampling.txt
··· 97 97 98 98 [source,c] 99 99 -- 100 - 42 cpus = perf_cpu_map__new(NULL); 100 + 42 cpus = perf_cpu_map__new_online_cpus(); 101 101 43 if (!cpus) { 102 102 44 fprintf(stderr, "failed to create cpus\n"); 103 103 45 return -1;
+2 -2
tools/lib/perf/Documentation/libperf.txt
··· 37 37 38 38 struct perf_cpu_map; 39 39 40 - struct perf_cpu_map *perf_cpu_map__dummy_new(void); 40 + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); 41 41 struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); 42 42 struct perf_cpu_map *perf_cpu_map__read(FILE *file); 43 43 struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); ··· 46 46 void perf_cpu_map__put(struct perf_cpu_map *map); 47 47 int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); 48 48 int perf_cpu_map__nr(const struct perf_cpu_map *cpus); 49 - bool perf_cpu_map__empty(const struct perf_cpu_map *map); 49 + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); 50 50 int perf_cpu_map__max(struct perf_cpu_map *map); 51 51 bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); 52 52
+38 -27
tools/lib/perf/cpumap.c
··· 9 9 #include <unistd.h> 10 10 #include <ctype.h> 11 11 #include <limits.h> 12 + #include "internal.h" 12 13 13 14 void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) 14 15 { ··· 28 27 return result; 29 28 } 30 29 31 - struct perf_cpu_map *perf_cpu_map__dummy_new(void) 30 + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) 32 31 { 33 32 struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); 34 33 ··· 67 66 } 68 67 } 69 68 70 - static struct perf_cpu_map *cpu_map__default_new(void) 69 + static struct perf_cpu_map *cpu_map__new_sysconf(void) 71 70 { 72 71 struct perf_cpu_map *cpus; 73 - int nr_cpus; 72 + int nr_cpus, nr_cpus_conf; 74 73 75 74 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 76 75 if (nr_cpus < 0) 77 76 return NULL; 77 + 78 + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); 79 + if (nr_cpus != nr_cpus_conf) { 80 + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", 81 + nr_cpus, nr_cpus_conf, nr_cpus); 82 + } 78 83 79 84 cpus = perf_cpu_map__alloc(nr_cpus); 80 85 if (cpus != NULL) { ··· 93 86 return cpus; 94 87 } 95 88 96 - struct perf_cpu_map *perf_cpu_map__default_new(void) 89 + static struct perf_cpu_map *cpu_map__new_sysfs_online(void) 97 90 { 98 - return cpu_map__default_new(); 91 + struct perf_cpu_map *cpus = NULL; 92 + FILE *onlnf; 93 + 94 + onlnf = fopen("/sys/devices/system/cpu/online", "r"); 95 + if (onlnf) { 96 + cpus = perf_cpu_map__read(onlnf); 97 + fclose(onlnf); 98 + } 99 + return cpus; 100 + } 101 + 102 + struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) 103 + { 104 + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); 105 + 106 + if (cpus) 107 + return cpus; 108 + 109 + return cpu_map__new_sysconf(); 99 110 } 100 111 101 112 ··· 205 180 206 181 if (nr_cpus > 0) 207 182 cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); 208 - else 209 - cpus = cpu_map__default_new(); 210 183 out_free_tmp: 211 184 free(tmp_cpus); 212 - return cpus; 213 - } 214 - 215 - static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) 216 - { 217 - struct perf_cpu_map *cpus = NULL; 218 - FILE *onlnf; 219 - 220 - onlnf = fopen("/sys/devices/system/cpu/online", "r"); 221 - if (!onlnf) 222 - return cpu_map__default_new(); 223 - 224 - cpus = perf_cpu_map__read(onlnf); 225 - fclose(onlnf); 226 185 return cpus; 227 186 } 228 187 ··· 220 211 int max_entries = 0; 221 212 222 213 if (!cpu_list) 223 - return cpu_map__read_all_cpu_map(); 214 + return perf_cpu_map__new_online_cpus(); 224 215 225 216 /* 226 217 * must handle the case of empty cpumap to cover ··· 277 268 278 269 if (nr_cpus > 0) 279 270 cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); 280 - else if (*cpu_list != '\0') 281 - cpus = cpu_map__default_new(); 282 - else 283 - cpus = perf_cpu_map__dummy_new(); 271 + else if (*cpu_list != '\0') { 272 + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", 273 + cpu_list); 274 + cpus = perf_cpu_map__new_online_cpus(); 275 + } else 276 + cpus = perf_cpu_map__new_any_cpu(); 284 277 invalid: 285 278 free(tmp_cpus); 286 279 out: ··· 311 300 return cpus ? __perf_cpu_map__nr(cpus) : 1; 312 301 } 313 302 314 - bool perf_cpu_map__empty(const struct perf_cpu_map *map) 303 + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) 315 304 { 316 305 return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; 317 306 }
+3 -3
tools/lib/perf/evlist.c
··· 39 39 if (evsel->system_wide) { 40 40 /* System wide: set the cpu map of the evsel to all online CPUs. */ 41 41 perf_cpu_map__put(evsel->cpus); 42 - evsel->cpus = perf_cpu_map__new(NULL); 42 + evsel->cpus = perf_cpu_map__new_online_cpus(); 43 43 } else if (evlist->has_user_cpus && evsel->is_pmu_core) { 44 44 /* 45 45 * User requested CPUs on a core PMU, ensure the requested CPUs ··· 619 619 620 620 /* One for each CPU */ 621 621 nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); 622 - if (perf_cpu_map__empty(evlist->all_cpus)) { 622 + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { 623 623 /* Plus one for each thread */ 624 624 nr_mmaps += perf_thread_map__nr(evlist->threads); 625 625 /* Minus the per-thread CPU (-1) */ ··· 653 653 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 654 654 return -ENOMEM; 655 655 656 - if (perf_cpu_map__empty(cpus)) 656 + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 657 657 return mmap_per_thread(evlist, ops, mp); 658 658 659 659 return mmap_per_cpu(evlist, ops, mp);
+1 -1
tools/lib/perf/evsel.c
··· 120 120 static struct perf_cpu_map *empty_cpu_map; 121 121 122 122 if (empty_cpu_map == NULL) { 123 - empty_cpu_map = perf_cpu_map__dummy_new(); 123 + empty_cpu_map = perf_cpu_map__new_any_cpu(); 124 124 if (empty_cpu_map == NULL) 125 125 return -ENOMEM; 126 126 }
+2 -1
tools/lib/perf/include/internal/mmap.h
··· 33 33 bool overwrite; 34 34 u64 flush; 35 35 libperf_unmap_cb_t unmap_cb; 36 - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); 36 + void *event_copy; 37 + size_t event_copy_sz; 37 38 struct perf_mmap *next; 38 39 }; 39 40
+35 -5
tools/lib/perf/include/perf/cpumap.h
··· 19 19 struct perf_cpu_map; 20 20 21 21 /** 22 - * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. 22 + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. 23 23 */ 24 - LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); 25 - LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); 24 + LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); 25 + /** 26 + * perf_cpu_map__new_online_cpus - a map read from 27 + * /sys/devices/system/cpu/online if 28 + * available. If reading wasn't possible a map 29 + * is created using the online processors 30 + * assuming the first 'n' processors are all 31 + * online. 32 + */ 33 + LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); 34 + /** 35 + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no 36 + * cpu_list argument is provided then 37 + * perf_cpu_map__new_online_cpus is returned. 38 + */ 26 39 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); 27 40 LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); 28 41 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); ··· 44 31 LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, 45 32 struct perf_cpu_map *other); 46 33 LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); 34 + /** 35 + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index 36 + * is invalid. 37 + */ 47 38 LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); 39 + /** 40 + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a 41 + * cpu of -1 for an invalid index, this makes an empty map 42 + * look like it contains the "any CPU"/dummy value. Otherwise 43 + * the result is the number CPUs in the map plus one if the 44 + * "any CPU"/dummy value is present. 45 + */ 48 46 LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); 49 47 /** 50 - * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. 48 + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. 51 49 */ 52 - LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); 50 + LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); 53 51 LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); 54 52 LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); 55 53 LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, ··· 74 50 for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ 75 51 (idx) < perf_cpu_map__nr(cpus); \ 76 52 (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) 53 + 54 + #define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ 55 + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ 56 + (idx) < perf_cpu_map__nr(cpus); \ 57 + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ 58 + if ((_cpu).cpu != -1) 77 59 78 60 #define perf_cpu_map__for_each_idx(idx, cpus) \ 79 61 for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
+3 -3
tools/lib/perf/libperf.map
··· 1 1 LIBPERF_0.0.1 { 2 2 global: 3 3 libperf_init; 4 - perf_cpu_map__dummy_new; 5 - perf_cpu_map__default_new; 4 + perf_cpu_map__new_any_cpu; 5 + perf_cpu_map__new_online_cpus; 6 6 perf_cpu_map__get; 7 7 perf_cpu_map__put; 8 8 perf_cpu_map__new; 9 9 perf_cpu_map__read; 10 10 perf_cpu_map__nr; 11 11 perf_cpu_map__cpu; 12 - perf_cpu_map__empty; 12 + perf_cpu_map__has_any_cpu_or_is_empty; 13 13 perf_cpu_map__max; 14 14 perf_cpu_map__has; 15 15 perf_thread_map__new_array;
+17 -3
tools/lib/perf/mmap.c
··· 19 19 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, 20 20 bool overwrite, libperf_unmap_cb_t unmap_cb) 21 21 { 22 + /* Assume fields were zero initialized. */ 22 23 map->fd = -1; 23 24 map->overwrite = overwrite; 24 25 map->unmap_cb = unmap_cb; ··· 52 51 53 52 void perf_mmap__munmap(struct perf_mmap *map) 54 53 { 55 - if (map && map->base != NULL) { 54 + if (!map) 55 + return; 56 + 57 + zfree(&map->event_copy); 58 + map->event_copy_sz = 0; 59 + if (map->base) { 56 60 munmap(map->base, perf_mmap__mmap_len(map)); 57 61 map->base = NULL; 58 62 map->fd = -1; 59 63 refcount_set(&map->refcnt, 0); 60 64 } 61 - if (map && map->unmap_cb) 65 + if (map->unmap_cb) 62 66 map->unmap_cb(map); 63 67 } 64 68 ··· 229 223 */ 230 224 if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { 231 225 unsigned int offset = *startp; 232 - unsigned int len = min(sizeof(*event), size), cpy; 226 + unsigned int len = size, cpy; 233 227 void *dst = map->event_copy; 228 + 229 + if (size > map->event_copy_sz) { 230 + dst = realloc(map->event_copy, size); 231 + if (!dst) 232 + return NULL; 233 + map->event_copy = dst; 234 + map->event_copy_sz = size; 235 + } 234 236 235 237 do { 236 238 cpy = min(map->mask + 1 - (offset & map->mask), len);
+2 -2
tools/lib/perf/tests/test-cpumap.c
··· 21 21 22 22 libperf_init(libperf_print); 23 23 24 - cpus = perf_cpu_map__dummy_new(); 24 + cpus = perf_cpu_map__new_any_cpu(); 25 25 if (!cpus) 26 26 return -1; 27 27 ··· 29 29 perf_cpu_map__put(cpus); 30 30 perf_cpu_map__put(cpus); 31 31 32 - cpus = perf_cpu_map__default_new(); 32 + cpus = perf_cpu_map__new_online_cpus(); 33 33 if (!cpus) 34 34 return -1; 35 35
+3 -3
tools/lib/perf/tests/test-evlist.c
··· 46 46 }; 47 47 int err, idx; 48 48 49 - cpus = perf_cpu_map__new(NULL); 49 + cpus = perf_cpu_map__new_online_cpus(); 50 50 __T("failed to create cpus", cpus); 51 51 52 52 evlist = perf_evlist__new(); ··· 261 261 threads = perf_thread_map__new_dummy(); 262 262 __T("failed to create threads", threads); 263 263 264 - cpus = perf_cpu_map__dummy_new(); 264 + cpus = perf_cpu_map__new_any_cpu(); 265 265 __T("failed to create cpus", cpus); 266 266 267 267 perf_thread_map__set_pid(threads, 0, pid); ··· 350 350 351 351 attr.config = id; 352 352 353 - cpus = perf_cpu_map__new(NULL); 353 + cpus = perf_cpu_map__new_online_cpus(); 354 354 __T("failed to create cpus", cpus); 355 355 356 356 evlist = perf_evlist__new();
+1 -1
tools/lib/perf/tests/test-evsel.c
··· 27 27 }; 28 28 int err, idx; 29 29 30 - cpus = perf_cpu_map__new(NULL); 30 + cpus = perf_cpu_map__new_online_cpus(); 31 31 __T("failed to create cpus", cpus); 32 32 33 33 evsel = perf_evsel__new(&attr);
+14 -4
tools/lib/subcmd/help.c
··· 52 52 if (!cmds->cnt) 53 53 return; 54 54 55 - for (i = j = 1; i < cmds->cnt; i++) 56 - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) 57 - cmds->names[j++] = cmds->names[i]; 58 - 55 + for (i = 1; i < cmds->cnt; i++) { 56 + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) 57 + zfree(&cmds->names[i - 1]); 58 + } 59 + for (i = 0, j = 0; i < cmds->cnt; i++) { 60 + if (cmds->names[i]) { 61 + if (i == j) 62 + j++; 63 + else 64 + cmds->names[j++] = cmds->names[i]; 65 + } 66 + } 59 67 cmds->cnt = j; 68 + while (j < i) 69 + cmds->names[j++] = NULL; 60 70 } 61 71 62 72 void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+4
tools/perf/.gitignore
··· 39 39 pmu-events/pmu-events.c 40 40 pmu-events/jevents 41 41 pmu-events/metric_test.log 42 + tests/shell/*.shellcheck_log 43 + tests/shell/coresight/*.shellcheck_log 44 + tests/shell/lib/*.shellcheck_log 42 45 feature/ 43 46 libapi/ 44 47 libbpf/ ··· 52 49 libtraceevent_plugins/ 53 50 fixdep 54 51 Documentation/doc.dep 52 + python_ext_build/
+1
tools/perf/Documentation/itrace.txt
··· 25 25 q quicker (less detailed) decoding 26 26 A approximate IPC 27 27 Z prefer to ignore timestamps (so-called "timeless" decoding) 28 + T use the timestamp trace as kernel time 28 29 29 30 The default is all events i.e. the same as --itrace=iybxwpe, 30 31 except for perf script where it is --itrace=ce
+11
tools/perf/Documentation/perf-annotate.txt
··· 155 155 stdio or stdio2 (Default: 0). Note that this is about selection of 156 156 functions to display, not about lines within the function. 157 157 158 + --data-type[=TYPE_NAME]:: 159 + Display data type annotation instead of code. It infers data type of 160 + samples (if they are memory accessing instructions) using DWARF debug 161 + information. It can take an optional argument of data type name. In 162 + that case it'd show annotation for the type only, otherwise it'd show 163 + all data types it finds. 164 + 165 + --type-stat:: 166 + Show stats for the data type annotation. 167 + 168 + 158 169 SEE ALSO 159 170 -------- 160 171 linkperf:perf-record[1], linkperf:perf-report[1]
+2 -2
tools/perf/Documentation/perf-config.txt
··· 251 251 addr2line binary to use for file names and line numbers. 252 252 253 253 annotate.objdump:: 254 - objdump binary to use for disassembly and annotations. 254 + objdump binary to use for disassembly and annotations, 255 + including in the 'perf test' command. 255 256 256 257 annotate.disassembler_style:: 257 258 Use this to change the default disassembler style to some other value ··· 722 721 723 722 Defines new record session for daemon. The value is record's 724 723 command line without the 'record' keyword. 725 - 726 724 727 725 SEE ALSO 728 726 --------
+7 -5
tools/perf/Documentation/perf-list.txt
··· 81 81 which supports up to precise-level 2, and precise level 3 for 82 82 some special cases 83 83 84 - On AMD systems it is implemented using IBS (up to precise-level 2). 85 - The precise modifier works with event types 0x76 (cpu-cycles, CPU 86 - clocks not halted) and 0xC1 (micro-ops retired). Both events map to 87 - IBS execution sampling (IBS op) with the IBS Op Counter Control bit 88 - (IbsOpCntCtl) set respectively (see the 84 + On AMD systems it is implemented using IBS OP (up to precise-level 2). 85 + Unlike Intel PEBS which provides levels of precision, AMD core pmu is 86 + inherently non-precise and IBS is inherently precise. (i.e. ibs_op//, 87 + ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier 88 + works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 89 + (micro-ops retired). Both events map to IBS execution sampling (IBS op) 90 + with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the 89 91 Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) 90 92 section of the [AMD Processor Programming Reference (PPR)] relevant to the 91 93 family, model and stepping of the processor being used).
+1 -1
tools/perf/Documentation/perf-lock.txt
··· 119 119 120 120 121 121 CONTENTION OPTIONS 122 - -------------- 122 + ------------------ 123 123 124 124 -k:: 125 125 --key=<value>::
+4
tools/perf/Documentation/perf-record.txt
··· 445 445 4th-Gen Xeon+ server), the save branch type is unconditionally enabled 446 446 when the taken branch stack sampling is enabled. 447 447 - priv: save privilege state during sampling in case binary is not available later 448 + - counter: save occurrences of the event since the last branch entry. Currently, the 449 + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and 450 + later platforms. An error out is expected if it's used on the unsupported 451 + kernel or CPUs. 448 452 449 453 + 450 454 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+3
tools/perf/Documentation/perf-report.txt
··· 118 118 - retire_lat: On X86, this reports pipeline stall of this instruction compared 119 119 to the previous instruction in cycles. And currently supported only on X86 120 120 - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate 121 + - type: Data type of sample memory access. 122 + - typeoff: Offset in the data type of sample memory access. 123 + - symoff: Offset in the symbol. 121 124 122 125 By default, comm, dso and symbol keys are used. 123 126 (i.e. --sort comm,dso,symbol)
+28 -24
tools/perf/Documentation/perf-stat.txt
··· 422 422 423 423 -A:: 424 424 --no-aggr:: 425 - Do not aggregate counts across all monitored CPUs. 425 + --no-merge:: 426 + Do not aggregate/merge counts across monitored CPUs or PMUs. 427 + 428 + When multiple events are created from a single event specification, 429 + stat will, by default, aggregate the event counts and show the result 430 + in a single row. This option disables that behavior and shows the 431 + individual events and counts. 432 + 433 + Multiple events are created from a single event specification when: 434 + 435 + 1. PID monitoring isn't requested and the system has more than one 436 + CPU. For example, a system with 8 SMT threads will have one event 437 + opened on each thread and aggregation is performed across them. 438 + 439 + 2. Prefix or glob wildcard matching is used for the PMU name. For 440 + example, multiple memory controller PMUs may exist typically with a 441 + suffix of _0, _1, etc. By default the event counts will all be 442 + combined if the PMU is specified without the suffix such as 443 + uncore_imc rather than uncore_imc_0. 444 + 445 + 3. Aliases, which are listed immediately after the Kernel PMU events 446 + by perf list, are used. 447 + 448 + --hybrid-merge:: 449 + Merge core event counts from all core PMUs. In hybrid or big.LITTLE 450 + systems by default each core PMU will report its count 451 + separately. This option forces core PMU counts to be combined to give 452 + a behavior closer to having a single CPU type in the system. 426 453 427 454 --topdown:: 428 455 Print top-down metrics supported by the CPU. This allows to determine ··· 501 474 'perf stat -M tma_frontend_bound_group...'. 502 475 503 476 Error out if the input is higher than the supported max level. 504 - 505 - --no-merge:: 506 - Do not merge results from same PMUs. 507 - 508 - When multiple events are created from a single event specification, 509 - stat will, by default, aggregate the event counts and show the result 510 - in a single row. This option disables that behavior and shows 511 - the individual events and counts. 512 - 513 - Multiple events are created from a single event specification when: 514 - 1. Prefix or glob matching is used for the PMU name. 515 - 2. Aliases, which are listed immediately after the Kernel PMU events 516 - by perf list, are used. 517 - 518 - --hybrid-merge:: 519 - Merge the hybrid event counts from all PMUs. 520 - 521 - For hybrid events, by default, the stat aggregates and reports the event 522 - counts per PMU. But sometimes, it's also useful to aggregate event counts 523 - from all PMUs. This option enables that behavior and reports the counts 524 - without PMUs. 525 - 526 - For non-hybrid events, it should be no effect. 527 477 528 478 --smi-cost:: 529 479 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+3
tools/perf/Documentation/perf.txt
··· 64 64 perf-event-open - Print perf_event_open() arguments and 65 65 return value 66 66 67 + --debug-file:: 68 + Write debug output to a specified file. 69 + 67 70 DESCRIPTION 68 71 ----------- 69 72 Performance counters for Linux are a new kernel-based subsystem
+9 -4
tools/perf/Makefile.config
··· 476 476 else 477 477 CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT 478 478 endif # dwarf_getlocations 479 + ifneq ($(feature-dwarf_getcfi), 1) 480 + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142); 481 + else 482 + CFLAGS += -DHAVE_DWARF_CFI_SUPPORT 483 + endif # dwarf_getcfi 479 484 endif # Dwarf support 480 485 endif # libelf support 481 486 endif # NO_LIBELF ··· 685 680 endif 686 681 687 682 ifeq ($(BUILD_BPF_SKEL),1) 688 - ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) 689 - dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) 690 - BUILD_BPF_SKEL := 0 691 - else ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) 683 + ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) 692 684 dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool) 693 685 BUILD_BPF_SKEL := 0 694 686 else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),) 695 687 dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool) 688 + BUILD_BPF_SKEL := 0 689 + else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) 690 + dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) 696 691 BUILD_BPF_SKEL := 0 697 692 else ifeq ($(call get-executable,$(CLANG)),) 698 693 dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing)
+11 -2
tools/perf/Makefile.perf
··· 134 134 # x86 instruction decoder - new instructions test 135 135 # 136 136 # Define GEN_VMLINUX_H to generate vmlinux.h from the BTF. 137 + # 138 + # Define NO_SHELLCHECK if you do not want to run shellcheck during build 137 139 138 140 # As per kernel Makefile, avoid funny character set dependencies 139 141 unexport LC_ALL ··· 229 227 force_fixdep := $(config) 230 228 endif 231 229 230 + # Runs shellcheck on perf test shell scripts 231 + ifeq ($(NO_SHELLCHECK),1) 232 + SHELLCHECK := 233 + else 234 + SHELLCHECK := $(shell which shellcheck 2> /dev/null) 235 + endif 236 + 232 237 export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK 233 - export HOSTCC HOSTLD HOSTAR HOSTCFLAGS 238 + export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK 234 239 235 240 include $(srctree)/tools/build/Makefile.include 236 241 ··· 1161 1152 1162 1153 clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean 1163 1154 $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) 1164 - $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete 1155 + $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -o -name '*.shellcheck_log' -delete 1165 1156 $(Q)$(RM) $(OUTPUT).config-detected 1166 1157 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so 1167 1158 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
+8 -8
tools/perf/arch/arm/util/cs-etm.c
··· 199 199 { 200 200 int i, err = -EINVAL; 201 201 struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; 202 - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); 202 + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 203 203 204 204 /* Set option of each CPU we have */ 205 205 for (i = 0; i < cpu__max_cpu().cpu; i++) { ··· 211 211 * program can run on any CPUs in this case, thus don't skip 212 212 * validation. 213 213 */ 214 - if (!perf_cpu_map__empty(event_cpus) && 214 + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && 215 215 !perf_cpu_map__has(event_cpus, cpu)) 216 216 continue; 217 217 ··· 435 435 * Also the case of per-cpu mmaps, need the contextID in order to be notified 436 436 * when a context switch happened. 437 437 */ 438 - if (!perf_cpu_map__empty(cpus)) { 438 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 439 439 evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, 440 440 "timestamp", 1); 441 441 evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, ··· 461 461 evsel->core.attr.sample_period = 1; 462 462 463 463 /* In per-cpu case, always need the time of mmap events etc */ 464 - if (!perf_cpu_map__empty(cpus)) 464 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 465 465 evsel__set_sample_bit(evsel, TIME); 466 466 467 467 err = cs_etm_validate_config(itr, cs_etm_evsel); ··· 536 536 int i; 537 537 int etmv3 = 0, etmv4 = 0, ete = 0; 538 538 struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; 539 - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); 539 + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 540 540 541 541 /* cpu map is not empty, we have specific CPUs to work with */ 542 - if (!perf_cpu_map__empty(event_cpus)) { 542 + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { 543 543 for (i = 0; i < cpu__max_cpu().cpu; i++) { 544 544 struct perf_cpu cpu = { .cpu = i, }; 545 545 ··· 802 802 u64 nr_cpu, type; 803 803 struct perf_cpu_map *cpu_map; 804 804 struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus; 805 - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); 805 + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 806 806 struct cs_etm_recording *ptr = 807 807 container_of(itr, struct cs_etm_recording, itr); 808 808 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; ··· 814 814 return -EINVAL; 815 815 816 816 /* If the cpu_map is empty all online CPUs are involved */ 817 - if (perf_cpu_map__empty(event_cpus)) { 817 + if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { 818 818 cpu_map = online_cpus; 819 819 } else { 820 820 /* Make sure all specified CPUs are online */
+2 -2
tools/perf/arch/arm64/util/arm-spe.c
··· 232 232 * In the case of per-cpu mmaps, sample CPU for AUX event; 233 233 * also enable the timestamp tracing for samples correlation. 234 234 */ 235 - if (!perf_cpu_map__empty(cpus)) { 235 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 236 236 evsel__set_sample_bit(arm_spe_evsel, CPU); 237 237 evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, 238 238 "ts_enable", 1); ··· 265 265 tracking_evsel->core.attr.sample_period = 1; 266 266 267 267 /* In per-cpu case, always need the time of mmap events etc */ 268 - if (!perf_cpu_map__empty(cpus)) { 268 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 269 269 evsel__set_sample_bit(tracking_evsel, TIME); 270 270 evsel__set_sample_bit(tracking_evsel, CPU); 271 271
+1 -1
tools/perf/arch/arm64/util/header.c
··· 57 57 58 58 int get_cpuid(char *buf, size_t sz) 59 59 { 60 - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); 60 + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); 61 61 int ret; 62 62 63 63 if (!cpus)
+3 -3
tools/perf/arch/loongarch/annotate/instructions.c
··· 61 61 const char *c = strchr(ops->raw, '#'); 62 62 u64 start, end; 63 63 64 - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); 65 - ops->raw_func_start = strchr(ops->raw, '<'); 64 + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 65 + ops->jump.raw_func_start = strchr(ops->raw, '<'); 66 66 67 - if (ops->raw_func_start && c > ops->raw_func_start) 67 + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 68 68 c = NULL; 69 69 70 70 if (c++ != NULL)
+30 -7
tools/perf/arch/x86/tests/hybrid.c
··· 47 47 evsel = evsel__next(evsel); 48 48 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 49 49 TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); 50 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 50 + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); 51 51 TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 52 52 return TEST_OK; 53 53 } ··· 102 102 evsel = evsel__next(evsel); 103 103 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 104 104 TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); 105 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 105 + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); 106 106 TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 107 107 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 108 108 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); ··· 163 163 return TEST_OK; 164 164 } 165 165 166 + static int test__hybrid_hw_group_event_2(struct evlist *evlist) 167 + { 168 + struct evsel *evsel, *leader; 169 + 170 + evsel = leader = evlist__first(evlist); 171 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 172 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 173 + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); 174 + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 175 + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 176 + 177 + evsel = evsel__next(evsel); 178 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 179 + TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == 0x3c); 180 + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 181 + return TEST_OK; 182 + } 183 + 166 184 struct evlist_test { 167 185 const char *name; 168 186 bool (*valid)(void); ··· 189 171 190 172 static const struct evlist_test test__hybrid_events[] = { 191 173 { 192 - .name = "cpu_core/cpu-cycles/", 174 + .name = "cpu_core/cycles/", 193 175 .check = test__hybrid_hw_event_with_pmu, 194 176 /* 0 */ 195 177 }, 196 178 { 197 - .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", 179 + .name = "{cpu_core/cycles/,cpu_core/branches/}", 198 180 .check = test__hybrid_hw_group_event, 199 181 /* 1 */ 200 182 }, 201 183 { 202 - .name = "{cpu-clock,cpu_core/cpu-cycles/}", 184 + .name = "{cpu-clock,cpu_core/cycles/}", 203 185 .check = test__hybrid_sw_hw_group_event, 204 186 /* 2 */ 205 187 }, 206 188 { 207 - .name = "{cpu_core/cpu-cycles/,cpu-clock}", 189 + .name = "{cpu_core/cycles/,cpu-clock}", 208 190 .check = test__hybrid_hw_sw_group_event, 209 191 /* 3 */ 210 192 }, 211 193 { 212 - .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", 194 + .name = "{cpu_core/cycles/k,cpu_core/branches/u}", 213 195 .check = test__hybrid_group_modifier1, 214 196 /* 4 */ 215 197 }, ··· 232 214 .name = "cpu_core/LLC-loads/", 233 215 .check = test__hybrid_cache_event, 234 216 /* 8 */ 217 + }, 218 + { 219 + .name = "{cpu_core/cycles/,cpu_core/cpu-cycles/}", 220 + .check = test__hybrid_hw_group_event_2, 221 + /* 9 */ 235 222 }, 236 223 }; 237 224
+38
tools/perf/arch/x86/util/dwarf-regs.c
··· 113 113 return roff->offset; 114 114 return -EINVAL; 115 115 } 116 + 117 + struct dwarf_regs_idx { 118 + const char *name; 119 + int idx; 120 + }; 121 + 122 + static const struct dwarf_regs_idx x86_regidx_table[] = { 123 + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, 124 + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, 125 + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, 126 + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, 127 + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, 128 + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, 129 + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, 130 + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, 131 + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, 132 + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, 133 + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, 134 + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, 135 + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, 136 + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, 137 + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, 138 + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, 139 + { "rip", DWARF_REG_PC }, 140 + }; 141 + 142 + int get_arch_regnum(const char *name) 143 + { 144 + unsigned int i; 145 + 146 + if (*name != '%') 147 + return -EINVAL; 148 + 149 + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) 150 + if (!strcmp(x86_regidx_table[i].name, name + 1)) 151 + return x86_regidx_table[i].idx; 152 + return -ENOENT; 153 + }
+61 -48
tools/perf/arch/x86/util/event.c
··· 14 14 15 15 #if defined(__x86_64__) 16 16 17 + struct perf_event__synthesize_extra_kmaps_cb_args { 18 + struct perf_tool *tool; 19 + perf_event__handler_t process; 20 + struct machine *machine; 21 + union perf_event *event; 22 + }; 23 + 24 + static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) 25 + { 26 + struct perf_event__synthesize_extra_kmaps_cb_args *args = data; 27 + union perf_event *event = args->event; 28 + struct kmap *kmap; 29 + size_t size; 30 + 31 + if (!__map__is_extra_kernel_map(map)) 32 + return 0; 33 + 34 + kmap = map__kmap(map); 35 + 36 + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + 37 + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + 38 + args->machine->id_hdr_size; 39 + 40 + memset(event, 0, size); 41 + 42 + event->mmap.header.type = PERF_RECORD_MMAP; 43 + 44 + /* 45 + * kernel uses 0 for user space maps, see kernel/perf_event.c 46 + * __perf_event_mmap 47 + */ 48 + if (machine__is_host(args->machine)) 49 + event->header.misc = PERF_RECORD_MISC_KERNEL; 50 + else 51 + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 52 + 53 + event->mmap.header.size = size; 54 + 55 + event->mmap.start = map__start(map); 56 + event->mmap.len = map__size(map); 57 + event->mmap.pgoff = map__pgoff(map); 58 + event->mmap.pid = args->machine->pid; 59 + 60 + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); 61 + 62 + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) 63 + return -1; 64 + 65 + return 0; 66 + } 67 + 17 68 int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, 18 69 perf_event__handler_t process, 19 70 struct machine *machine) 20 71 { 21 - int rc = 0; 22 - struct map_rb_node *pos; 72 + int rc; 23 73 struct maps *kmaps = machine__kernel_maps(machine); 24 - union perf_event *event = zalloc(sizeof(event->mmap) + 25 - machine->id_hdr_size); 74 + struct perf_event__synthesize_extra_kmaps_cb_args args = { 75 + .tool = tool, 76 + .process = process, 77 + .machine = machine, 78 + .event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size), 79 + }; 26 80 27 - if (!event) { 81 + if (!args.event) { 28 82 pr_debug("Not enough memory synthesizing mmap event " 29 83 "for extra kernel maps\n"); 30 84 return -1; 31 85 } 32 86 33 - maps__for_each_entry(kmaps, pos) { 34 - struct kmap *kmap; 35 - size_t size; 36 - struct map *map = pos->map; 87 + rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args); 37 88 38 - if (!__map__is_extra_kernel_map(map)) 39 - continue; 40 - 41 - kmap = map__kmap(map); 42 - 43 - size = sizeof(event->mmap) - sizeof(event->mmap.filename) + 44 - PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + 45 - machine->id_hdr_size; 46 - 47 - memset(event, 0, size); 48 - 49 - event->mmap.header.type = PERF_RECORD_MMAP; 50 - 51 - /* 52 - * kernel uses 0 for user space maps, see kernel/perf_event.c 53 - * __perf_event_mmap 54 - */ 55 - if (machine__is_host(machine)) 56 - event->header.misc = PERF_RECORD_MISC_KERNEL; 57 - else 58 - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 59 - 60 - event->mmap.header.size = size; 61 - 62 - event->mmap.start = map__start(map); 63 - event->mmap.len = map__size(map); 64 - event->mmap.pgoff = map__pgoff(map); 65 - event->mmap.pid = machine->pid; 66 - 67 - strlcpy(event->mmap.filename, kmap->name, PATH_MAX); 68 - 69 - if (perf_tool__process_synth_event(tool, event, machine, 70 - process) != 0) { 71 - rc = -1; 72 - break; 73 - } 74 - } 75 - 76 - free(event); 89 + free(args.event); 77 90 return rc; 78 91 } 79 92
+2 -2
tools/perf/arch/x86/util/intel-bts.c
··· 143 143 if (!opts->full_auxtrace) 144 144 return 0; 145 145 146 - if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) { 146 + if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 147 147 pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); 148 148 return -EINVAL; 149 149 } ··· 224 224 * In the case of per-cpu mmaps, we need the CPU on the 225 225 * AUX event. 226 226 */ 227 - if (!perf_cpu_map__empty(cpus)) 227 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 228 228 evsel__set_sample_bit(intel_bts_evsel, CPU); 229 229 } 230 230
+5 -5
tools/perf/arch/x86/util/intel-pt.c
··· 369 369 ui__warning("Intel Processor Trace: TSC not available\n"); 370 370 } 371 371 372 - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); 372 + per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); 373 373 374 374 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; 375 375 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; ··· 774 774 * Per-cpu recording needs sched_switch events to distinguish different 775 775 * threads. 776 776 */ 777 - if (have_timing_info && !perf_cpu_map__empty(cpus) && 777 + if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && 778 778 !record_opts__no_switch_events(opts)) { 779 779 if (perf_can_record_switch_events()) { 780 780 bool cpu_wide = !target__none(&opts->target) && ··· 832 832 * In the case of per-cpu mmaps, we need the CPU on the 833 833 * AUX event. 834 834 */ 835 - if (!perf_cpu_map__empty(cpus)) 835 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 836 836 evsel__set_sample_bit(intel_pt_evsel, CPU); 837 837 } 838 838 ··· 858 858 tracking_evsel->immediate = true; 859 859 860 860 /* In per-cpu case, always need the time of mmap events etc */ 861 - if (!perf_cpu_map__empty(cpus)) { 861 + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 862 862 evsel__set_sample_bit(tracking_evsel, TIME); 863 863 /* And the CPU for switch events */ 864 864 evsel__set_sample_bit(tracking_evsel, CPU); ··· 870 870 * Warn the user when we do not have enough information to decode i.e. 871 871 * per-cpu with no sched_switch (except workload-only). 872 872 */ 873 - if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && 873 + if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && 874 874 !target__none(&opts->target) && 875 875 !intel_pt_evsel->core.attr.exclude_user) 876 876 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
+1 -1
tools/perf/bench/epoll-ctl.c
··· 330 330 act.sa_sigaction = toggle_done; 331 331 sigaction(SIGINT, &act, NULL); 332 332 333 - cpu = perf_cpu_map__new(NULL); 333 + cpu = perf_cpu_map__new_online_cpus(); 334 334 if (!cpu) 335 335 goto errmem; 336 336
+1 -1
tools/perf/bench/epoll-wait.c
··· 444 444 act.sa_sigaction = toggle_done; 445 445 sigaction(SIGINT, &act, NULL); 446 446 447 - cpu = perf_cpu_map__new(NULL); 447 + cpu = perf_cpu_map__new_online_cpus(); 448 448 if (!cpu) 449 449 goto errmem; 450 450
+1 -1
tools/perf/bench/futex-hash.c
··· 138 138 exit(EXIT_FAILURE); 139 139 } 140 140 141 - cpu = perf_cpu_map__new(NULL); 141 + cpu = perf_cpu_map__new_online_cpus(); 142 142 if (!cpu) 143 143 goto errmem; 144 144
+1 -1
tools/perf/bench/futex-lock-pi.c
··· 172 172 if (argc) 173 173 goto err; 174 174 175 - cpu = perf_cpu_map__new(NULL); 175 + cpu = perf_cpu_map__new_online_cpus(); 176 176 if (!cpu) 177 177 err(EXIT_FAILURE, "calloc"); 178 178
+1 -1
tools/perf/bench/futex-requeue.c
··· 174 174 if (argc) 175 175 goto err; 176 176 177 - cpu = perf_cpu_map__new(NULL); 177 + cpu = perf_cpu_map__new_online_cpus(); 178 178 if (!cpu) 179 179 err(EXIT_FAILURE, "cpu_map__new"); 180 180
+1 -1
tools/perf/bench/futex-wake-parallel.c
··· 264 264 err(EXIT_FAILURE, "mlockall"); 265 265 } 266 266 267 - cpu = perf_cpu_map__new(NULL); 267 + cpu = perf_cpu_map__new_online_cpus(); 268 268 if (!cpu) 269 269 err(EXIT_FAILURE, "calloc"); 270 270
+1 -1
tools/perf/bench/futex-wake.c
··· 149 149 exit(EXIT_FAILURE); 150 150 } 151 151 152 - cpu = perf_cpu_map__new(NULL); 152 + cpu = perf_cpu_map__new_online_cpus(); 153 153 if (!cpu) 154 154 err(EXIT_FAILURE, "calloc"); 155 155
+1 -1
tools/perf/bench/sched-seccomp-notify.c
··· 32 32 static const struct option options[] = { 33 33 OPT_U64('l', "loop", &loops, "Specify number of loops"), 34 34 OPT_BOOLEAN('s', "sync-mode", &sync_mode, 35 - "Enable the synchronious mode for seccomp notifications"), 35 + "Enable the synchronous mode for seccomp notifications"), 36 36 OPT_END() 37 37 }; 38 38
+272 -28
tools/perf/builtin-annotate.c
··· 20 20 #include "util/evlist.h" 21 21 #include "util/evsel.h" 22 22 #include "util/annotate.h" 23 + #include "util/annotate-data.h" 23 24 #include "util/event.h" 24 25 #include <subcmd/parse-options.h> 25 26 #include "util/parse-events.h" ··· 46 45 struct perf_annotate { 47 46 struct perf_tool tool; 48 47 struct perf_session *session; 49 - struct annotation_options opts; 50 48 #ifdef HAVE_SLANG_SUPPORT 51 49 bool use_tui; 52 50 #endif ··· 56 56 bool skip_missing; 57 57 bool has_br_stack; 58 58 bool group_set; 59 + bool data_type; 60 + bool type_stat; 61 + bool insn_stat; 59 62 float min_percent; 60 63 const char *sym_hist_filter; 61 64 const char *cpu_list; 65 + const char *target_data_type; 62 66 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 63 67 }; 64 68 ··· 98 94 struct annotation *notes = sym ? symbol__annotation(sym) : NULL; 99 95 struct block_range_iter iter; 100 96 struct block_range *entry; 97 + struct annotated_branch *branch; 101 98 102 99 /* 103 100 * Sanity; NULL isn't executable and the CPU cannot execute backwards ··· 109 104 iter = block_range__create(start->addr, end->addr); 110 105 if (!block_range_iter__valid(&iter)) 111 106 return; 107 + 108 + branch = annotation__get_branch(notes); 112 109 113 110 /* 114 111 * First block in range is a branch target. ··· 125 118 entry->coverage++; 126 119 entry->sym = sym; 127 120 128 - if (notes) 129 - notes->max_coverage = max(notes->max_coverage, entry->coverage); 121 + if (branch) 122 + branch->max_coverage = max(branch->max_coverage, entry->coverage); 130 123 131 124 } while (block_range_iter__next(&iter)); 132 125 ··· 322 315 struct perf_annotate *ann) 323 316 { 324 317 if (!ann->use_stdio2) 325 - return symbol__tty_annotate(&he->ms, evsel, &ann->opts); 318 + return symbol__tty_annotate(&he->ms, evsel); 326 319 327 - return symbol__tty_annotate2(&he->ms, evsel, &ann->opts); 320 + return symbol__tty_annotate2(&he->ms, evsel); 321 + } 322 + 323 + static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel) 324 + { 325 + struct dso *dso = map__dso(he->ms.map); 326 + int nr_members = 1; 327 + int nr_samples = he->stat.nr_events; 328 + 329 + if (evsel__is_group_event(evsel)) { 330 + struct hist_entry *pair; 331 + 332 + list_for_each_entry(pair, &he->pairs.head, pairs.node) 333 + nr_samples += pair->stat.nr_events; 334 + } 335 + 336 + printf("Annotate type: '%s' in %s (%d samples):\n", 337 + he->mem_type->self.type_name, dso->name, nr_samples); 338 + 339 + if (evsel__is_group_event(evsel)) { 340 + struct evsel *pos; 341 + int i = 0; 342 + 343 + for_each_group_evsel(pos, evsel) 344 + printf(" event[%d] = %s\n", i++, pos->name); 345 + 346 + nr_members = evsel->core.nr_members; 347 + } 348 + 349 + printf("============================================================================\n"); 350 + printf("%*s %10s %10s %s\n", 11 * nr_members, "samples", "offset", "size", "field"); 351 + } 352 + 353 + static void print_annotated_data_type(struct annotated_data_type *mem_type, 354 + struct annotated_member *member, 355 + struct evsel *evsel, int indent) 356 + { 357 + struct annotated_member *child; 358 + struct type_hist *h = mem_type->histograms[evsel->core.idx]; 359 + int i, nr_events = 1, samples = 0; 360 + 361 + for (i = 0; i < member->size; i++) 362 + samples += h->addr[member->offset + i].nr_samples; 363 + printf(" %10d", samples); 364 + 365 + if (evsel__is_group_event(evsel)) { 366 + struct evsel *pos; 367 + 368 + for_each_group_member(pos, evsel) { 369 + h = mem_type->histograms[pos->core.idx]; 370 + 371 + samples = 0; 372 + for (i = 0; i < member->size; i++) 373 + samples += h->addr[member->offset + i].nr_samples; 374 + printf(" %10d", samples); 375 + } 376 + nr_events = evsel->core.nr_members; 377 + } 378 + 379 + printf(" %10d %10d %*s%s\t%s", 380 + member->offset, member->size, indent, "", member->type_name, 381 + member->var_name ?: ""); 382 + 383 + if (!list_empty(&member->children)) 384 + printf(" {\n"); 385 + 386 + list_for_each_entry(child, &member->children, node) 387 + print_annotated_data_type(mem_type, child, evsel, indent + 4); 388 + 389 + if (!list_empty(&member->children)) 390 + printf("%*s}", 11 * nr_events + 24 + indent, ""); 391 + printf(";\n"); 392 + } 393 + 394 + static void print_annotate_data_stat(struct annotated_data_stat *s) 395 + { 396 + #define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld) 397 + 398 + int bad = s->no_sym + 399 + s->no_insn + 400 + s->no_insn_ops + 401 + s->no_mem_ops + 402 + s->no_reg + 403 + s->no_dbginfo + 404 + s->no_cuinfo + 405 + s->no_var + 406 + s->no_typeinfo + 407 + s->invalid_size + 408 + s->bad_offset; 409 + int ok = s->total - bad; 410 + 411 + printf("Annotate data type stats:\n"); 412 + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n", 413 + s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1)); 414 + printf("-----------------------------------------------------------\n"); 415 + PRINT_STAT(no_sym); 416 + PRINT_STAT(no_insn); 417 + PRINT_STAT(no_insn_ops); 418 + PRINT_STAT(no_mem_ops); 419 + PRINT_STAT(no_reg); 420 + PRINT_STAT(no_dbginfo); 421 + PRINT_STAT(no_cuinfo); 422 + PRINT_STAT(no_var); 423 + PRINT_STAT(no_typeinfo); 424 + PRINT_STAT(invalid_size); 425 + PRINT_STAT(bad_offset); 426 + printf("\n"); 427 + 428 + #undef PRINT_STAT 429 + } 430 + 431 + static void print_annotate_item_stat(struct list_head *head, const char *title) 432 + { 433 + struct annotated_item_stat *istat, *pos, *iter; 434 + int total_good, total_bad, total; 435 + int sum1, sum2; 436 + LIST_HEAD(tmp); 437 + 438 + /* sort the list by count */ 439 + list_splice_init(head, &tmp); 440 + total_good = total_bad = 0; 441 + 442 + list_for_each_entry_safe(istat, pos, &tmp, list) { 443 + total_good += istat->good; 444 + total_bad += istat->bad; 445 + sum1 = istat->good + istat->bad; 446 + 447 + list_for_each_entry(iter, head, list) { 448 + sum2 = iter->good + iter->bad; 449 + if (sum1 > sum2) 450 + break; 451 + } 452 + list_move_tail(&istat->list, &iter->list); 453 + } 454 + total = total_good + total_bad; 455 + 456 + printf("Annotate %s stats\n", title); 457 + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n\n", total, 458 + total_good, 100.0 * total_good / (total ?: 1), 459 + total_bad, 100.0 * total_bad / (total ?: 1)); 460 + printf(" %-10s: %5s %5s\n", "Name", "Good", "Bad"); 461 + printf("-----------------------------------------------------------\n"); 462 + list_for_each_entry(istat, head, list) 463 + printf(" %-10s: %5d %5d\n", istat->name, istat->good, istat->bad); 464 + printf("\n"); 328 465 } 329 466 330 467 static void hists__find_annotations(struct hists *hists, ··· 477 326 { 478 327 struct rb_node *nd = rb_first_cached(&hists->entries), *next; 479 328 int key = K_RIGHT; 329 + 330 + if (ann->type_stat) 331 + print_annotate_data_stat(&ann_data_stat); 332 + if (ann->insn_stat) 333 + print_annotate_item_stat(&ann_insn_stat, "Instruction"); 480 334 481 335 while (nd) { 482 336 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); ··· 515 359 continue; 516 360 } 517 361 362 + if (ann->data_type) { 363 + /* skip unknown type */ 364 + if (he->mem_type->histograms == NULL) 365 + goto find_next; 366 + 367 + if (ann->target_data_type) { 368 + const char *type_name = he->mem_type->self.type_name; 369 + 370 + /* skip 'struct ' prefix in the type name */ 371 + if (strncmp(ann->target_data_type, "struct ", 7) && 372 + !strncmp(type_name, "struct ", 7)) 373 + type_name += 7; 374 + 375 + /* skip 'union ' prefix in the type name */ 376 + if (strncmp(ann->target_data_type, "union ", 6) && 377 + !strncmp(type_name, "union ", 6)) 378 + type_name += 6; 379 + 380 + if (strcmp(ann->target_data_type, type_name)) 381 + goto find_next; 382 + } 383 + 384 + print_annotated_data_header(he, evsel); 385 + print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0); 386 + printf("\n"); 387 + goto find_next; 388 + } 389 + 518 390 if (use_browser == 2) { 519 391 int ret; 520 392 int (*annotate)(struct hist_entry *he, 521 393 struct evsel *evsel, 522 - struct annotation_options *options, 523 394 struct hist_browser_timer *hbt); 524 395 525 396 annotate = dlsym(perf_gtk_handle, ··· 556 373 return; 557 374 } 558 375 559 - ret = annotate(he, evsel, &ann->opts, NULL); 376 + ret = annotate(he, evsel, NULL); 560 377 if (!ret || !ann->skip_missing) 561 378 return; 562 379 563 380 /* skip missing symbols */ 564 381 nd = rb_next(nd); 565 382 } else if (use_browser == 1) { 566 - key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); 383 + key = hist_entry__tui_annotate(he, evsel, NULL); 567 384 568 385 switch (key) { 569 386 case -1: ··· 605 422 goto out; 606 423 } 607 424 608 - if (!ann->opts.objdump_path) { 425 + if (!annotate_opts.objdump_path) { 609 426 ret = perf_env__lookup_objdump(&session->header.env, 610 - &ann->opts.objdump_path); 427 + &annotate_opts.objdump_path); 611 428 if (ret) 612 429 goto out; 613 430 } ··· 640 457 evsel__reset_sample_bit(pos, CALLCHAIN); 641 458 evsel__output_resort(pos, NULL); 642 459 643 - if (symbol_conf.event_group && !evsel__is_group_leader(pos)) 460 + /* 461 + * An event group needs to display other events too. 462 + * Let's delay printing until other events are processed. 463 + */ 464 + if (symbol_conf.event_group) { 465 + if (!evsel__is_group_leader(pos)) { 466 + struct hists *leader_hists; 467 + 468 + leader_hists = evsel__hists(evsel__leader(pos)); 469 + hists__match(leader_hists, hists); 470 + hists__link(leader_hists, hists); 471 + } 644 472 continue; 473 + } 645 474 646 475 hists__find_annotations(hists, pos, ann); 647 476 } ··· 662 467 if (total_nr_samples == 0) { 663 468 ui__error("The %s data has no samples!\n", session->data->path); 664 469 goto out; 470 + } 471 + 472 + /* Display group events together */ 473 + evlist__for_each_entry(session->evlist, pos) { 474 + struct hists *hists = evsel__hists(pos); 475 + u32 nr_samples = hists->stats.nr_samples; 476 + 477 + if (nr_samples == 0) 478 + continue; 479 + 480 + if (!symbol_conf.event_group || !evsel__is_group_leader(pos)) 481 + continue; 482 + 483 + hists__find_annotations(hists, pos, ann); 665 484 } 666 485 667 486 if (use_browser == 2) { ··· 701 492 double pcnt = strtof(str, NULL); 702 493 703 494 ann->min_percent = pcnt; 495 + return 0; 496 + } 497 + 498 + static int parse_data_type(const struct option *opt, const char *str, int unset) 499 + { 500 + struct perf_annotate *ann = opt->value; 501 + 502 + ann->data_type = !unset; 503 + if (str) 504 + ann->target_data_type = strdup(str); 505 + 704 506 return 0; 705 507 } 706 508 ··· 778 558 "file", "vmlinux pathname"), 779 559 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 780 560 "load module symbols - WARNING: use only with -k and LIVE kernel"), 781 - OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, 561 + OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines, 782 562 "print matching source lines (may be slow)"), 783 - OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, 563 + OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path, 784 564 "Don't shorten the displayed pathnames"), 785 565 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, 786 566 "Skip symbols that cannot be annotated"), ··· 791 571 OPT_CALLBACK(0, "symfs", NULL, "directory", 792 572 "Look for files with symbols relative to this directory", 793 573 symbol__config_symfs), 794 - OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, 574 + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, 795 575 "Interleave source code with assembly code (default)"), 796 - OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, 576 + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, 797 577 "Display raw encoding of assembly instructions (default)"), 798 578 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 799 579 "Specify disassembler style (e.g. -M intel for intel syntax)"), 800 - OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", 580 + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", 801 581 "Add prefix to source file path names in programs (with --prefix-strip)"), 802 - OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", 582 + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", 803 583 "Strip first N entries of source file path name in programs (with --prefix)"), 804 584 OPT_STRING(0, "objdump", &objdump_path, "path", 805 585 "objdump binary to use for disassembly and annotations"), ··· 818 598 OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", 819 599 "'always' (default), 'never' or 'auto' only applicable to --stdio mode", 820 600 stdio__config_color, "always"), 821 - OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", 601 + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", 822 602 "Set percent type local/global-period/hits", 823 603 annotate_parse_percent_type), 824 604 OPT_CALLBACK(0, "percent-limit", &annotate, "percent", ··· 826 606 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", 827 607 "Instruction Tracing options\n" ITRACE_HELP, 828 608 itrace_parse_synth_opts), 829 - 609 + OPT_CALLBACK_OPTARG(0, "data-type", &annotate, NULL, "name", 610 + "Show data type annotate for the memory accesses", 611 + parse_data_type), 612 + OPT_BOOLEAN(0, "type-stat", &annotate.type_stat, 613 + "Show stats for the data type annotation"), 614 + OPT_BOOLEAN(0, "insn-stat", &annotate.insn_stat, 615 + "Show instruction stats for the data type annotation"), 830 616 OPT_END() 831 617 }; 832 618 int ret; ··· 840 614 set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); 841 615 set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); 842 616 843 - annotation_options__init(&annotate.opts); 617 + annotation_options__init(); 844 618 845 619 ret = hists__init(); 846 620 if (ret < 0) 847 621 return ret; 848 622 849 - annotation_config__init(&annotate.opts); 623 + annotation_config__init(); 850 624 851 625 argc = parse_options(argc, argv, options, annotate_usage, 0); 852 626 if (argc) { ··· 861 635 } 862 636 863 637 if (disassembler_style) { 864 - annotate.opts.disassembler_style = strdup(disassembler_style); 865 - if (!annotate.opts.disassembler_style) 638 + annotate_opts.disassembler_style = strdup(disassembler_style); 639 + if (!annotate_opts.disassembler_style) 866 640 return -ENOMEM; 867 641 } 868 642 if (objdump_path) { 869 - annotate.opts.objdump_path = strdup(objdump_path); 870 - if (!annotate.opts.objdump_path) 643 + annotate_opts.objdump_path = strdup(objdump_path); 644 + if (!annotate_opts.objdump_path) 871 645 return -ENOMEM; 872 646 } 873 647 if (addr2line_path) { ··· 876 650 return -ENOMEM; 877 651 } 878 652 879 - if (annotate_check_args(&annotate.opts) < 0) 653 + if (annotate_check_args() < 0) 880 654 return -EINVAL; 881 655 882 656 #ifdef HAVE_GTK2_SUPPORT 883 657 if (symbol_conf.show_nr_samples && annotate.use_gtk) { 884 658 pr_err("--show-nr-samples is not available in --gtk mode at this time\n"); 885 659 return ret; 660 + } 661 + #endif 662 + 663 + #ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT 664 + if (annotate.data_type) { 665 + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); 666 + return -ENOTSUP; 886 667 } 887 668 #endif 888 669 ··· 935 702 use_browser = 2; 936 703 #endif 937 704 705 + /* FIXME: only support stdio for now */ 706 + if (annotate.data_type) { 707 + use_browser = 0; 708 + annotate_opts.annotate_src = false; 709 + symbol_conf.annotate_data_member = true; 710 + symbol_conf.annotate_data_sample = true; 711 + } 712 + 938 713 setup_browser(true); 939 714 940 715 /* ··· 950 709 * symbol, we do not care about the processes in annotate, 951 710 * set sort order to avoid repeated output. 952 711 */ 953 - sort_order = "dso,symbol"; 712 + if (annotate.data_type) 713 + sort_order = "dso,type"; 714 + else 715 + sort_order = "dso,symbol"; 954 716 955 717 /* 956 718 * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle ··· 975 731 #ifndef NDEBUG 976 732 perf_session__delete(annotate.session); 977 733 #endif 978 - annotation_options__exit(&annotate.opts); 734 + annotation_options__exit(); 979 735 980 736 return ret; 981 737 }
+1 -1
tools/perf/builtin-c2c.c
··· 2320 2320 nodes[node] = set; 2321 2321 2322 2322 /* empty node, skip */ 2323 - if (perf_cpu_map__empty(map)) 2323 + if (perf_cpu_map__has_any_cpu_or_is_empty(map)) 2324 2324 continue; 2325 2325 2326 2326 perf_cpu_map__for_each_cpu(cpu, idx, map) {
+1 -1
tools/perf/builtin-ftrace.c
··· 333 333 334 334 static int reset_tracing_cpu(void) 335 335 { 336 - struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); 336 + struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus(); 337 337 int ret; 338 338 339 339 ret = set_tracing_cpumask(cpumap);
+6
tools/perf/builtin-inject.c
··· 2265 2265 "perf inject [<options>]", 2266 2266 NULL 2267 2267 }; 2268 + 2269 + if (!inject.itrace_synth_opts.set) { 2270 + /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2271 + symbol_conf.lazy_load_kernel_maps = true; 2272 + } 2273 + 2268 2274 #ifndef HAVE_JITDUMP 2269 2275 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2270 2276 #endif
+3 -1
tools/perf/builtin-lock.c
··· 2285 2285 else 2286 2286 ev_name = strdup(contention_tracepoints[j].name); 2287 2287 2288 - if (!ev_name) 2288 + if (!ev_name) { 2289 + free(rec_argv); 2289 2290 return -ENOMEM; 2291 + } 2290 2292 2291 2293 rec_argv[i++] = "-e"; 2292 2294 rec_argv[i++] = ev_name;
+45 -48
tools/perf/builtin-record.c
··· 270 270 271 271 static int record__aio_enabled(struct record *rec); 272 272 static int record__comp_enabled(struct record *rec); 273 - static size_t zstd_compress(struct perf_session *session, struct mmap *map, 273 + static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 274 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 275 276 276 #ifdef HAVE_AIO_SUPPORT ··· 405 405 */ 406 406 407 407 if (record__comp_enabled(aio->rec)) { 408 - size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 - mmap__mmap_len(map) - aio->size, 410 - buf, size); 408 + ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 + mmap__mmap_len(map) - aio->size, 410 + buf, size); 411 + if (compressed < 0) 412 + return (int)compressed; 413 + 414 + size = compressed; 411 415 } else { 412 416 memcpy(aio->data + aio->size, buf, size); 413 417 } ··· 637 633 struct record *rec = to; 638 634 639 635 if (record__comp_enabled(rec)) { 640 - size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 636 + ssize_t compressed = zstd_compress(rec->session, map, map->data, 637 + mmap__mmap_len(map), bf, size); 638 + 639 + if (compressed < 0) 640 + return (int)compressed; 641 + 642 + size = compressed; 641 643 bf = map->data; 642 644 } 643 645 ··· 1360 1350 evlist__for_each_entry(evlist, pos) { 1361 1351 try_again: 1362 1352 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1363 - if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1353 + if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1364 1354 if (verbose > 0) 1365 1355 ui__warning("%s\n", msg); 1366 1356 goto try_again; ··· 1537 1527 return size; 1538 1528 } 1539 1529 1540 - static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1530 + static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1541 1531 void *dst, size_t dst_size, void *src, size_t src_size) 1542 1532 { 1543 - size_t compressed; 1533 + ssize_t compressed; 1544 1534 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1545 1535 struct zstd_data *zstd_data = &session->zstd_data; 1546 1536 ··· 1549 1539 1550 1540 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1551 1541 max_record_size, process_comp_header); 1542 + if (compressed < 0) 1543 + return compressed; 1552 1544 1553 1545 if (map && map->file) { 1554 1546 thread->bytes_transferred += src_size; ··· 1924 1912 static void record__read_lost_samples(struct record *rec) 1925 1913 { 1926 1914 struct perf_session *session = rec->session; 1927 - struct perf_record_lost_samples *lost; 1915 + struct perf_record_lost_samples *lost = NULL; 1928 1916 struct evsel *evsel; 1929 1917 1930 1918 /* there was an error during record__open */ 1931 1919 if (session->evlist == NULL) 1932 1920 return; 1933 - 1934 - lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1935 - if (lost == NULL) { 1936 - pr_debug("Memory allocation failed\n"); 1937 - return; 1938 - } 1939 - 1940 - lost->header.type = PERF_RECORD_LOST_SAMPLES; 1941 1921 1942 1922 evlist__for_each_entry(session->evlist, evsel) { 1943 1923 struct xyarray *xy = evsel->core.sample_id; ··· 1953 1949 } 1954 1950 1955 1951 if (count.lost) { 1952 + if (!lost) { 1953 + lost = zalloc(sizeof(*lost) + 1954 + session->machines.host.id_hdr_size); 1955 + if (!lost) { 1956 + pr_debug("Memory allocation failed\n"); 1957 + return; 1958 + } 1959 + lost->header.type = PERF_RECORD_LOST_SAMPLES; 1960 + } 1956 1961 __record__save_lost_samples(rec, evsel, lost, 1957 1962 x, y, count.lost, 0); 1958 1963 } ··· 1969 1956 } 1970 1957 1971 1958 lost_count = perf_bpf_filter__lost_count(evsel); 1972 - if (lost_count) 1959 + if (lost_count) { 1960 + if (!lost) { 1961 + lost = zalloc(sizeof(*lost) + 1962 + session->machines.host.id_hdr_size); 1963 + if (!lost) { 1964 + pr_debug("Memory allocation failed\n"); 1965 + return; 1966 + } 1967 + lost->header.type = PERF_RECORD_LOST_SAMPLES; 1968 + } 1973 1969 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, 1974 1970 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1971 + } 1975 1972 } 1976 1973 out: 1977 1974 free(lost); ··· 2239 2216 } 2240 2217 } 2241 2218 2242 - static void record__uniquify_name(struct record *rec) 2243 - { 2244 - struct evsel *pos; 2245 - struct evlist *evlist = rec->evlist; 2246 - char *new_name; 2247 - int ret; 2248 - 2249 - if (perf_pmus__num_core_pmus() == 1) 2250 - return; 2251 - 2252 - evlist__for_each_entry(evlist, pos) { 2253 - if (!evsel__is_hybrid(pos)) 2254 - continue; 2255 - 2256 - if (strchr(pos->name, '/')) 2257 - continue; 2258 - 2259 - ret = asprintf(&new_name, "%s/%s/", 2260 - pos->pmu_name, pos->name); 2261 - if (ret) { 2262 - free(pos->name); 2263 - pos->name = new_name; 2264 - } 2265 - } 2266 - } 2267 - 2268 2219 static int record__terminate_thread(struct record_thread *thread_data) 2269 2220 { 2270 2221 int err; ··· 2472 2475 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2473 2476 rec->opts.sample_id = true; 2474 2477 2475 - record__uniquify_name(rec); 2478 + evlist__uniquify_name(rec->evlist); 2476 2479 2477 2480 /* Debug message used by test scripts */ 2478 2481 pr_debug3("perf record opening and mmapping events\n"); ··· 3577 3580 if (cpu_map__is_dummy(cpus)) 3578 3581 return 0; 3579 3582 3580 - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3581 - if (cpu.cpu == -1) 3582 - continue; 3583 + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3583 3584 /* Return ENODEV is input cpu is greater than max cpu */ 3584 3585 if ((unsigned long)cpu.cpu > mask->nbits) 3585 3586 return -ENODEV; ··· 3984 3989 # undef set_nobuild 3985 3990 #endif 3986 3991 3992 + /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 3993 + symbol_conf.lazy_load_kernel_maps = true; 3987 3994 rec->opts.affinity = PERF_AFFINITY_SYS; 3988 3995 3989 3996 rec->evlist = evlist__new();
+74 -39
tools/perf/builtin-report.c
··· 96 96 bool stitch_lbr; 97 97 bool disable_order; 98 98 bool skip_empty; 99 + bool data_type; 99 100 int max_stack; 100 101 struct perf_read_values show_threads_values; 101 - struct annotation_options annotation_opts; 102 102 const char *pretty_printing_style; 103 103 const char *cpu_list; 104 104 const char *symbol_filter_str; ··· 171 171 struct mem_info *mi; 172 172 struct branch_info *bi; 173 173 174 - if (!ui__has_annotation() && !rep->symbol_ipc) 174 + if (!ui__has_annotation() && !rep->symbol_ipc && !rep->data_type) 175 175 return 0; 176 176 177 177 if (sort__mode == SORT_MODE__BRANCH) { ··· 541 541 evlist__for_each_entry(evlist, pos) { 542 542 ret = report__browse_block_hists(&rep->block_reports[i++].hist, 543 543 rep->min_percent, pos, 544 - &rep->session->header.env, 545 - &rep->annotation_opts); 544 + &rep->session->header.env); 546 545 if (ret != 0) 547 546 return ret; 548 547 } ··· 573 574 574 575 if (rep->total_cycles_mode) { 575 576 report__browse_block_hists(&rep->block_reports[i++].hist, 576 - rep->min_percent, pos, 577 - NULL, NULL); 577 + rep->min_percent, pos, NULL); 578 578 continue; 579 579 } 580 580 ··· 668 670 } 669 671 670 672 ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, 671 - &session->header.env, true, &rep->annotation_opts); 673 + &session->header.env, true); 672 674 /* 673 675 * Usually "ret" is the last pressed key, and we only 674 676 * care if the key notifies us to switch data file. ··· 743 745 if (rep->symbol_ipc && sym && !sym->annotate2) { 744 746 struct evsel *evsel = hists_to_evsel(he->hists); 745 747 746 - symbol__annotate2(&he->ms, evsel, &rep->annotation_opts, NULL); 748 + symbol__annotate2(&he->ms, evsel, NULL); 747 749 } 748 750 749 751 return 0; ··· 857 859 return tasks_list(parent_task, machine); 858 860 } 859 861 862 + struct maps__fprintf_task_args { 863 + int indent; 864 + FILE *fp; 865 + size_t printed; 866 + }; 867 + 868 + static int maps__fprintf_task_cb(struct map *map, void *data) 869 + { 870 + struct maps__fprintf_task_args *args = data; 871 + const struct dso *dso = map__dso(map); 872 + u32 prot = map__prot(map); 873 + int ret; 874 + 875 + ret = fprintf(args->fp, 876 + "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", 877 + args->indent, "", map__start(map), map__end(map), 878 + prot & PROT_READ ? 'r' : '-', 879 + prot & PROT_WRITE ? 'w' : '-', 880 + prot & PROT_EXEC ? 'x' : '-', 881 + map__flags(map) ? 's' : 'p', 882 + map__pgoff(map), 883 + dso->id.ino, dso->name); 884 + 885 + if (ret < 0) 886 + return ret; 887 + 888 + args->printed += ret; 889 + return 0; 890 + } 891 + 860 892 static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) 861 893 { 862 - size_t printed = 0; 863 - struct map_rb_node *rb_node; 894 + struct maps__fprintf_task_args args = { 895 + .indent = indent, 896 + .fp = fp, 897 + .printed = 0, 898 + }; 864 899 865 - maps__for_each_entry(maps, rb_node) { 866 - struct map *map = rb_node->map; 867 - const struct dso *dso = map__dso(map); 868 - u32 prot = map__prot(map); 900 + maps__for_each_map(maps, maps__fprintf_task_cb, &args); 869 901 870 - printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", 871 - indent, "", map__start(map), map__end(map), 872 - prot & PROT_READ ? 'r' : '-', 873 - prot & PROT_WRITE ? 'w' : '-', 874 - prot & PROT_EXEC ? 'x' : '-', 875 - map__flags(map) ? 's' : 'p', 876 - map__pgoff(map), 877 - dso->id.ino, dso->name); 878 - } 879 - 880 - return printed; 902 + return args.printed; 881 903 } 882 904 883 905 static void task__print_level(struct task *task, FILE *fp, int level) ··· 1359 1341 "list of cpus to profile"), 1360 1342 OPT_BOOLEAN('I', "show-info", &report.show_full_info, 1361 1343 "Display extended information about perf.data file"), 1362 - OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, 1344 + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, 1363 1345 "Interleave source code with assembly code (default)"), 1364 - OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, 1346 + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, 1365 1347 "Display raw encoding of assembly instructions (default)"), 1366 1348 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1367 1349 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1368 - OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", 1350 + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", 1369 1351 "Add prefix to source file path names in programs (with --prefix-strip)"), 1370 - OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", 1352 + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", 1371 1353 "Strip first N entries of source file path name in programs (with --prefix)"), 1372 1354 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 1373 1355 "Show a column with the sum of periods"), ··· 1419 1401 "Time span of interest (start,stop)"), 1420 1402 OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, 1421 1403 "Show inline function"), 1422 - OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", 1404 + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", 1423 1405 "Set percent type local/global-period/hits", 1424 1406 annotate_parse_percent_type), 1425 1407 OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), ··· 1444 1426 if (ret < 0) 1445 1427 goto exit; 1446 1428 1447 - annotation_options__init(&report.annotation_opts); 1429 + /* 1430 + * tasks_mode require access to exited threads to list those that are in 1431 + * the data file. Off-cpu events are synthesized after other events and 1432 + * reference exited threads. 1433 + */ 1434 + symbol_conf.keep_exited_threads = true; 1435 + 1436 + annotation_options__init(); 1448 1437 1449 1438 ret = perf_config(report__config, &report); 1450 1439 if (ret) ··· 1470 1445 } 1471 1446 1472 1447 if (disassembler_style) { 1473 - report.annotation_opts.disassembler_style = strdup(disassembler_style); 1474 - if (!report.annotation_opts.disassembler_style) 1448 + annotate_opts.disassembler_style = strdup(disassembler_style); 1449 + if (!annotate_opts.disassembler_style) 1475 1450 return -ENOMEM; 1476 1451 } 1477 1452 if (objdump_path) { 1478 - report.annotation_opts.objdump_path = strdup(objdump_path); 1479 - if (!report.annotation_opts.objdump_path) 1453 + annotate_opts.objdump_path = strdup(objdump_path); 1454 + if (!annotate_opts.objdump_path) 1480 1455 return -ENOMEM; 1481 1456 } 1482 1457 if (addr2line_path) { ··· 1485 1460 return -ENOMEM; 1486 1461 } 1487 1462 1488 - if (annotate_check_args(&report.annotation_opts) < 0) { 1463 + if (annotate_check_args() < 0) { 1489 1464 ret = -EINVAL; 1490 1465 goto exit; 1491 1466 } ··· 1640 1615 sort_order = NULL; 1641 1616 } 1642 1617 1618 + if (sort_order && strstr(sort_order, "type")) { 1619 + report.data_type = true; 1620 + annotate_opts.annotate_src = false; 1621 + 1622 + #ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT 1623 + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); 1624 + goto error; 1625 + #endif 1626 + } 1627 + 1643 1628 if (strcmp(input_name, "-") != 0) 1644 1629 setup_browser(true); 1645 1630 else ··· 1708 1673 * so don't allocate extra space that won't be used in the stdio 1709 1674 * implementation. 1710 1675 */ 1711 - if (ui__has_annotation() || report.symbol_ipc || 1676 + if (ui__has_annotation() || report.symbol_ipc || report.data_type || 1712 1677 report.total_cycles_mode) { 1713 1678 ret = symbol__annotation_init(); 1714 1679 if (ret < 0) ··· 1727 1692 */ 1728 1693 symbol_conf.priv_size += sizeof(u32); 1729 1694 } 1730 - annotation_config__init(&report.annotation_opts); 1695 + annotation_config__init(); 1731 1696 } 1732 1697 1733 1698 if (symbol__init(&session->header.env) < 0) ··· 1781 1746 zstd_fini(&(session->zstd_data)); 1782 1747 perf_session__delete(session); 1783 1748 exit: 1784 - annotation_options__exit(&report.annotation_opts); 1749 + annotation_options__exit(); 1785 1750 free(sort_order_help); 1786 1751 free(field_order_help); 1787 1752 return ret;
+19 -14
tools/perf/builtin-stat.c
··· 653 653 if ((evsel__leader(counter) != counter) || 654 654 !(counter->core.leader->nr_members > 1)) 655 655 return COUNTER_SKIP; 656 - } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { 656 + } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) { 657 657 if (verbose > 0) 658 658 ui__warning("%s\n", msg); 659 659 return COUNTER_RETRY; ··· 1204 1204 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1205 1205 "list of cpus to monitor in system-wide"), 1206 1206 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1207 - "disable CPU count aggregation", AGGR_NONE), 1208 - OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), 1207 + "disable aggregation across CPUs or PMUs", AGGR_NONE), 1208 + OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode, 1209 + "disable aggregation the same as -A or -no-aggr", AGGR_NONE), 1209 1210 OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, 1210 1211 "Merge identical named hybrid events"), 1211 1212 OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", ··· 1256 1255 OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, 1257 1256 "don't try to share events between metrics in a group"), 1258 1257 OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, 1259 - "don't try to share events between metrics in a group "), 1258 + "disable adding events for the metric threshold calculation"), 1260 1259 OPT_BOOLEAN(0, "topdown", &topdown_run, 1261 1260 "measure top-down statistics"), 1262 1261 OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, ··· 1317 1316 * be the first online CPU in the cache domain else use the 1318 1317 * first online CPU of the cache domain as the ID. 1319 1318 */ 1320 - if (perf_cpu_map__empty(cpu_map)) 1319 + if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map)) 1321 1320 id = cpu.cpu; 1322 1321 else 1323 1322 id = perf_cpu_map__cpu(cpu_map, 0).cpu; ··· 1623 1622 * taking the highest cpu number to be the size of 1624 1623 * the aggregation translate cpumap. 1625 1624 */ 1626 - if (!perf_cpu_map__empty(evsel_list->core.user_requested_cpus)) 1625 + if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) 1627 1626 nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; 1628 1627 else 1629 1628 nr = 0; ··· 2290 2289 2291 2290 perf_event__read_stat_config(&stat_config, &event->stat_config); 2292 2291 2293 - if (perf_cpu_map__empty(st->cpus)) { 2292 + if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) { 2294 2293 if (st->aggr_mode != AGGR_UNSET) 2295 2294 pr_warning("warning: processing task data, aggregation mode not set\n"); 2296 2295 } else if (st->aggr_mode != AGGR_UNSET) { ··· 2696 2695 */ 2697 2696 if (metrics) { 2698 2697 const char *pmu = parse_events_option_args.pmu_filter ?: "all"; 2698 + int ret = metricgroup__parse_groups(evsel_list, pmu, metrics, 2699 + stat_config.metric_no_group, 2700 + stat_config.metric_no_merge, 2701 + stat_config.metric_no_threshold, 2702 + stat_config.user_requested_cpu_list, 2703 + stat_config.system_wide, 2704 + &stat_config.metric_events); 2699 2705 2700 - metricgroup__parse_groups(evsel_list, pmu, metrics, 2701 - stat_config.metric_no_group, 2702 - stat_config.metric_no_merge, 2703 - stat_config.metric_no_threshold, 2704 - stat_config.user_requested_cpu_list, 2705 - stat_config.system_wide, 2706 - &stat_config.metric_events); 2707 2706 zfree(&metrics); 2707 + if (ret) { 2708 + status = ret; 2709 + goto out; 2710 + } 2708 2711 } 2709 2712 2710 2713 if (add_default_attributes())
+26 -26
tools/perf/builtin-top.c
··· 147 147 return err; 148 148 } 149 149 150 - err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); 150 + err = symbol__annotate(&he->ms, evsel, NULL); 151 151 if (err == 0) { 152 152 top->sym_filter_entry = he; 153 153 } else { ··· 261 261 goto out_unlock; 262 262 263 263 printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); 264 - printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); 264 + printf(" Events Pcnt (>=%d%%)\n", annotate_opts.min_pcnt); 265 265 266 - more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); 266 + more = symbol__annotate_printf(&he->ms, top->sym_evsel); 267 267 268 268 if (top->evlist->enabled) { 269 269 if (top->zero) ··· 450 450 451 451 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); 452 452 453 - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); 453 + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt); 454 454 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 455 455 fprintf(stdout, "\t[S] stop annotation.\n"); 456 456 ··· 553 553 prompt_integer(&top->count_filter, "Enter display event count filter"); 554 554 break; 555 555 case 'F': 556 - prompt_percent(&top->annotation_opts.min_pcnt, 556 + prompt_percent(&annotate_opts.min_pcnt, 557 557 "Enter details display event filter (percent)"); 558 558 break; 559 559 case 'K': ··· 646 646 } 647 647 648 648 ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, 649 - &top->session->header.env, !top->record_opts.overwrite, 650 - &top->annotation_opts); 649 + &top->session->header.env, !top->record_opts.overwrite); 651 650 if (ret == K_RELOAD) { 652 651 top->zero = true; 653 652 goto repeat; ··· 1026 1027 1027 1028 evlist__for_each_entry(evlist, counter) { 1028 1029 try_again: 1029 - if (evsel__open(counter, top->evlist->core.user_requested_cpus, 1030 - top->evlist->core.threads) < 0) { 1030 + if (evsel__open(counter, counter->core.cpus, 1031 + counter->core.threads) < 0) { 1031 1032 1032 1033 /* 1033 1034 * Specially handle overwrite fall back. ··· 1043 1044 perf_top_overwrite_fallback(top, counter)) 1044 1045 goto try_again; 1045 1046 1046 - if (evsel__fallback(counter, errno, msg, sizeof(msg))) { 1047 + if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) { 1047 1048 if (verbose > 0) 1048 1049 ui__warning("%s\n", msg); 1049 1050 goto try_again; ··· 1240 1241 pthread_t thread, thread_process; 1241 1242 int ret; 1242 1243 1243 - if (!top->annotation_opts.objdump_path) { 1244 + if (!annotate_opts.objdump_path) { 1244 1245 ret = perf_env__lookup_objdump(&top->session->header.env, 1245 - &top->annotation_opts.objdump_path); 1246 + &annotate_opts.objdump_path); 1246 1247 if (ret) 1247 1248 return ret; 1248 1249 } ··· 1298 1299 } 1299 1300 } 1300 1301 1302 + evlist__uniquify_name(top->evlist); 1301 1303 ret = perf_top__start_counters(top); 1302 1304 if (ret) 1303 1305 return ret; ··· 1536 1536 "only consider symbols in these comms"), 1537 1537 OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 1538 1538 "only consider these symbols"), 1539 - OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, 1539 + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, 1540 1540 "Interleave source code with assembly code (default)"), 1541 - OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, 1541 + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, 1542 1542 "Display raw encoding of assembly instructions (default)"), 1543 1543 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, 1544 1544 "Enable kernel symbol demangling"), ··· 1549 1549 "addr2line binary to use for line numbers"), 1550 1550 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1551 1551 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1552 - OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", 1552 + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", 1553 1553 "Add prefix to source file path names in programs (with --prefix-strip)"), 1554 - OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", 1554 + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", 1555 1555 "Strip first N entries of source file path name in programs (with --prefix)"), 1556 1556 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), 1557 1557 OPT_CALLBACK(0, "percent-limit", &top, "percent", ··· 1609 1609 if (status < 0) 1610 1610 return status; 1611 1611 1612 - annotation_options__init(&top.annotation_opts); 1612 + annotation_options__init(); 1613 1613 1614 - top.annotation_opts.min_pcnt = 5; 1615 - top.annotation_opts.context = 4; 1614 + annotate_opts.min_pcnt = 5; 1615 + annotate_opts.context = 4; 1616 1616 1617 1617 top.evlist = evlist__new(); 1618 1618 if (top.evlist == NULL) ··· 1642 1642 usage_with_options(top_usage, options); 1643 1643 1644 1644 if (disassembler_style) { 1645 - top.annotation_opts.disassembler_style = strdup(disassembler_style); 1646 - if (!top.annotation_opts.disassembler_style) 1645 + annotate_opts.disassembler_style = strdup(disassembler_style); 1646 + if (!annotate_opts.disassembler_style) 1647 1647 return -ENOMEM; 1648 1648 } 1649 1649 if (objdump_path) { 1650 - top.annotation_opts.objdump_path = strdup(objdump_path); 1651 - if (!top.annotation_opts.objdump_path) 1650 + annotate_opts.objdump_path = strdup(objdump_path); 1651 + if (!annotate_opts.objdump_path) 1652 1652 return -ENOMEM; 1653 1653 } 1654 1654 if (addr2line_path) { ··· 1661 1661 if (status) 1662 1662 goto out_delete_evlist; 1663 1663 1664 - if (annotate_check_args(&top.annotation_opts) < 0) 1664 + if (annotate_check_args() < 0) 1665 1665 goto out_delete_evlist; 1666 1666 1667 1667 if (!top.evlist->core.nr_entries) { ··· 1787 1787 if (status < 0) 1788 1788 goto out_delete_evlist; 1789 1789 1790 - annotation_config__init(&top.annotation_opts); 1790 + annotation_config__init(); 1791 1791 1792 1792 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1793 1793 status = symbol__init(NULL); ··· 1840 1840 out_delete_evlist: 1841 1841 evlist__delete(top.evlist); 1842 1842 perf_session__delete(top.session); 1843 - annotation_options__exit(&top.annotation_opts); 1843 + annotation_options__exit(); 1844 1844 1845 1845 return status; 1846 1846 }
+2 -4
tools/perf/builtin-trace.c
··· 2470 2470 static const char *errno_to_name(struct evsel *evsel, int err) 2471 2471 { 2472 2472 struct perf_env *env = evsel__env(evsel); 2473 - const char *arch_name = perf_env__arch(env); 2474 2473 2475 - return arch_syscalls__strerrno(arch_name, err); 2474 + return perf_env__arch_strerrno(env, err); 2476 2475 } 2477 2476 2478 2477 static int trace__sys_exit(struct trace *trace, struct evsel *evsel, ··· 4263 4264 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 4264 4265 4265 4266 if (trace->errno_summary && stats->nr_failures) { 4266 - const char *arch_name = perf_env__arch(trace->host->env); 4267 4267 int e; 4268 4268 4269 4269 for (e = 0; e < stats->max_errno; ++e) { 4270 4270 if (stats->errnos[e] != 0) 4271 - fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); 4271 + fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]); 4272 4272 } 4273 4273 } 4274 4274 }
+80 -6
tools/perf/perf-archive.sh
··· 4 4 # Arnaldo Carvalho de Melo <acme@redhat.com> 5 5 6 6 PERF_DATA=perf.data 7 - if [ $# -ne 0 ] ; then 8 - PERF_DATA=$1 7 + PERF_SYMBOLS=perf.symbols 8 + PERF_ALL=perf.all 9 + ALL=0 10 + UNPACK=0 11 + 12 + while [ $# -gt 0 ] ; do 13 + if [ $1 == "--all" ]; then 14 + ALL=1 15 + shift 16 + elif [ $1 == "--unpack" ]; then 17 + UNPACK=1 18 + shift 19 + else 20 + PERF_DATA=$1 21 + UNPACK_TAR=$1 22 + shift 23 + fi 24 + done 25 + 26 + if [ $UNPACK -eq 1 ]; then 27 + if [ ! -z "$UNPACK_TAR" ]; then # tar given as an argument 28 + if [ ! -e "$UNPACK_TAR" ]; then 29 + echo "Provided file $UNPACK_TAR does not exist" 30 + exit 1 31 + fi 32 + TARGET="$UNPACK_TAR" 33 + else # search for perf tar in the current directory 34 + TARGET=`find . -regex "\./perf.*\.tar\.bz2"` 35 + TARGET_NUM=`echo -n "$TARGET" | grep -c '^'` 36 + 37 + if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then 38 + echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET" 39 + echo "Provide the requested file as an argument" 40 + exit 1 41 + else 42 + echo "Found target file for unpacking: $TARGET" 43 + fi 44 + fi 45 + 46 + if [[ "$TARGET" =~ (\./)?$PERF_ALL.*.tar.bz2 ]]; then # perf tar generated by --all option 47 + TAR_CONTENTS=`tar tvf "$TARGET" | tr -s " " | cut -d " " -f 6` 48 + VALID_TAR=`echo "$TAR_CONTENTS" | grep "$PERF_SYMBOLS.tar.bz2" | wc -l` # check if it contains a sub-tar perf.symbols 49 + if [ $VALID_TAR -ne 1 ]; then 50 + echo "Error: $TARGET file is not valid (contains zero or multiple sub-tar files with debug symbols)" 51 + exit 1 52 + fi 53 + 54 + INTERSECT=`comm -12 <(ls) <(echo "$TAR_CONTENTS") | tr "\n" " "` # check for overwriting 55 + if [ ! -z "$INTERSECT" ]; then # prompt if file(s) already exist in the current directory 56 + echo "File(s) ${INTERSECT::-1} already exist in the current directory." 57 + while true; do 58 + read -p 'Do you wish to overwrite them? ' yn 59 + case $yn in 60 + [Yy]* ) break;; 61 + [Nn]* ) exit 1;; 62 + * ) echo "Please answer yes or no.";; 63 + esac 64 + done 65 + fi 66 + 67 + # unzip the perf.data file in the current working directory and debug symbols in ~/.debug directory 68 + tar xvf $TARGET && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug 69 + 70 + else # perf tar generated by perf archive (contains only debug symbols) 71 + tar xvf $TARGET -C ~/.debug 72 + fi 73 + exit 0 9 74 fi 10 75 11 76 # ··· 104 39 echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST 105 40 done 106 41 107 - tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST 108 - rm $MANIFEST $BUILDIDS || true 42 + if [ $ALL -eq 1 ]; then # pack perf.data file together with tar containing debug symbols 43 + HOSTNAME=$(hostname) 44 + DATE=$(date '+%Y%m%d-%H%M%S') 45 + tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST 46 + tar cjf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2 47 + rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true 48 + else # pack only the debug symbols 49 + tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST 50 + rm $MANIFEST $BUILDIDS || true 51 + fi 52 + 109 53 echo -e "Now please run:\n" 110 - echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" 111 - echo "wherever you need to run 'perf report' on." 54 + echo -e "$ perf archive --unpack\n" 55 + echo "or unpack the tar manually wherever you need to run 'perf report' on." 112 56 exit 0
+30
tools/perf/perf.c
··· 39 39 #include <linux/zalloc.h> 40 40 41 41 static int use_pager = -1; 42 + static FILE *debug_fp = NULL; 42 43 43 44 struct cmd_struct { 44 45 const char *cmd; ··· 163 162 } 164 163 } 165 164 165 + static int set_debug_file(const char *path) 166 + { 167 + debug_fp = fopen(path, "w"); 168 + if (!debug_fp) { 169 + fprintf(stderr, "Open debug file '%s' failed: %s\n", 170 + path, strerror(errno)); 171 + return -1; 172 + } 173 + 174 + debug_set_file(debug_fp); 175 + return 0; 176 + } 177 + 166 178 struct option options[] = { 167 179 OPT_ARGUMENT("help", "help"), 168 180 OPT_ARGUMENT("version", "version"), ··· 188 174 OPT_ARGUMENT("list-cmds", "list-cmds"), 189 175 OPT_ARGUMENT("list-opts", "list-opts"), 190 176 OPT_ARGUMENT("debug", "debug"), 177 + OPT_ARGUMENT("debug-file", "debug-file"), 191 178 OPT_END() 192 179 }; 193 180 ··· 302 287 303 288 (*argv)++; 304 289 (*argc)--; 290 + } else if (!strcmp(cmd, "--debug-file")) { 291 + if (*argc < 2) { 292 + fprintf(stderr, "No path given for --debug-file.\n"); 293 + usage(perf_usage_string); 294 + } 295 + 296 + if (set_debug_file((*argv)[1])) 297 + usage(perf_usage_string); 298 + 299 + (*argv)++; 300 + (*argc)--; 301 + 305 302 } else { 306 303 fprintf(stderr, "Unknown option: %s\n", cmd); 307 304 usage(perf_usage_string); ··· 574 547 fprintf(stderr, "Failed to run command '%s': %s\n", 575 548 cmd, str_error_r(errno, sbuf, sizeof(sbuf))); 576 549 out: 550 + if (debug_fp) 551 + fclose(debug_fp); 552 + 577 553 return 1; 578 554 }
+1 -1
tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
··· 110 110 { 111 111 "PublicDescription": "Flushes due to memory hazards", 112 112 "EventCode": "0x121", 113 - "EventName": "BPU_FLUSH_MEM_FAULT", 113 + "EventName": "GPC_FLUSH_MEM_FAULT", 114 114 "BriefDescription": "Flushes due to memory hazards" 115 115 }, 116 116 {
+125
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "BR_IMMED_SPEC" 4 + }, 5 + { 6 + "ArchStdEvent": "BR_RETURN_SPEC" 7 + }, 8 + { 9 + "ArchStdEvent": "BR_INDIRECT_SPEC" 10 + }, 11 + { 12 + "ArchStdEvent": "BR_MIS_PRED" 13 + }, 14 + { 15 + "ArchStdEvent": "BR_PRED" 16 + }, 17 + { 18 + "PublicDescription": "Instruction architecturally executed, branch not taken", 19 + "EventCode": "0x8107", 20 + "EventName": "BR_SKIP_RETIRED", 21 + "BriefDescription": "Instruction architecturally executed, branch not taken" 22 + }, 23 + { 24 + "PublicDescription": "Instruction architecturally executed, immediate branch taken", 25 + "EventCode": "0x8108", 26 + "EventName": "BR_IMMED_TAKEN_RETIRED", 27 + "BriefDescription": "Instruction architecturally executed, immediate branch taken" 28 + }, 29 + { 30 + "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired", 31 + "EventCode": "0x810c", 32 + "EventName": "BR_INDNR_TAKEN_RETIRED", 33 + "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired" 34 + }, 35 + { 36 + "PublicDescription": "Instruction architecturally executed, predicted immediate branch", 37 + "EventCode": "0x8110", 38 + "EventName": "BR_IMMED_PRED_RETIRED", 39 + "BriefDescription": "Instruction architecturally executed, predicted immediate branch" 40 + }, 41 + { 42 + "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch", 43 + "EventCode": "0x8111", 44 + "EventName": "BR_IMMED_MIS_PRED_RETIRED", 45 + "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch" 46 + }, 47 + { 48 + "PublicDescription": "Instruction architecturally executed, predicted indirect branch", 49 + "EventCode": "0x8112", 50 + "EventName": "BR_IND_PRED_RETIRED", 51 + "BriefDescription": "Instruction architecturally executed, predicted indirect branch" 52 + }, 53 + { 54 + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch", 55 + "EventCode": "0x8113", 56 + "EventName": "BR_IND_MIS_PRED_RETIRED", 57 + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch" 58 + }, 59 + { 60 + "PublicDescription": "Instruction architecturally executed, predicted procedure return", 61 + "EventCode": "0x8114", 62 + "EventName": "BR_RETURN_PRED_RETIRED", 63 + "BriefDescription": "Instruction architecturally executed, predicted procedure return" 64 + }, 65 + { 66 + "PublicDescription": "Instruction architecturally executed, mispredicted procedure return", 67 + "EventCode": "0x8115", 68 + "EventName": "BR_RETURN_MIS_PRED_RETIRED", 69 + "BriefDescription": "Instruction architecturally executed, mispredicted procedure return" 70 + }, 71 + { 72 + "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return", 73 + "EventCode": "0x8116", 74 + "EventName": "BR_INDNR_PRED_RETIRED", 75 + "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return" 76 + }, 77 + { 78 + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return", 79 + "EventCode": "0x8117", 80 + "EventName": "BR_INDNR_MIS_PRED_RETIRED", 81 + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return" 82 + }, 83 + { 84 + "PublicDescription": "Instruction architecturally executed, predicted branch, taken", 85 + "EventCode": "0x8118", 86 + "EventName": "BR_TAKEN_PRED_RETIRED", 87 + "BriefDescription": "Instruction architecturally executed, predicted branch, taken" 88 + }, 89 + { 90 + "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken", 91 + "EventCode": "0x8119", 92 + "EventName": "BR_TAKEN_MIS_PRED_RETIRED", 93 + "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken" 94 + }, 95 + { 96 + "PublicDescription": "Instruction architecturally executed, predicted branch, not taken", 97 + "EventCode": "0x811a", 98 + "EventName": "BR_SKIP_PRED_RETIRED", 99 + "BriefDescription": "Instruction architecturally executed, predicted branch, not taken" 100 + }, 101 + { 102 + "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken", 103 + "EventCode": "0x811b", 104 + "EventName": "BR_SKIP_MIS_PRED_RETIRED", 105 + "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken" 106 + }, 107 + { 108 + "PublicDescription": "Instruction architecturally executed, predicted branch", 109 + "EventCode": "0x811c", 110 + "EventName": "BR_PRED_RETIRED", 111 + "BriefDescription": "Instruction architecturally executed, predicted branch" 112 + }, 113 + { 114 + "PublicDescription": "Instruction architecturally executed, indirect branch", 115 + "EventCode": "0x811d", 116 + "EventName": "BR_IND_RETIRED", 117 + "BriefDescription": "Instruction architecturally executed, indirect branch" 118 + }, 119 + { 120 + "PublicDescription": "Branch Record captured.", 121 + "EventCode": "0x811f", 122 + "EventName": "BRB_FILTRATE", 123 + "BriefDescription": "Branch Record captured." 124 + } 125 + ]
+20
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "CPU_CYCLES" 4 + }, 5 + { 6 + "ArchStdEvent": "BUS_CYCLES" 7 + }, 8 + { 9 + "ArchStdEvent": "BUS_ACCESS_RD" 10 + }, 11 + { 12 + "ArchStdEvent": "BUS_ACCESS_WR" 13 + }, 14 + { 15 + "ArchStdEvent": "BUS_ACCESS" 16 + }, 17 + { 18 + "ArchStdEvent": "CNT_CYCLES" 19 + } 20 + ]
+206
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "L1D_CACHE_RD" 4 + }, 5 + { 6 + "ArchStdEvent": "L1D_CACHE_WR" 7 + }, 8 + { 9 + "ArchStdEvent": "L1D_CACHE_REFILL_RD" 10 + }, 11 + { 12 + "ArchStdEvent": "L1D_CACHE_INVAL" 13 + }, 14 + { 15 + "ArchStdEvent": "L1D_TLB_REFILL_RD" 16 + }, 17 + { 18 + "ArchStdEvent": "L1D_TLB_REFILL_WR" 19 + }, 20 + { 21 + "ArchStdEvent": "L2D_CACHE_RD" 22 + }, 23 + { 24 + "ArchStdEvent": "L2D_CACHE_WR" 25 + }, 26 + { 27 + "ArchStdEvent": "L2D_CACHE_REFILL_RD" 28 + }, 29 + { 30 + "ArchStdEvent": "L2D_CACHE_REFILL_WR" 31 + }, 32 + { 33 + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" 34 + }, 35 + { 36 + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" 37 + }, 38 + { 39 + "ArchStdEvent": "L2D_CACHE_INVAL" 40 + }, 41 + { 42 + "ArchStdEvent": "L1I_CACHE_REFILL" 43 + }, 44 + { 45 + "ArchStdEvent": "L1I_TLB_REFILL" 46 + }, 47 + { 48 + "ArchStdEvent": "L1D_CACHE_REFILL" 49 + }, 50 + { 51 + "ArchStdEvent": "L1D_CACHE" 52 + }, 53 + { 54 + "ArchStdEvent": "L1D_TLB_REFILL" 55 + }, 56 + { 57 + "ArchStdEvent": "L1I_CACHE" 58 + }, 59 + { 60 + "ArchStdEvent": "L2D_CACHE" 61 + }, 62 + { 63 + "ArchStdEvent": "L2D_CACHE_REFILL" 64 + }, 65 + { 66 + "ArchStdEvent": "L2D_CACHE_WB" 67 + }, 68 + { 69 + "ArchStdEvent": "L1D_TLB" 70 + }, 71 + { 72 + "ArchStdEvent": "L1I_TLB" 73 + }, 74 + { 75 + "ArchStdEvent": "L2D_TLB_REFILL" 76 + }, 77 + { 78 + "ArchStdEvent": "L2I_TLB_REFILL" 79 + }, 80 + { 81 + "ArchStdEvent": "L2D_TLB" 82 + }, 83 + { 84 + "ArchStdEvent": "L2I_TLB" 85 + }, 86 + { 87 + "ArchStdEvent": "DTLB_WALK" 88 + }, 89 + { 90 + "ArchStdEvent": "ITLB_WALK" 91 + }, 92 + { 93 + "ArchStdEvent": "L1D_CACHE_REFILL_WR" 94 + }, 95 + { 96 + "ArchStdEvent": "L1D_CACHE_LMISS_RD" 97 + }, 98 + { 99 + "ArchStdEvent": "L1I_CACHE_LMISS" 100 + }, 101 + { 102 + "ArchStdEvent": "L2D_CACHE_LMISS_RD" 103 + }, 104 + { 105 + "PublicDescription": "Level 1 data or unified cache demand access", 106 + "EventCode": "0x8140", 107 + "EventName": "L1D_CACHE_RW", 108 + "BriefDescription": "Level 1 data or unified cache demand access" 109 + }, 110 + { 111 + "PublicDescription": "Level 1 data or unified cache preload or prefetch", 112 + "EventCode": "0x8142", 113 + "EventName": "L1D_CACHE_PRFM", 114 + "BriefDescription": "Level 1 data or unified cache preload or prefetch" 115 + }, 116 + { 117 + "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch", 118 + "EventCode": "0x8146", 119 + "EventName": "L1D_CACHE_REFILL_PRFM", 120 + "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch" 121 + }, 122 + { 123 + "ArchStdEvent": "L1D_TLB_RD" 124 + }, 125 + { 126 + "ArchStdEvent": "L1D_TLB_WR" 127 + }, 128 + { 129 + "ArchStdEvent": "L2D_TLB_REFILL_RD" 130 + }, 131 + { 132 + "ArchStdEvent": "L2D_TLB_REFILL_WR" 133 + }, 134 + { 135 + "ArchStdEvent": "L2D_TLB_RD" 136 + }, 137 + { 138 + "ArchStdEvent": "L2D_TLB_WR" 139 + }, 140 + { 141 + "PublicDescription": "L1D TLB miss", 142 + "EventCode": "0xD600", 143 + "EventName": "L1D_TLB_MISS", 144 + "BriefDescription": "L1D TLB miss" 145 + }, 146 + { 147 + "PublicDescription": "Level 1 prefetcher, load prefetch requests generated", 148 + "EventCode": "0xd606", 149 + "EventName": "L1_PREFETCH_LD_GEN", 150 + "BriefDescription": "Level 1 prefetcher, load prefetch requests generated" 151 + }, 152 + { 153 + "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache", 154 + "EventCode": "0xd607", 155 + "EventName": "L1_PREFETCH_LD_FILL", 156 + "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache" 157 + }, 158 + { 159 + "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated", 160 + "EventCode": "0xd608", 161 + "EventName": "L1_PREFETCH_L2_REQ", 162 + "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated" 163 + }, 164 + { 165 + "PublicDescription": "L1 prefetcher, distance was reset", 166 + "EventCode": "0xd609", 167 + "EventName": "L1_PREFETCH_DIST_RST", 168 + "BriefDescription": "L1 prefetcher, distance was reset" 169 + }, 170 + { 171 + "PublicDescription": "L1 prefetcher, distance was increased", 172 + "EventCode": "0xd60a", 173 + "EventName": "L1_PREFETCH_DIST_INC", 174 + "BriefDescription": "L1 prefetcher, distance was increased" 175 + }, 176 + { 177 + "PublicDescription": "Level 1 prefetcher, table entry is trained", 178 + "EventCode": "0xd60b", 179 + "EventName": "L1_PREFETCH_ENTRY_TRAINED", 180 + "BriefDescription": "Level 1 prefetcher, table entry is trained" 181 + }, 182 + { 183 + "PublicDescription": "L1 data cache refill - Read or Write", 184 + "EventCode": "0xd60e", 185 + "EventName": "L1D_CACHE_REFILL_RW", 186 + "BriefDescription": "L1 data cache refill - Read or Write" 187 + }, 188 + { 189 + "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills", 190 + "EventCode": "0xD701", 191 + "EventName": "L2C_INST_REFILL", 192 + "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills" 193 + }, 194 + { 195 + "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills", 196 + "EventCode": "0xD702", 197 + "EventName": "L2C_DATA_REFILL", 198 + "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills" 199 + }, 200 + { 201 + "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated", 202 + "EventCode": "0xD703", 203 + "EventName": "L2_PREFETCH_REQ", 204 + "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated" 205 + } 206 + ]
+464
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json
··· 1 + [ 2 + { 3 + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", 4 + "EventCode": "0x10A", 5 + "EventName": "L2_PREFETCH_REFILL", 6 + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" 7 + }, 8 + { 9 + "PublicDescription": "Level 2 prefetch requests, late", 10 + "EventCode": "0x10B", 11 + "EventName": "L2_PREFETCH_UPGRADE", 12 + "BriefDescription": "Level 2 prefetch requests, late" 13 + }, 14 + { 15 + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", 16 + "EventCode": "0x110", 17 + "EventName": "BPU_HIT_BTB", 18 + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" 19 + }, 20 + { 21 + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", 22 + "EventCode": "0x111", 23 + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", 24 + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" 25 + }, 26 + { 27 + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", 28 + "EventCode": "0x112", 29 + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", 30 + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" 31 + }, 32 + { 33 + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", 34 + "EventCode": "0x113", 35 + "EventName": "BPU_HIT_RSB", 36 + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" 37 + }, 38 + { 39 + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", 40 + "EventCode": "0x114", 41 + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", 42 + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" 43 + }, 44 + { 45 + "PublicDescription": "Predictable branch speculatively executed, unpredicted", 46 + "EventCode": "0x115", 47 + "EventName": "BPU_BRANCH_NO_HIT", 48 + "BriefDescription": "Predictable branch speculatively executed, unpredicted" 49 + }, 50 + { 51 + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", 52 + "EventCode": "0x116", 53 + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", 54 + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" 55 + }, 56 + { 57 + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", 58 + "EventCode": "0x117", 59 + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", 60 + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" 61 + }, 62 + { 63 + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", 64 + "EventCode": "0x118", 65 + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", 66 + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" 67 + }, 68 + { 69 + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", 70 + "EventCode": "0x119", 71 + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", 72 + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" 73 + }, 74 + { 75 + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", 76 + "EventCode": "0x11a", 77 + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", 78 + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" 79 + }, 80 + { 81 + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", 82 + "EventCode": "0x11b", 83 + "EventName": "BPU_NO_PREDICTION_MISPREDICT", 84 + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" 85 + }, 86 + { 87 + "PublicDescription": "Preditable branch update the BTB region buffer entry", 88 + "EventCode": "0x11c", 89 + "EventName": "BPU_BTB_UPDATE", 90 + "BriefDescription": "Preditable branch update the BTB region buffer entry" 91 + }, 92 + { 93 + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", 94 + "EventCode": "0x11d", 95 + "EventName": "BPU_RSB_FULL_STALL", 96 + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" 97 + }, 98 + { 99 + "PublicDescription": "Macro-ops speculatively decoded", 100 + "EventCode": "0x11f", 101 + "EventName": "ICF_INST_SPEC_DECODE", 102 + "BriefDescription": "Macro-ops speculatively decoded" 103 + }, 104 + { 105 + "PublicDescription": "Flushes", 106 + "EventCode": "0x120", 107 + "EventName": "GPC_FLUSH", 108 + "BriefDescription": "Flushes" 109 + }, 110 + { 111 + "PublicDescription": "Flushes due to memory hazards", 112 + "EventCode": "0x121", 113 + "EventName": "GPC_FLUSH_MEM_FAULT", 114 + "BriefDescription": "Flushes due to memory hazards" 115 + }, 116 + { 117 + "PublicDescription": "ETM extout bit 0", 118 + "EventCode": "0x141", 119 + "EventName": "MSC_ETM_EXTOUT0", 120 + "BriefDescription": "ETM extout bit 0" 121 + }, 122 + { 123 + "PublicDescription": "ETM extout bit 1", 124 + "EventCode": "0x142", 125 + "EventName": "MSC_ETM_EXTOUT1", 126 + "BriefDescription": "ETM extout bit 1" 127 + }, 128 + { 129 + "PublicDescription": "ETM extout bit 2", 130 + "EventCode": "0x143", 131 + "EventName": "MSC_ETM_EXTOUT2", 132 + "BriefDescription": "ETM extout bit 2" 133 + }, 134 + { 135 + "PublicDescription": "ETM extout bit 3", 136 + "EventCode": "0x144", 137 + "EventName": "MSC_ETM_EXTOUT3", 138 + "BriefDescription": "ETM extout bit 3" 139 + }, 140 + { 141 + "PublicDescription": "Bus request sn", 142 + "EventCode": "0x156", 143 + "EventName": "L2C_SNOOP", 144 + "BriefDescription": "Bus request sn" 145 + }, 146 + { 147 + "PublicDescription": "L2 TXDAT LCRD blocked", 148 + "EventCode": "0x169", 149 + "EventName": "L2C_DAT_CRD_STALL", 150 + "BriefDescription": "L2 TXDAT LCRD blocked" 151 + }, 152 + { 153 + "PublicDescription": "L2 TXRSP LCRD blocked", 154 + "EventCode": "0x16a", 155 + "EventName": "L2C_RSP_CRD_STALL", 156 + "BriefDescription": "L2 TXRSP LCRD blocked" 157 + }, 158 + { 159 + "PublicDescription": "L2 TXREQ LCRD blocked", 160 + "EventCode": "0x16b", 161 + "EventName": "L2C_REQ_CRD_STALL", 162 + "BriefDescription": "L2 TXREQ LCRD blocked" 163 + }, 164 + { 165 + "PublicDescription": "Early mispredict", 166 + "EventCode": "0xD100", 167 + "EventName": "ICF_EARLY_MIS_PRED", 168 + "BriefDescription": "Early mispredict" 169 + }, 170 + { 171 + "PublicDescription": "FEQ full cycles", 172 + "EventCode": "0xD101", 173 + "EventName": "ICF_FEQ_FULL", 174 + "BriefDescription": "FEQ full cycles" 175 + }, 176 + { 177 + "PublicDescription": "Instruction FIFO Full", 178 + "EventCode": "0xD102", 179 + "EventName": "ICF_INST_FIFO_FULL", 180 + "BriefDescription": "Instruction FIFO Full" 181 + }, 182 + { 183 + "PublicDescription": "L1I TLB miss", 184 + "EventCode": "0xD103", 185 + "EventName": "L1I_TLB_MISS", 186 + "BriefDescription": "L1I TLB miss" 187 + }, 188 + { 189 + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", 190 + "EventCode": "0xD104", 191 + "EventName": "ICF_STALL", 192 + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" 193 + }, 194 + { 195 + "PublicDescription": "PC FIFO Full", 196 + "EventCode": "0xD105", 197 + "EventName": "ICF_PC_FIFO_FULL", 198 + "BriefDescription": "PC FIFO Full" 199 + }, 200 + { 201 + "PublicDescription": "Stall due to BOB ID", 202 + "EventCode": "0xD200", 203 + "EventName": "IDR_STALL_BOB_ID", 204 + "BriefDescription": "Stall due to BOB ID" 205 + }, 206 + { 207 + "PublicDescription": "Dispatch stall due to LOB entries", 208 + "EventCode": "0xD201", 209 + "EventName": "IDR_STALL_LOB_ID", 210 + "BriefDescription": "Dispatch stall due to LOB entries" 211 + }, 212 + { 213 + "PublicDescription": "Dispatch stall due to SOB entries", 214 + "EventCode": "0xD202", 215 + "EventName": "IDR_STALL_SOB_ID", 216 + "BriefDescription": "Dispatch stall due to SOB entries" 217 + }, 218 + { 219 + "PublicDescription": "Dispatch stall due to IXU scheduler entries", 220 + "EventCode": "0xD203", 221 + "EventName": "IDR_STALL_IXU_SCHED", 222 + "BriefDescription": "Dispatch stall due to IXU scheduler entries" 223 + }, 224 + { 225 + "PublicDescription": "Dispatch stall due to FSU scheduler entries", 226 + "EventCode": "0xD204", 227 + "EventName": "IDR_STALL_FSU_SCHED", 228 + "BriefDescription": "Dispatch stall due to FSU scheduler entries" 229 + }, 230 + { 231 + "PublicDescription": "Dispatch stall due to ROB entries", 232 + "EventCode": "0xD205", 233 + "EventName": "IDR_STALL_ROB_ID", 234 + "BriefDescription": "Dispatch stall due to ROB entries" 235 + }, 236 + { 237 + "PublicDescription": "Dispatch stall due to flush", 238 + "EventCode": "0xD206", 239 + "EventName": "IDR_STALL_FLUSH", 240 + "BriefDescription": "Dispatch stall due to flush" 241 + }, 242 + { 243 + "PublicDescription": "Dispatch stall due to WFI", 244 + "EventCode": "0xD207", 245 + "EventName": "IDR_STALL_WFI", 246 + "BriefDescription": "Dispatch stall due to WFI" 247 + }, 248 + { 249 + "PublicDescription": "Number of SWOB drains triggered by timeout", 250 + "EventCode": "0xD208", 251 + "EventName": "IDR_STALL_SWOB_TIMEOUT", 252 + "BriefDescription": "Number of SWOB drains triggered by timeout" 253 + }, 254 + { 255 + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", 256 + "EventCode": "0xD209", 257 + "EventName": "IDR_STALL_SWOB_RAW", 258 + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" 259 + }, 260 + { 261 + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", 262 + "EventCode": "0xD20A", 263 + "EventName": "IDR_STALL_SWOB_FULL", 264 + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" 265 + }, 266 + { 267 + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", 268 + "EventCode": "0xD20B", 269 + "EventName": "STALL_FRONTEND_CACHE", 270 + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" 271 + }, 272 + { 273 + "PublicDescription": "Dispatch stall due to L1 data cache miss", 274 + "EventCode": "0xD20D", 275 + "EventName": "STALL_BACKEND_CACHE", 276 + "BriefDescription": "Dispatch stall due to L1 data cache miss" 277 + }, 278 + { 279 + "PublicDescription": "Dispatch stall due to lack of any core resource", 280 + "EventCode": "0xD20F", 281 + "EventName": "STALL_BACKEND_RESOURCE", 282 + "BriefDescription": "Dispatch stall due to lack of any core resource" 283 + }, 284 + { 285 + "PublicDescription": "Instructions issued by the scheduler", 286 + "EventCode": "0xD300", 287 + "EventName": "IXU_NUM_UOPS_ISSUED", 288 + "BriefDescription": "Instructions issued by the scheduler" 289 + }, 290 + { 291 + "PublicDescription": "Any uop issued was canceled for any reason", 292 + "EventCode": "0xD301", 293 + "EventName": "IXU_ISSUE_CANCEL", 294 + "BriefDescription": "Any uop issued was canceled for any reason" 295 + }, 296 + { 297 + "PublicDescription": "A load wakeup to the scheduler has been canceled", 298 + "EventCode": "0xD302", 299 + "EventName": "IXU_LOAD_CANCEL", 300 + "BriefDescription": "A load wakeup to the scheduler has been canceled" 301 + }, 302 + { 303 + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", 304 + "EventCode": "0xD303", 305 + "EventName": "IXU_SLOW_CANCEL", 306 + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" 307 + }, 308 + { 309 + "PublicDescription": "Uops issued by the scheduler on IXA", 310 + "EventCode": "0xD304", 311 + "EventName": "IXU_IXA_ISSUED", 312 + "BriefDescription": "Uops issued by the scheduler on IXA" 313 + }, 314 + { 315 + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", 316 + "EventCode": "0xD305", 317 + "EventName": "IXU_IXA_PAR0_ISSUED", 318 + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" 319 + }, 320 + { 321 + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", 322 + "EventCode": "0xD306", 323 + "EventName": "IXU_IXA_PAR1_ISSUED", 324 + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" 325 + }, 326 + { 327 + "PublicDescription": "Uops issued by the scheduler on IXB", 328 + "EventCode": "0xD307", 329 + "EventName": "IXU_IXB_ISSUED", 330 + "BriefDescription": "Uops issued by the scheduler on IXB" 331 + }, 332 + { 333 + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", 334 + "EventCode": "0xD308", 335 + "EventName": "IXU_IXB_PAR0_ISSUED", 336 + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" 337 + }, 338 + { 339 + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", 340 + "EventCode": "0xD309", 341 + "EventName": "IXU_IXB_PAR1_ISSUED", 342 + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" 343 + }, 344 + { 345 + "PublicDescription": "Uops issued by the scheduler on IXC", 346 + "EventCode": "0xD30A", 347 + "EventName": "IXU_IXC_ISSUED", 348 + "BriefDescription": "Uops issued by the scheduler on IXC" 349 + }, 350 + { 351 + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", 352 + "EventCode": "0xD30B", 353 + "EventName": "IXU_IXC_PAR0_ISSUED", 354 + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" 355 + }, 356 + { 357 + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", 358 + "EventCode": "0xD30C", 359 + "EventName": "IXU_IXC_PAR1_ISSUED", 360 + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" 361 + }, 362 + { 363 + "PublicDescription": "Uops issued by the scheduler on IXD", 364 + "EventCode": "0xD30D", 365 + "EventName": "IXU_IXD_ISSUED", 366 + "BriefDescription": "Uops issued by the scheduler on IXD" 367 + }, 368 + { 369 + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", 370 + "EventCode": "0xD30E", 371 + "EventName": "IXU_IXD_PAR0_ISSUED", 372 + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" 373 + }, 374 + { 375 + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", 376 + "EventCode": "0xD30F", 377 + "EventName": "IXU_IXD_PAR1_ISSUED", 378 + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" 379 + }, 380 + { 381 + "PublicDescription": "Uops issued by the FSU scheduler", 382 + "EventCode": "0xD400", 383 + "EventName": "FSU_ISSUED", 384 + "BriefDescription": "Uops issued by the FSU scheduler" 385 + }, 386 + { 387 + "PublicDescription": "Uops issued by the scheduler on FSX", 388 + "EventCode": "0xD401", 389 + "EventName": "FSU_FSX_ISSUED", 390 + "BriefDescription": "Uops issued by the scheduler on FSX" 391 + }, 392 + { 393 + "PublicDescription": "Uops issued by the scheduler on FSY", 394 + "EventCode": "0xD402", 395 + "EventName": "FSU_FSY_ISSUED", 396 + "BriefDescription": "Uops issued by the scheduler on FSY" 397 + }, 398 + { 399 + "PublicDescription": "Uops issued by the scheduler on FSZ", 400 + "EventCode": "0xD403", 401 + "EventName": "FSU_FSZ_ISSUED", 402 + "BriefDescription": "Uops issued by the scheduler on FSZ" 403 + }, 404 + { 405 + "PublicDescription": "Uops canceled (load cancels)", 406 + "EventCode": "0xD404", 407 + "EventName": "FSU_CANCEL", 408 + "BriefDescription": "Uops canceled (load cancels)" 409 + }, 410 + { 411 + "PublicDescription": "Count scheduler stalls due to divide/sqrt", 412 + "EventCode": "0xD405", 413 + "EventName": "FSU_DIV_SQRT_STALL", 414 + "BriefDescription": "Count scheduler stalls due to divide/sqrt" 415 + }, 416 + { 417 + "PublicDescription": "Number of SWOB drains", 418 + "EventCode": "0xD500", 419 + "EventName": "GPC_SWOB_DRAIN", 420 + "BriefDescription": "Number of SWOB drains" 421 + }, 422 + { 423 + "PublicDescription": "GPC detected a Breakpoint instruction match", 424 + "EventCode": "0xD501", 425 + "EventName": "BREAKPOINT_MATCH", 426 + "BriefDescription": "GPC detected a Breakpoint instruction match" 427 + }, 428 + { 429 + "PublicDescription": "Core progress monitor triggered", 430 + "EventCode": "0xd502", 431 + "EventName": "GPC_CPM_TRIGGER", 432 + "BriefDescription": "Core progress monitor triggered" 433 + }, 434 + { 435 + "PublicDescription": "Fill buffer full", 436 + "EventCode": "0xD601", 437 + "EventName": "OFB_FULL", 438 + "BriefDescription": "Fill buffer full" 439 + }, 440 + { 441 + "PublicDescription": "Load satisified from store forwarded data", 442 + "EventCode": "0xD605", 443 + "EventName": "LD_FROM_ST_FWD", 444 + "BriefDescription": "Load satisified from store forwarded data" 445 + }, 446 + { 447 + "PublicDescription": "Store retirement pipe stall", 448 + "EventCode": "0xD60C", 449 + "EventName": "LSU_ST_RETIRE_STALL", 450 + "BriefDescription": "Store retirement pipe stall" 451 + }, 452 + { 453 + "PublicDescription": "LSU detected a Watchpoint data match", 454 + "EventCode": "0xD60D", 455 + "EventName": "WATCHPOINT_MATCH", 456 + "BriefDescription": "LSU detected a Watchpoint data match" 457 + }, 458 + { 459 + "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature", 460 + "EventCode": "0xda00", 461 + "EventName": "MSC_ETM_COMMIT_STALL", 462 + "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature" 463 + } 464 + ]
+47
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "EXC_UNDEF" 4 + }, 5 + { 6 + "ArchStdEvent": "EXC_SVC" 7 + }, 8 + { 9 + "ArchStdEvent": "EXC_PABORT" 10 + }, 11 + { 12 + "ArchStdEvent": "EXC_DABORT" 13 + }, 14 + { 15 + "ArchStdEvent": "EXC_IRQ" 16 + }, 17 + { 18 + "ArchStdEvent": "EXC_FIQ" 19 + }, 20 + { 21 + "ArchStdEvent": "EXC_HVC" 22 + }, 23 + { 24 + "ArchStdEvent": "EXC_TRAP_PABORT" 25 + }, 26 + { 27 + "ArchStdEvent": "EXC_TRAP_DABORT" 28 + }, 29 + { 30 + "ArchStdEvent": "EXC_TRAP_OTHER" 31 + }, 32 + { 33 + "ArchStdEvent": "EXC_TRAP_IRQ" 34 + }, 35 + { 36 + "ArchStdEvent": "EXC_TRAP_FIQ" 37 + }, 38 + { 39 + "ArchStdEvent": "EXC_TAKEN" 40 + }, 41 + { 42 + "ArchStdEvent": "EXC_RETURN" 43 + }, 44 + { 45 + "ArchStdEvent": "EXC_SMC" 46 + } 47 + ]
+128
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "SW_INCR" 4 + }, 5 + { 6 + "ArchStdEvent": "ST_RETIRED" 7 + }, 8 + { 9 + "ArchStdEvent": "LD_SPEC" 10 + }, 11 + { 12 + "ArchStdEvent": "ST_SPEC" 13 + }, 14 + { 15 + "ArchStdEvent": "LDST_SPEC" 16 + }, 17 + { 18 + "ArchStdEvent": "DP_SPEC" 19 + }, 20 + { 21 + "ArchStdEvent": "ASE_SPEC" 22 + }, 23 + { 24 + "ArchStdEvent": "VFP_SPEC" 25 + }, 26 + { 27 + "ArchStdEvent": "PC_WRITE_SPEC" 28 + }, 29 + { 30 + "ArchStdEvent": "BR_IMMED_RETIRED" 31 + }, 32 + { 33 + "ArchStdEvent": "BR_RETURN_RETIRED" 34 + }, 35 + { 36 + "ArchStdEvent": "CRYPTO_SPEC" 37 + }, 38 + { 39 + "ArchStdEvent": "ISB_SPEC" 40 + }, 41 + { 42 + "ArchStdEvent": "DSB_SPEC" 43 + }, 44 + { 45 + "ArchStdEvent": "DMB_SPEC" 46 + }, 47 + { 48 + "ArchStdEvent": "RC_LD_SPEC" 49 + }, 50 + { 51 + "ArchStdEvent": "RC_ST_SPEC" 52 + }, 53 + { 54 + "ArchStdEvent": "INST_RETIRED" 55 + }, 56 + { 57 + "ArchStdEvent": "CID_WRITE_RETIRED" 58 + }, 59 + { 60 + "ArchStdEvent": "PC_WRITE_RETIRED" 61 + }, 62 + { 63 + "ArchStdEvent": "INST_SPEC" 64 + }, 65 + { 66 + "ArchStdEvent": "TTBR_WRITE_RETIRED" 67 + }, 68 + { 69 + "ArchStdEvent": "BR_RETIRED" 70 + }, 71 + { 72 + "ArchStdEvent": "BR_MIS_PRED_RETIRED" 73 + }, 74 + { 75 + "ArchStdEvent": "OP_RETIRED" 76 + }, 77 + { 78 + "ArchStdEvent": "OP_SPEC" 79 + }, 80 + { 81 + "PublicDescription": "Operation speculatively executed - ASE Scalar", 82 + "EventCode": "0xd210", 83 + "EventName": "ASE_SCALAR_SPEC", 84 + "BriefDescription": "Operation speculatively executed - ASE Scalar" 85 + }, 86 + { 87 + "PublicDescription": "Operation speculatively executed - ASE Vector", 88 + "EventCode": "0xd211", 89 + "EventName": "ASE_VECTOR_SPEC", 90 + "BriefDescription": "Operation speculatively executed - ASE Vector" 91 + }, 92 + { 93 + "PublicDescription": "Barrier speculatively executed, CSDB", 94 + "EventCode": "0x7f", 95 + "EventName": "CSDB_SPEC", 96 + "BriefDescription": "Barrier speculatively executed, CSDB" 97 + }, 98 + { 99 + "PublicDescription": "Prefetch sent to L2.", 100 + "EventCode": "0xd106", 101 + "EventName": "ICF_PREFETCH_DISPATCH", 102 + "BriefDescription": "Prefetch sent to L2." 103 + }, 104 + { 105 + "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.", 106 + "EventCode": "0xd107", 107 + "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE", 108 + "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades." 109 + }, 110 + { 111 + "PublicDescription": "Prefetch request missed TLB.", 112 + "EventCode": "0xd108", 113 + "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS", 114 + "BriefDescription": "Prefetch request missed TLB." 115 + }, 116 + { 117 + "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.", 118 + "EventCode": "0xd109", 119 + "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE", 120 + "BriefDescription": "Prefetch request dropped since duplicate was found in TLB." 121 + }, 122 + { 123 + "PublicDescription": "Prefetch request dropped since it was found in cache.", 124 + "EventCode": "0xd10a", 125 + "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT", 126 + "BriefDescription": "Prefetch request dropped since it was found in cache." 127 + } 128 + ]
+14
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "LDREX_SPEC" 4 + }, 5 + { 6 + "ArchStdEvent": "STREX_PASS_SPEC" 7 + }, 8 + { 9 + "ArchStdEvent": "STREX_FAIL_SPEC" 10 + }, 11 + { 12 + "ArchStdEvent": "STREX_SPEC" 13 + } 14 + ]
+41
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "LD_RETIRED" 4 + }, 5 + { 6 + "ArchStdEvent": "MEM_ACCESS_RD" 7 + }, 8 + { 9 + "ArchStdEvent": "MEM_ACCESS_WR" 10 + }, 11 + { 12 + "ArchStdEvent": "LD_ALIGN_LAT" 13 + }, 14 + { 15 + "ArchStdEvent": "ST_ALIGN_LAT" 16 + }, 17 + { 18 + "ArchStdEvent": "MEM_ACCESS" 19 + }, 20 + { 21 + "ArchStdEvent": "MEMORY_ERROR" 22 + }, 23 + { 24 + "ArchStdEvent": "LDST_ALIGN_LAT" 25 + }, 26 + { 27 + "ArchStdEvent": "MEM_ACCESS_CHECKED" 28 + }, 29 + { 30 + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" 31 + }, 32 + { 33 + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" 34 + }, 35 + { 36 + "PublicDescription": "Flushes due to memory hazards", 37 + "EventCode": "0x121", 38 + "EventName": "BPU_FLUSH_MEM_FAULT", 39 + "BriefDescription": "Flushes due to memory hazards" 40 + } 41 + ]
+442
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
··· 1 + [ 2 + { 3 + "MetricName": "branch_miss_pred_rate", 4 + "MetricExpr": "BR_MIS_PRED / BR_PRED", 5 + "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch", 6 + "MetricGroup": "branch", 7 + "ScaleUnit": "100%" 8 + }, 9 + { 10 + "MetricName": "bus_utilization", 11 + "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)", 12 + "BriefDescription": "Core-to-uncore bus utilization", 13 + "MetricGroup": "Bus", 14 + "ScaleUnit": "100percent of bus cycles" 15 + }, 16 + { 17 + "MetricName": "l1d_cache_miss_ratio", 18 + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", 19 + "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.", 20 + "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness", 21 + "ScaleUnit": "1per cache access" 22 + }, 23 + { 24 + "MetricName": "l1i_cache_miss_ratio", 25 + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", 26 + "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.", 27 + "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness", 28 + "ScaleUnit": "1per cache access" 29 + }, 30 + { 31 + "MetricName": "Miss_Ratio;l1d_cache_read_miss", 32 + "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD", 33 + "BriefDescription": "L1D cache read miss rate", 34 + "MetricGroup": "Cache", 35 + "ScaleUnit": "1per cache read access" 36 + }, 37 + { 38 + "MetricName": "l2_cache_miss_ratio", 39 + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", 40 + "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.", 41 + "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness", 42 + "ScaleUnit": "1per cache access" 43 + }, 44 + { 45 + "MetricName": "l1i_cache_read_miss_rate", 46 + "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE", 47 + "BriefDescription": "L1I cache read miss rate", 48 + "MetricGroup": "Cache", 49 + "ScaleUnit": "1per cache access" 50 + }, 51 + { 52 + "MetricName": "l2d_cache_read_miss_rate", 53 + "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD", 54 + "BriefDescription": "L2 cache read miss rate", 55 + "MetricGroup": "Cache", 56 + "ScaleUnit": "1per cache read access" 57 + }, 58 + { 59 + "MetricName": "l1d_cache_miss_mpki", 60 + "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED", 61 + "BriefDescription": "Misses per thousand instructions (data)", 62 + "MetricGroup": "Cache", 63 + "ScaleUnit": "1MPKI" 64 + }, 65 + { 66 + "MetricName": "l1i_cache_miss_mpki", 67 + "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED", 68 + "BriefDescription": "Misses per thousand instructions (instruction)", 69 + "MetricGroup": "Cache", 70 + "ScaleUnit": "1MPKI" 71 + }, 72 + { 73 + "MetricName": "simd_percentage", 74 + "MetricExpr": "ASE_SPEC / INST_SPEC", 75 + "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.", 76 + "MetricGroup": "Operation_Mix", 77 + "ScaleUnit": "100percent of operations" 78 + }, 79 + { 80 + "MetricName": "crypto_percentage", 81 + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", 82 + "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.", 83 + "MetricGroup": "Operation_Mix", 84 + "ScaleUnit": "100percent of operations" 85 + }, 86 + { 87 + "MetricName": "gflops", 88 + "MetricExpr": "VFP_SPEC / (duration_time * 1e9)", 89 + "BriefDescription": "Giga-floating point operations per second", 90 + "MetricGroup": "InstructionMix" 91 + }, 92 + { 93 + "MetricName": "integer_dp_percentage", 94 + "MetricExpr": "DP_SPEC / INST_SPEC", 95 + "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.", 96 + "MetricGroup": "Operation_Mix", 97 + "ScaleUnit": "100percent of operations" 98 + }, 99 + { 100 + "MetricName": "ipc", 101 + "MetricExpr": "INST_RETIRED / CPU_CYCLES", 102 + "BriefDescription": "This metric measures the number of instructions retired per cycle.", 103 + "MetricGroup": "General", 104 + "ScaleUnit": "1per cycle" 105 + }, 106 + { 107 + "MetricName": "load_percentage", 108 + "MetricExpr": "LD_SPEC / INST_SPEC", 109 + "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.", 110 + "MetricGroup": "Operation_Mix", 111 + "ScaleUnit": "100percent of operations" 112 + }, 113 + { 114 + "MetricName": "load_store_spec_rate", 115 + "MetricExpr": "LDST_SPEC / INST_SPEC", 116 + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", 117 + "MetricGroup": "Operation_Mix", 118 + "ScaleUnit": "100percent of operations" 119 + }, 120 + { 121 + "MetricName": "retired_mips", 122 + "MetricExpr": "INST_RETIRED / (duration_time * 1e6)", 123 + "BriefDescription": "Millions of instructions per second", 124 + "MetricGroup": "InstructionMix" 125 + }, 126 + { 127 + "MetricName": "spec_utilization_mips", 128 + "MetricExpr": "INST_SPEC / (duration_time * 1e6)", 129 + "BriefDescription": "Millions of instructions per second", 130 + "MetricGroup": "PEutilization" 131 + }, 132 + { 133 + "MetricName": "pc_write_spec_rate", 134 + "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", 135 + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", 136 + "MetricGroup": "Operation_Mix", 137 + "ScaleUnit": "100percent of operations" 138 + }, 139 + { 140 + "MetricName": "store_percentage", 141 + "MetricExpr": "ST_SPEC / INST_SPEC", 142 + "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.", 143 + "MetricGroup": "Operation_Mix", 144 + "ScaleUnit": "100percent of operations" 145 + }, 146 + { 147 + "MetricName": "scalar_fp_percentage", 148 + "MetricExpr": "VFP_SPEC / INST_SPEC", 149 + "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.", 150 + "MetricGroup": "Operation_Mix", 151 + "ScaleUnit": "100percent of operations" 152 + }, 153 + { 154 + "MetricName": "retired_rate", 155 + "MetricExpr": "OP_RETIRED / OP_SPEC", 156 + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", 157 + "MetricGroup": "General", 158 + "ScaleUnit": "100%" 159 + }, 160 + { 161 + "MetricName": "wasted", 162 + "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))", 163 + "BriefDescription": "Of all the micro-operations issued, what proportion are lost", 164 + "MetricGroup": "General", 165 + "ScaleUnit": "100%" 166 + }, 167 + { 168 + "MetricName": "wasted_rate", 169 + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", 170 + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", 171 + "MetricGroup": "General", 172 + "ScaleUnit": "100%" 173 + }, 174 + { 175 + "MetricName": "stall_backend_cache_rate", 176 + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", 177 + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss", 178 + "MetricGroup": "Stall", 179 + "ScaleUnit": "100percent of cycles" 180 + }, 181 + { 182 + "MetricName": "stall_backend_resource_rate", 183 + "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES", 184 + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full", 185 + "MetricGroup": "Stall", 186 + "ScaleUnit": "100percent of cycles" 187 + }, 188 + { 189 + "MetricName": "stall_backend_tlb_rate", 190 + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", 191 + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss", 192 + "MetricGroup": "Stall", 193 + "ScaleUnit": "100percent of cycles" 194 + }, 195 + { 196 + "MetricName": "stall_frontend_cache_rate", 197 + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", 198 + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", 199 + "MetricGroup": "Stall", 200 + "ScaleUnit": "100percent of cycles" 201 + }, 202 + { 203 + "MetricName": "stall_frontend_tlb_rate", 204 + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", 205 + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", 206 + "MetricGroup": "Stall", 207 + "ScaleUnit": "100percent of cycles" 208 + }, 209 + { 210 + "MetricName": "dtlb_walk_ratio", 211 + "MetricExpr": "DTLB_WALK / L1D_TLB", 212 + "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.", 213 + "MetricGroup": "Miss_Ratio;DTLB_Effectiveness", 214 + "ScaleUnit": "1per TLB access" 215 + }, 216 + { 217 + "MetricName": "itlb_walk_ratio", 218 + "MetricExpr": "ITLB_WALK / L1I_TLB", 219 + "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.", 220 + "MetricGroup": "Miss_Ratio;ITLB_Effectiveness", 221 + "ScaleUnit": "1per TLB access" 222 + }, 223 + { 224 + "ArchStdEvent": "backend_bound" 225 + }, 226 + { 227 + "ArchStdEvent": "frontend_bound", 228 + "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)" 229 + }, 230 + { 231 + "MetricName": "slots_lost_misspeculation_fraction", 232 + "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", 233 + "BriefDescription": "Fraction of slots lost due to misspeculation", 234 + "DefaultMetricgroupName": "TopdownL1", 235 + "MetricGroup": "Default;TopdownL1", 236 + "ScaleUnit": "100percent of slots" 237 + }, 238 + { 239 + "MetricName": "retired_fraction", 240 + "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", 241 + "BriefDescription": "Fraction of slots retiring, useful work", 242 + "DefaultMetricgroupName": "TopdownL1", 243 + "MetricGroup": "Default;TopdownL1", 244 + "ScaleUnit": "100percent of slots" 245 + }, 246 + { 247 + "MetricName": "backend_core", 248 + "MetricExpr": "(backend_bound / 100) - backend_memory", 249 + "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues", 250 + "MetricGroup": "TopdownL2", 251 + "ScaleUnit": "100%" 252 + }, 253 + { 254 + "MetricName": "backend_memory", 255 + "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES", 256 + "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)", 257 + "MetricGroup": "TopdownL2", 258 + "ScaleUnit": "100%" 259 + }, 260 + { 261 + "MetricName": "branch_mispredict", 262 + "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction", 263 + "BriefDescription": "Fraction of slots lost due to branch misprediciton", 264 + "MetricGroup": "TopdownL2", 265 + "ScaleUnit": "1percent of slots" 266 + }, 267 + { 268 + "MetricName": "frontend_bandwidth", 269 + "MetricExpr": "frontend_bound - frontend_latency", 270 + "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", 271 + "MetricGroup": "TopdownL2", 272 + "ScaleUnit": "1percent of slots" 273 + }, 274 + { 275 + "MetricName": "frontend_latency", 276 + "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES", 277 + "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch", 278 + "MetricGroup": "TopdownL2", 279 + "ScaleUnit": "100percent of slots" 280 + }, 281 + { 282 + "MetricName": "other_miss_pred", 283 + "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict", 284 + "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation", 285 + "MetricGroup": "TopdownL2", 286 + "ScaleUnit": "1percent of slots" 287 + }, 288 + { 289 + "MetricName": "pipe_utilization", 290 + "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))", 291 + "BriefDescription": "Fraction of execute slots utilized", 292 + "MetricGroup": "TopdownL2", 293 + "ScaleUnit": "1percent of slots" 294 + }, 295 + { 296 + "MetricName": "d_cache_l2_miss_rate", 297 + "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES", 298 + "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss", 299 + "MetricGroup": "TopdownL3", 300 + "ScaleUnit": "100percent of cycles" 301 + }, 302 + { 303 + "MetricName": "d_cache_miss_rate", 304 + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", 305 + "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss", 306 + "MetricGroup": "TopdownL3", 307 + "ScaleUnit": "100percent of cycles" 308 + }, 309 + { 310 + "MetricName": "d_tlb_miss_rate", 311 + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", 312 + "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss", 313 + "MetricGroup": "TopdownL3", 314 + "ScaleUnit": "100percent of cycles" 315 + }, 316 + { 317 + "MetricName": "fsu_pipe_utilization", 318 + "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)", 319 + "BriefDescription": "Fraction of FSU execute slots utilized", 320 + "MetricGroup": "TopdownL3", 321 + "ScaleUnit": "100percent of slots" 322 + }, 323 + { 324 + "MetricName": "i_cache_miss_rate", 325 + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", 326 + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss", 327 + "MetricGroup": "TopdownL3", 328 + "ScaleUnit": "100percent of slots" 329 + }, 330 + { 331 + "MetricName": "i_tlb_miss_rate", 332 + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", 333 + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss", 334 + "MetricGroup": "TopdownL3", 335 + "ScaleUnit": "100percent of slots" 336 + }, 337 + { 338 + "MetricName": "ixu_pipe_utilization", 339 + "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)", 340 + "BriefDescription": "Fraction of IXU execute slots utilized", 341 + "MetricGroup": "TopdownL3", 342 + "ScaleUnit": "100percent of slots" 343 + }, 344 + { 345 + "MetricName": "stall_recovery_rate", 346 + "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES", 347 + "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery", 348 + "MetricGroup": "TopdownL3", 349 + "ScaleUnit": "100percent of slots" 350 + }, 351 + { 352 + "MetricName": "stall_fsu_sched_rate", 353 + "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES", 354 + "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full", 355 + "MetricGroup": "TopdownL4", 356 + "ScaleUnit": "100percent of cycles" 357 + }, 358 + { 359 + "MetricName": "stall_ixu_sched_rate", 360 + "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES", 361 + "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full", 362 + "MetricGroup": "TopdownL4", 363 + "ScaleUnit": "100percent of cycles" 364 + }, 365 + { 366 + "MetricName": "stall_lob_id_rate", 367 + "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES", 368 + "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full", 369 + "MetricGroup": "TopdownL4", 370 + "ScaleUnit": "100percent of cycles" 371 + }, 372 + { 373 + "MetricName": "stall_rob_id_rate", 374 + "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES", 375 + "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full", 376 + "MetricGroup": "TopdownL4", 377 + "ScaleUnit": "100percent of cycles" 378 + }, 379 + { 380 + "MetricName": "stall_sob_id_rate", 381 + "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES", 382 + "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full", 383 + "MetricGroup": "TopdownL4", 384 + "ScaleUnit": "100percent of cycles" 385 + }, 386 + { 387 + "MetricName": "l1d_cache_access_demand", 388 + "MetricExpr": "L1D_CACHE_RW / L1D_CACHE", 389 + "BriefDescription": "L1D cache access - demand", 390 + "MetricGroup": "Cache", 391 + "ScaleUnit": "100percent of cache acceses" 392 + }, 393 + { 394 + "MetricName": "l1d_cache_access_prefetces", 395 + "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", 396 + "BriefDescription": "L1D cache access - prefetch", 397 + "MetricGroup": "Cache", 398 + "ScaleUnit": "100percent of cache acceses" 399 + }, 400 + { 401 + "MetricName": "l1d_cache_demand_misses", 402 + "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE", 403 + "BriefDescription": "L1D cache demand misses", 404 + "MetricGroup": "Cache", 405 + "ScaleUnit": "100percent of cache acceses" 406 + }, 407 + { 408 + "MetricName": "l1d_cache_demand_misses_read", 409 + "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE", 410 + "BriefDescription": "L1D cache demand misses - read", 411 + "MetricGroup": "Cache", 412 + "ScaleUnit": "100percent of cache acceses" 413 + }, 414 + { 415 + "MetricName": "l1d_cache_demand_misses_write", 416 + "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE", 417 + "BriefDescription": "L1D cache demand misses - write", 418 + "MetricGroup": "Cache", 419 + "ScaleUnit": "100percent of cache acceses" 420 + }, 421 + { 422 + "MetricName": "l1d_cache_prefetch_misses", 423 + "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE", 424 + "BriefDescription": "L1D cache prefetch misses", 425 + "MetricGroup": "Cache", 426 + "ScaleUnit": "100percent of cache acceses" 427 + }, 428 + { 429 + "MetricName": "ase_scalar_mix", 430 + "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC", 431 + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations", 432 + "MetricGroup": "Instructions", 433 + "ScaleUnit": "100percent of cache acceses" 434 + }, 435 + { 436 + "MetricName": "ase_vector_mix", 437 + "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC", 438 + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations", 439 + "MetricGroup": "Instructions", 440 + "ScaleUnit": "100percent of cache acceses" 441 + } 442 + ]
+170
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json
··· 1 + [ 2 + { 3 + "PublicDescription": "Level 2 data translation buffer allocation", 4 + "EventCode": "0xD800", 5 + "EventName": "MMU_D_OTB_ALLOC", 6 + "BriefDescription": "Level 2 data translation buffer allocation" 7 + }, 8 + { 9 + "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry", 10 + "EventCode": "0xd801", 11 + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", 12 + "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry" 13 + }, 14 + { 15 + "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry", 16 + "EventCode": "0xd802", 17 + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", 18 + "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry" 19 + }, 20 + { 21 + "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry", 22 + "EventCode": "0xd803", 23 + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", 24 + "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry" 25 + }, 26 + { 27 + "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry", 28 + "EventCode": "0xd804", 29 + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", 30 + "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry" 31 + }, 32 + { 33 + "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry", 34 + "EventCode": "0xd805", 35 + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", 36 + "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry" 37 + }, 38 + { 39 + "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry", 40 + "EventCode": "0xd806", 41 + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", 42 + "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry" 43 + }, 44 + { 45 + "PublicDescrition": "Data-side S1 page walk cache lookup", 46 + "EventCode": "0xd807", 47 + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", 48 + "BriefDescription": "Data-side S1 page walk cache lookup" 49 + }, 50 + { 51 + "PublicDescrition": "Data-side S1 page walk cache refill", 52 + "EventCode": "0xd808", 53 + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", 54 + "BriefDescription": "Data-side S1 page walk cache refill" 55 + }, 56 + { 57 + "PublicDescrition": "Data-side S2 page walk cache lookup", 58 + "EventCode": "0xd809", 59 + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", 60 + "BriefDescription": "Data-side S2 page walk cache lookup" 61 + }, 62 + { 63 + "PublicDescrition": "Data-side S2 page walk cache refill", 64 + "EventCode": "0xd80a", 65 + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", 66 + "BriefDescription": "Data-side S2 page walk cache refill" 67 + }, 68 + { 69 + "PublicDescription": "Data-side S1 table walk fault", 70 + "EventCode": "0xD80B", 71 + "EventName": "MMU_D_S1_WALK_FAULT", 72 + "BriefDescription": "Data-side S1 table walk fault" 73 + }, 74 + { 75 + "PublicDescription": "Data-side S2 table walk fault", 76 + "EventCode": "0xD80C", 77 + "EventName": "MMU_D_S2_WALK_FAULT", 78 + "BriefDescription": "Data-side S2 table walk fault" 79 + }, 80 + { 81 + "PublicDescription": "Data-side table walk steps or descriptor fetches", 82 + "EventCode": "0xD80D", 83 + "EventName": "MMU_D_WALK_STEPS", 84 + "BriefDescription": "Data-side table walk steps or descriptor fetches" 85 + }, 86 + { 87 + "PublicDescription": "Level 2 instruction translation buffer allocation", 88 + "EventCode": "0xD900", 89 + "EventName": "MMU_I_OTB_ALLOC", 90 + "BriefDescription": "Level 2 instruction translation buffer allocation" 91 + }, 92 + { 93 + "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry", 94 + "EventCode": "0xd901", 95 + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", 96 + "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry" 97 + }, 98 + { 99 + "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry", 100 + "EventCode": "0xd902", 101 + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", 102 + "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry" 103 + }, 104 + { 105 + "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry", 106 + "EventCode": "0xd903", 107 + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", 108 + "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry" 109 + }, 110 + { 111 + "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry", 112 + "EventCode": "0xd904", 113 + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", 114 + "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry" 115 + }, 116 + { 117 + "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry", 118 + "EventCode": "0xd905", 119 + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", 120 + "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry" 121 + }, 122 + { 123 + "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry", 124 + "EventCode": "0xd906", 125 + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", 126 + "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry" 127 + }, 128 + { 129 + "PublicDescrition": "Instruction-side S1 page walk cache lookup", 130 + "EventCode": "0xd907", 131 + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", 132 + "BriefDescription": "Instruction-side S1 page walk cache lookup" 133 + }, 134 + { 135 + "PublicDescrition": "Instruction-side S1 page walk cache refill", 136 + "EventCode": "0xd908", 137 + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", 138 + "BriefDescription": "Instruction-side S1 page walk cache refill" 139 + }, 140 + { 141 + "PublicDescrition": "Instruction-side S2 page walk cache lookup", 142 + "EventCode": "0xd909", 143 + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", 144 + "BriefDescription": "Instruction-side S2 page walk cache lookup" 145 + }, 146 + { 147 + "PublicDescrition": "Instruction-side S2 page walk cache refill", 148 + "EventCode": "0xd90a", 149 + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", 150 + "BriefDescription": "Instruction-side S2 page walk cache refill" 151 + }, 152 + { 153 + "PublicDescription": "Instruction-side S1 table walk fault", 154 + "EventCode": "0xD90B", 155 + "EventName": "MMU_I_S1_WALK_FAULT", 156 + "BriefDescription": "Instruction-side S1 table walk fault" 157 + }, 158 + { 159 + "PublicDescription": "Instruction-side S2 table walk fault", 160 + "EventCode": "0xD90C", 161 + "EventName": "MMU_I_S2_WALK_FAULT", 162 + "BriefDescription": "Instruction-side S2 table walk fault" 163 + }, 164 + { 165 + "PublicDescription": "Instruction-side table walk steps or descriptor fetches", 166 + "EventCode": "0xD90D", 167 + "EventName": "MMU_I_WALK_STEPS", 168 + "BriefDescription": "Instruction-side table walk steps or descriptor fetches" 169 + } 170 + ]
+41
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "STALL_FRONTEND", 4 + "Errata": "Errata AC03_CPU_29", 5 + "BriefDescription": "Impacted by errata, use metrics instead -" 6 + }, 7 + { 8 + "ArchStdEvent": "STALL_BACKEND" 9 + }, 10 + { 11 + "ArchStdEvent": "STALL", 12 + "Errata": "Errata AC03_CPU_29", 13 + "BriefDescription": "Impacted by errata, use metrics instead -" 14 + }, 15 + { 16 + "ArchStdEvent": "STALL_SLOT_BACKEND" 17 + }, 18 + { 19 + "ArchStdEvent": "STALL_SLOT_FRONTEND", 20 + "Errata": "Errata AC03_CPU_29", 21 + "BriefDescription": "Impacted by errata, use metrics instead -" 22 + }, 23 + { 24 + "ArchStdEvent": "STALL_SLOT" 25 + }, 26 + { 27 + "ArchStdEvent": "STALL_BACKEND_MEM" 28 + }, 29 + { 30 + "PublicDescription": "Frontend stall cycles, TLB", 31 + "EventCode": "0x815c", 32 + "EventName": "STALL_FRONTEND_TLB", 33 + "BriefDescription": "Frontend stall cycles, TLB" 34 + }, 35 + { 36 + "PublicDescription": "Backend stall cycles, TLB", 37 + "EventCode": "0x8167", 38 + "EventName": "STALL_BACKEND_TLB", 39 + "BriefDescription": "Backend stall cycles, TLB" 40 + } 41 + ]
+14
tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "SAMPLE_POP" 4 + }, 5 + { 6 + "ArchStdEvent": "SAMPLE_FEED" 7 + }, 8 + { 9 + "ArchStdEvent": "SAMPLE_FILTRATE" 10 + }, 11 + { 12 + "ArchStdEvent": "SAMPLE_COLLISION" 13 + } 14 + ]
+1 -1
tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json
··· 107 107 "EventName": "hnf_qos_hh_retry", 108 108 "EventidCode": "0xe", 109 109 "NodeType": "0x5", 110 - "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN‑F.", 110 + "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN-F.", 111 111 "Unit": "arm_cmn", 112 112 "Compat": "(434|436|43c|43a).*" 113 113 },
+1
tools/perf/pmu-events/arch/arm64/mapfile.csv
··· 42 42 0x00000000480fd010,v1,hisilicon/hip08,core 43 43 0x00000000500f0000,v1,ampere/emag,core 44 44 0x00000000c00fac30,v1,ampere/ampereone,core 45 + 0x00000000c00fac40,v1,ampere/ampereonex,core
+1 -2
tools/perf/pmu-events/arch/powerpc/mapfile.csv
··· 11 11 # 12 12 # Multiple PVRs could map to a single JSON file. 13 13 # 14 - 15 - # Power8 entries 16 14 0x004[bcd][[:xdigit:]]{4},1,power8,core 15 + 0x0066[[:xdigit:]]{4},1,power8,core 17 16 0x004e[[:xdigit:]]{4},1,power9,core 18 17 0x0080[[:xdigit:]]{4},1,power10,core
+14 -4
tools/perf/pmu-events/arch/powerpc/power10/datasource.json
··· 100 100 "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss." 101 101 }, 102 102 { 103 + "EventCode": "0x0003C0000000C040", 104 + "EventName": "PM_DATA_FROM_L2MISS_DSRC", 105 + "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss." 106 + }, 107 + { 103 108 "EventCode": "0x000380000010C040", 104 109 "EventName": "PM_INST_FROM_L2MISS_ALL", 105 110 "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss or prefetch reload." ··· 166 161 }, 167 162 { 168 163 "EventCode": "0x000780000000C040", 169 - "EventName": "PM_INST_FROM_L3MISS", 164 + "EventName": "PM_INST_FROM_L3MISS_DSRC", 170 165 "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss." 166 + }, 167 + { 168 + "EventCode": "0x0007C0000000C040", 169 + "EventName": "PM_DATA_FROM_L3MISS_DSRC", 170 + "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss." 171 171 }, 172 172 { 173 173 "EventCode": "0x000780000010C040", ··· 991 981 }, 992 982 { 993 983 "EventCode": "0x0003C0000000C142", 994 - "EventName": "PM_MRK_DATA_FROM_L2MISS", 984 + "EventName": "PM_MRK_DATA_FROM_L2MISS_DSRC", 995 985 "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction." 996 986 }, 997 987 { ··· 1056 1046 }, 1057 1047 { 1058 1048 "EventCode": "0x000780000000C142", 1059 - "EventName": "PM_MRK_INST_FROM_L3MISS", 1049 + "EventName": "PM_MRK_INST_FROM_L3MISS_DSRC", 1060 1050 "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." 1061 1051 }, 1062 1052 { 1063 1053 "EventCode": "0x0007C0000000C142", 1064 - "EventName": "PM_MRK_DATA_FROM_L3MISS", 1054 + "EventName": "PM_MRK_DATA_FROM_L3MISS_DSRC", 1065 1055 "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." 1066 1056 }, 1067 1057 {
+2
tools/perf/pmu-events/arch/riscv/mapfile.csv
··· 15 15 # 16 16 #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 17 17 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core 18 + 0x5b7-0x0-0x0,v1,thead/c900-legacy,core 19 + 0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core
+172
tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json
··· 1 + [ 2 + { 3 + "EventName": "ACCESS_MMU_STLB", 4 + "EventCode": "0x1", 5 + "BriefDescription": "access MMU STLB" 6 + }, 7 + { 8 + "EventName": "MISS_MMU_STLB", 9 + "EventCode": "0x2", 10 + "BriefDescription": "miss MMU STLB" 11 + }, 12 + { 13 + "EventName": "ACCESS_MMU_PTE_C", 14 + "EventCode": "0x3", 15 + "BriefDescription": "access MMU PTE-Cache" 16 + }, 17 + { 18 + "EventName": "MISS_MMU_PTE_C", 19 + "EventCode": "0x4", 20 + "BriefDescription": "miss MMU PTE-Cache" 21 + }, 22 + { 23 + "EventName": "ROB_FLUSH", 24 + "EventCode": "0x5", 25 + "BriefDescription": "ROB flush (all kinds of exceptions)" 26 + }, 27 + { 28 + "EventName": "BTB_PREDICTION_MISS", 29 + "EventCode": "0x6", 30 + "BriefDescription": "BTB prediction miss" 31 + }, 32 + { 33 + "EventName": "ITLB_MISS", 34 + "EventCode": "0x7", 35 + "BriefDescription": "ITLB miss" 36 + }, 37 + { 38 + "EventName": "SYNC_DEL_FETCH_G", 39 + "EventCode": "0x8", 40 + "BriefDescription": "SYNC delivery a fetch-group" 41 + }, 42 + { 43 + "EventName": "ICACHE_MISS", 44 + "EventCode": "0x9", 45 + "BriefDescription": "ICache miss" 46 + }, 47 + { 48 + "EventName": "BPU_BR_RETIRE", 49 + "EventCode": "0xA", 50 + "BriefDescription": "condition branch instruction retire" 51 + }, 52 + { 53 + "EventName": "BPU_BR_MISS", 54 + "EventCode": "0xB", 55 + "BriefDescription": "condition branch instruction miss" 56 + }, 57 + { 58 + "EventName": "RET_INS_RETIRE", 59 + "EventCode": "0xC", 60 + "BriefDescription": "return instruction retire" 61 + }, 62 + { 63 + "EventName": "RET_INS_MISS", 64 + "EventCode": "0xD", 65 + "BriefDescription": "return instruction miss" 66 + }, 67 + { 68 + "EventName": "INDIRECT_JR_MISS", 69 + "EventCode": "0xE", 70 + "BriefDescription": "indirect JR instruction miss (inlcude without target)" 71 + }, 72 + { 73 + "EventName": "IBUF_VAL_ID_NORDY", 74 + "EventCode": "0xF", 75 + "BriefDescription": "IBUF valid while ID not ready" 76 + }, 77 + { 78 + "EventName": "IBUF_NOVAL_ID_RDY", 79 + "EventCode": "0x10", 80 + "BriefDescription": "IBUF not valid while ID ready" 81 + }, 82 + { 83 + "EventName": "REN_INT_PHY_REG_NORDY", 84 + "EventCode": "0x11", 85 + "BriefDescription": "REN integer physical register file is not ready" 86 + }, 87 + { 88 + "EventName": "REN_FP_PHY_REG_NORDY", 89 + "EventCode": "0x12", 90 + "BriefDescription": "REN floating point physical register file is not ready" 91 + }, 92 + { 93 + "EventName": "REN_CP_NORDY", 94 + "EventCode": "0x13", 95 + "BriefDescription": "REN checkpoint is not ready" 96 + }, 97 + { 98 + "EventName": "DEC_VAL_ROB_NORDY", 99 + "EventCode": "0x14", 100 + "BriefDescription": "DEC is valid and ROB is not ready" 101 + }, 102 + { 103 + "EventName": "OOD_FLUSH_LS_DEP", 104 + "EventCode": "0x15", 105 + "BriefDescription": "out of order flush due to load/store dependency" 106 + }, 107 + { 108 + "EventName": "BRU_RET_IJR_INS", 109 + "EventCode": "0x16", 110 + "BriefDescription": "BRU retire an IJR instruction" 111 + }, 112 + { 113 + "EventName": "ACCESS_DTLB", 114 + "EventCode": "0x17", 115 + "BriefDescription": "access DTLB" 116 + }, 117 + { 118 + "EventName": "MISS_DTLB", 119 + "EventCode": "0x18", 120 + "BriefDescription": "miss DTLB" 121 + }, 122 + { 123 + "EventName": "LOAD_INS_DCACHE", 124 + "EventCode": "0x19", 125 + "BriefDescription": "load instruction access DCache" 126 + }, 127 + { 128 + "EventName": "LOAD_INS_MISS_DCACHE", 129 + "EventCode": "0x1A", 130 + "BriefDescription": "load instruction miss DCache" 131 + }, 132 + { 133 + "EventName": "STORE_INS_DCACHE", 134 + "EventCode": "0x1B", 135 + "BriefDescription": "store/amo instruction access DCache" 136 + }, 137 + { 138 + "EventName": "STORE_INS_MISS_DCACHE", 139 + "EventCode": "0x1C", 140 + "BriefDescription": "store/amo instruction miss DCache" 141 + }, 142 + { 143 + "EventName": "LOAD_SCACHE", 144 + "EventCode": "0x1D", 145 + "BriefDescription": "load access SCache" 146 + }, 147 + { 148 + "EventName": "STORE_SCACHE", 149 + "EventCode": "0x1E", 150 + "BriefDescription": "store access SCache" 151 + }, 152 + { 153 + "EventName": "LOAD_MISS_SCACHE", 154 + "EventCode": "0x1F", 155 + "BriefDescription": "load miss SCache" 156 + }, 157 + { 158 + "EventName": "STORE_MISS_SCACHE", 159 + "EventCode": "0x20", 160 + "BriefDescription": "store miss SCache" 161 + }, 162 + { 163 + "EventName": "L2C_PF_REQ", 164 + "EventCode": "0x21", 165 + "BriefDescription": "L2C data-prefetcher request" 166 + }, 167 + { 168 + "EventName": "L2C_PF_HIT", 169 + "EventCode": "0x22", 170 + "BriefDescription": "L2C data-prefetcher hit" 171 + } 172 + ]
+68
tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "FW_MISALIGNED_LOAD" 4 + }, 5 + { 6 + "ArchStdEvent": "FW_MISALIGNED_STORE" 7 + }, 8 + { 9 + "ArchStdEvent": "FW_ACCESS_LOAD" 10 + }, 11 + { 12 + "ArchStdEvent": "FW_ACCESS_STORE" 13 + }, 14 + { 15 + "ArchStdEvent": "FW_ILLEGAL_INSN" 16 + }, 17 + { 18 + "ArchStdEvent": "FW_SET_TIMER" 19 + }, 20 + { 21 + "ArchStdEvent": "FW_IPI_SENT" 22 + }, 23 + { 24 + "ArchStdEvent": "FW_IPI_RECEIVED" 25 + }, 26 + { 27 + "ArchStdEvent": "FW_FENCE_I_SENT" 28 + }, 29 + { 30 + "ArchStdEvent": "FW_FENCE_I_RECEIVED" 31 + }, 32 + { 33 + "ArchStdEvent": "FW_SFENCE_VMA_SENT" 34 + }, 35 + { 36 + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" 37 + }, 38 + { 39 + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" 40 + }, 41 + { 42 + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" 43 + }, 44 + { 45 + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" 46 + }, 47 + { 48 + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" 49 + }, 50 + { 51 + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" 52 + }, 53 + { 54 + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" 55 + }, 56 + { 57 + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" 58 + }, 59 + { 60 + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" 61 + }, 62 + { 63 + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" 64 + }, 65 + { 66 + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" 67 + } 68 + ]
+67
tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json
··· 1 + [ 2 + { 3 + "EventName": "L1_ICACHE_ACCESS", 4 + "EventCode": "0x00000001", 5 + "BriefDescription": "L1 instruction cache access" 6 + }, 7 + { 8 + "EventName": "L1_ICACHE_MISS", 9 + "EventCode": "0x00000002", 10 + "BriefDescription": "L1 instruction cache miss" 11 + }, 12 + { 13 + "EventName": "ITLB_MISS", 14 + "EventCode": "0x00000003", 15 + "BriefDescription": "I-UTLB miss" 16 + }, 17 + { 18 + "EventName": "DTLB_MISS", 19 + "EventCode": "0x00000004", 20 + "BriefDescription": "D-UTLB miss" 21 + }, 22 + { 23 + "EventName": "JTLB_MISS", 24 + "EventCode": "0x00000005", 25 + "BriefDescription": "JTLB miss" 26 + }, 27 + { 28 + "EventName": "L1_DCACHE_READ_ACCESS", 29 + "EventCode": "0x0000000c", 30 + "BriefDescription": "L1 data cache read access" 31 + }, 32 + { 33 + "EventName": "L1_DCACHE_READ_MISS", 34 + "EventCode": "0x0000000d", 35 + "BriefDescription": "L1 data cache read miss" 36 + }, 37 + { 38 + "EventName": "L1_DCACHE_WRITE_ACCESS", 39 + "EventCode": "0x0000000e", 40 + "BriefDescription": "L1 data cache write access" 41 + }, 42 + { 43 + "EventName": "L1_DCACHE_WRITE_MISS", 44 + "EventCode": "0x0000000f", 45 + "BriefDescription": "L1 data cache write miss" 46 + }, 47 + { 48 + "EventName": "LL_CACHE_READ_ACCESS", 49 + "EventCode": "0x00000010", 50 + "BriefDescription": "LL Cache read access" 51 + }, 52 + { 53 + "EventName": "LL_CACHE_READ_MISS", 54 + "EventCode": "0x00000011", 55 + "BriefDescription": "LL Cache read miss" 56 + }, 57 + { 58 + "EventName": "LL_CACHE_WRITE_ACCESS", 59 + "EventCode": "0x00000012", 60 + "BriefDescription": "LL Cache write access" 61 + }, 62 + { 63 + "EventName": "LL_CACHE_WRITE_MISS", 64 + "EventCode": "0x00000013", 65 + "BriefDescription": "LL Cache write miss" 66 + } 67 + ]
+68
tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
··· 1 + [ 2 + { 3 + "ArchStdEvent": "FW_MISALIGNED_LOAD" 4 + }, 5 + { 6 + "ArchStdEvent": "FW_MISALIGNED_STORE" 7 + }, 8 + { 9 + "ArchStdEvent": "FW_ACCESS_LOAD" 10 + }, 11 + { 12 + "ArchStdEvent": "FW_ACCESS_STORE" 13 + }, 14 + { 15 + "ArchStdEvent": "FW_ILLEGAL_INSN" 16 + }, 17 + { 18 + "ArchStdEvent": "FW_SET_TIMER" 19 + }, 20 + { 21 + "ArchStdEvent": "FW_IPI_SENT" 22 + }, 23 + { 24 + "ArchStdEvent": "FW_IPI_RECEIVED" 25 + }, 26 + { 27 + "ArchStdEvent": "FW_FENCE_I_SENT" 28 + }, 29 + { 30 + "ArchStdEvent": "FW_FENCE_I_RECEIVED" 31 + }, 32 + { 33 + "ArchStdEvent": "FW_SFENCE_VMA_SENT" 34 + }, 35 + { 36 + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" 37 + }, 38 + { 39 + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" 40 + }, 41 + { 42 + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" 43 + }, 44 + { 45 + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" 46 + }, 47 + { 48 + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" 49 + }, 50 + { 51 + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" 52 + }, 53 + { 54 + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" 55 + }, 56 + { 57 + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" 58 + }, 59 + { 60 + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" 61 + }, 62 + { 63 + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" 64 + }, 65 + { 66 + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" 67 + } 68 + ]
+72
tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json
··· 1 + [ 2 + { 3 + "EventName": "INST_BRANCH_MISPREDICT", 4 + "EventCode": "0x00000006", 5 + "BriefDescription": "Mispredicted branch instructions" 6 + }, 7 + { 8 + "EventName": "INST_BRANCH", 9 + "EventCode": "0x00000007", 10 + "BriefDescription": "Retired branch instructions" 11 + }, 12 + { 13 + "EventName": "INST_JMP_MISPREDICT", 14 + "EventCode": "0x00000008", 15 + "BriefDescription": "Indirect branch mispredict" 16 + }, 17 + { 18 + "EventName": "INST_JMP", 19 + "EventCode": "0x00000009", 20 + "BriefDescription": "Retired jmp instructions" 21 + }, 22 + { 23 + "EventName": "INST_STORE", 24 + "EventCode": "0x0000000b", 25 + "BriefDescription": "Retired store instructions" 26 + }, 27 + { 28 + "EventName": "INST_ALU", 29 + "EventCode": "0x0000001d", 30 + "BriefDescription": "Retired ALU instructions" 31 + }, 32 + { 33 + "EventName": "INST_LDST", 34 + "EventCode": "0x0000001e", 35 + "BriefDescription": "Retired Load/Store instructions" 36 + }, 37 + { 38 + "EventName": "INST_VECTOR", 39 + "EventCode": "0x0000001f", 40 + "BriefDescription": "Retired Vector instructions" 41 + }, 42 + { 43 + "EventName": "INST_CSR", 44 + "EventCode": "0x00000020", 45 + "BriefDescription": "Retired CSR instructions" 46 + }, 47 + { 48 + "EventName": "INST_SYNC", 49 + "EventCode": "0x00000021", 50 + "BriefDescription": "Retired sync instructions (AMO/LR/SC instructions)" 51 + }, 52 + { 53 + "EventName": "INST_UNALIGNED_ACCESS", 54 + "EventCode": "0x00000022", 55 + "BriefDescription": "Retired Store/Load instructions with unaligned memory access" 56 + }, 57 + { 58 + "EventName": "INST_ECALL", 59 + "EventCode": "0x00000025", 60 + "BriefDescription": "Retired ecall instructions" 61 + }, 62 + { 63 + "EventName": "INST_LONG_JP", 64 + "EventCode": "0x00000026", 65 + "BriefDescription": "Retired long jump instructions" 66 + }, 67 + { 68 + "EventName": "INST_FP", 69 + "EventCode": "0x0000002a", 70 + "BriefDescription": "Retired FPU instructions" 71 + } 72 + ]
+80
tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json
··· 1 + [ 2 + { 3 + "EventName": "LSU_SPEC_FAIL", 4 + "EventCode": "0x0000000a", 5 + "BriefDescription": "LSU speculation fail" 6 + }, 7 + { 8 + "EventName": "IDU_RF_PIPE_FAIL", 9 + "EventCode": "0x00000014", 10 + "BriefDescription": "Instruction decode unit launch pipeline failed in RF state" 11 + }, 12 + { 13 + "EventName": "IDU_RF_REG_FAIL", 14 + "EventCode": "0x00000015", 15 + "BriefDescription": "Instruction decode unit launch register file fail in RF state" 16 + }, 17 + { 18 + "EventName": "IDU_RF_INSTRUCTION", 19 + "EventCode": "0x00000016", 20 + "BriefDescription": "retired instruction count of Instruction decode unit in RF (Register File) stage" 21 + }, 22 + { 23 + "EventName": "LSU_4K_STALL", 24 + "EventCode": "0x00000017", 25 + "BriefDescription": "LSU stall times for long distance data access (Over 4K)", 26 + "PublicDescription": "This stall occurs when translate virtual address with page offset over 4k" 27 + }, 28 + { 29 + "EventName": "LSU_OTHER_STALL", 30 + "EventCode": "0x00000018", 31 + "BriefDescription": "LSU stall times for other reasons (except the 4k stall)" 32 + }, 33 + { 34 + "EventName": "LSU_SQ_OTHER_DIS", 35 + "EventCode": "0x00000019", 36 + "BriefDescription": "LSU store queue discard others" 37 + }, 38 + { 39 + "EventName": "LSU_SQ_DATA_DISCARD", 40 + "EventCode": "0x0000001a", 41 + "BriefDescription": "LSU store queue discard data (uops)" 42 + }, 43 + { 44 + "EventName": "BRANCH_DIRECTION_MISPREDICTION", 45 + "EventCode": "0x0000001b", 46 + "BriefDescription": "Branch misprediction in BTB" 47 + }, 48 + { 49 + "EventName": "BRANCH_DIRECTION_PREDICTION", 50 + "EventCode": "0x0000001c", 51 + "BriefDescription": "All branch prediction in BTB", 52 + "PublicDescription": "This event including both successful prediction and failed prediction in BTB" 53 + }, 54 + { 55 + "EventName": "INTERRUPT_ACK_COUNT", 56 + "EventCode": "0x00000023", 57 + "BriefDescription": "acknowledged interrupt count" 58 + }, 59 + { 60 + "EventName": "INTERRUPT_OFF_CYCLE", 61 + "EventCode": "0x00000024", 62 + "BriefDescription": "PLIC arbitration time when the interrupt is not responded", 63 + "PublicDescription": "The arbitration time is recorded while meeting any of the following:\n- CPU is M-mode and MIE == 0\n- CPU is S-mode and delegation and SIE == 0\n" 64 + }, 65 + { 66 + "EventName": "IFU_STALLED_CYCLE", 67 + "EventCode": "0x00000027", 68 + "BriefDescription": "Number of stall cycles of the instruction fetch unit (IFU)." 69 + }, 70 + { 71 + "EventName": "IDU_STALLED_CYCLE", 72 + "EventCode": "0x00000028", 73 + "BriefDescription": "hpcp_backend_stall Number of stall cycles of the instruction decoding unit (IDU) and next-level pipeline unit." 74 + }, 75 + { 76 + "EventName": "SYNC_STALL", 77 + "EventCode": "0x00000029", 78 + "BriefDescription": "Sync instruction stall cycle fence/fence.i/sync/sfence" 79 + } 80 + ]
+8 -7
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
··· 70 70 "ScaleUnit": "100%" 71 71 }, 72 72 { 73 - "BriefDescription": "Uncore frequency per die [GHZ]", 74 - "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", 75 - "MetricGroup": "SoC", 76 - "MetricName": "UNCORE_FREQ" 77 - }, 78 - { 79 73 "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", 80 74 "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", 81 75 "MetricGroup": "smi", ··· 802 808 "MetricThreshold": "tma_store_fwd_blk > 0.05", 803 809 "ScaleUnit": "100%", 804 810 "Unit": "cpu_atom" 811 + }, 812 + { 813 + "BriefDescription": "Uncore frequency per die [GHZ]", 814 + "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", 815 + "MetricGroup": "SoC", 816 + "MetricName": "UNCORE_FREQ", 817 + "Unit": "cpu_core" 805 818 }, 806 819 { 807 820 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.", ··· 1839 1838 }, 1840 1839 { 1841 1840 "BriefDescription": "Average number of parallel data read requests to external memory", 1842 - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", 1841 + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", 1843 1842 "MetricGroup": "Mem;MemoryBW;SoC", 1844 1843 "MetricName": "tma_info_system_mem_parallel_reads", 1845 1844 "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+101
tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json
··· 1 + [ 2 + { 3 + "EventName": "umc_mem_clk", 4 + "PublicDescription": "Number of memory clock cycles.", 5 + "EventCode": "0x00", 6 + "PerPkg": "1", 7 + "Unit": "UMCPMC" 8 + }, 9 + { 10 + "EventName": "umc_act_cmd.all", 11 + "PublicDescription": "Number of ACTIVATE commands sent.", 12 + "EventCode": "0x05", 13 + "PerPkg": "1", 14 + "Unit": "UMCPMC" 15 + }, 16 + { 17 + "EventName": "umc_act_cmd.rd", 18 + "PublicDescription": "Number of ACTIVATE commands sent for reads.", 19 + "EventCode": "0x05", 20 + "RdWrMask": "0x1", 21 + "PerPkg": "1", 22 + "Unit": "UMCPMC" 23 + }, 24 + { 25 + "EventName": "umc_act_cmd.wr", 26 + "PublicDescription": "Number of ACTIVATE commands sent for writes.", 27 + "EventCode": "0x05", 28 + "RdWrMask": "0x2", 29 + "PerPkg": "1", 30 + "Unit": "UMCPMC" 31 + }, 32 + { 33 + "EventName": "umc_pchg_cmd.all", 34 + "PublicDescription": "Number of PRECHARGE commands sent.", 35 + "EventCode": "0x06", 36 + "PerPkg": "1", 37 + "Unit": "UMCPMC" 38 + }, 39 + { 40 + "EventName": "umc_pchg_cmd.rd", 41 + "PublicDescription": "Number of PRECHARGE commands sent for reads.", 42 + "EventCode": "0x06", 43 + "RdWrMask": "0x1", 44 + "PerPkg": "1", 45 + "Unit": "UMCPMC" 46 + }, 47 + { 48 + "EventName": "umc_pchg_cmd.wr", 49 + "PublicDescription": "Number of PRECHARGE commands sent for writes.", 50 + "EventCode": "0x06", 51 + "RdWrMask": "0x2", 52 + "PerPkg": "1", 53 + "Unit": "UMCPMC" 54 + }, 55 + { 56 + "EventName": "umc_cas_cmd.all", 57 + "PublicDescription": "Number of CAS commands sent.", 58 + "EventCode": "0x0a", 59 + "PerPkg": "1", 60 + "Unit": "UMCPMC" 61 + }, 62 + { 63 + "EventName": "umc_cas_cmd.rd", 64 + "PublicDescription": "Number of CAS commands sent for reads.", 65 + "EventCode": "0x0a", 66 + "RdWrMask": "0x1", 67 + "PerPkg": "1", 68 + "Unit": "UMCPMC" 69 + }, 70 + { 71 + "EventName": "umc_cas_cmd.wr", 72 + "PublicDescription": "Number of CAS commands sent for writes.", 73 + "EventCode": "0x0a", 74 + "RdWrMask": "0x2", 75 + "PerPkg": "1", 76 + "Unit": "UMCPMC" 77 + }, 78 + { 79 + "EventName": "umc_data_slot_clks.all", 80 + "PublicDescription": "Number of clocks used by the data bus.", 81 + "EventCode": "0x14", 82 + "PerPkg": "1", 83 + "Unit": "UMCPMC" 84 + }, 85 + { 86 + "EventName": "umc_data_slot_clks.rd", 87 + "PublicDescription": "Number of clocks used by the data bus for reads.", 88 + "EventCode": "0x14", 89 + "RdWrMask": "0x1", 90 + "PerPkg": "1", 91 + "Unit": "UMCPMC" 92 + }, 93 + { 94 + "EventName": "umc_data_slot_clks.wr", 95 + "PublicDescription": "Number of clocks used by the data bus for writes.", 96 + "EventCode": "0x14", 97 + "RdWrMask": "0x2", 98 + "PerPkg": "1", 99 + "Unit": "UMCPMC" 100 + } 101 + ]
+84
tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
··· 330 330 "MetricGroup": "data_fabric", 331 331 "PerPkg": "1", 332 332 "ScaleUnit": "6.103515625e-5MiB" 333 + }, 334 + { 335 + "MetricName": "umc_data_bus_utilization", 336 + "BriefDescription": "Memory controller data bus utilization.", 337 + "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)", 338 + "MetricGroup": "memory_controller", 339 + "PerPkg": "1", 340 + "ScaleUnit": "100%" 341 + }, 342 + { 343 + "MetricName": "umc_cas_cmd_rate", 344 + "BriefDescription": "Memory controller CAS command rate.", 345 + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", 346 + "MetricGroup": "memory_controller", 347 + "PerPkg": "1" 348 + }, 349 + { 350 + "MetricName": "umc_cas_cmd_read_ratio", 351 + "BriefDescription": "Ratio of memory controller CAS commands for reads.", 352 + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", 353 + "MetricGroup": "memory_controller", 354 + "PerPkg": "1", 355 + "ScaleUnit": "100%" 356 + }, 357 + { 358 + "MetricName": "umc_cas_cmd_write_ratio", 359 + "BriefDescription": "Ratio of memory controller CAS commands for writes.", 360 + "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)", 361 + "MetricGroup": "memory_controller", 362 + "PerPkg": "1", 363 + "ScaleUnit": "100%" 364 + }, 365 + { 366 + "MetricName": "umc_mem_read_bandwidth", 367 + "BriefDescription": "Estimated memory read bandwidth.", 368 + "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time", 369 + "MetricGroup": "memory_controller", 370 + "PerPkg": "1", 371 + "ScaleUnit": "1MB/s" 372 + }, 373 + { 374 + "MetricName": "umc_mem_write_bandwidth", 375 + "BriefDescription": "Estimated memory write bandwidth.", 376 + "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time", 377 + "MetricGroup": "memory_controller", 378 + "PerPkg": "1", 379 + "ScaleUnit": "1MB/s" 380 + }, 381 + { 382 + "MetricName": "umc_mem_bandwidth", 383 + "BriefDescription": "Estimated combined memory bandwidth.", 384 + "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time", 385 + "MetricGroup": "memory_controller", 386 + "PerPkg": "1", 387 + "ScaleUnit": "1MB/s" 388 + }, 389 + { 390 + "MetricName": "umc_cas_cmd_read_ratio", 391 + "BriefDescription": "Ratio of memory controller CAS commands for reads.", 392 + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", 393 + "MetricGroup": "memory_controller", 394 + "PerPkg": "1", 395 + "ScaleUnit": "100%" 396 + }, 397 + { 398 + "MetricName": "umc_cas_cmd_rate", 399 + "BriefDescription": "Memory controller CAS command rate.", 400 + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", 401 + "MetricGroup": "memory_controller", 402 + "PerPkg": "1" 403 + }, 404 + { 405 + "MetricName": "umc_activate_cmd_rate", 406 + "BriefDescription": "Memory controller ACTIVATE command rate.", 407 + "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)", 408 + "MetricGroup": "memory_controller", 409 + "PerPkg": "1" 410 + }, 411 + { 412 + "MetricName": "umc_precharge_cmd_rate", 413 + "BriefDescription": "Memory controller PRECHARGE command rate.", 414 + "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)", 415 + "MetricGroup": "memory_controller", 416 + "PerPkg": "1" 333 417 } 334 418 ]
+6
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
··· 1863 1863 "ScaleUnit": "1GHz" 1864 1864 }, 1865 1865 { 1866 + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", 1867 + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1868 + "MetricName": "upi_data_receive_bw", 1869 + "ScaleUnit": "1MB/s" 1870 + }, 1871 + { 1866 1872 "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", 1867 1873 "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1868 1874 "MetricName": "upi_data_transmit_bw",
+24 -3
tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
··· 23 23 "UMask": "0x10" 24 24 }, 25 25 { 26 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", 26 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", 27 27 "EventCode": "0xb3", 28 28 "EventName": "FP_ARITH_DISPATCHED.PORT_0", 29 29 "SampleAfterValue": "2000003", 30 30 "UMask": "0x1" 31 31 }, 32 32 { 33 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", 33 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", 34 34 "EventCode": "0xb3", 35 35 "EventName": "FP_ARITH_DISPATCHED.PORT_1", 36 36 "SampleAfterValue": "2000003", 37 37 "UMask": "0x2" 38 38 }, 39 39 { 40 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", 40 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", 41 41 "EventCode": "0xb3", 42 42 "EventName": "FP_ARITH_DISPATCHED.PORT_5", 43 + "SampleAfterValue": "2000003", 44 + "UMask": "0x4" 45 + }, 46 + { 47 + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", 48 + "EventCode": "0xb3", 49 + "EventName": "FP_ARITH_DISPATCHED.V0", 50 + "SampleAfterValue": "2000003", 51 + "UMask": "0x1" 52 + }, 53 + { 54 + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", 55 + "EventCode": "0xb3", 56 + "EventName": "FP_ARITH_DISPATCHED.V1", 57 + "SampleAfterValue": "2000003", 58 + "UMask": "0x2" 59 + }, 60 + { 61 + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", 62 + "EventCode": "0xb3", 63 + "EventName": "FP_ARITH_DISPATCHED.V2", 43 64 "SampleAfterValue": "2000003", 44 65 "UMask": "0x4" 45 66 },
+1 -17
tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
··· 1 1 [ 2 2 { 3 - "BriefDescription": "AMX retired arithmetic BF16 operations.", 4 - "EventCode": "0xce", 5 - "EventName": "AMX_OPS_RETIRED.BF16", 6 - "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4", 7 - "SampleAfterValue": "1000003", 8 - "UMask": "0x2" 9 - }, 10 - { 11 - "BriefDescription": "AMX retired arithmetic integer 8-bit operations.", 12 - "EventCode": "0xce", 13 - "EventName": "AMX_OPS_RETIRED.INT8", 14 - "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.", 15 - "SampleAfterValue": "1000003", 16 - "UMask": "0x1" 17 - }, 18 - { 19 3 "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", 20 4 "CounterMask": "1", 21 5 "Deprecated": "1", ··· 489 505 "UMask": "0x1" 490 506 }, 491 507 { 492 - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", 508 + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", 493 509 "EventCode": "0xad", 494 510 "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", 495 511 "MSRIndex": "0x3F7",
+4 -4
tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
··· 4825 4825 "Unit": "M3UPI" 4826 4826 }, 4827 4827 { 4828 - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", 4828 + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", 4829 4829 "EventCode": "0x47", 4830 4830 "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", 4831 4831 "PerPkg": "1", 4832 - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", 4832 + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", 4833 4833 "UMask": "0x1", 4834 4834 "Unit": "MDF" 4835 4835 }, ··· 4861 4861 "Unit": "MDF" 4862 4862 }, 4863 4863 { 4864 - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", 4864 + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", 4865 4865 "EventCode": "0x47", 4866 4866 "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", 4867 4867 "PerPkg": "1", 4868 - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", 4868 + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", 4869 4869 "UMask": "0x4", 4870 4870 "Unit": "MDF" 4871 4871 },
+30
tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
··· 1186 1186 "Unit": "IIO" 1187 1187 }, 1188 1188 { 1189 + "BriefDescription": ": IOTLB Hits to a 1G Page", 1190 + "EventCode": "0x40", 1191 + "EventName": "UNC_IIO_IOMMU0.1G_HITS", 1192 + "PerPkg": "1", 1193 + "PortMask": "0x0000", 1194 + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", 1195 + "UMask": "0x10", 1196 + "Unit": "IIO" 1197 + }, 1198 + { 1199 + "BriefDescription": ": IOTLB Hits to a 2M Page", 1200 + "EventCode": "0x40", 1201 + "EventName": "UNC_IIO_IOMMU0.2M_HITS", 1202 + "PerPkg": "1", 1203 + "PortMask": "0x0000", 1204 + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", 1205 + "UMask": "0x8", 1206 + "Unit": "IIO" 1207 + }, 1208 + { 1209 + "BriefDescription": ": IOTLB Hits to a 4K Page", 1210 + "EventCode": "0x40", 1211 + "EventName": "UNC_IIO_IOMMU0.4K_HITS", 1212 + "PerPkg": "1", 1213 + "PortMask": "0x0000", 1214 + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", 1215 + "UMask": "0x4", 1216 + "Unit": "IIO" 1217 + }, 1218 + { 1189 1219 "BriefDescription": ": Context cache hits", 1190 1220 "EventCode": "0x40", 1191 1221 "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
+6
tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
··· 1847 1847 "ScaleUnit": "1GHz" 1848 1848 }, 1849 1849 { 1850 + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", 1851 + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1852 + "MetricName": "upi_data_receive_bw", 1853 + "ScaleUnit": "1MB/s" 1854 + }, 1855 + { 1850 1856 "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", 1851 1857 "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1852 1858 "MetricName": "upi_data_transmit_bw",
+1 -1
tools/perf/pmu-events/arch/x86/icelakex/other.json
··· 19 19 "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", 20 20 "EventCode": "0x28", 21 21 "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", 22 - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.", 22 + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.", 23 23 "SampleAfterValue": "200003", 24 24 "UMask": "0x20" 25 25 },
+1 -1
tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
··· 519 519 "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", 520 520 "EventCode": "0x5e", 521 521 "EventName": "RS_EVENTS.EMPTY_CYCLES", 522 - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)", 522 + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", 523 523 "SampleAfterValue": "1000003", 524 524 "UMask": "0x1" 525 525 },
+3 -3
tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
··· 38 38 "EventCode": "0x10", 39 39 "EventName": "UNC_I_COHERENT_OPS.CLFLUSH", 40 40 "PerPkg": "1", 41 - "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP", 41 + "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP", 42 42 "UMask": "0x80", 43 43 "Unit": "IRP" 44 44 }, ··· 65 65 "EventCode": "0x10", 66 66 "EventName": "UNC_I_COHERENT_OPS.WBMTOI", 67 67 "PerPkg": "1", 68 - "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP", 68 + "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP", 69 69 "UMask": "0x40", 70 70 "Unit": "IRP" 71 71 }, ··· 454 454 "EventCode": "0x11", 455 455 "EventName": "UNC_I_TRANSACTIONS.WRITES", 456 456 "PerPkg": "1", 457 - "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", 457 + "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", 458 458 "UMask": "0x2", 459 459 "Unit": "IRP" 460 460 },
+3 -3
tools/perf/pmu-events/arch/x86/mapfile.csv
··· 7 7 GenuineIntel-6-4F,v22,broadwellx,core 8 8 GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core 9 9 GenuineIntel-6-9[6C],v1.04,elkhartlake,core 10 - GenuineIntel-6-CF,v1.01,emeraldrapids,core 10 + GenuineIntel-6-CF,v1.02,emeraldrapids,core 11 11 GenuineIntel-6-5[CF],v13,goldmont,core 12 12 GenuineIntel-6-7A,v1.01,goldmontplus,core 13 13 GenuineIntel-6-B6,v1.00,grandridge,core ··· 15 15 GenuineIntel-6-(3C|45|46),v33,haswell,core 16 16 GenuineIntel-6-3F,v28,haswellx,core 17 17 GenuineIntel-6-7[DE],v1.19,icelake,core 18 - GenuineIntel-6-6[AC],v1.21,icelakex,core 18 + GenuineIntel-6-6[AC],v1.23,icelakex,core 19 19 GenuineIntel-6-3A,v24,ivybridge,core 20 20 GenuineIntel-6-3E,v24,ivytown,core 21 21 GenuineIntel-6-2D,v24,jaketown,core ··· 26 26 GenuineIntel-6-2E,v4,nehalemex,core 27 27 GenuineIntel-6-A7,v1.01,rocketlake,core 28 28 GenuineIntel-6-2A,v19,sandybridge,core 29 - GenuineIntel-6-8F,v1.16,sapphirerapids,core 29 + GenuineIntel-6-8F,v1.17,sapphirerapids,core 30 30 GenuineIntel-6-AF,v1.00,sierraforest,core 31 31 GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core 32 32 GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
+1 -1
tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
··· 985 985 }, 986 986 { 987 987 "BriefDescription": "Average number of parallel data read requests to external memory", 988 - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", 988 + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", 989 989 "MetricGroup": "Mem;MemoryBW;SoC", 990 990 "MetricName": "tma_info_system_mem_parallel_reads", 991 991 "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+24 -3
tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
··· 23 23 "UMask": "0x10" 24 24 }, 25 25 { 26 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", 26 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", 27 27 "EventCode": "0xb3", 28 28 "EventName": "FP_ARITH_DISPATCHED.PORT_0", 29 29 "SampleAfterValue": "2000003", 30 30 "UMask": "0x1" 31 31 }, 32 32 { 33 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", 33 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", 34 34 "EventCode": "0xb3", 35 35 "EventName": "FP_ARITH_DISPATCHED.PORT_1", 36 36 "SampleAfterValue": "2000003", 37 37 "UMask": "0x2" 38 38 }, 39 39 { 40 - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", 40 + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", 41 41 "EventCode": "0xb3", 42 42 "EventName": "FP_ARITH_DISPATCHED.PORT_5", 43 + "SampleAfterValue": "2000003", 44 + "UMask": "0x4" 45 + }, 46 + { 47 + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", 48 + "EventCode": "0xb3", 49 + "EventName": "FP_ARITH_DISPATCHED.V0", 50 + "SampleAfterValue": "2000003", 51 + "UMask": "0x1" 52 + }, 53 + { 54 + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", 55 + "EventCode": "0xb3", 56 + "EventName": "FP_ARITH_DISPATCHED.V1", 57 + "SampleAfterValue": "2000003", 58 + "UMask": "0x2" 59 + }, 60 + { 61 + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", 62 + "EventCode": "0xb3", 63 + "EventName": "FP_ARITH_DISPATCHED.V2", 43 64 "SampleAfterValue": "2000003", 44 65 "UMask": "0x4" 45 66 },
+1 -1
tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
··· 505 505 "UMask": "0x1" 506 506 }, 507 507 { 508 - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", 508 + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", 509 509 "EventCode": "0xad", 510 510 "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", 511 511 "MSRIndex": "0x3F7",
+6
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
··· 1965 1965 "ScaleUnit": "1GHz" 1966 1966 }, 1967 1967 { 1968 + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", 1969 + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1970 + "MetricName": "upi_data_receive_bw", 1971 + "ScaleUnit": "1MB/s" 1972 + }, 1973 + { 1968 1974 "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", 1969 1975 "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1970 1976 "MetricName": "upi_data_transmit_bw",
+4 -4
tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
··· 4825 4825 "Unit": "M3UPI" 4826 4826 }, 4827 4827 { 4828 - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", 4828 + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", 4829 4829 "EventCode": "0x47", 4830 4830 "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", 4831 4831 "PerPkg": "1", 4832 - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", 4832 + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", 4833 4833 "UMask": "0x1", 4834 4834 "Unit": "MDF" 4835 4835 }, ··· 4861 4861 "Unit": "MDF" 4862 4862 }, 4863 4863 { 4864 - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", 4864 + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", 4865 4865 "EventCode": "0x47", 4866 4866 "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", 4867 4867 "PerPkg": "1", 4868 - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", 4868 + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", 4869 4869 "UMask": "0x4", 4870 4870 "Unit": "MDF" 4871 4871 },
+30
tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
··· 1250 1250 "Unit": "IIO" 1251 1251 }, 1252 1252 { 1253 + "BriefDescription": ": IOTLB Hits to a 1G Page", 1254 + "EventCode": "0x40", 1255 + "EventName": "UNC_IIO_IOMMU0.1G_HITS", 1256 + "PerPkg": "1", 1257 + "PortMask": "0x0000", 1258 + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", 1259 + "UMask": "0x10", 1260 + "Unit": "IIO" 1261 + }, 1262 + { 1263 + "BriefDescription": ": IOTLB Hits to a 2M Page", 1264 + "EventCode": "0x40", 1265 + "EventName": "UNC_IIO_IOMMU0.2M_HITS", 1266 + "PerPkg": "1", 1267 + "PortMask": "0x0000", 1268 + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", 1269 + "UMask": "0x8", 1270 + "Unit": "IIO" 1271 + }, 1272 + { 1273 + "BriefDescription": ": IOTLB Hits to a 4K Page", 1274 + "EventCode": "0x40", 1275 + "EventName": "UNC_IIO_IOMMU0.4K_HITS", 1276 + "PerPkg": "1", 1277 + "PortMask": "0x0000", 1278 + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", 1279 + "UMask": "0x4", 1280 + "Unit": "IIO" 1281 + }, 1282 + { 1253 1283 "BriefDescription": ": Context cache hits", 1254 1284 "EventCode": "0x40", 1255 1285 "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
+6
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
··· 1807 1807 "ScaleUnit": "1GHz" 1808 1808 }, 1809 1809 { 1810 + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", 1811 + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1812 + "MetricName": "upi_data_receive_bw", 1813 + "ScaleUnit": "1MB/s" 1814 + }, 1815 + { 1810 1816 "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", 1811 1817 "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", 1812 1818 "MetricName": "upi_data_transmit_bw",
+3 -1
tools/perf/pmu-events/jevents.py
··· 83 83 """Return the length of s a C string 84 84 85 85 This doesn't handle all escape characters properly. It first assumes 86 - all \ are for escaping, it then adjusts as it will have over counted 86 + all \\ are for escaping, it then adjusts as it will have over counted 87 87 \\. The code uses \000 rather than \0 as a terminator as an adjacent 88 88 number would be folded into a string of \0 (ie. "\0" + "5" doesn't 89 89 equal a terminator followed by the number 5 but the escape of ··· 286 286 'imx8_ddr': 'imx8_ddr', 287 287 'L3PMC': 'amd_l3', 288 288 'DFPMC': 'amd_df', 289 + 'UMCPMC': 'amd_umc', 289 290 'cpu_core': 'cpu_core', 290 291 'cpu_atom': 'cpu_atom', 291 292 'ali_drw': 'ali_drw', ··· 355 354 ('SampleAfterValue', 'period='), 356 355 ('UMask', 'umask='), 357 356 ('NodeType', 'type='), 357 + ('RdWrMask', 'rdwrmask='), 358 358 ] 359 359 for key, value in event_fields: 360 360 if key in jd and jd[key] != '0':
+16 -14
tools/perf/scripts/python/arm-cs-trace-disasm.py
··· 45 45 # Initialize global dicts and regular expression 46 46 disasm_cache = dict() 47 47 cpu_data = dict() 48 - disasm_re = re.compile("^\s*([0-9a-fA-F]+):") 49 - disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") 48 + disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") 49 + disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") 50 50 cache_size = 64*1024 51 51 52 52 glb_source_file_name = None ··· 188 188 dso_end = get_optional(param_dict, "dso_map_end") 189 189 symbol = get_optional(param_dict, "symbol") 190 190 191 + cpu = sample["cpu"] 192 + ip = sample["ip"] 193 + addr = sample["addr"] 194 + 195 + # Initialize CPU data if it's empty, and directly return back 196 + # if this is the first tracing event for this CPU. 197 + if (cpu_data.get(str(cpu) + 'addr') == None): 198 + cpu_data[str(cpu) + 'addr'] = addr 199 + return 200 + 201 + 191 202 if (options.verbose == True): 192 203 print("Event type: %s" % name) 193 204 print_sample(sample) ··· 218 207 219 208 # Don't proceed if this event is not a branch sample, . 220 209 if (name[0:8] != "branches"): 221 - return 222 - 223 - cpu = sample["cpu"] 224 - ip = sample["ip"] 225 - addr = sample["addr"] 226 - 227 - # Initialize CPU data if it's empty, and directly return back 228 - # if this is the first tracing event for this CPU. 229 - if (cpu_data.get(str(cpu) + 'addr') == None): 230 - cpu_data[str(cpu) + 'addr'] = addr 231 210 return 232 211 233 212 # The format for packet is: ··· 259 258 260 259 if (options.objdump_name != None): 261 260 # It doesn't need to decrease virtual memory offset for disassembly 262 - # for kernel dso, so in this case we set vm_start to zero. 263 - if (dso == "[kernel.kallsyms]"): 261 + # for kernel dso and executable file dso, so in this case we set 262 + # vm_start to zero. 263 + if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): 264 264 dso_vm_start = 0 265 265 else: 266 266 dso_vm_start = int(dso_start)
+1 -1
tools/perf/scripts/python/compaction-times.py
··· 260 260 261 261 comm_re = None 262 262 pid_re = None 263 - pid_regex = "^(\d*)-(\d*)$|^(\d*)$" 263 + pid_regex = r"^(\d*)-(\d*)$|^(\d*)$" 264 264 265 265 opt_proc = popt.DISP_DFL 266 266 opt_disp = topt.DISP_ALL
+2 -2
tools/perf/scripts/python/exported-sql-viewer.py
··· 677 677 # sqlite supports GLOB (text only) which uses * and ? and is case sensitive 678 678 if not self.glb.dbref.is_sqlite3: 679 679 # Escape % and _ 680 - s = value.replace("%", "\%") 681 - s = s.replace("_", "\_") 680 + s = value.replace("%", "\\%") 681 + s = s.replace("_", "\\_") 682 682 # Translate * and ? into SQL LIKE pattern characters % and _ 683 683 trans = string.maketrans("*?", "%_") 684 684 match = " LIKE '" + str(s).translate(trans) + "'"
+14
tools/perf/tests/Build
··· 77 77 CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls 78 78 79 79 perf-y += workloads/ 80 + 81 + ifdef SHELLCHECK 82 + SHELL_TESTS := $(shell find tests/shell -executable -type f -name '*.sh') 83 + TEST_LOGS := $(SHELL_TESTS:tests/shell/%=shell/%.shellcheck_log) 84 + else 85 + SHELL_TESTS := 86 + TEST_LOGS := 87 + endif 88 + 89 + $(OUTPUT)%.shellcheck_log: % 90 + $(call rule_mkdir) 91 + $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) 92 + 93 + perf-y += $(TEST_LOGS)
+1 -1
tools/perf/tests/attr.c
··· 188 188 if (perf_pmus__num_core_pmus() > 1) { 189 189 /* 190 190 * TODO: Attribute tests hard code the PMU type. If there are >1 191 - * core PMU then each PMU will have a different type whic 191 + * core PMU then each PMU will have a different type which 192 192 * requires additional support. 193 193 */ 194 194 pr_debug("Skip test on hybrid systems");
+1 -1
tools/perf/tests/attr/base-record
··· 6 6 cpu=* 7 7 type=0|1 8 8 size=136 9 - config=0 9 + config=0|1 10 10 sample_period=* 11 11 sample_type=263 12 12 read_format=0|4|20
+1 -1
tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
··· 6 6 ret = 129 7 7 test_ret = true 8 8 arch = aarch64 9 - auxv = auxv["AT_HWCAP"] & 0x200000 == 0 9 + auxv = auxv["AT_HWCAP"] & 0x400000 == 0
+1 -1
tools/perf/tests/attr/test-record-user-regs-sve-aarch64
··· 6 6 ret = 1 7 7 test_ret = true 8 8 arch = aarch64 9 - auxv = auxv["AT_HWCAP"] & 0x200000 == 0x200000 9 + auxv = auxv["AT_HWCAP"] & 0x400000 == 0x400000 10 10 kernel_since = 6.1 11 11 12 12 [event:base-record]
+15 -2
tools/perf/tests/builtin-test.c
··· 14 14 #include <sys/wait.h> 15 15 #include <sys/stat.h> 16 16 #include "builtin.h" 17 + #include "config.h" 17 18 #include "hist.h" 18 19 #include "intlist.h" 19 20 #include "tests.h" ··· 33 32 34 33 static bool dont_fork; 35 34 const char *dso_to_test; 35 + const char *test_objdump_path = "objdump"; 36 36 37 37 /* 38 38 * List of architecture specific tests. Not a weak symbol as the array length is ··· 62 60 &suite__pmu, 63 61 &suite__pmu_events, 64 62 &suite__dso_data, 65 - &suite__dso_data_cache, 66 - &suite__dso_data_reopen, 67 63 &suite__perf_evsel__roundtrip_name_test, 68 64 #ifdef HAVE_LIBTRACEEVENT 69 65 &suite__perf_evsel__tp_sched_test, ··· 513 513 return -1; 514 514 } 515 515 516 + static int perf_test__config(const char *var, const char *value, 517 + void *data __maybe_unused) 518 + { 519 + if (!strcmp(var, "annotate.objdump")) 520 + test_objdump_path = value; 521 + 522 + return 0; 523 + } 524 + 516 525 int cmd_test(int argc, const char **argv) 517 526 { 518 527 const char *test_usage[] = { ··· 538 529 "Do not fork for testcase"), 539 530 OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), 540 531 OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), 532 + OPT_STRING(0, "objdump", &test_objdump_path, "path", 533 + "objdump binary to use for disassembly and annotations"), 541 534 OPT_END() 542 535 }; 543 536 const char * const test_subcommands[] = { "list", NULL }; ··· 548 537 549 538 if (ret < 0) 550 539 return ret; 540 + 541 + perf_config(perf_test__config, NULL); 551 542 552 543 /* Unbuffered output */ 553 544 setvbuf(stdout, NULL, _IONBF, 0);
+25 -55
tools/perf/tests/code-reading.c
··· 185 185 int ret; 186 186 187 187 fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; 188 - ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, 188 + ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len, 189 189 filename); 190 190 if (ret <= 0 || (size_t)ret >= sizeof(cmd)) 191 191 return -1; ··· 511 511 } 512 512 } 513 513 514 - #ifdef __s390x__ 515 - #include "header.h" // for get_cpuid() 516 - #endif 517 - 518 - static const char *do_determine_event(bool excl_kernel) 519 - { 520 - const char *event = excl_kernel ? "cycles:u" : "cycles"; 521 - 522 - #ifdef __s390x__ 523 - char cpuid[128], model[16], model_c[16], cpum_cf_v[16]; 524 - unsigned int family; 525 - int ret, cpum_cf_a; 526 - 527 - if (get_cpuid(cpuid, sizeof(cpuid))) 528 - goto out_clocks; 529 - ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c, 530 - model, cpum_cf_v, &cpum_cf_a); 531 - if (ret != 5) /* Not available */ 532 - goto out_clocks; 533 - if (excl_kernel && (cpum_cf_a & 4)) 534 - return event; 535 - if (!excl_kernel && (cpum_cf_a & 2)) 536 - return event; 537 - 538 - /* Fall through: missing authorization */ 539 - out_clocks: 540 - event = excl_kernel ? "cpu-clock:u" : "cpu-clock"; 541 - 542 - #endif 543 - return event; 544 - } 545 - 546 514 static void do_something(void) 547 515 { 548 516 fs_something(); ··· 551 583 int err = -1, ret; 552 584 pid_t pid; 553 585 struct map *map; 554 - bool have_vmlinux, have_kcore, excl_kernel = false; 586 + bool have_vmlinux, have_kcore; 555 587 struct dso *dso; 588 + const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL }; 589 + int evidx = 0; 556 590 557 591 pid = getpid(); 558 592 ··· 588 618 589 619 /* No point getting kernel events if there is no kernel object */ 590 620 if (!have_vmlinux && !have_kcore) 591 - excl_kernel = true; 621 + evidx++; 592 622 593 623 threads = thread_map__new_by_tid(pid); 594 624 if (!threads) { ··· 610 640 goto out_put; 611 641 } 612 642 613 - cpus = perf_cpu_map__new(NULL); 643 + cpus = perf_cpu_map__new_online_cpus(); 614 644 if (!cpus) { 615 645 pr_debug("perf_cpu_map__new failed\n"); 616 646 goto out_put; 617 647 } 618 648 619 - while (1) { 649 + while (events[evidx]) { 620 650 const char *str; 621 651 622 652 evlist = evlist__new(); ··· 627 657 628 658 perf_evlist__set_maps(&evlist->core, cpus, threads); 629 659 630 - str = do_determine_event(excl_kernel); 660 + str = events[evidx]; 631 661 pr_debug("Parsing event '%s'\n", str); 632 662 ret = parse_event(evlist, str); 633 663 if (ret < 0) { ··· 645 675 646 676 ret = evlist__open(evlist); 647 677 if (ret < 0) { 648 - if (!excl_kernel) { 649 - excl_kernel = true; 650 - /* 651 - * Both cpus and threads are now owned by evlist 652 - * and will be freed by following perf_evlist__set_maps 653 - * call. Getting reference to keep them alive. 654 - */ 655 - perf_cpu_map__get(cpus); 656 - perf_thread_map__get(threads); 657 - perf_evlist__set_maps(&evlist->core, NULL, NULL); 658 - evlist__delete(evlist); 659 - evlist = NULL; 660 - continue; 661 - } 678 + evidx++; 662 679 663 - if (verbose > 0) { 680 + if (events[evidx] == NULL && verbose > 0) { 664 681 char errbuf[512]; 665 682 evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 666 683 pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); 667 684 } 668 685 669 - goto out_put; 686 + /* 687 + * Both cpus and threads are now owned by evlist 688 + * and will be freed by following perf_evlist__set_maps 689 + * call. Getting reference to keep them alive. 690 + */ 691 + perf_cpu_map__get(cpus); 692 + perf_thread_map__get(threads); 693 + perf_evlist__set_maps(&evlist->core, NULL, NULL); 694 + evlist__delete(evlist); 695 + evlist = NULL; 696 + continue; 670 697 } 671 698 break; 672 699 } 700 + 701 + if (events[evidx] == NULL) 702 + goto out_put; 673 703 674 704 ret = evlist__mmap(evlist, UINT_MAX); 675 705 if (ret < 0) { ··· 691 721 err = TEST_CODE_READING_NO_KERNEL_OBJ; 692 722 else if (!have_vmlinux && !try_kcore) 693 723 err = TEST_CODE_READING_NO_VMLINUX; 694 - else if (excl_kernel) 724 + else if (strstr(events[evidx], ":u")) 695 725 err = TEST_CODE_READING_NO_ACCESS; 696 726 else 697 727 err = TEST_CODE_READING_OK;
+1 -1
tools/perf/tests/cpumap.c
··· 213 213 214 214 static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 215 215 { 216 - struct perf_cpu_map *any = perf_cpu_map__dummy_new(); 216 + struct perf_cpu_map *any = perf_cpu_map__new_any_cpu(); 217 217 struct perf_cpu_map *one = perf_cpu_map__new("1"); 218 218 struct perf_cpu_map *two = perf_cpu_map__new("2"); 219 219 struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
+12 -3
tools/perf/tests/dso-data.c
··· 394 394 return 0; 395 395 } 396 396 397 - DEFINE_SUITE("DSO data read", dso_data); 398 - DEFINE_SUITE("DSO data cache", dso_data_cache); 399 - DEFINE_SUITE("DSO data reopen", dso_data_reopen); 397 + 398 + static struct test_case tests__dso_data[] = { 399 + TEST_CASE("read", dso_data), 400 + TEST_CASE("cache", dso_data_cache), 401 + TEST_CASE("reopen", dso_data_reopen), 402 + { .name = NULL, } 403 + }; 404 + 405 + struct test_suite suite__dso_data = { 406 + .desc = "DSO data tests", 407 + .test_cases = tests__dso_data, 408 + };
+1 -1
tools/perf/tests/keep-tracking.c
··· 81 81 threads = thread_map__new(-1, getpid(), UINT_MAX); 82 82 CHECK_NOT_NULL__(threads); 83 83 84 - cpus = perf_cpu_map__new(NULL); 84 + cpus = perf_cpu_map__new_online_cpus(); 85 85 CHECK_NOT_NULL__(cpus); 86 86 87 87 evlist = evlist__new();
+1 -1
tools/perf/tests/make
··· 183 183 # run += make_install_pdf 184 184 run += make_minimal 185 185 186 - old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| egrep -q "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") 186 + old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| grep -q -E "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") 187 187 188 188 ifneq ($(old_libbpf),) 189 189 run += make_libbpf_dynamic
+38 -23
tools/perf/tests/maps.c
··· 14 14 u64 end; 15 15 }; 16 16 17 + struct check_maps_cb_args { 18 + struct map_def *merged; 19 + unsigned int i; 20 + }; 21 + 22 + static int check_maps_cb(struct map *map, void *data) 23 + { 24 + struct check_maps_cb_args *args = data; 25 + struct map_def *merged = &args->merged[args->i]; 26 + 27 + if (map__start(map) != merged->start || 28 + map__end(map) != merged->end || 29 + strcmp(map__dso(map)->name, merged->name) || 30 + refcount_read(map__refcnt(map)) != 1) { 31 + return 1; 32 + } 33 + args->i++; 34 + return 0; 35 + } 36 + 37 + static int failed_cb(struct map *map, void *data __maybe_unused) 38 + { 39 + pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", 40 + map__start(map), 41 + map__end(map), 42 + map__dso(map)->name, 43 + refcount_read(map__refcnt(map))); 44 + 45 + return 0; 46 + } 47 + 17 48 static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps) 18 49 { 19 - struct map_rb_node *rb_node; 20 - unsigned int i = 0; 21 50 bool failed = false; 22 51 23 52 if (maps__nr_maps(maps) != size) { 24 53 pr_debug("Expected %d maps, got %d", size, maps__nr_maps(maps)); 25 54 failed = true; 26 55 } else { 27 - maps__for_each_entry(maps, rb_node) { 28 - struct map *map = rb_node->map; 29 - 30 - if (map__start(map) != merged[i].start || 31 - map__end(map) != merged[i].end || 32 - strcmp(map__dso(map)->name, merged[i].name) || 33 - refcount_read(map__refcnt(map)) != 1) { 34 - failed = true; 35 - } 36 - i++; 37 - } 56 + struct check_maps_cb_args args = { 57 + .merged = merged, 58 + .i = 0, 59 + }; 60 + failed = maps__for_each_map(maps, check_maps_cb, &args); 38 61 } 39 62 if (failed) { 40 63 pr_debug("Expected:\n"); 41 - for (i = 0; i < size; i++) { 64 + for (unsigned int i = 0; i < size; i++) { 42 65 pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: 1\n", 43 66 merged[i].start, merged[i].end, merged[i].name); 44 67 } 45 68 pr_debug("Got:\n"); 46 - maps__for_each_entry(maps, rb_node) { 47 - struct map *map = rb_node->map; 48 - 49 - pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", 50 - map__start(map), 51 - map__end(map), 52 - map__dso(map)->name, 53 - refcount_read(map__refcnt(map))); 54 - } 69 + maps__for_each_map(maps, failed_cb, NULL); 55 70 } 56 71 return failed ? TEST_FAIL : TEST_OK; 57 72 }
+1 -1
tools/perf/tests/mmap-basic.c
··· 52 52 return -1; 53 53 } 54 54 55 - cpus = perf_cpu_map__new(NULL); 55 + cpus = perf_cpu_map__new_online_cpus(); 56 56 if (cpus == NULL) { 57 57 pr_debug("perf_cpu_map__new\n"); 58 58 goto out_free_threads;
+1 -1
tools/perf/tests/openat-syscall-all-cpus.c
··· 37 37 return -1; 38 38 } 39 39 40 - cpus = perf_cpu_map__new(NULL); 40 + cpus = perf_cpu_map__new_online_cpus(); 41 41 if (cpus == NULL) { 42 42 pr_debug("perf_cpu_map__new\n"); 43 43 goto out_thread_map_delete;
+187 -73
tools/perf/tests/parse-events.c
··· 162 162 return TEST_OK; 163 163 } 164 164 165 + 166 + static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name) 167 + { 168 + struct perf_pmu *pmu; 169 + 170 + if (evsel->attr.type == PERF_TYPE_HARDWARE) { 171 + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id)); 172 + return 0; 173 + } 174 + pmu = perf_pmus__find_by_type(evsel->attr.type); 175 + 176 + TEST_ASSERT_VAL("unexpected PMU type", pmu); 177 + TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name)); 178 + return 0; 179 + } 180 + 165 181 static int test__checkevent_symbolic_name(struct evlist *evlist) 166 182 { 167 183 struct perf_evsel *evsel; ··· 185 169 TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); 186 170 187 171 perf_evlist__for_each_evsel(&evlist->core, evsel) { 188 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); 189 - TEST_ASSERT_VAL("wrong config", 190 - test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 172 + int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 173 + 174 + if (ret) 175 + return ret; 191 176 } 177 + 192 178 return TEST_OK; 193 179 } 194 180 ··· 201 183 TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); 202 184 203 185 perf_evlist__for_each_evsel(&evlist->core, evsel) { 204 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); 205 - TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 186 + int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 187 + 188 + if (ret) 189 + return ret; 206 190 /* 207 191 * The period value gets configured within evlist__config, 208 192 * while this test executes only parse events method. ··· 881 861 evlist__nr_groups(evlist) == num_core_entries()); 882 862 883 863 for (int i = 0; i < num_core_entries(); i++) { 864 + int ret; 865 + 884 866 /* instructions:k */ 885 867 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 886 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 887 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 868 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 869 + if (ret) 870 + return ret; 871 + 888 872 TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); 889 873 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 890 874 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 902 878 903 879 /* cycles:upp */ 904 880 evsel = evsel__next(evsel); 905 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 906 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 881 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 882 + if (ret) 883 + return ret; 884 + 907 885 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 908 886 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 909 887 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 933 907 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist)); 934 908 935 909 evlist__for_each_entry(evlist, evsel) { 910 + int ret; 911 + 936 912 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) { 937 913 /* faults + :ku modifier */ 938 914 leader = evsel; ··· 967 939 continue; 968 940 } 969 941 /* cycles:k */ 970 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 971 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 942 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 943 + if (ret) 944 + return ret; 945 + 972 946 TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); 973 947 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 974 948 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 987 957 static int test__group3(struct evlist *evlist __maybe_unused) 988 958 { 989 959 struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL; 960 + int ret; 990 961 991 962 TEST_ASSERT_VAL("wrong number of entries", 992 963 evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2)); ··· 1076 1045 continue; 1077 1046 } 1078 1047 /* instructions:u */ 1079 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1080 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 1048 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 1049 + if (ret) 1050 + return ret; 1051 + 1081 1052 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1082 1053 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1083 1054 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1103 1070 num_core_entries() == evlist__nr_groups(evlist)); 1104 1071 1105 1072 for (int i = 0; i < num_core_entries(); i++) { 1073 + int ret; 1074 + 1106 1075 /* cycles:u + p */ 1107 1076 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1108 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1109 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1077 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1078 + if (ret) 1079 + return ret; 1080 + 1110 1081 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1111 1082 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1112 1083 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1126 1089 1127 1090 /* instructions:kp + p */ 1128 1091 evsel = evsel__next(evsel); 1129 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1130 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 1092 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 1093 + if (ret) 1094 + return ret; 1095 + 1131 1096 TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); 1132 1097 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1133 1098 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1147 1108 static int test__group5(struct evlist *evlist __maybe_unused) 1148 1109 { 1149 1110 struct evsel *evsel = NULL, *leader; 1111 + int ret; 1150 1112 1151 1113 TEST_ASSERT_VAL("wrong number of entries", 1152 1114 evlist->core.nr_entries == (5 * num_core_entries())); ··· 1157 1117 for (int i = 0; i < num_core_entries(); i++) { 1158 1118 /* cycles + G */ 1159 1119 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1160 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1161 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1120 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1121 + if (ret) 1122 + return ret; 1123 + 1162 1124 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1163 1125 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1164 1126 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1175 1133 1176 1134 /* instructions + G */ 1177 1135 evsel = evsel__next(evsel); 1178 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1179 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 1136 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 1137 + if (ret) 1138 + return ret; 1139 + 1180 1140 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1181 1141 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1182 1142 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1192 1148 for (int i = 0; i < num_core_entries(); i++) { 1193 1149 /* cycles:G */ 1194 1150 evsel = leader = evsel__next(evsel); 1195 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1196 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1151 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1152 + if (ret) 1153 + return ret; 1154 + 1197 1155 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1198 1156 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1199 1157 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1210 1164 1211 1165 /* instructions:G */ 1212 1166 evsel = evsel__next(evsel); 1213 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1214 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 1167 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 1168 + if (ret) 1169 + return ret; 1170 + 1215 1171 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1216 1172 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1217 1173 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1226 1178 for (int i = 0; i < num_core_entries(); i++) { 1227 1179 /* cycles */ 1228 1180 evsel = evsel__next(evsel); 1229 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1230 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1181 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1182 + if (ret) 1183 + return ret; 1184 + 1231 1185 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1232 1186 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1233 1187 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1251 1201 evlist__nr_groups(evlist) == num_core_entries()); 1252 1202 1253 1203 for (int i = 0; i < num_core_entries(); i++) { 1204 + int ret; 1205 + 1254 1206 /* cycles + :H group modifier */ 1255 1207 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1256 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1257 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1208 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1209 + if (ret) 1210 + return ret; 1211 + 1258 1212 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1259 1213 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1260 1214 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1272 1218 1273 1219 /* cache-misses:G + :H group modifier */ 1274 1220 evsel = evsel__next(evsel); 1275 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1276 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1221 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1222 + if (ret) 1223 + return ret; 1224 + 1277 1225 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1278 1226 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1279 1227 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1298 1242 evlist__nr_groups(evlist) == num_core_entries()); 1299 1243 1300 1244 for (int i = 0; i < num_core_entries(); i++) { 1245 + int ret; 1246 + 1301 1247 /* cycles + :G group modifier */ 1302 1248 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1303 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1304 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1249 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1250 + if (ret) 1251 + return ret; 1252 + 1305 1253 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1306 1254 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1307 1255 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1319 1259 1320 1260 /* cache-misses:H + :G group modifier */ 1321 1261 evsel = evsel__next(evsel); 1322 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1323 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1262 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1263 + if (ret) 1264 + return ret; 1265 + 1324 1266 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1325 1267 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1326 1268 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1345 1283 evlist__nr_groups(evlist) == num_core_entries()); 1346 1284 1347 1285 for (int i = 0; i < num_core_entries(); i++) { 1286 + int ret; 1287 + 1348 1288 /* cycles:G + :u group modifier */ 1349 1289 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1350 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1351 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1290 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1291 + if (ret) 1292 + return ret; 1293 + 1352 1294 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1353 1295 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1354 1296 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1366 1300 1367 1301 /* cache-misses:H + :u group modifier */ 1368 1302 evsel = evsel__next(evsel); 1369 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1370 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1303 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1304 + if (ret) 1305 + return ret; 1306 + 1371 1307 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1372 1308 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1373 1309 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1392 1324 evlist__nr_groups(evlist) == num_core_entries()); 1393 1325 1394 1326 for (int i = 0; i < num_core_entries(); i++) { 1327 + int ret; 1328 + 1395 1329 /* cycles:G + :uG group modifier */ 1396 1330 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1397 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1398 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1331 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1332 + if (ret) 1333 + return ret; 1334 + 1399 1335 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1400 1336 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1401 1337 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1413 1341 1414 1342 /* cache-misses:H + :uG group modifier */ 1415 1343 evsel = evsel__next(evsel); 1416 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1417 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1344 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1345 + if (ret) 1346 + return ret; 1347 + 1418 1348 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1419 1349 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1420 1350 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1437 1363 evlist->core.nr_entries == (3 * num_core_entries())); 1438 1364 1439 1365 for (int i = 0; i < num_core_entries(); i++) { 1366 + int ret; 1367 + 1440 1368 /* cycles - sampling group leader */ 1441 1369 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1442 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1443 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1370 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1371 + if (ret) 1372 + return ret; 1373 + 1444 1374 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1445 1375 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1446 1376 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1457 1379 1458 1380 /* cache-misses - not sampling */ 1459 1381 evsel = evsel__next(evsel); 1460 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1461 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1382 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1383 + if (ret) 1384 + return ret; 1385 + 1462 1386 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1463 1387 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1464 1388 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1472 1392 1473 1393 /* branch-misses - not sampling */ 1474 1394 evsel = evsel__next(evsel); 1475 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1476 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); 1395 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); 1396 + if (ret) 1397 + return ret; 1398 + 1477 1399 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1478 1400 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1479 1401 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); ··· 1497 1415 evlist->core.nr_entries == (2 * num_core_entries())); 1498 1416 1499 1417 for (int i = 0; i < num_core_entries(); i++) { 1418 + int ret; 1419 + 1500 1420 /* instructions - sampling group leader */ 1501 1421 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1502 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1503 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); 1422 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); 1423 + if (ret) 1424 + return ret; 1425 + 1504 1426 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1505 1427 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1506 1428 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1517 1431 1518 1432 /* branch-misses - not sampling */ 1519 1433 evsel = evsel__next(evsel); 1520 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1521 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); 1434 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); 1435 + if (ret) 1436 + return ret; 1437 + 1522 1438 TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1523 1439 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1524 1440 TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); ··· 1560 1472 evlist->core.nr_entries == (3 * num_core_entries())); 1561 1473 1562 1474 for (int i = 0; i < num_core_entries(); i++) { 1475 + int ret; 1476 + 1563 1477 /* cycles - group leader */ 1564 1478 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1565 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1566 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1479 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1480 + if (ret) 1481 + return ret; 1482 + 1567 1483 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 1568 1484 TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 1569 1485 /* TODO: The group modifier is not copied to the split group leader. */ ··· 1576 1484 1577 1485 /* cache-misses - can not be pinned, but will go on with the leader */ 1578 1486 evsel = evsel__next(evsel); 1579 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1580 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1487 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1488 + if (ret) 1489 + return ret; 1490 + 1581 1491 TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); 1582 1492 1583 1493 /* branch-misses - ditto */ 1584 1494 evsel = evsel__next(evsel); 1585 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); 1495 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); 1496 + if (ret) 1497 + return ret; 1498 + 1586 1499 TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); 1587 1500 } 1588 1501 return TEST_OK; ··· 1614 1517 evlist->core.nr_entries == 3 * num_core_entries()); 1615 1518 1616 1519 for (int i = 0; i < num_core_entries(); i++) { 1520 + int ret; 1521 + 1617 1522 /* cycles - group leader */ 1618 1523 evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); 1619 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1620 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1524 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1525 + if (ret) 1526 + return ret; 1527 + 1621 1528 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 1622 1529 TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); 1623 1530 /* TODO: The group modifier is not copied to the split group leader. */ ··· 1630 1529 1631 1530 /* cache-misses - can not be pinned, but will go on with the leader */ 1632 1531 evsel = evsel__next(evsel); 1633 - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); 1634 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); 1532 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); 1533 + if (ret) 1534 + return ret; 1535 + 1635 1536 TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); 1636 1537 1637 1538 /* branch-misses - ditto */ 1638 1539 evsel = evsel__next(evsel); 1639 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); 1540 + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); 1541 + if (ret) 1542 + return ret; 1543 + 1640 1544 TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); 1641 1545 } 1642 1546 return TEST_OK; ··· 1783 1677 static int test__sym_event_slash(struct evlist *evlist) 1784 1678 { 1785 1679 struct evsel *evsel = evlist__first(evlist); 1680 + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1786 1681 1787 - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); 1788 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1682 + if (ret) 1683 + return ret; 1684 + 1789 1685 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1790 1686 return TEST_OK; 1791 1687 } ··· 1795 1687 static int test__sym_event_dc(struct evlist *evlist) 1796 1688 { 1797 1689 struct evsel *evsel = evlist__first(evlist); 1690 + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1798 1691 1799 - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); 1800 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1692 + if (ret) 1693 + return ret; 1694 + 1801 1695 TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); 1802 1696 return TEST_OK; 1803 1697 } ··· 1807 1697 static int test__term_equal_term(struct evlist *evlist) 1808 1698 { 1809 1699 struct evsel *evsel = evlist__first(evlist); 1700 + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1810 1701 1811 - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); 1812 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1702 + if (ret) 1703 + return ret; 1704 + 1813 1705 TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); 1814 1706 return TEST_OK; 1815 1707 } ··· 1819 1707 static int test__term_equal_legacy(struct evlist *evlist) 1820 1708 { 1821 1709 struct evsel *evsel = evlist__first(evlist); 1710 + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); 1822 1711 1823 - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); 1824 - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); 1712 + if (ret) 1713 + return ret; 1714 + 1825 1715 TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); 1826 1716 return TEST_OK; 1827 1717 } ··· 2663 2549 if (strchr(ent->d_name, '.')) 2664 2550 continue; 2665 2551 2666 - /* exclude parametrized ones (name contains '?') */ 2552 + /* exclude parameterized ones (name contains '?') */ 2667 2553 n = snprintf(pmu_event, sizeof(pmu_event), "%s%s", path, ent->d_name); 2668 2554 if (n >= PATH_MAX) { 2669 2555 pr_err("pmu event name crossed PATH_MAX(%d) size\n", PATH_MAX); ··· 2692 2578 fclose(file); 2693 2579 2694 2580 if (is_event_parameterized == 1) { 2695 - pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event); 2581 + pr_debug("skipping parameterized PMU event: %s which contains ?\n", pmu_event); 2696 2582 continue; 2697 2583 } 2698 2584
+1 -1
tools/perf/tests/perf-time-to-tsc.c
··· 93 93 threads = thread_map__new(-1, getpid(), UINT_MAX); 94 94 CHECK_NOT_NULL__(threads); 95 95 96 - cpus = perf_cpu_map__new(NULL); 96 + cpus = perf_cpu_map__new_online_cpus(); 97 97 CHECK_NOT_NULL__(cpus); 98 98 99 99 evlist = evlist__new();
-1
tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
··· 42 42 int main(int argc, char **argv) 43 43 { 44 44 unsigned long i, len, size, thr; 45 - pthread_t threads[256]; 46 45 struct args args[256]; 47 46 long long v; 48 47
-1
tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
··· 57 57 int main(int argc, char **argv) 58 58 { 59 59 unsigned int i, len, thr; 60 - pthread_t threads[256]; 61 60 struct args args[256]; 62 61 63 62 if (argc < 3) {
-1
tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
··· 51 51 int main(int argc, char **argv) 52 52 { 53 53 unsigned int i, thr; 54 - pthread_t threads[256]; 55 54 struct args args[256]; 56 55 57 56 if (argc < 2) {
+108
tools/perf/tests/shell/diff.sh
··· 1 + #!/bin/sh 2 + # perf diff tests 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + err=0 8 + perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 9 + perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 10 + perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 11 + testprog="perf test -w thloop" 12 + 13 + shelldir=$(dirname "$0") 14 + # shellcheck source=lib/perf_has_symbol.sh 15 + . "${shelldir}"/lib/perf_has_symbol.sh 16 + 17 + testsym="test_loop" 18 + 19 + skip_test_missing_symbol ${testsym} 20 + 21 + cleanup() { 22 + rm -rf "${perfdata1}" 23 + rm -rf "${perfdata1}".old 24 + rm -rf "${perfdata2}" 25 + rm -rf "${perfdata2}".old 26 + rm -rf "${perfdata3}" 27 + rm -rf "${perfdata3}".old 28 + 29 + trap - EXIT TERM INT 30 + } 31 + 32 + trap_cleanup() { 33 + cleanup 34 + exit 1 35 + } 36 + trap trap_cleanup EXIT TERM INT 37 + 38 + make_data() { 39 + file="$1" 40 + if ! perf record -o "${file}" ${testprog} 2> /dev/null 41 + then 42 + echo "Workload record [Failed record]" 43 + echo 1 44 + return 45 + fi 46 + if ! perf report -i "${file}" -q | grep -q "${testsym}" 47 + then 48 + echo "Workload record [Failed missing output]" 49 + echo 1 50 + return 51 + fi 52 + echo 0 53 + } 54 + 55 + test_two_files() { 56 + echo "Basic two file diff test" 57 + err=$(make_data "${perfdata1}") 58 + if [ $err != 0 ] 59 + then 60 + return 61 + fi 62 + err=$(make_data "${perfdata2}") 63 + if [ $err != 0 ] 64 + then 65 + return 66 + fi 67 + 68 + if ! perf diff "${perfdata1}" "${perfdata2}" | grep -q "${testsym}" 69 + then 70 + echo "Basic two file diff test [Failed diff]" 71 + err=1 72 + return 73 + fi 74 + echo "Basic two file diff test [Success]" 75 + } 76 + 77 + test_three_files() { 78 + echo "Basic three file diff test" 79 + err=$(make_data "${perfdata1}") 80 + if [ $err != 0 ] 81 + then 82 + return 83 + fi 84 + err=$(make_data "${perfdata2}") 85 + if [ $err != 0 ] 86 + then 87 + return 88 + fi 89 + err=$(make_data "${perfdata3}") 90 + if [ $err != 0 ] 91 + then 92 + return 93 + fi 94 + 95 + if ! perf diff "${perfdata1}" "${perfdata2}" "${perfdata3}" | grep -q "${testsym}" 96 + then 97 + echo "Basic three file diff test [Failed diff]" 98 + err=1 99 + return 100 + fi 101 + echo "Basic three file diff test [Success]" 102 + } 103 + 104 + test_two_files 105 + test_three_files 106 + 107 + cleanup 108 + exit $err
+21
tools/perf/tests/shell/lib/perf_has_symbol.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + perf_has_symbol() 5 + { 6 + if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then 7 + echo "perf does have symbol '$1'" 8 + return 0 9 + fi 10 + echo "perf does not have symbol '$1'" 11 + return 1 12 + } 13 + 14 + skip_test_missing_symbol() 15 + { 16 + if ! perf_has_symbol "$1" ; then 17 + echo "perf is missing symbols - skipping test" 18 + exit 2 19 + fi 20 + return 0 21 + }
+16
tools/perf/tests/shell/lib/setup_python.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + if [ "x$PYTHON" = "x" ] 5 + then 6 + python3 --version >/dev/null 2>&1 && PYTHON=python3 7 + fi 8 + if [ "x$PYTHON" = "x" ] 9 + then 10 + python --version >/dev/null 2>&1 && PYTHON=python 11 + fi 12 + if [ "x$PYTHON" = "x" ] 13 + then 14 + echo Skipping test, python not detected please set environment variable PYTHON. 15 + exit 2 16 + fi
+19
tools/perf/tests/shell/list.sh
··· 1 + #!/bin/sh 2 + # perf list tests 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + err=0 7 + 8 + shelldir=$(dirname "$0") 9 + # shellcheck source=lib/setup_python.sh 10 + . "${shelldir}"/lib/setup_python.sh 11 + 12 + test_list_json() { 13 + echo "Json output test" 14 + perf list -j | $PYTHON -m json.tool 15 + echo "Json output test [Success]" 16 + } 17 + 18 + test_list_json 19 + exit $err
+8 -1
tools/perf/tests/shell/pipe_test.sh
··· 2 2 # perf pipe recording and injection test 3 3 # SPDX-License-Identifier: GPL-2.0 4 4 5 + shelldir=$(dirname "$0") 6 + # shellcheck source=lib/perf_has_symbol.sh 7 + . "${shelldir}"/lib/perf_has_symbol.sh 8 + 9 + sym="noploop" 10 + 11 + skip_test_missing_symbol ${sym} 12 + 5 13 data=$(mktemp /tmp/perf.data.XXXXXX) 6 14 prog="perf test -w noploop" 7 15 task="perf" 8 - sym="noploop" 9 16 10 17 if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then 11 18 echo "cannot find the test file in the perf report"
+4 -1
tools/perf/tests/shell/record+probe_libc_inet_pton.sh
··· 45 45 ;; 46 46 ppc64|ppc64le) 47 47 eventattr='max-stack=4' 48 - echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected 48 + # Add gaih_inet to expected backtrace only if it is part of libc. 49 + if nm $libc | grep -F -q gaih_inet.; then 50 + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected 51 + fi 49 52 echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected 50 53 echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected 51 54 ;;
+37 -1
tools/perf/tests/shell/record.sh
··· 8 8 # shellcheck source=lib/waiting.sh 9 9 . "${shelldir}"/lib/waiting.sh 10 10 11 + # shellcheck source=lib/perf_has_symbol.sh 12 + . "${shelldir}"/lib/perf_has_symbol.sh 13 + 14 + testsym="test_loop" 15 + 16 + skip_test_missing_symbol ${testsym} 17 + 11 18 err=0 12 19 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 13 20 testprog="perf test -w thloop" 14 - testsym="test_loop" 21 + cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" 22 + br_cntr_file="/caps/branch_counter_nr" 23 + br_cntr_output="branch stack counters" 15 24 16 25 cleanup() { 17 26 rm -rf "${perfdata}" ··· 164 155 echo "Basic target workload test [Success]" 165 156 } 166 157 158 + test_branch_counter() { 159 + echo "Basic branch counter test" 160 + # Check if the branch counter feature is supported 161 + for dir in $cpu_pmu_dir 162 + do 163 + if [ ! -e "$dir$br_cntr_file" ] 164 + then 165 + echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]" 166 + return 167 + fi 168 + done 169 + if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null 170 + then 171 + echo "Basic branch counter test [Failed record]" 172 + err=1 173 + return 174 + fi 175 + if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output" 176 + then 177 + echo "Basic branch record test [Failed missing output]" 178 + err=1 179 + return 180 + fi 181 + echo "Basic branch counter test [Success]" 182 + } 183 + 167 184 test_per_thread 168 185 test_register_capture 169 186 test_system_wide 170 187 test_workload 188 + test_branch_counter 171 189 172 190 cleanup 173 191 exit $err
+2 -2
tools/perf/tests/shell/record_offcpu.sh
··· 77 77 err=1 78 78 return 79 79 fi 80 - # each process waits for read and write, so it should be more than 800 events 80 + # each process waits at least for poll, so it should be more than 400 events 81 81 if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \ 82 - awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }' 82 + awk -F ";" '{ if (NF > 3 && int($3) < 400) exit 1; }' 83 83 then 84 84 echo "Child task off-cpu test [Failed invalid output]" 85 85 err=1
+66
tools/perf/tests/shell/script.sh
··· 1 + #!/bin/sh 2 + # perf script tests 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + temp_dir=$(mktemp -d /tmp/perf-test-script.XXXXXXXXXX) 8 + 9 + perfdatafile="${temp_dir}/perf.data" 10 + db_test="${temp_dir}/db_test.py" 11 + 12 + err=0 13 + 14 + cleanup() 15 + { 16 + trap - EXIT TERM INT 17 + sane=$(echo "${temp_dir}" | cut -b 1-21) 18 + if [ "${sane}" = "/tmp/perf-test-script" ] ; then 19 + echo "--- Cleaning up ---" 20 + rm -f "${temp_dir}/"* 21 + rmdir "${temp_dir}" 22 + fi 23 + } 24 + 25 + trap_cleanup() 26 + { 27 + cleanup 28 + exit 1 29 + } 30 + 31 + trap trap_cleanup EXIT TERM INT 32 + 33 + 34 + test_db() 35 + { 36 + echo "DB test" 37 + 38 + # Check if python script is supported 39 + libpython=$(perf version --build-options | grep python | grep -cv OFF) 40 + if [ "${libpython}" != "1" ] ; then 41 + echo "SKIP: python scripting is not supported" 42 + err=2 43 + return 44 + fi 45 + 46 + cat << "_end_of_file_" > "${db_test}" 47 + perf_db_export_mode = True 48 + perf_db_export_calls = False 49 + perf_db_export_callchains = True 50 + 51 + def sample_table(*args): 52 + print(f'sample_table({args})') 53 + 54 + def call_path_table(*args): 55 + print(f'call_path_table({args}') 56 + _end_of_file_ 57 + perf record -g -o "${perfdatafile}" true 58 + perf script -i "${perfdatafile}" -s "${db_test}" 59 + echo "DB test [Success]" 60 + } 61 + 62 + test_db 63 + 64 + cleanup 65 + 66 + exit $err
+3 -13
tools/perf/tests/shell/stat+json_output.sh
··· 8 8 9 9 skip_test=0 10 10 11 + shelldir=$(dirname "$0") 12 + # shellcheck source=lib/setup_python.sh 13 + . "${shelldir}"/lib/setup_python.sh 11 14 pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py 12 - if [ "x$PYTHON" == "x" ] 13 - then 14 - if which python3 > /dev/null 15 - then 16 - PYTHON=python3 17 - elif which python > /dev/null 18 - then 19 - PYTHON=python 20 - else 21 - echo Skipping test, python not detected please set environment variable PYTHON. 22 - exit 2 23 - fi 24 - fi 25 15 26 16 stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX) 27 17
+1 -1
tools/perf/tests/shell/stat_all_pmu.sh
··· 4 4 5 5 set -e 6 6 7 - # Test all PMU events; however exclude parametrized ones (name contains '?') 7 + # Test all PMU events; however exclude parameterized ones (name contains '?') 8 8 for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do 9 9 echo "Testing $p" 10 10 result=$(perf stat -e "$p" true 2>&1)
+4 -10
tools/perf/tests/shell/stat_metrics_values.sh
··· 1 1 #!/bin/bash 2 2 # perf metrics value validation 3 3 # SPDX-License-Identifier: GPL-2.0 4 - if [ "x$PYTHON" == "x" ] 5 - then 6 - if which python3 > /dev/null 7 - then 8 - PYTHON=python3 9 - else 10 - echo Skipping test, python3 not detected please set environment variable PYTHON. 11 - exit 2 12 - fi 13 - fi 4 + 5 + shelldir=$(dirname "$0") 6 + # shellcheck source=lib/setup_python.sh 7 + . "${shelldir}"/lib/setup_python.sh 14 8 15 9 grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } 16 10
+6
tools/perf/tests/shell/test_arm_callgraph_fp.sh
··· 2 2 # Check Arm64 callgraphs are complete in fp mode 3 3 # SPDX-License-Identifier: GPL-2.0 4 4 5 + shelldir=$(dirname "$0") 6 + # shellcheck source=lib/perf_has_symbol.sh 7 + . "${shelldir}"/lib/perf_has_symbol.sh 8 + 5 9 lscpu | grep -q "aarch64" || exit 2 10 + 11 + skip_test_missing_symbol leafloop 6 12 7 13 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 8 14 TEST_PROGRAM="perf test -w leafloop"
+6
tools/perf/tests/shell/test_brstack.sh
··· 4 4 # SPDX-License-Identifier: GPL-2.0 5 5 # German Gomez <german.gomez@arm.com>, 2022 6 6 7 + shelldir=$(dirname "$0") 8 + # shellcheck source=lib/perf_has_symbol.sh 9 + . "${shelldir}"/lib/perf_has_symbol.sh 10 + 7 11 # skip the test if the hardware doesn't support branch stack sampling 8 12 # and if the architecture doesn't support filter types: any,save_type,u 9 13 if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then 10 14 echo "skip: system doesn't support filter types: any,save_type,u" 11 15 exit 2 12 16 fi 17 + 18 + skip_test_missing_symbol brstack_bench 13 19 14 20 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) 15 21 TESTPROG="perf test -w brstack"
+14 -2
tools/perf/tests/shell/test_data_symbol.sh
··· 4 4 # SPDX-License-Identifier: GPL-2.0 5 5 # Leo Yan <leo.yan@linaro.org>, 2022 6 6 7 + shelldir=$(dirname "$0") 8 + # shellcheck source=lib/waiting.sh 9 + . "${shelldir}"/lib/waiting.sh 10 + 11 + # shellcheck source=lib/perf_has_symbol.sh 12 + . "${shelldir}"/lib/perf_has_symbol.sh 13 + 7 14 skip_if_no_mem_event() { 8 15 perf mem record -e list 2>&1 | grep -E -q 'available' && return 0 9 16 return 2 ··· 18 11 19 12 skip_if_no_mem_event || exit 2 20 13 14 + skip_test_missing_symbol buf1 15 + 21 16 TEST_PROGRAM="perf test -w datasym" 22 17 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 18 + ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX) 23 19 24 20 check_result() { 25 21 # The memory report format is as below: ··· 60 50 # specific CPU and test in per-CPU mode. 61 51 is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo) 62 52 if (($is_amd >= 1)); then 63 - perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM & 53 + perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" & 64 54 else 65 - perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM & 55 + perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}" & 66 56 fi 67 57 68 58 PERFPID=$! 59 + 60 + wait_for_perf_to_start ${PERFPID} "${ERR_FILE}" 69 61 70 62 sleep 1 71 63
+3 -10
tools/perf/tests/shell/test_perf_data_converter_json.sh
··· 6 6 7 7 err=0 8 8 9 - if [ "$PYTHON" = "" ] ; then 10 - if which python3 > /dev/null ; then 11 - PYTHON=python3 12 - elif which python > /dev/null ; then 13 - PYTHON=python 14 - else 15 - echo Skipping test, python not detected please set environment variable PYTHON. 16 - exit 2 17 - fi 18 - fi 9 + shelldir=$(dirname "$0") 10 + # shellcheck source=lib/setup_python.sh 11 + . "${shelldir}"/lib/setup_python.sh 19 12 20 13 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 21 14 result=$(mktemp /tmp/__perf_test.output.json.XXXXX)
+84 -22
tools/perf/tests/sigtrap.c
··· 57 57 #ifdef HAVE_BPF_SKEL 58 58 #include <bpf/btf.h> 59 59 60 + static struct btf *btf; 61 + 62 + static bool btf__available(void) 63 + { 64 + if (btf == NULL) 65 + btf = btf__load_vmlinux_btf(); 66 + 67 + return btf != NULL; 68 + } 69 + 70 + static void btf__exit(void) 71 + { 72 + btf__free(btf); 73 + btf = NULL; 74 + } 75 + 76 + static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name) 77 + { 78 + const struct btf_type *t = btf__type_by_id(btf, type_id); 79 + const struct btf_member *m; 80 + int i; 81 + 82 + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 83 + const char *current_member_name = btf__name_by_offset(btf, m->name_off); 84 + if (!strcmp(current_member_name, member_name)) 85 + return m; 86 + } 87 + 88 + return NULL; 89 + } 90 + 60 91 static bool attr_has_sigtrap(void) 61 92 { 62 - bool ret = false; 63 - struct btf *btf; 64 - const struct btf_type *t; 65 - const struct btf_member *m; 66 - const char *name; 67 - int i, id; 93 + int id; 68 94 69 - btf = btf__load_vmlinux_btf(); 70 - if (btf == NULL) { 95 + if (!btf__available()) { 71 96 /* should be an old kernel */ 72 97 return false; 73 98 } 74 99 75 100 id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT); 76 101 if (id < 0) 77 - goto out; 102 + return false; 78 103 79 - t = btf__type_by_id(btf, id); 80 - for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 81 - name = btf__name_by_offset(btf, m->name_off); 82 - if (!strcmp(name, "sigtrap")) { 83 - ret = true; 84 - break; 85 - } 86 - } 87 - out: 88 - btf__free(btf); 89 - return ret; 104 + return __btf_type__find_member_by_name(id, "sigtrap") != NULL; 105 + } 106 + 107 + static bool kernel_with_sleepable_spinlocks(void) 108 + { 109 + const struct btf_member *member; 110 + const struct btf_type *type; 111 + const char *type_name; 112 + int id; 113 + 114 + if (!btf__available()) 115 + return false; 116 + 117 + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT); 118 + if (id < 0) 119 + return false; 120 + 121 + // Only RT has a "lock" member for "struct spinlock" 122 + member = __btf_type__find_member_by_name(id, "lock"); 123 + if (member == NULL) 124 + return false; 125 + 126 + // But check its type as well 127 + type = btf__type_by_id(btf, member->type); 128 + if (!type || !btf_is_struct(type)) 129 + return false; 130 + 131 + type_name = btf__name_by_offset(btf, type->name_off); 132 + return type_name && !strcmp(type_name, "rt_mutex_base"); 90 133 } 91 134 #else /* !HAVE_BPF_SKEL */ 92 135 static bool attr_has_sigtrap(void) ··· 151 108 } 152 109 153 110 return ret; 111 + } 112 + 113 + static bool kernel_with_sleepable_spinlocks(void) 114 + { 115 + return false; 116 + } 117 + 118 + static void btf__exit(void) 119 + { 154 120 } 155 121 #endif /* HAVE_BPF_SKEL */ 156 122 ··· 199 147 200 148 static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) 201 149 { 202 - int ret; 150 + int ret, expected_sigtraps; 203 151 204 152 ctx.iterate_on = 3000; 205 153 ··· 208 156 ret = run_test_threads(threads, barrier); 209 157 TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); 210 158 211 - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); 159 + expected_sigtraps = NUM_THREADS * ctx.iterate_on; 160 + 161 + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) { 162 + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n", 163 + expected_sigtraps, ctx.signal_count); 164 + pr_debug("See https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/\n"); 165 + return TEST_SKIP; 166 + } else 167 + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps); 168 + 212 169 TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); 213 170 TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); 214 171 #if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ ··· 282 221 sigaction(SIGTRAP, &oldact, NULL); 283 222 out: 284 223 pthread_barrier_destroy(&barrier); 224 + btf__exit(); 285 225 return ret; 286 226 } 287 227
+1 -1
tools/perf/tests/sw-clock.c
··· 62 62 } 63 63 evlist__add(evlist, evsel); 64 64 65 - cpus = perf_cpu_map__dummy_new(); 65 + cpus = perf_cpu_map__new_any_cpu(); 66 66 threads = thread_map__new_by_tid(getpid()); 67 67 if (!cpus || !threads) { 68 68 err = -ENOMEM;
+1 -1
tools/perf/tests/switch-tracking.c
··· 351 351 goto out_err; 352 352 } 353 353 354 - cpus = perf_cpu_map__new(NULL); 354 + cpus = perf_cpu_map__new_online_cpus(); 355 355 if (!cpus) { 356 356 pr_debug("perf_cpu_map__new failed!\n"); 357 357 goto out_err;
+1 -1
tools/perf/tests/task-exit.c
··· 70 70 * evlist__prepare_workload we'll fill in the only thread 71 71 * we're monitoring, the one forked there. 72 72 */ 73 - cpus = perf_cpu_map__dummy_new(); 73 + cpus = perf_cpu_map__new_any_cpu(); 74 74 threads = thread_map__new_by_tid(-1); 75 75 if (!cpus || !threads) { 76 76 err = -ENOMEM;
+1
tools/perf/tests/tests.h
··· 207 207 DECLARE_WORKLOAD(datasym); 208 208 209 209 extern const char *dso_to_test; 210 + extern const char *test_objdump_path; 210 211 211 212 #endif /* TESTS_H */
+1 -1
tools/perf/tests/topology.c
··· 215 215 if (session_write_header(path)) 216 216 goto free_path; 217 217 218 - map = perf_cpu_map__new(NULL); 218 + map = perf_cpu_map__new_online_cpus(); 219 219 if (map == NULL) { 220 220 pr_debug("failed to get system cpumap\n"); 221 221 goto free_path;
+98 -83
tools/perf/tests/vmlinux-kallsyms.c
··· 112 112 return false; 113 113 } 114 114 115 + struct test__vmlinux_matches_kallsyms_cb_args { 116 + struct machine kallsyms; 117 + struct map *vmlinux_map; 118 + bool header_printed; 119 + }; 120 + 121 + static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data) 122 + { 123 + struct test__vmlinux_matches_kallsyms_cb_args *args = data; 124 + struct dso *dso = map__dso(map); 125 + /* 126 + * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while 127 + * the kernel will have the path for the vmlinux file being used, so use 128 + * the short name, less descriptive but the same ("[kernel]" in both 129 + * cases. 130 + */ 131 + struct map *pair = maps__find_by_name(args->kallsyms.kmaps, 132 + (dso->kernel ? dso->short_name : dso->name)); 133 + 134 + if (pair) 135 + map__set_priv(pair, 1); 136 + else { 137 + if (!args->header_printed) { 138 + pr_info("WARN: Maps only in vmlinux:\n"); 139 + args->header_printed = true; 140 + } 141 + map__fprintf(map, stderr); 142 + } 143 + return 0; 144 + } 145 + 146 + static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data) 147 + { 148 + struct test__vmlinux_matches_kallsyms_cb_args *args = data; 149 + struct map *pair; 150 + u64 mem_start = map__unmap_ip(args->vmlinux_map, map__start(map)); 151 + u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map)); 152 + 153 + pair = maps__find(args->kallsyms.kmaps, mem_start); 154 + if (pair == NULL || map__priv(pair)) 155 + return 0; 156 + 157 + if (map__start(pair) == mem_start) { 158 + struct dso *dso = map__dso(map); 159 + 160 + if (!args->header_printed) { 161 + pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); 162 + args->header_printed = true; 163 + } 164 + 165 + pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", 166 + map__start(map), map__end(map), map__pgoff(map), dso->name); 167 + if (mem_end != map__end(pair)) 168 + pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, 169 + map__start(pair), map__end(pair), map__pgoff(pair)); 170 + pr_info(" %s\n", dso->name); 171 + map__set_priv(pair, 1); 172 + } 173 + return 0; 174 + } 175 + 176 + static int test__vmlinux_matches_kallsyms_cb3(struct map *map, void *data) 177 + { 178 + struct test__vmlinux_matches_kallsyms_cb_args *args = data; 179 + 180 + if (!map__priv(map)) { 181 + if (!args->header_printed) { 182 + pr_info("WARN: Maps only in kallsyms:\n"); 183 + args->header_printed = true; 184 + } 185 + map__fprintf(map, stderr); 186 + } 187 + return 0; 188 + } 189 + 115 190 static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, 116 191 int subtest __maybe_unused) 117 192 { 118 193 int err = TEST_FAIL; 119 194 struct rb_node *nd; 120 195 struct symbol *sym; 121 - struct map *kallsyms_map, *vmlinux_map; 122 - struct map_rb_node *rb_node; 123 - struct machine kallsyms, vmlinux; 196 + struct map *kallsyms_map; 197 + struct machine vmlinux; 124 198 struct maps *maps; 125 199 u64 mem_start, mem_end; 126 - bool header_printed; 200 + struct test__vmlinux_matches_kallsyms_cb_args args; 127 201 128 202 /* 129 203 * Step 1: ··· 205 131 * Init the machines that will hold kernel, modules obtained from 206 132 * both vmlinux + .ko files and from /proc/kallsyms split by modules. 207 133 */ 208 - machine__init(&kallsyms, "", HOST_KERNEL_ID); 134 + machine__init(&args.kallsyms, "", HOST_KERNEL_ID); 209 135 machine__init(&vmlinux, "", HOST_KERNEL_ID); 210 136 211 137 maps = machine__kernel_maps(&vmlinux); ··· 217 143 * load /proc/kallsyms. Also create the modules maps from /proc/modules 218 144 * and find the .ko files that match them in /lib/modules/`uname -r`/. 219 145 */ 220 - if (machine__create_kernel_maps(&kallsyms) < 0) { 146 + if (machine__create_kernel_maps(&args.kallsyms) < 0) { 221 147 pr_debug("machine__create_kernel_maps failed"); 222 148 err = TEST_SKIP; 223 149 goto out; ··· 234 160 * be compacted against the list of modules found in the "vmlinux" 235 161 * code and with the one got from /proc/modules from the "kallsyms" code. 236 162 */ 237 - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { 163 + if (machine__load_kallsyms(&args.kallsyms, "/proc/kallsyms") <= 0) { 238 164 pr_debug("machine__load_kallsyms failed"); 239 165 err = TEST_SKIP; 240 166 goto out; ··· 248 174 * to see if the running kernel was relocated by checking if it has the 249 175 * same value in the vmlinux file we load. 250 176 */ 251 - kallsyms_map = machine__kernel_map(&kallsyms); 177 + kallsyms_map = machine__kernel_map(&args.kallsyms); 252 178 253 179 /* 254 180 * Step 5: ··· 260 186 goto out; 261 187 } 262 188 263 - vmlinux_map = machine__kernel_map(&vmlinux); 189 + args.vmlinux_map = machine__kernel_map(&vmlinux); 264 190 265 191 /* 266 192 * Step 6: ··· 287 213 * in the kallsyms dso. For the ones that are in both, check its names and 288 214 * end addresses too. 289 215 */ 290 - map__for_each_symbol(vmlinux_map, sym, nd) { 216 + map__for_each_symbol(args.vmlinux_map, sym, nd) { 291 217 struct symbol *pair, *first_pair; 292 218 293 219 sym = rb_entry(nd, struct symbol, rb_node); ··· 295 221 if (sym->start == sym->end) 296 222 continue; 297 223 298 - mem_start = map__unmap_ip(vmlinux_map, sym->start); 299 - mem_end = map__unmap_ip(vmlinux_map, sym->end); 224 + mem_start = map__unmap_ip(args.vmlinux_map, sym->start); 225 + mem_end = map__unmap_ip(args.vmlinux_map, sym->end); 300 226 301 - first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); 227 + first_pair = machine__find_kernel_symbol(&args.kallsyms, mem_start, NULL); 302 228 pair = first_pair; 303 229 304 230 if (pair && UM(pair->start) == mem_start) { ··· 327 253 */ 328 254 continue; 329 255 } else { 330 - pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); 256 + pair = machine__find_kernel_symbol_by_name(&args.kallsyms, 257 + sym->name, NULL); 331 258 if (pair) { 332 259 if (UM(pair->start) == mem_start) 333 260 goto next_pair; ··· 342 267 343 268 continue; 344 269 } 345 - } else if (mem_start == map__end(kallsyms.vmlinux_map)) { 270 + } else if (mem_start == map__end(args.kallsyms.vmlinux_map)) { 346 271 /* 347 272 * Ignore aliases to _etext, i.e. to the end of the kernel text area, 348 273 * such as __indirect_thunk_end. ··· 364 289 if (verbose <= 0) 365 290 goto out; 366 291 367 - header_printed = false; 292 + args.header_printed = false; 293 + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb1, &args); 368 294 369 - maps__for_each_entry(maps, rb_node) { 370 - struct map *map = rb_node->map; 371 - struct dso *dso = map__dso(map); 372 - /* 373 - * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while 374 - * the kernel will have the path for the vmlinux file being used, 375 - * so use the short name, less descriptive but the same ("[kernel]" in 376 - * both cases. 377 - */ 378 - struct map *pair = maps__find_by_name(kallsyms.kmaps, (dso->kernel ? 379 - dso->short_name : 380 - dso->name)); 381 - if (pair) { 382 - map__set_priv(pair, 1); 383 - } else { 384 - if (!header_printed) { 385 - pr_info("WARN: Maps only in vmlinux:\n"); 386 - header_printed = true; 387 - } 388 - map__fprintf(map, stderr); 389 - } 390 - } 295 + args.header_printed = false; 296 + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb2, &args); 391 297 392 - header_printed = false; 298 + args.header_printed = false; 299 + maps = machine__kernel_maps(&args.kallsyms); 300 + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb3, &args); 393 301 394 - maps__for_each_entry(maps, rb_node) { 395 - struct map *pair, *map = rb_node->map; 396 - 397 - mem_start = map__unmap_ip(vmlinux_map, map__start(map)); 398 - mem_end = map__unmap_ip(vmlinux_map, map__end(map)); 399 - 400 - pair = maps__find(kallsyms.kmaps, mem_start); 401 - if (pair == NULL || map__priv(pair)) 402 - continue; 403 - 404 - if (map__start(pair) == mem_start) { 405 - struct dso *dso = map__dso(map); 406 - 407 - if (!header_printed) { 408 - pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); 409 - header_printed = true; 410 - } 411 - 412 - pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", 413 - map__start(map), map__end(map), map__pgoff(map), dso->name); 414 - if (mem_end != map__end(pair)) 415 - pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, 416 - map__start(pair), map__end(pair), map__pgoff(pair)); 417 - pr_info(" %s\n", dso->name); 418 - map__set_priv(pair, 1); 419 - } 420 - } 421 - 422 - header_printed = false; 423 - 424 - maps = machine__kernel_maps(&kallsyms); 425 - 426 - maps__for_each_entry(maps, rb_node) { 427 - struct map *map = rb_node->map; 428 - 429 - if (!map__priv(map)) { 430 - if (!header_printed) { 431 - pr_info("WARN: Maps only in kallsyms:\n"); 432 - header_printed = true; 433 - } 434 - map__fprintf(map, stderr); 435 - } 436 - } 437 302 out: 438 - machine__exit(&kallsyms); 303 + machine__exit(&args.kallsyms); 439 304 machine__exit(&vmlinux); 440 305 return err; 441 306 }
+1 -3
tools/perf/tests/workloads/thloop.c
··· 7 7 #include "../tests.h" 8 8 9 9 static volatile sig_atomic_t done; 10 - static volatile unsigned count; 11 10 12 11 /* We want to check this symbol in perf report */ 13 12 noinline void test_loop(void); ··· 18 19 19 20 noinline void test_loop(void) 20 21 { 21 - while (!done) 22 - __atomic_fetch_add(&count, 1, __ATOMIC_RELAXED); 22 + while (!done); 23 23 } 24 24 25 25 static void *thfunc(void *arg)
+6 -4
tools/perf/trace/beauty/arch_errno_names.sh
··· 57 57 archlist="$1" 58 58 default="$2" 59 59 60 - printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n' 60 + printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n' 61 61 printf '{\n' 62 62 for arch in $archlist; do 63 63 printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch") 64 - printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch") 64 + printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch") 65 65 done 66 - printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default") 66 + printf '\treturn errno_to_name__%s;\n' $(arch_string "$default") 67 67 printf '}\n' 68 68 } 69 69 ··· 76 76 77 77 # Create list of architectures that have a specific errno.h. 78 78 archlist="" 79 - for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do 79 + for f in $toolsdir/arch/*/include/uapi/asm/errno.h; do 80 + d=${f%/include/uapi/asm/errno.h} 81 + arch="${d##*/}" 80 82 test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch" 81 83 done 82 84
-2
tools/perf/trace/beauty/beauty.h
··· 251 251 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 252 252 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); 253 253 254 - const char *arch_syscalls__strerrno(const char *arch, int err); 255 - 256 254 #endif /* _PERF_TRACE_BEAUTY_H */
+2 -2
tools/perf/trace/beauty/prctl_option.sh
··· 4 4 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ 5 5 6 6 printf "static const char *prctl_options[] = {\n" 7 - regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' 7 + regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$' 8 8 grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ 9 - sed -r "s/$regex/\2 \1/g" | \ 9 + sed -E "s%$regex%\2 \1%g" | \ 10 10 sort -n | xargs printf "\t[%s] = \"%s\",\n" 11 11 printf "};\n" 12 12
+2 -2
tools/perf/trace/beauty/socket.sh
··· 18 18 printf "};\n\n" 19 19 20 20 printf "static const char *socket_level[] = {\n" 21 - socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' 21 + socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+/.*)?' 22 22 23 23 grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \ 24 - sed -r "s/$socket_level_regex/\2 \1/g" | \ 24 + sed -E "s%$socket_level_regex%\2 \1%g" | \ 25 25 sort -n | xargs printf "\t[%s] = \"%s\",\n" 26 26 printf "};\n\n" 27 27
+48 -56
tools/perf/ui/browsers/annotate.c
··· 27 27 struct rb_node *curr_hot; 28 28 struct annotation_line *selection; 29 29 struct arch *arch; 30 - struct annotation_options *opts; 31 30 bool searching_backwards; 32 31 char search_bf[128]; 33 32 }; ··· 37 38 return symbol__annotation(ms->sym); 38 39 } 39 40 40 - static bool disasm_line__filter(struct ui_browser *browser, void *entry) 41 + static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) 41 42 { 42 - struct annotation *notes = browser__annotation(browser); 43 43 struct annotation_line *al = list_entry(entry, struct annotation_line, node); 44 - return annotation_line__filter(al, notes); 44 + return annotation_line__filter(al); 45 45 } 46 46 47 47 static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current) ··· 95 97 struct annotation_write_ops ops = { 96 98 .first_line = row == 0, 97 99 .current_entry = is_current_entry, 98 - .change_color = (!notes->options->hide_src_code && 100 + .change_color = (!annotate_opts.hide_src_code && 99 101 (!is_current_entry || 100 102 (browser->use_navkeypressed && 101 103 !browser->navkeypressed))), ··· 112 114 if (!browser->navkeypressed) 113 115 ops.width += 1; 114 116 115 - annotation_line__write(al, notes, &ops, ab->opts); 117 + annotation_line__write(al, notes, &ops); 116 118 117 119 if (ops.current_entry) 118 120 ab->selection = al; ··· 126 128 127 129 while (pos && pos->al.offset == -1) { 128 130 pos = list_prev_entry(pos, al.node); 129 - if (!ab->opts->hide_src_code) 131 + if (!annotate_opts.hide_src_code) 130 132 diff++; 131 133 } 132 134 ··· 186 188 * name right after the '<' token and probably treating this like a 187 189 * 'call' instruction. 188 190 */ 189 - target = notes->offsets[cursor->ops.target.offset]; 191 + target = notes->src->offsets[cursor->ops.target.offset]; 190 192 if (target == NULL) { 191 193 ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n", 192 194 cursor->ops.target.offset); 193 195 return; 194 196 } 195 197 196 - if (notes->options->hide_src_code) { 198 + if (annotate_opts.hide_src_code) { 197 199 from = cursor->al.idx_asm; 198 200 to = target->idx_asm; 199 201 } else { ··· 222 224 int ret = ui_browser__list_head_refresh(browser); 223 225 int pcnt_width = annotation__pcnt_width(notes); 224 226 225 - if (notes->options->jump_arrows) 227 + if (annotate_opts.jump_arrows) 226 228 annotate_browser__draw_current_jump(browser); 227 229 228 230 ui_browser__set_color(browser, HE_COLORSET_NORMAL); ··· 256 258 parent = *p; 257 259 l = rb_entry(parent, struct annotation_line, rb_node); 258 260 259 - if (disasm__cmp(al, l, browser->opts->percent_type) < 0) 261 + if (disasm__cmp(al, l, annotate_opts.percent_type) < 0) 260 262 p = &(*p)->rb_left; 261 263 else 262 264 p = &(*p)->rb_right; ··· 268 270 static void annotate_browser__set_top(struct annotate_browser *browser, 269 271 struct annotation_line *pos, u32 idx) 270 272 { 271 - struct annotation *notes = browser__annotation(&browser->b); 272 273 unsigned back; 273 274 274 275 ui_browser__refresh_dimensions(&browser->b); ··· 277 280 while (browser->b.top_idx != 0 && back != 0) { 278 281 pos = list_entry(pos->node.prev, struct annotation_line, node); 279 282 280 - if (annotation_line__filter(pos, notes)) 283 + if (annotation_line__filter(pos)) 281 284 continue; 282 285 283 286 --browser->b.top_idx; ··· 291 294 static void annotate_browser__set_rb_top(struct annotate_browser *browser, 292 295 struct rb_node *nd) 293 296 { 294 - struct annotation *notes = browser__annotation(&browser->b); 295 297 struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node); 296 298 u32 idx = pos->idx; 297 299 298 - if (notes->options->hide_src_code) 300 + if (annotate_opts.hide_src_code) 299 301 idx = pos->idx_asm; 300 302 annotate_browser__set_top(browser, pos, idx); 301 303 browser->curr_hot = nd; ··· 327 331 double percent; 328 332 329 333 percent = annotation_data__percent(&pos->al.data[i], 330 - browser->opts->percent_type); 334 + annotate_opts.percent_type); 331 335 332 336 if (max_percent < percent) 333 337 max_percent = percent; 334 338 } 335 339 336 - if (max_percent < 0.01 && pos->al.ipc == 0) { 340 + if (max_percent < 0.01 && (!pos->al.cycles || pos->al.cycles->ipc == 0)) { 337 341 RB_CLEAR_NODE(&pos->al.rb_node); 338 342 continue; 339 343 } ··· 376 380 browser->b.seek(&browser->b, offset, SEEK_CUR); 377 381 al = list_entry(browser->b.top, struct annotation_line, node); 378 382 379 - if (notes->options->hide_src_code) { 383 + if (annotate_opts.hide_src_code) { 380 384 if (al->idx_asm < offset) 381 385 offset = al->idx; 382 386 383 - browser->b.nr_entries = notes->nr_entries; 384 - notes->options->hide_src_code = false; 387 + browser->b.nr_entries = notes->src->nr_entries; 388 + annotate_opts.hide_src_code = false; 385 389 browser->b.seek(&browser->b, -offset, SEEK_CUR); 386 390 browser->b.top_idx = al->idx - offset; 387 391 browser->b.index = al->idx; ··· 398 402 if (al->idx_asm < offset) 399 403 offset = al->idx_asm; 400 404 401 - browser->b.nr_entries = notes->nr_asm_entries; 402 - notes->options->hide_src_code = true; 405 + browser->b.nr_entries = notes->src->nr_asm_entries; 406 + annotate_opts.hide_src_code = true; 403 407 browser->b.seek(&browser->b, -offset, SEEK_CUR); 404 408 browser->b.top_idx = al->idx_asm - offset; 405 409 browser->b.index = al->idx_asm; ··· 431 435 { 432 436 struct annotation *notes = browser__annotation(browser); 433 437 ui_browser__reset_index(browser); 434 - browser->nr_entries = notes->nr_asm_entries; 438 + browser->nr_entries = notes->src->nr_asm_entries; 435 439 } 436 440 437 441 static int sym_title(struct symbol *sym, struct map *map, char *title, ··· 479 483 target_ms.map = ms->map; 480 484 target_ms.sym = dl->ops.target.sym; 481 485 annotation__unlock(notes); 482 - symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); 483 - sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); 486 + symbol__tui_annotate(&target_ms, evsel, hbt); 487 + sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type); 484 488 ui_browser__show_title(&browser->b, title); 485 489 return true; 486 490 } ··· 496 500 list_for_each_entry(pos, &notes->src->source, al.node) { 497 501 if (pos->al.offset == offset) 498 502 return pos; 499 - if (!annotation_line__filter(&pos->al, notes)) 503 + if (!annotation_line__filter(&pos->al)) 500 504 ++*idx; 501 505 } 502 506 ··· 540 544 541 545 *idx = browser->b.index; 542 546 list_for_each_entry_continue(al, &notes->src->source, node) { 543 - if (annotation_line__filter(al, notes)) 547 + if (annotation_line__filter(al)) 544 548 continue; 545 549 546 550 ++*idx; ··· 577 581 578 582 *idx = browser->b.index; 579 583 list_for_each_entry_continue_reverse(al, &notes->src->source, node) { 580 - if (annotation_line__filter(al, notes)) 584 + if (annotation_line__filter(al)) 581 585 continue; 582 586 583 587 --*idx; ··· 655 659 656 660 static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) 657 661 { 658 - struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 659 662 struct map_symbol *ms = browser->priv; 660 663 struct symbol *sym = ms->sym; 661 664 char symbol_dso[SYM_TITLE_MAX_SIZE]; ··· 662 667 if (ui_browser__show(browser, title, help) < 0) 663 668 return -1; 664 669 665 - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); 670 + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type); 666 671 667 672 ui_browser__gotorc_title(browser, 0, 0); 668 673 ui_browser__set_color(browser, HE_COLORSET_ROOT); ··· 804 809 annotate_browser__show(&browser->b, title, help); 805 810 continue; 806 811 case 'k': 807 - notes->options->show_linenr = !notes->options->show_linenr; 812 + annotate_opts.show_linenr = !annotate_opts.show_linenr; 808 813 continue; 809 814 case 'l': 810 815 annotate_browser__show_full_location (&browser->b); ··· 817 822 ui_helpline__puts(help); 818 823 continue; 819 824 case 'o': 820 - notes->options->use_offset = !notes->options->use_offset; 825 + annotate_opts.use_offset = !annotate_opts.use_offset; 821 826 annotation__update_column_widths(notes); 822 827 continue; 823 828 case 'O': 824 - if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) 825 - notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; 829 + if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL) 830 + annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL; 826 831 continue; 827 832 case 'j': 828 - notes->options->jump_arrows = !notes->options->jump_arrows; 833 + annotate_opts.jump_arrows = !annotate_opts.jump_arrows; 829 834 continue; 830 835 case 'J': 831 - notes->options->show_nr_jumps = !notes->options->show_nr_jumps; 836 + annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps; 832 837 annotation__update_column_widths(notes); 833 838 continue; 834 839 case '/': ··· 855 860 browser->b.height, 856 861 browser->b.index, 857 862 browser->b.top_idx, 858 - notes->nr_asm_entries); 863 + notes->src->nr_asm_entries); 859 864 } 860 865 continue; 861 866 case K_ENTER: ··· 879 884 continue; 880 885 } 881 886 case 'P': 882 - map_symbol__annotation_dump(ms, evsel, browser->opts); 887 + map_symbol__annotation_dump(ms, evsel); 883 888 continue; 884 889 case 't': 885 890 if (symbol_conf.show_total_period) { ··· 892 897 annotation__update_column_widths(notes); 893 898 continue; 894 899 case 'c': 895 - if (notes->options->show_minmax_cycle) 896 - notes->options->show_minmax_cycle = false; 900 + if (annotate_opts.show_minmax_cycle) 901 + annotate_opts.show_minmax_cycle = false; 897 902 else 898 - notes->options->show_minmax_cycle = true; 903 + annotate_opts.show_minmax_cycle = true; 899 904 annotation__update_column_widths(notes); 900 905 continue; 901 906 case 'p': 902 907 case 'b': 903 - switch_percent_type(browser->opts, key == 'b'); 908 + switch_percent_type(&annotate_opts, key == 'b'); 904 909 hists__scnprintf_title(hists, title, sizeof(title)); 905 910 annotate_browser__show(&browser->b, title, help); 906 911 continue; ··· 927 932 } 928 933 929 934 int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, 930 - struct hist_browser_timer *hbt, 931 - struct annotation_options *opts) 935 + struct hist_browser_timer *hbt) 932 936 { 933 - return symbol__tui_annotate(ms, evsel, hbt, opts); 937 + return symbol__tui_annotate(ms, evsel, hbt); 934 938 } 935 939 936 940 int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, 937 - struct hist_browser_timer *hbt, 938 - struct annotation_options *opts) 941 + struct hist_browser_timer *hbt) 939 942 { 940 943 /* reset abort key so that it can get Ctrl-C as a key */ 941 944 SLang_reset_tty(); 942 945 SLang_init_tty(0, 0, 0); 946 + SLtty_set_suspend_state(true); 943 947 944 - return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); 948 + return map_symbol__tui_annotate(&he->ms, evsel, hbt); 945 949 } 946 950 947 951 int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, 948 - struct hist_browser_timer *hbt, 949 - struct annotation_options *opts) 952 + struct hist_browser_timer *hbt) 950 953 { 951 954 struct symbol *sym = ms->sym; 952 955 struct annotation *notes = symbol__annotation(sym); ··· 958 965 .priv = ms, 959 966 .use_navkeypressed = true, 960 967 }, 961 - .opts = opts, 962 968 }; 963 969 struct dso *dso; 964 970 int ret = -1, err; ··· 971 979 return -1; 972 980 973 981 if (not_annotated) { 974 - err = symbol__annotate2(ms, evsel, opts, &browser.arch); 982 + err = symbol__annotate2(ms, evsel, &browser.arch); 975 983 if (err) { 976 984 char msg[BUFSIZ]; 977 985 dso->annotate_warned = true; ··· 983 991 984 992 ui_helpline__push("Press ESC to exit"); 985 993 986 - browser.b.width = notes->max_line_len; 987 - browser.b.nr_entries = notes->nr_entries; 994 + browser.b.width = notes->src->max_line_len; 995 + browser.b.nr_entries = notes->src->nr_entries; 988 996 browser.b.entries = &notes->src->source, 989 997 browser.b.width += 18; /* Percentage */ 990 998 991 - if (notes->options->hide_src_code) 999 + if (annotate_opts.hide_src_code) 992 1000 ui_browser__init_asm_mode(&browser.b); 993 1001 994 1002 ret = annotate_browser__run(&browser, evsel, hbt); ··· 998 1006 999 1007 out_free_offsets: 1000 1008 if(not_annotated) 1001 - zfree(&notes->offsets); 1009 + zfree(&notes->src->offsets); 1002 1010 return ret; 1003 1011 }
+14 -22
tools/perf/ui/browsers/hists.c
··· 2250 2250 static struct hist_browser * 2251 2251 perf_evsel_browser__new(struct evsel *evsel, 2252 2252 struct hist_browser_timer *hbt, 2253 - struct perf_env *env, 2254 - struct annotation_options *annotation_opts) 2253 + struct perf_env *env) 2255 2254 { 2256 2255 struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); 2257 2256 ··· 2258 2259 browser->hbt = hbt; 2259 2260 browser->env = env; 2260 2261 browser->title = hists_browser__scnprintf_title; 2261 - browser->annotation_opts = annotation_opts; 2262 2262 } 2263 2263 return browser; 2264 2264 } ··· 2430 2432 struct hist_entry *he; 2431 2433 int err; 2432 2434 2433 - if (!browser->annotation_opts->objdump_path && 2434 - perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) 2435 + if (!annotate_opts.objdump_path && 2436 + perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path)) 2435 2437 return 0; 2436 2438 2437 2439 notes = symbol__annotation(act->ms.sym); ··· 2443 2445 else 2444 2446 evsel = hists_to_evsel(browser->hists); 2445 2447 2446 - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, 2447 - browser->annotation_opts); 2448 + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); 2448 2449 he = hist_browser__selected_entry(browser); 2449 2450 /* 2450 2451 * offer option to annotate the other branch source or target ··· 2940 2943 2941 2944 static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, 2942 2945 bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, 2943 - struct perf_env *env, bool warn_lost_event, 2944 - struct annotation_options *annotation_opts) 2946 + struct perf_env *env, bool warn_lost_event) 2945 2947 { 2946 2948 struct hists *hists = evsel__hists(evsel); 2947 - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); 2949 + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); 2948 2950 struct branch_info *bi = NULL; 2949 2951 #define MAX_OPTIONS 16 2950 2952 char *options[MAX_OPTIONS]; ··· 3000 3004 /* reset abort key so that it can get Ctrl-C as a key */ 3001 3005 SLang_reset_tty(); 3002 3006 SLang_init_tty(0, 0, 0); 3007 + SLtty_set_suspend_state(true); 3003 3008 3004 3009 if (min_pcnt) 3005 3010 browser->min_pcnt = min_pcnt; ··· 3395 3398 struct evsel_menu { 3396 3399 struct ui_browser b; 3397 3400 struct evsel *selection; 3398 - struct annotation_options *annotation_opts; 3399 3401 bool lost_events, lost_events_warned; 3400 3402 float min_pcnt; 3401 3403 struct perf_env *env; ··· 3495 3499 hbt->timer(hbt->arg); 3496 3500 key = evsel__hists_browse(pos, nr_events, help, true, hbt, 3497 3501 menu->min_pcnt, menu->env, 3498 - warn_lost_event, 3499 - menu->annotation_opts); 3502 + warn_lost_event); 3500 3503 ui_browser__show_title(&menu->b, title); 3501 3504 switch (key) { 3502 3505 case K_TAB: ··· 3552 3557 3553 3558 static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, 3554 3559 struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, 3555 - bool warn_lost_event, struct annotation_options *annotation_opts) 3560 + bool warn_lost_event) 3556 3561 { 3557 3562 struct evsel *pos; 3558 3563 struct evsel_menu menu = { ··· 3567 3572 }, 3568 3573 .min_pcnt = min_pcnt, 3569 3574 .env = env, 3570 - .annotation_opts = annotation_opts, 3571 3575 }; 3572 3576 3573 3577 ui_helpline__push("Press ESC to exit"); ··· 3601 3607 } 3602 3608 3603 3609 int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, 3604 - float min_pcnt, struct perf_env *env, bool warn_lost_event, 3605 - struct annotation_options *annotation_opts) 3610 + float min_pcnt, struct perf_env *env, bool warn_lost_event) 3606 3611 { 3607 3612 int nr_entries = evlist->core.nr_entries; 3608 3613 ··· 3610 3617 struct evsel *first = evlist__first(evlist); 3611 3618 3612 3619 return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, 3613 - env, warn_lost_event, annotation_opts); 3620 + env, warn_lost_event); 3614 3621 } 3615 3622 } 3616 3623 ··· 3628 3635 } 3629 3636 3630 3637 return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, 3631 - warn_lost_event, annotation_opts); 3638 + warn_lost_event); 3632 3639 } 3633 3640 3634 3641 static int block_hists_browser__title(struct hist_browser *browser, char *bf, ··· 3647 3654 } 3648 3655 3649 3656 int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, 3650 - float min_percent, struct perf_env *env, 3651 - struct annotation_options *annotation_opts) 3657 + float min_percent, struct perf_env *env) 3652 3658 { 3653 3659 struct hists *hists = &bh->block_hists; 3654 3660 struct hist_browser *browser; ··· 3664 3672 browser->title = block_hists_browser__title; 3665 3673 browser->min_pcnt = min_percent; 3666 3674 browser->env = env; 3667 - browser->annotation_opts = annotation_opts; 3668 3675 3669 3676 /* reset abort key so that it can get Ctrl-C as a key */ 3670 3677 SLang_reset_tty(); 3671 3678 SLang_init_tty(0, 0, 0); 3679 + SLtty_set_suspend_state(true); 3672 3680 3673 3681 memset(&action, 0, sizeof(action)); 3674 3682
-2
tools/perf/ui/browsers/hists.h
··· 4 4 5 5 #include "ui/browser.h" 6 6 7 - struct annotation_options; 8 7 struct evsel; 9 8 10 9 struct hist_browser { ··· 14 15 struct hist_browser_timer *hbt; 15 16 struct pstack *pstack; 16 17 struct perf_env *env; 17 - struct annotation_options *annotation_opts; 18 18 struct evsel *block_evsel; 19 19 int print_seq; 20 20 bool show_dso;
+1
tools/perf/ui/browsers/scripts.c
··· 166 166 printf("\033[c\033[H\033[J"); 167 167 fflush(stdout); 168 168 SLang_init_tty(0, 0, 0); 169 + SLtty_set_suspend_state(true); 169 170 SLsmg_refresh(); 170 171 } 171 172
+2 -4
tools/perf/ui/gtk/annotate.c
··· 162 162 } 163 163 164 164 static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, 165 - struct annotation_options *options, 166 165 struct hist_browser_timer *hbt) 167 166 { 168 167 struct dso *dso = map__dso(ms->map); ··· 175 176 if (dso->annotate_warned) 176 177 return -1; 177 178 178 - err = symbol__annotate(ms, evsel, options, NULL); 179 + err = symbol__annotate(ms, evsel, NULL); 179 180 if (err) { 180 181 char msg[BUFSIZ]; 181 182 dso->annotate_warned = true; ··· 243 244 244 245 int hist_entry__gtk_annotate(struct hist_entry *he, 245 246 struct evsel *evsel, 246 - struct annotation_options *options, 247 247 struct hist_browser_timer *hbt) 248 248 { 249 - return symbol__gtk_annotate(&he->ms, evsel, options, hbt); 249 + return symbol__gtk_annotate(&he->ms, evsel, hbt); 250 250 } 251 251 252 252 void perf_gtk__show_annotations(void)
-2
tools/perf/ui/gtk/gtk.h
··· 56 56 struct evlist; 57 57 struct hist_entry; 58 58 struct hist_browser_timer; 59 - struct annotation_options; 60 59 61 60 int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, 62 61 struct hist_browser_timer *hbt, float min_pcnt); 63 62 int hist_entry__gtk_annotate(struct hist_entry *he, 64 63 struct evsel *evsel, 65 - struct annotation_options *options, 66 64 struct hist_browser_timer *hbt); 67 65 void perf_gtk__show_annotations(void); 68 66
+22
tools/perf/ui/tui/setup.c
··· 2 2 #include <signal.h> 3 3 #include <stdbool.h> 4 4 #include <stdlib.h> 5 + #include <termios.h> 5 6 #include <unistd.h> 6 7 #include <linux/kernel.h> 7 8 #ifdef HAVE_BACKTRACE_SUPPORT 8 9 #include <execinfo.h> 9 10 #endif 10 11 12 + #include "../../util/color.h" 11 13 #include "../../util/debug.h" 12 14 #include "../browser.h" 13 15 #include "../helpline.h" ··· 123 121 exit(0); 124 122 } 125 123 124 + static void ui__sigcont(int sig) 125 + { 126 + static struct termios tty; 127 + 128 + if (sig == SIGTSTP) { 129 + while (tcgetattr(SLang_TT_Read_FD, &tty) == -1 && errno == EINTR) 130 + ; 131 + while (write(SLang_TT_Read_FD, PERF_COLOR_RESET, sizeof(PERF_COLOR_RESET) - 1) == -1 && errno == EINTR) 132 + ; 133 + raise(SIGSTOP); 134 + } else { 135 + while (tcsetattr(SLang_TT_Read_FD, TCSADRAIN, &tty) == -1 && errno == EINTR) 136 + ; 137 + raise(SIGWINCH); 138 + } 139 + } 140 + 126 141 int ui__init(void) 127 142 { 128 143 int err; ··· 154 135 err = SLang_init_tty(-1, 0, 0); 155 136 if (err < 0) 156 137 goto out; 138 + SLtty_set_suspend_state(true); 157 139 158 140 err = SLkp_init(); 159 141 if (err < 0) { ··· 169 149 signal(SIGINT, ui__signal); 170 150 signal(SIGQUIT, ui__signal); 171 151 signal(SIGTERM, ui__signal); 152 + signal(SIGTSTP, ui__sigcont); 153 + signal(SIGCONT, ui__sigcont); 172 154 173 155 perf_error__register(&perf_tui_eops); 174 156
+2
tools/perf/util/Build
··· 195 195 perf-$(CONFIG_DWARF) += probe-finder.o 196 196 perf-$(CONFIG_DWARF) += dwarf-aux.o 197 197 perf-$(CONFIG_DWARF) += dwarf-regs.o 198 + perf-$(CONFIG_DWARF) += debuginfo.o 199 + perf-$(CONFIG_DWARF) += annotate-data.o 198 200 199 201 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 200 202 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
+405
tools/perf/util/annotate-data.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Convert sample address to data type using DWARF debug info. 4 + * 5 + * Written by Namhyung Kim <namhyung@kernel.org> 6 + */ 7 + 8 + #include <stdio.h> 9 + #include <stdlib.h> 10 + #include <inttypes.h> 11 + 12 + #include "annotate-data.h" 13 + #include "debuginfo.h" 14 + #include "debug.h" 15 + #include "dso.h" 16 + #include "evsel.h" 17 + #include "evlist.h" 18 + #include "map.h" 19 + #include "map_symbol.h" 20 + #include "strbuf.h" 21 + #include "symbol.h" 22 + #include "symbol_conf.h" 23 + 24 + /* 25 + * Compare type name and size to maintain them in a tree. 26 + * I'm not sure if DWARF would have information of a single type in many 27 + * different places (compilation units). If not, it could compare the 28 + * offset of the type entry in the .debug_info section. 29 + */ 30 + static int data_type_cmp(const void *_key, const struct rb_node *node) 31 + { 32 + const struct annotated_data_type *key = _key; 33 + struct annotated_data_type *type; 34 + 35 + type = rb_entry(node, struct annotated_data_type, node); 36 + 37 + if (key->self.size != type->self.size) 38 + return key->self.size - type->self.size; 39 + return strcmp(key->self.type_name, type->self.type_name); 40 + } 41 + 42 + static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) 43 + { 44 + struct annotated_data_type *a, *b; 45 + 46 + a = rb_entry(node_a, struct annotated_data_type, node); 47 + b = rb_entry(node_b, struct annotated_data_type, node); 48 + 49 + if (a->self.size != b->self.size) 50 + return a->self.size < b->self.size; 51 + return strcmp(a->self.type_name, b->self.type_name) < 0; 52 + } 53 + 54 + /* Recursively add new members for struct/union */ 55 + static int __add_member_cb(Dwarf_Die *die, void *arg) 56 + { 57 + struct annotated_member *parent = arg; 58 + struct annotated_member *member; 59 + Dwarf_Die member_type, die_mem; 60 + Dwarf_Word size, loc; 61 + Dwarf_Attribute attr; 62 + struct strbuf sb; 63 + int tag; 64 + 65 + if (dwarf_tag(die) != DW_TAG_member) 66 + return DIE_FIND_CB_SIBLING; 67 + 68 + member = zalloc(sizeof(*member)); 69 + if (member == NULL) 70 + return DIE_FIND_CB_END; 71 + 72 + strbuf_init(&sb, 32); 73 + die_get_typename(die, &sb); 74 + 75 + die_get_real_type(die, &member_type); 76 + if (dwarf_aggregate_size(&member_type, &size) < 0) 77 + size = 0; 78 + 79 + if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) 80 + loc = 0; 81 + else 82 + dwarf_formudata(&attr, &loc); 83 + 84 + member->type_name = strbuf_detach(&sb, NULL); 85 + /* member->var_name can be NULL */ 86 + if (dwarf_diename(die)) 87 + member->var_name = strdup(dwarf_diename(die)); 88 + member->size = size; 89 + member->offset = loc + parent->offset; 90 + INIT_LIST_HEAD(&member->children); 91 + list_add_tail(&member->node, &parent->children); 92 + 93 + tag = dwarf_tag(&member_type); 94 + switch (tag) { 95 + case DW_TAG_structure_type: 96 + case DW_TAG_union_type: 97 + die_find_child(&member_type, __add_member_cb, member, &die_mem); 98 + break; 99 + default: 100 + break; 101 + } 102 + return DIE_FIND_CB_SIBLING; 103 + } 104 + 105 + static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) 106 + { 107 + Dwarf_Die die_mem; 108 + 109 + die_find_child(type, __add_member_cb, &parent->self, &die_mem); 110 + } 111 + 112 + static void delete_members(struct annotated_member *member) 113 + { 114 + struct annotated_member *child, *tmp; 115 + 116 + list_for_each_entry_safe(child, tmp, &member->children, node) { 117 + list_del(&child->node); 118 + delete_members(child); 119 + free(child->type_name); 120 + free(child->var_name); 121 + free(child); 122 + } 123 + } 124 + 125 + static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, 126 + Dwarf_Die *type_die) 127 + { 128 + struct annotated_data_type *result = NULL; 129 + struct annotated_data_type key; 130 + struct rb_node *node; 131 + struct strbuf sb; 132 + char *type_name; 133 + Dwarf_Word size; 134 + 135 + strbuf_init(&sb, 32); 136 + if (die_get_typename_from_type(type_die, &sb) < 0) 137 + strbuf_add(&sb, "(unknown type)", 14); 138 + type_name = strbuf_detach(&sb, NULL); 139 + dwarf_aggregate_size(type_die, &size); 140 + 141 + /* Check existing nodes in dso->data_types tree */ 142 + key.self.type_name = type_name; 143 + key.self.size = size; 144 + node = rb_find(&key, &dso->data_types, data_type_cmp); 145 + if (node) { 146 + result = rb_entry(node, struct annotated_data_type, node); 147 + free(type_name); 148 + return result; 149 + } 150 + 151 + /* If not, add a new one */ 152 + result = zalloc(sizeof(*result)); 153 + if (result == NULL) { 154 + free(type_name); 155 + return NULL; 156 + } 157 + 158 + result->self.type_name = type_name; 159 + result->self.size = size; 160 + INIT_LIST_HEAD(&result->self.children); 161 + 162 + if (symbol_conf.annotate_data_member) 163 + add_member_types(result, type_die); 164 + 165 + rb_add(&result->node, &dso->data_types, data_type_less); 166 + return result; 167 + } 168 + 169 + static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) 170 + { 171 + Dwarf_Off off, next_off; 172 + size_t header_size; 173 + 174 + if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) 175 + return cu_die; 176 + 177 + /* 178 + * There are some kernels don't have full aranges and contain only a few 179 + * aranges entries. Fallback to iterate all CU entries in .debug_info 180 + * in case it's missing. 181 + */ 182 + off = 0; 183 + while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, 184 + NULL, NULL, NULL) == 0) { 185 + if (dwarf_offdie(di->dbg, off + header_size, cu_die) && 186 + dwarf_haspc(cu_die, pc)) 187 + return true; 188 + 189 + off = next_off; 190 + } 191 + return false; 192 + } 193 + 194 + /* The type info will be saved in @type_die */ 195 + static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) 196 + { 197 + Dwarf_Word size; 198 + 199 + /* Get the type of the variable */ 200 + if (die_get_real_type(var_die, type_die) == NULL) { 201 + pr_debug("variable has no type\n"); 202 + ann_data_stat.no_typeinfo++; 203 + return -1; 204 + } 205 + 206 + /* 207 + * It expects a pointer type for a memory access. 208 + * Convert to a real type it points to. 209 + */ 210 + if (dwarf_tag(type_die) != DW_TAG_pointer_type || 211 + die_get_real_type(type_die, type_die) == NULL) { 212 + pr_debug("no pointer or no type\n"); 213 + ann_data_stat.no_typeinfo++; 214 + return -1; 215 + } 216 + 217 + /* Get the size of the actual type */ 218 + if (dwarf_aggregate_size(type_die, &size) < 0) { 219 + pr_debug("type size is unknown\n"); 220 + ann_data_stat.invalid_size++; 221 + return -1; 222 + } 223 + 224 + /* Minimal sanity check */ 225 + if ((unsigned)offset >= size) { 226 + pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); 227 + ann_data_stat.bad_offset++; 228 + return -1; 229 + } 230 + 231 + return 0; 232 + } 233 + 234 + /* The result will be saved in @type_die */ 235 + static int find_data_type_die(struct debuginfo *di, u64 pc, 236 + int reg, int offset, Dwarf_Die *type_die) 237 + { 238 + Dwarf_Die cu_die, var_die; 239 + Dwarf_Die *scopes = NULL; 240 + int ret = -1; 241 + int i, nr_scopes; 242 + 243 + /* Get a compile_unit for this address */ 244 + if (!find_cu_die(di, pc, &cu_die)) { 245 + pr_debug("cannot find CU for address %" PRIx64 "\n", pc); 246 + ann_data_stat.no_cuinfo++; 247 + return -1; 248 + } 249 + 250 + /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ 251 + nr_scopes = die_get_scopes(&cu_die, pc, &scopes); 252 + 253 + /* Search from the inner-most scope to the outer */ 254 + for (i = nr_scopes - 1; i >= 0; i--) { 255 + /* Look up variables/parameters in this scope */ 256 + if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die)) 257 + continue; 258 + 259 + /* Found a variable, see if it's correct */ 260 + ret = check_variable(&var_die, type_die, offset); 261 + goto out; 262 + } 263 + if (ret < 0) 264 + ann_data_stat.no_var++; 265 + 266 + out: 267 + free(scopes); 268 + return ret; 269 + } 270 + 271 + /** 272 + * find_data_type - Return a data type at the location 273 + * @ms: map and symbol at the location 274 + * @ip: instruction address of the memory access 275 + * @reg: register that holds the base address 276 + * @offset: offset from the base address 277 + * 278 + * This functions searches the debug information of the binary to get the data 279 + * type it accesses. The exact location is expressed by (ip, reg, offset). 280 + * It return %NULL if not found. 281 + */ 282 + struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, 283 + int reg, int offset) 284 + { 285 + struct annotated_data_type *result = NULL; 286 + struct dso *dso = map__dso(ms->map); 287 + struct debuginfo *di; 288 + Dwarf_Die type_die; 289 + u64 pc; 290 + 291 + di = debuginfo__new(dso->long_name); 292 + if (di == NULL) { 293 + pr_debug("cannot get the debug info\n"); 294 + return NULL; 295 + } 296 + 297 + /* 298 + * IP is a relative instruction address from the start of the map, as 299 + * it can be randomized/relocated, it needs to translate to PC which is 300 + * a file address for DWARF processing. 301 + */ 302 + pc = map__rip_2objdump(ms->map, ip); 303 + if (find_data_type_die(di, pc, reg, offset, &type_die) < 0) 304 + goto out; 305 + 306 + result = dso__findnew_data_type(dso, &type_die); 307 + 308 + out: 309 + debuginfo__delete(di); 310 + return result; 311 + } 312 + 313 + static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) 314 + { 315 + int i; 316 + size_t sz = sizeof(struct type_hist); 317 + 318 + sz += sizeof(struct type_hist_entry) * adt->self.size; 319 + 320 + /* Allocate a table of pointers for each event */ 321 + adt->nr_histograms = nr_entries; 322 + adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); 323 + if (adt->histograms == NULL) 324 + return -ENOMEM; 325 + 326 + /* 327 + * Each histogram is allocated for the whole size of the type. 328 + * TODO: Probably we can move the histogram to members. 329 + */ 330 + for (i = 0; i < nr_entries; i++) { 331 + adt->histograms[i] = zalloc(sz); 332 + if (adt->histograms[i] == NULL) 333 + goto err; 334 + } 335 + return 0; 336 + 337 + err: 338 + while (--i >= 0) 339 + free(adt->histograms[i]); 340 + free(adt->histograms); 341 + return -ENOMEM; 342 + } 343 + 344 + static void delete_data_type_histograms(struct annotated_data_type *adt) 345 + { 346 + for (int i = 0; i < adt->nr_histograms; i++) 347 + free(adt->histograms[i]); 348 + free(adt->histograms); 349 + } 350 + 351 + void annotated_data_type__tree_delete(struct rb_root *root) 352 + { 353 + struct annotated_data_type *pos; 354 + 355 + while (!RB_EMPTY_ROOT(root)) { 356 + struct rb_node *node = rb_first(root); 357 + 358 + rb_erase(node, root); 359 + pos = rb_entry(node, struct annotated_data_type, node); 360 + delete_members(&pos->self); 361 + delete_data_type_histograms(pos); 362 + free(pos->self.type_name); 363 + free(pos); 364 + } 365 + } 366 + 367 + /** 368 + * annotated_data_type__update_samples - Update histogram 369 + * @adt: Data type to update 370 + * @evsel: Event to update 371 + * @offset: Offset in the type 372 + * @nr_samples: Number of samples at this offset 373 + * @period: Event count at this offset 374 + * 375 + * This function updates type histogram at @ofs for @evsel. Samples are 376 + * aggregated before calling this function so it can be called with more 377 + * than one samples at a certain offset. 378 + */ 379 + int annotated_data_type__update_samples(struct annotated_data_type *adt, 380 + struct evsel *evsel, int offset, 381 + int nr_samples, u64 period) 382 + { 383 + struct type_hist *h; 384 + 385 + if (adt == NULL) 386 + return 0; 387 + 388 + if (adt->histograms == NULL) { 389 + int nr = evsel->evlist->core.nr_entries; 390 + 391 + if (alloc_data_type_histograms(adt, nr) < 0) 392 + return -1; 393 + } 394 + 395 + if (offset < 0 || offset >= adt->self.size) 396 + return -1; 397 + 398 + h = adt->histograms[evsel->core.idx]; 399 + 400 + h->nr_samples += nr_samples; 401 + h->addr[offset].nr_samples += nr_samples; 402 + h->period += period; 403 + h->addr[offset].period += period; 404 + return 0; 405 + }
+143
tools/perf/util/annotate-data.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _PERF_ANNOTATE_DATA_H 3 + #define _PERF_ANNOTATE_DATA_H 4 + 5 + #include <errno.h> 6 + #include <linux/compiler.h> 7 + #include <linux/rbtree.h> 8 + #include <linux/types.h> 9 + 10 + struct evsel; 11 + struct map_symbol; 12 + 13 + /** 14 + * struct annotated_member - Type of member field 15 + * @node: List entry in the parent list 16 + * @children: List head for child nodes 17 + * @type_name: Name of the member type 18 + * @var_name: Name of the member variable 19 + * @offset: Offset from the outer data type 20 + * @size: Size of the member field 21 + * 22 + * This represents a member type in a data type. 23 + */ 24 + struct annotated_member { 25 + struct list_head node; 26 + struct list_head children; 27 + char *type_name; 28 + char *var_name; 29 + int offset; 30 + int size; 31 + }; 32 + 33 + /** 34 + * struct type_hist_entry - Histogram entry per offset 35 + * @nr_samples: Number of samples 36 + * @period: Count of event 37 + */ 38 + struct type_hist_entry { 39 + int nr_samples; 40 + u64 period; 41 + }; 42 + 43 + /** 44 + * struct type_hist - Type histogram for each event 45 + * @nr_samples: Total number of samples in this data type 46 + * @period: Total count of the event in this data type 47 + * @offset: Array of histogram entry 48 + */ 49 + struct type_hist { 50 + u64 nr_samples; 51 + u64 period; 52 + struct type_hist_entry addr[]; 53 + }; 54 + 55 + /** 56 + * struct annotated_data_type - Data type to profile 57 + * @node: RB-tree node for dso->type_tree 58 + * @self: Actual type information 59 + * @nr_histogram: Number of histogram entries 60 + * @histograms: An array of pointers to histograms 61 + * 62 + * This represents a data type accessed by samples in the profile data. 63 + */ 64 + struct annotated_data_type { 65 + struct rb_node node; 66 + struct annotated_member self; 67 + int nr_histograms; 68 + struct type_hist **histograms; 69 + }; 70 + 71 + extern struct annotated_data_type unknown_type; 72 + 73 + /** 74 + * struct annotated_data_stat - Debug statistics 75 + * @total: Total number of entry 76 + * @no_sym: No symbol or map found 77 + * @no_insn: Failed to get disasm line 78 + * @no_insn_ops: The instruction has no operands 79 + * @no_mem_ops: The instruction has no memory operands 80 + * @no_reg: Failed to extract a register from the operand 81 + * @no_dbginfo: The binary has no debug information 82 + * @no_cuinfo: Failed to find a compile_unit 83 + * @no_var: Failed to find a matching variable 84 + * @no_typeinfo: Failed to get a type info for the variable 85 + * @invalid_size: Failed to get a size info of the type 86 + * @bad_offset: The access offset is out of the type 87 + */ 88 + struct annotated_data_stat { 89 + int total; 90 + int no_sym; 91 + int no_insn; 92 + int no_insn_ops; 93 + int no_mem_ops; 94 + int no_reg; 95 + int no_dbginfo; 96 + int no_cuinfo; 97 + int no_var; 98 + int no_typeinfo; 99 + int invalid_size; 100 + int bad_offset; 101 + }; 102 + extern struct annotated_data_stat ann_data_stat; 103 + 104 + #ifdef HAVE_DWARF_SUPPORT 105 + 106 + /* Returns data type at the location (ip, reg, offset) */ 107 + struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, 108 + int reg, int offset); 109 + 110 + /* Update type access histogram at the given offset */ 111 + int annotated_data_type__update_samples(struct annotated_data_type *adt, 112 + struct evsel *evsel, int offset, 113 + int nr_samples, u64 period); 114 + 115 + /* Release all data type information in the tree */ 116 + void annotated_data_type__tree_delete(struct rb_root *root); 117 + 118 + #else /* HAVE_DWARF_SUPPORT */ 119 + 120 + static inline struct annotated_data_type * 121 + find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, 122 + int reg __maybe_unused, int offset __maybe_unused) 123 + { 124 + return NULL; 125 + } 126 + 127 + static inline int 128 + annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused, 129 + struct evsel *evsel __maybe_unused, 130 + int offset __maybe_unused, 131 + int nr_samples __maybe_unused, 132 + u64 period __maybe_unused) 133 + { 134 + return -1; 135 + } 136 + 137 + static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused) 138 + { 139 + } 140 + 141 + #endif /* HAVE_DWARF_SUPPORT */ 142 + 143 + #endif /* _PERF_ANNOTATE_DATA_H */
+523 -166
tools/perf/util/annotate.c
··· 25 25 #include "units.h" 26 26 #include "debug.h" 27 27 #include "annotate.h" 28 + #include "annotate-data.h" 28 29 #include "evsel.h" 29 30 #include "evlist.h" 30 31 #include "bpf-event.h" 31 32 #include "bpf-utils.h" 32 33 #include "block-range.h" 33 34 #include "string2.h" 35 + #include "dwarf-regs.h" 34 36 #include "util/event.h" 35 37 #include "util/sharded_mutex.h" 36 38 #include "arch/common.h" ··· 58 56 #define UARROW_CHAR ((unsigned char)'-') 59 57 60 58 #include <linux/ctype.h> 59 + 60 + /* global annotation options */ 61 + struct annotation_options annotate_opts; 61 62 62 63 static regex_t file_lineno; 63 64 ··· 90 85 struct { 91 86 char comment_char; 92 87 char skip_functions_char; 88 + char register_char; 89 + char memory_ref_char; 93 90 } objdump; 94 91 }; 95 92 ··· 102 95 static struct ins_ops nop_ops; 103 96 static struct ins_ops lock_ops; 104 97 static struct ins_ops ret_ops; 98 + 99 + /* Data type collection debug statistics */ 100 + struct annotated_data_stat ann_data_stat; 101 + LIST_HEAD(ann_insn_stat); 105 102 106 103 static int arch__grow_instructions(struct arch *arch) 107 104 { ··· 199 188 .insn_suffix = "bwlq", 200 189 .objdump = { 201 190 .comment_char = '#', 191 + .register_char = '%', 192 + .memory_ref_char = '(', 202 193 }, 203 194 }, 204 195 { ··· 353 340 */ 354 341 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 355 342 { 356 - if (ops->raw_comment && c > ops->raw_comment) 343 + if (ops->jump.raw_comment && c > ops->jump.raw_comment) 357 344 return NULL; 358 345 359 - if (ops->raw_func_start && c > ops->raw_func_start) 346 + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 360 347 return NULL; 361 348 362 349 return c; ··· 372 359 const char *c = strchr(ops->raw, ','); 373 360 u64 start, end; 374 361 375 - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); 376 - ops->raw_func_start = strchr(ops->raw, '<'); 362 + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 363 + ops->jump.raw_func_start = strchr(ops->raw, '<'); 377 364 378 365 c = validate_comma(c, ops); 379 366 ··· 475 462 ops->target.offset); 476 463 } 477 464 465 + static void jump__delete(struct ins_operands *ops __maybe_unused) 466 + { 467 + /* 468 + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 469 + * raw string, don't free them. 470 + */ 471 + } 472 + 478 473 static struct ins_ops jump_ops = { 474 + .free = jump__delete, 479 475 .parse = jump__parse, 480 476 .scnprintf = jump__scnprintf, 481 477 }; ··· 579 557 .scnprintf = lock__scnprintf, 580 558 }; 581 559 560 + /* 561 + * Check if the operand has more than one registers like x86 SIB addressing: 562 + * 0x1234(%rax, %rbx, 8) 563 + * 564 + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 565 + * the input string after 'memory_ref_char' if exists. 566 + */ 567 + static bool check_multi_regs(struct arch *arch, const char *op) 568 + { 569 + int count = 0; 570 + 571 + if (arch->objdump.register_char == 0) 572 + return false; 573 + 574 + if (arch->objdump.memory_ref_char) { 575 + op = strchr(op, arch->objdump.memory_ref_char); 576 + if (op == NULL) 577 + return false; 578 + } 579 + 580 + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 581 + count++; 582 + op++; 583 + } 584 + 585 + return count > 1; 586 + } 587 + 582 588 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) 583 589 { 584 590 char *s = strchr(ops->raw, ','), *target, *comment, prev; ··· 634 584 if (ops->source.raw == NULL) 635 585 return -1; 636 586 587 + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 588 + 637 589 target = skip_spaces(++s); 638 590 comment = strchr(s, arch->objdump.comment_char); 639 591 ··· 655 603 656 604 if (ops->target.raw == NULL) 657 605 goto out_free_source; 606 + 607 + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 658 608 659 609 if (comment == NULL) 660 610 return 0; ··· 849 795 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 850 796 } 851 797 798 + bool arch__is(struct arch *arch, const char *name) 799 + { 800 + return !strcmp(arch->name, name); 801 + } 802 + 852 803 static struct annotated_source *annotated_source__new(void) 853 804 { 854 805 struct annotated_source *src = zalloc(sizeof(*src)); ··· 869 810 if (src == NULL) 870 811 return; 871 812 zfree(&src->histograms); 872 - zfree(&src->cycles_hist); 873 813 free(src); 874 814 } 875 815 ··· 903 845 return src->histograms ? 0 : -1; 904 846 } 905 847 906 - /* The cycles histogram is lazily allocated. */ 907 - static int symbol__alloc_hist_cycles(struct symbol *sym) 908 - { 909 - struct annotation *notes = symbol__annotation(sym); 910 - const size_t size = symbol__size(sym); 911 - 912 - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); 913 - if (notes->src->cycles_hist == NULL) 914 - return -1; 915 - return 0; 916 - } 917 - 918 848 void symbol__annotate_zero_histograms(struct symbol *sym) 919 849 { 920 850 struct annotation *notes = symbol__annotation(sym); ··· 911 865 if (notes->src != NULL) { 912 866 memset(notes->src->histograms, 0, 913 867 notes->src->nr_histograms * notes->src->sizeof_sym_hist); 914 - if (notes->src->cycles_hist) 915 - memset(notes->src->cycles_hist, 0, 916 - symbol__size(sym) * sizeof(struct cyc_hist)); 868 + } 869 + if (notes->branch && notes->branch->cycles_hist) { 870 + memset(notes->branch->cycles_hist, 0, 871 + symbol__size(sym) * sizeof(struct cyc_hist)); 917 872 } 918 873 annotation__unlock(notes); 919 874 } ··· 1005 958 return 0; 1006 959 } 1007 960 961 + struct annotated_branch *annotation__get_branch(struct annotation *notes) 962 + { 963 + if (notes == NULL) 964 + return NULL; 965 + 966 + if (notes->branch == NULL) 967 + notes->branch = zalloc(sizeof(*notes->branch)); 968 + 969 + return notes->branch; 970 + } 971 + 1008 972 static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) 1009 973 { 1010 974 struct annotation *notes = symbol__annotation(sym); 975 + struct annotated_branch *branch; 1011 976 1012 - if (notes->src == NULL) { 1013 - notes->src = annotated_source__new(); 1014 - if (notes->src == NULL) 1015 - return NULL; 1016 - goto alloc_cycles_hist; 977 + branch = annotation__get_branch(notes); 978 + if (branch == NULL) 979 + return NULL; 980 + 981 + if (branch->cycles_hist == NULL) { 982 + const size_t size = symbol__size(sym); 983 + 984 + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); 1017 985 } 1018 986 1019 - if (!notes->src->cycles_hist) { 1020 - alloc_cycles_hist: 1021 - symbol__alloc_hist_cycles(sym); 1022 - } 1023 - 1024 - return notes->src->cycles_hist; 987 + return branch->cycles_hist; 1025 988 } 1026 989 1027 990 struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) ··· 1134 1077 u64 offset; 1135 1078 1136 1079 for (offset = start; offset <= end; offset++) { 1137 - if (notes->offsets[offset]) 1080 + if (notes->src->offsets[offset]) 1138 1081 n_insn++; 1139 1082 } 1140 1083 return n_insn; 1084 + } 1085 + 1086 + static void annotated_branch__delete(struct annotated_branch *branch) 1087 + { 1088 + if (branch) { 1089 + zfree(&branch->cycles_hist); 1090 + free(branch); 1091 + } 1141 1092 } 1142 1093 1143 1094 static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) ··· 1156 1091 1157 1092 n_insn = annotation__count_insn(notes, start, end); 1158 1093 if (n_insn && ch->num && ch->cycles) { 1094 + struct annotated_branch *branch; 1159 1095 float ipc = n_insn / ((double)ch->cycles / (double)ch->num); 1160 1096 1161 1097 /* Hide data when there are too many overlaps. */ ··· 1164 1098 return; 1165 1099 1166 1100 for (offset = start; offset <= end; offset++) { 1167 - struct annotation_line *al = notes->offsets[offset]; 1101 + struct annotation_line *al = notes->src->offsets[offset]; 1168 1102 1169 - if (al && al->ipc == 0.0) { 1170 - al->ipc = ipc; 1103 + if (al && al->cycles && al->cycles->ipc == 0.0) { 1104 + al->cycles->ipc = ipc; 1171 1105 cover_insn++; 1172 1106 } 1173 1107 } 1174 1108 1175 - if (cover_insn) { 1176 - notes->hit_cycles += ch->cycles; 1177 - notes->hit_insn += n_insn * ch->num; 1178 - notes->cover_insn += cover_insn; 1109 + branch = annotation__get_branch(notes); 1110 + if (cover_insn && branch) { 1111 + branch->hit_cycles += ch->cycles; 1112 + branch->hit_insn += n_insn * ch->num; 1113 + branch->cover_insn += cover_insn; 1179 1114 } 1180 1115 } 1181 1116 } 1182 1117 1183 - void annotation__compute_ipc(struct annotation *notes, size_t size) 1118 + static int annotation__compute_ipc(struct annotation *notes, size_t size) 1184 1119 { 1120 + int err = 0; 1185 1121 s64 offset; 1186 1122 1187 - if (!notes->src || !notes->src->cycles_hist) 1188 - return; 1123 + if (!notes->branch || !notes->branch->cycles_hist) 1124 + return 0; 1189 1125 1190 - notes->total_insn = annotation__count_insn(notes, 0, size - 1); 1191 - notes->hit_cycles = 0; 1192 - notes->hit_insn = 0; 1193 - notes->cover_insn = 0; 1126 + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); 1127 + notes->branch->hit_cycles = 0; 1128 + notes->branch->hit_insn = 0; 1129 + notes->branch->cover_insn = 0; 1194 1130 1195 1131 annotation__lock(notes); 1196 1132 for (offset = size - 1; offset >= 0; --offset) { 1197 1133 struct cyc_hist *ch; 1198 1134 1199 - ch = &notes->src->cycles_hist[offset]; 1135 + ch = &notes->branch->cycles_hist[offset]; 1200 1136 if (ch && ch->cycles) { 1201 1137 struct annotation_line *al; 1202 1138 1139 + al = notes->src->offsets[offset]; 1140 + if (al && al->cycles == NULL) { 1141 + al->cycles = zalloc(sizeof(*al->cycles)); 1142 + if (al->cycles == NULL) { 1143 + err = ENOMEM; 1144 + break; 1145 + } 1146 + } 1203 1147 if (ch->have_start) 1204 1148 annotation__count_and_fill(notes, ch->start, offset, ch); 1205 - al = notes->offsets[offset]; 1206 1149 if (al && ch->num_aggr) { 1207 - al->cycles = ch->cycles_aggr / ch->num_aggr; 1208 - al->cycles_max = ch->cycles_max; 1209 - al->cycles_min = ch->cycles_min; 1150 + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; 1151 + al->cycles->max = ch->cycles_max; 1152 + al->cycles->min = ch->cycles_min; 1210 1153 } 1211 - notes->have_cycles = true; 1212 1154 } 1213 1155 } 1156 + 1157 + if (err) { 1158 + while (++offset < (s64)size) { 1159 + struct cyc_hist *ch = &notes->branch->cycles_hist[offset]; 1160 + 1161 + if (ch && ch->cycles) { 1162 + struct annotation_line *al = notes->src->offsets[offset]; 1163 + if (al) 1164 + zfree(&al->cycles); 1165 + } 1166 + } 1167 + } 1168 + 1214 1169 annotation__unlock(notes); 1170 + return 0; 1215 1171 } 1216 1172 1217 1173 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, ··· 1313 1225 { 1314 1226 zfree_srcline(&al->path); 1315 1227 zfree(&al->line); 1228 + zfree(&al->cycles); 1316 1229 } 1317 1230 1318 1231 static size_t disasm_line_size(int nr) ··· 1388 1299 void annotation__exit(struct annotation *notes) 1389 1300 { 1390 1301 annotated_source__delete(notes->src); 1302 + annotated_branch__delete(notes->branch); 1391 1303 } 1392 1304 1393 1305 static struct sharded_mutex *sharded_mutex; ··· 1907 1817 struct annotate_args *args) 1908 1818 { 1909 1819 struct annotation *notes = symbol__annotation(sym); 1910 - struct annotation_options *opts = args->options; 1911 1820 struct bpf_prog_linfo *prog_linfo = NULL; 1912 1821 struct bpf_prog_info_node *info_node; 1913 1822 int len = sym->end - sym->start; ··· 2016 1927 prev_buf_size = buf_size; 2017 1928 fflush(s); 2018 1929 2019 - if (!opts->hide_src_code && srcline) { 1930 + if (!annotate_opts.hide_src_code && srcline) { 2020 1931 args->offset = -1; 2021 1932 args->line = strdup(srcline); 2022 1933 args->line_nr = 0; ··· 2139 2050 2140 2051 static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 2141 2052 { 2142 - struct annotation_options *opts = args->options; 2053 + struct annotation_options *opts = &annotate_opts; 2143 2054 struct map *map = args->ms.map; 2144 2055 struct dso *dso = map__dso(map); 2145 2056 char *command; ··· 2202 2113 err = asprintf(&command, 2203 2114 "%s %s%s --start-address=0x%016" PRIx64 2204 2115 " --stop-address=0x%016" PRIx64 2205 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", 2116 + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 2206 2117 opts->objdump_path ?: "objdump", 2207 2118 opts->disassembler_style ? "-M " : "", 2208 2119 opts->disassembler_style ?: "", 2209 2120 map__rip_2objdump(map, sym->start), 2210 2121 map__rip_2objdump(map, sym->end), 2122 + opts->show_linenr ? "-l" : "", 2211 2123 opts->show_asm_raw ? "" : "--no-show-raw-insn", 2212 2124 opts->annotate_src ? "-S" : "", 2213 2125 opts->prefix ? "--prefix " : "", ··· 2389 2299 annotation__calc_percent(notes, evsel, symbol__size(sym)); 2390 2300 } 2391 2301 2392 - int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, 2393 - struct annotation_options *options, struct arch **parch) 2302 + static int evsel__get_arch(struct evsel *evsel, struct arch **parch) 2394 2303 { 2395 - struct symbol *sym = ms->sym; 2396 - struct annotation *notes = symbol__annotation(sym); 2397 - struct annotate_args args = { 2398 - .evsel = evsel, 2399 - .options = options, 2400 - }; 2401 2304 struct perf_env *env = evsel__env(evsel); 2402 2305 const char *arch_name = perf_env__arch(env); 2403 2306 struct arch *arch; ··· 2399 2316 if (!arch_name) 2400 2317 return errno; 2401 2318 2402 - args.arch = arch = arch__find(arch_name); 2319 + *parch = arch = arch__find(arch_name); 2403 2320 if (arch == NULL) { 2404 2321 pr_err("%s: unsupported arch %s\n", __func__, arch_name); 2405 2322 return ENOTSUP; 2406 2323 } 2407 2324 2408 - if (parch) 2409 - *parch = arch; 2410 - 2411 2325 if (arch->init) { 2412 2326 err = arch->init(arch, env ? env->cpuid : NULL); 2413 2327 if (err) { 2414 - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); 2328 + pr_err("%s: failed to initialize %s arch priv area\n", 2329 + __func__, arch->name); 2415 2330 return err; 2416 2331 } 2417 2332 } 2333 + return 0; 2334 + } 2418 2335 2336 + int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, 2337 + struct arch **parch) 2338 + { 2339 + struct symbol *sym = ms->sym; 2340 + struct annotation *notes = symbol__annotation(sym); 2341 + struct annotate_args args = { 2342 + .evsel = evsel, 2343 + .options = &annotate_opts, 2344 + }; 2345 + struct arch *arch = NULL; 2346 + int err; 2347 + 2348 + err = evsel__get_arch(evsel, &arch); 2349 + if (err < 0) 2350 + return err; 2351 + 2352 + if (parch) 2353 + *parch = arch; 2354 + 2355 + args.arch = arch; 2419 2356 args.ms = *ms; 2420 - if (notes->options && notes->options->full_addr) 2357 + if (annotate_opts.full_addr) 2421 2358 notes->start = map__objdump_2mem(ms->map, ms->sym->start); 2422 2359 else 2423 2360 notes->start = map__rip_2objdump(ms->map, ms->sym->start); ··· 2445 2342 return symbol__disassemble(sym, &args); 2446 2343 } 2447 2344 2448 - static void insert_source_line(struct rb_root *root, struct annotation_line *al, 2449 - struct annotation_options *opts) 2345 + static void insert_source_line(struct rb_root *root, struct annotation_line *al) 2450 2346 { 2451 2347 struct annotation_line *iter; 2452 2348 struct rb_node **p = &root->rb_node; 2453 2349 struct rb_node *parent = NULL; 2350 + unsigned int percent_type = annotate_opts.percent_type; 2454 2351 int i, ret; 2455 2352 2456 2353 while (*p != NULL) { ··· 2461 2358 if (ret == 0) { 2462 2359 for (i = 0; i < al->data_nr; i++) { 2463 2360 iter->data[i].percent_sum += annotation_data__percent(&al->data[i], 2464 - opts->percent_type); 2361 + percent_type); 2465 2362 } 2466 2363 return; 2467 2364 } ··· 2474 2371 2475 2372 for (i = 0; i < al->data_nr; i++) { 2476 2373 al->data[i].percent_sum = annotation_data__percent(&al->data[i], 2477 - opts->percent_type); 2374 + percent_type); 2478 2375 } 2479 2376 2480 2377 rb_link_node(&al->rb_node, parent, p); ··· 2596 2493 return 0; 2597 2494 } 2598 2495 2599 - int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, 2600 - struct annotation_options *opts) 2496 + int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) 2601 2497 { 2602 2498 struct map *map = ms->map; 2603 2499 struct symbol *sym = ms->sym; ··· 2607 2505 struct annotation *notes = symbol__annotation(sym); 2608 2506 struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); 2609 2507 struct annotation_line *pos, *queue = NULL; 2508 + struct annotation_options *opts = &annotate_opts; 2610 2509 u64 start = map__rip_2objdump(map, sym->start); 2611 2510 int printed = 2, queue_len = 0, addr_fmt_width; 2612 2511 int more = 0; ··· 2736 2633 fputs(s, fp); 2737 2634 } 2738 2635 2739 - static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, 2740 - struct annotation_options *opts) 2636 + static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) 2741 2637 { 2742 2638 struct annotation *notes = symbol__annotation(sym); 2743 2639 struct annotation_write_ops wops = { ··· 2751 2649 struct annotation_line *al; 2752 2650 2753 2651 list_for_each_entry(al, &notes->src->source, node) { 2754 - if (annotation_line__filter(al, notes)) 2652 + if (annotation_line__filter(al)) 2755 2653 continue; 2756 - annotation_line__write(al, notes, &wops, opts); 2654 + annotation_line__write(al, notes, &wops); 2757 2655 fputc('\n', fp); 2758 2656 wops.first_line = false; 2759 2657 } ··· 2761 2659 return 0; 2762 2660 } 2763 2661 2764 - int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, 2765 - struct annotation_options *opts) 2662 + int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) 2766 2663 { 2767 2664 const char *ev_name = evsel__name(evsel); 2768 2665 char buf[1024]; ··· 2783 2682 2784 2683 fprintf(fp, "%s() %s\nEvent: %s\n\n", 2785 2684 ms->sym->name, map__dso(ms->map)->long_name, ev_name); 2786 - symbol__annotate_fprintf2(ms->sym, fp, opts); 2685 + symbol__annotate_fprintf2(ms->sym, fp); 2787 2686 2788 2687 fclose(fp); 2789 2688 err = 0; ··· 2870 2769 return; 2871 2770 2872 2771 for (offset = 0; offset < size; ++offset) { 2873 - struct annotation_line *al = notes->offsets[offset]; 2772 + struct annotation_line *al = notes->src->offsets[offset]; 2874 2773 struct disasm_line *dl; 2875 2774 2876 2775 dl = disasm_line(al); ··· 2878 2777 if (!disasm_line__is_valid_local_jump(dl, sym)) 2879 2778 continue; 2880 2779 2881 - al = notes->offsets[dl->ops.target.offset]; 2780 + al = notes->src->offsets[dl->ops.target.offset]; 2882 2781 2883 2782 /* 2884 2783 * FIXME: Oops, no jump target? Buggy disassembler? Or do we ··· 2895 2794 void annotation__set_offsets(struct annotation *notes, s64 size) 2896 2795 { 2897 2796 struct annotation_line *al; 2797 + struct annotated_source *src = notes->src; 2898 2798 2899 - notes->max_line_len = 0; 2900 - notes->nr_entries = 0; 2901 - notes->nr_asm_entries = 0; 2799 + src->max_line_len = 0; 2800 + src->nr_entries = 0; 2801 + src->nr_asm_entries = 0; 2902 2802 2903 - list_for_each_entry(al, &notes->src->source, node) { 2803 + list_for_each_entry(al, &src->source, node) { 2904 2804 size_t line_len = strlen(al->line); 2905 2805 2906 - if (notes->max_line_len < line_len) 2907 - notes->max_line_len = line_len; 2908 - al->idx = notes->nr_entries++; 2806 + if (src->max_line_len < line_len) 2807 + src->max_line_len = line_len; 2808 + al->idx = src->nr_entries++; 2909 2809 if (al->offset != -1) { 2910 - al->idx_asm = notes->nr_asm_entries++; 2810 + al->idx_asm = src->nr_asm_entries++; 2911 2811 /* 2912 2812 * FIXME: short term bandaid to cope with assembly 2913 2813 * routines that comes with labels in the same column ··· 2917 2815 * E.g. copy_user_generic_unrolled 2918 2816 */ 2919 2817 if (al->offset < size) 2920 - notes->offsets[al->offset] = al; 2818 + notes->src->offsets[al->offset] = al; 2921 2819 } else 2922 2820 al->idx_asm = -1; 2923 2821 } ··· 2960 2858 2961 2859 void annotation__update_column_widths(struct annotation *notes) 2962 2860 { 2963 - if (notes->options->use_offset) 2861 + if (annotate_opts.use_offset) 2964 2862 notes->widths.target = notes->widths.min_addr; 2965 - else if (notes->options->full_addr) 2863 + else if (annotate_opts.full_addr) 2966 2864 notes->widths.target = BITS_PER_LONG / 4; 2967 2865 else 2968 2866 notes->widths.target = notes->widths.max_addr; 2969 2867 2970 2868 notes->widths.addr = notes->widths.target; 2971 2869 2972 - if (notes->options->show_nr_jumps) 2870 + if (annotate_opts.show_nr_jumps) 2973 2871 notes->widths.addr += notes->widths.jumps + 1; 2974 2872 } 2975 2873 2976 2874 void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) 2977 2875 { 2978 - notes->options->full_addr = !notes->options->full_addr; 2876 + annotate_opts.full_addr = !annotate_opts.full_addr; 2979 2877 2980 - if (notes->options->full_addr) 2878 + if (annotate_opts.full_addr) 2981 2879 notes->start = map__objdump_2mem(ms->map, ms->sym->start); 2982 2880 else 2983 2881 notes->start = map__rip_2objdump(ms->map, ms->sym->start); ··· 2986 2884 } 2987 2885 2988 2886 static void annotation__calc_lines(struct annotation *notes, struct map *map, 2989 - struct rb_root *root, 2990 - struct annotation_options *opts) 2887 + struct rb_root *root) 2991 2888 { 2992 2889 struct annotation_line *al; 2993 2890 struct rb_root tmp_root = RB_ROOT; ··· 2999 2898 double percent; 3000 2899 3001 2900 percent = annotation_data__percent(&al->data[i], 3002 - opts->percent_type); 2901 + annotate_opts.percent_type); 3003 2902 3004 2903 if (percent > percent_max) 3005 2904 percent_max = percent; ··· 3010 2909 3011 2910 al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, 3012 2911 false, true, notes->start + al->offset); 3013 - insert_source_line(&tmp_root, al, opts); 2912 + insert_source_line(&tmp_root, al); 3014 2913 } 3015 2914 3016 2915 resort_source_line(root, &tmp_root); 3017 2916 } 3018 2917 3019 - static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, 3020 - struct annotation_options *opts) 2918 + static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) 3021 2919 { 3022 2920 struct annotation *notes = symbol__annotation(ms->sym); 3023 2921 3024 - annotation__calc_lines(notes, ms->map, root, opts); 2922 + annotation__calc_lines(notes, ms->map, root); 3025 2923 } 3026 2924 3027 - int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, 3028 - struct annotation_options *opts) 2925 + int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) 3029 2926 { 3030 2927 struct dso *dso = map__dso(ms->map); 3031 2928 struct symbol *sym = ms->sym; ··· 3032 2933 char buf[1024]; 3033 2934 int err; 3034 2935 3035 - err = symbol__annotate2(ms, evsel, opts, NULL); 2936 + err = symbol__annotate2(ms, evsel, NULL); 3036 2937 if (err) { 3037 2938 char msg[BUFSIZ]; 3038 2939 ··· 3042 2943 return -1; 3043 2944 } 3044 2945 3045 - if (opts->print_lines) { 3046 - srcline_full_filename = opts->full_path; 3047 - symbol__calc_lines(ms, &source_line, opts); 2946 + if (annotate_opts.print_lines) { 2947 + srcline_full_filename = annotate_opts.full_path; 2948 + symbol__calc_lines(ms, &source_line); 3048 2949 print_summary(&source_line, dso->long_name); 3049 2950 } 3050 2951 3051 2952 hists__scnprintf_title(hists, buf, sizeof(buf)); 3052 2953 fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", 3053 - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); 3054 - symbol__annotate_fprintf2(sym, stdout, opts); 2954 + buf, percent_type_str(annotate_opts.percent_type), sym->name, 2955 + dso->long_name); 2956 + symbol__annotate_fprintf2(sym, stdout); 3055 2957 3056 2958 annotated_source__purge(symbol__annotation(sym)->src); 3057 2959 3058 2960 return 0; 3059 2961 } 3060 2962 3061 - int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, 3062 - struct annotation_options *opts) 2963 + int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) 3063 2964 { 3064 2965 struct dso *dso = map__dso(ms->map); 3065 2966 struct symbol *sym = ms->sym; 3066 2967 struct rb_root source_line = RB_ROOT; 3067 2968 int err; 3068 2969 3069 - err = symbol__annotate(ms, evsel, opts, NULL); 2970 + err = symbol__annotate(ms, evsel, NULL); 3070 2971 if (err) { 3071 2972 char msg[BUFSIZ]; 3072 2973 ··· 3078 2979 3079 2980 symbol__calc_percent(sym, evsel); 3080 2981 3081 - if (opts->print_lines) { 3082 - srcline_full_filename = opts->full_path; 3083 - symbol__calc_lines(ms, &source_line, opts); 2982 + if (annotate_opts.print_lines) { 2983 + srcline_full_filename = annotate_opts.full_path; 2984 + symbol__calc_lines(ms, &source_line); 3084 2985 print_summary(&source_line, dso->long_name); 3085 2986 } 3086 2987 3087 - symbol__annotate_printf(ms, evsel, opts); 2988 + symbol__annotate_printf(ms, evsel); 3088 2989 3089 2990 annotated_source__purge(symbol__annotation(sym)->src); 3090 2991 ··· 3145 3046 obj__printf(obj, " "); 3146 3047 } 3147 3048 3148 - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); 3049 + disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name); 3149 3050 } 3150 3051 3151 3052 static void ipc_coverage_string(char *bf, int size, struct annotation *notes) 3152 3053 { 3153 3054 double ipc = 0.0, coverage = 0.0; 3055 + struct annotated_branch *branch = annotation__get_branch(notes); 3154 3056 3155 - if (notes->hit_cycles) 3156 - ipc = notes->hit_insn / ((double)notes->hit_cycles); 3057 + if (branch && branch->hit_cycles) 3058 + ipc = branch->hit_insn / ((double)branch->hit_cycles); 3157 3059 3158 - if (notes->total_insn) { 3159 - coverage = notes->cover_insn * 100.0 / 3160 - ((double)notes->total_insn); 3060 + if (branch && branch->total_insn) { 3061 + coverage = branch->cover_insn * 100.0 / 3062 + ((double)branch->total_insn); 3161 3063 } 3162 3064 3163 3065 scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", ··· 3183 3083 int printed; 3184 3084 3185 3085 if (first_line && (al->offset == -1 || percent_max == 0.0)) { 3186 - if (notes->have_cycles) { 3187 - if (al->ipc == 0.0 && al->cycles == 0) 3086 + if (notes->branch && al->cycles) { 3087 + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) 3188 3088 show_title = true; 3189 3089 } else 3190 3090 show_title = true; ··· 3220 3120 } 3221 3121 } 3222 3122 3223 - if (notes->have_cycles) { 3224 - if (al->ipc) 3225 - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); 3123 + if (notes->branch) { 3124 + if (al->cycles && al->cycles->ipc) 3125 + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); 3226 3126 else if (!show_title) 3227 3127 obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); 3228 3128 else 3229 3129 obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); 3230 3130 3231 - if (!notes->options->show_minmax_cycle) { 3232 - if (al->cycles) 3131 + if (!annotate_opts.show_minmax_cycle) { 3132 + if (al->cycles && al->cycles->avg) 3233 3133 obj__printf(obj, "%*" PRIu64 " ", 3234 - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); 3134 + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); 3235 3135 else if (!show_title) 3236 3136 obj__printf(obj, "%*s", 3237 3137 ANNOTATION__CYCLES_WIDTH, " "); ··· 3245 3145 3246 3146 scnprintf(str, sizeof(str), 3247 3147 "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", 3248 - al->cycles, al->cycles_min, 3249 - al->cycles_max); 3148 + al->cycles->avg, al->cycles->min, 3149 + al->cycles->max); 3250 3150 3251 3151 obj__printf(obj, "%*s ", 3252 3152 ANNOTATION__MINMAX_CYCLES_WIDTH - 1, ··· 3272 3172 if (!*al->line) 3273 3173 obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); 3274 3174 else if (al->offset == -1) { 3275 - if (al->line_nr && notes->options->show_linenr) 3175 + if (al->line_nr && annotate_opts.show_linenr) 3276 3176 printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); 3277 3177 else 3278 3178 printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); ··· 3282 3182 u64 addr = al->offset; 3283 3183 int color = -1; 3284 3184 3285 - if (!notes->options->use_offset) 3185 + if (!annotate_opts.use_offset) 3286 3186 addr += notes->start; 3287 3187 3288 - if (!notes->options->use_offset) { 3188 + if (!annotate_opts.use_offset) { 3289 3189 printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); 3290 3190 } else { 3291 3191 if (al->jump_sources && 3292 - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { 3293 - if (notes->options->show_nr_jumps) { 3192 + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { 3193 + if (annotate_opts.show_nr_jumps) { 3294 3194 int prev; 3295 3195 printed = scnprintf(bf, sizeof(bf), "%*d ", 3296 3196 notes->widths.jumps, ··· 3304 3204 printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", 3305 3205 notes->widths.target, addr); 3306 3206 } else if (ins__is_call(&disasm_line(al)->ins) && 3307 - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { 3207 + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { 3308 3208 goto print_addr; 3309 - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { 3209 + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { 3310 3210 goto print_addr; 3311 3211 } else { 3312 3212 printed = scnprintf(bf, sizeof(bf), "%-*s ", ··· 3328 3228 } 3329 3229 3330 3230 void annotation_line__write(struct annotation_line *al, struct annotation *notes, 3331 - struct annotation_write_ops *wops, 3332 - struct annotation_options *opts) 3231 + struct annotation_write_ops *wops) 3333 3232 { 3334 3233 __annotation_line__write(al, notes, wops->first_line, wops->current_entry, 3335 3234 wops->change_color, wops->width, wops->obj, 3336 - opts->percent_type, 3235 + annotate_opts.percent_type, 3337 3236 wops->set_color, wops->set_percent_color, 3338 3237 wops->set_jumps_percent_color, wops->printf, 3339 3238 wops->write_graph); 3340 3239 } 3341 3240 3342 3241 int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, 3343 - struct annotation_options *options, struct arch **parch) 3242 + struct arch **parch) 3344 3243 { 3345 3244 struct symbol *sym = ms->sym; 3346 3245 struct annotation *notes = symbol__annotation(sym); 3347 3246 size_t size = symbol__size(sym); 3348 3247 int nr_pcnt = 1, err; 3349 3248 3350 - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); 3351 - if (notes->offsets == NULL) 3249 + notes->src->offsets = zalloc(size * sizeof(struct annotation_line *)); 3250 + if (notes->src->offsets == NULL) 3352 3251 return ENOMEM; 3353 3252 3354 3253 if (evsel__is_group_event(evsel)) 3355 3254 nr_pcnt = evsel->core.nr_members; 3356 3255 3357 - err = symbol__annotate(ms, evsel, options, parch); 3256 + err = symbol__annotate(ms, evsel, parch); 3358 3257 if (err) 3359 3258 goto out_free_offsets; 3360 - 3361 - notes->options = options; 3362 3259 3363 3260 symbol__calc_percent(sym, evsel); 3364 3261 3365 3262 annotation__set_offsets(notes, size); 3366 3263 annotation__mark_jump_targets(notes, sym); 3367 - annotation__compute_ipc(notes, size); 3264 + 3265 + err = annotation__compute_ipc(notes, size); 3266 + if (err) 3267 + goto out_free_offsets; 3268 + 3368 3269 annotation__init_column_widths(notes, sym); 3369 3270 notes->nr_events = nr_pcnt; 3370 3271 ··· 3375 3274 return 0; 3376 3275 3377 3276 out_free_offsets: 3378 - zfree(&notes->offsets); 3277 + zfree(&notes->src->offsets); 3379 3278 return err; 3380 3279 } 3381 3280 ··· 3438 3337 return 0; 3439 3338 } 3440 3339 3441 - void annotation_options__init(struct annotation_options *opt) 3340 + void annotation_options__init(void) 3442 3341 { 3342 + struct annotation_options *opt = &annotate_opts; 3343 + 3443 3344 memset(opt, 0, sizeof(*opt)); 3444 3345 3445 3346 /* Default values. */ ··· 3452 3349 opt->percent_type = PERCENT_PERIOD_LOCAL; 3453 3350 } 3454 3351 3455 - 3456 - void annotation_options__exit(struct annotation_options *opt) 3352 + void annotation_options__exit(void) 3457 3353 { 3458 - zfree(&opt->disassembler_style); 3459 - zfree(&opt->objdump_path); 3354 + zfree(&annotate_opts.disassembler_style); 3355 + zfree(&annotate_opts.objdump_path); 3460 3356 } 3461 3357 3462 - void annotation_config__init(struct annotation_options *opt) 3358 + void annotation_config__init(void) 3463 3359 { 3464 - perf_config(annotation__config, opt); 3360 + perf_config(annotation__config, &annotate_opts); 3465 3361 } 3466 3362 3467 3363 static unsigned int parse_percent_type(char *str1, char *str2) ··· 3484 3382 return type; 3485 3383 } 3486 3384 3487 - int annotate_parse_percent_type(const struct option *opt, const char *_str, 3385 + int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, 3488 3386 int unset __maybe_unused) 3489 3387 { 3490 - struct annotation_options *opts = opt->value; 3491 3388 unsigned int type; 3492 3389 char *str1, *str2; 3493 3390 int err = -1; ··· 3505 3404 if (type == (unsigned int) -1) 3506 3405 type = parse_percent_type(str2, str1); 3507 3406 if (type != (unsigned int) -1) { 3508 - opts->percent_type = type; 3407 + annotate_opts.percent_type = type; 3509 3408 err = 0; 3510 3409 } 3511 3410 ··· 3514 3413 return err; 3515 3414 } 3516 3415 3517 - int annotate_check_args(struct annotation_options *args) 3416 + int annotate_check_args(void) 3518 3417 { 3418 + struct annotation_options *args = &annotate_opts; 3419 + 3519 3420 if (args->prefix_strip && !args->prefix) { 3520 3421 pr_err("--prefix-strip requires --prefix\n"); 3521 3422 return -1; 3522 3423 } 3523 3424 return 0; 3425 + } 3426 + 3427 + /* 3428 + * Get register number and access offset from the given instruction. 3429 + * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs 3430 + * to revisit the format when it handles different architecture. 3431 + * Fills @reg and @offset when return 0. 3432 + */ 3433 + static int extract_reg_offset(struct arch *arch, const char *str, 3434 + struct annotated_op_loc *op_loc) 3435 + { 3436 + char *p; 3437 + char *regname; 3438 + 3439 + if (arch->objdump.register_char == 0) 3440 + return -1; 3441 + 3442 + /* 3443 + * It should start from offset, but it's possible to skip 0 3444 + * in the asm. So 0(%rax) should be same as (%rax). 3445 + * 3446 + * However, it also start with a segment select register like 3447 + * %gs:0x18(%rbx). In that case it should skip the part. 3448 + */ 3449 + if (*str == arch->objdump.register_char) { 3450 + while (*str && !isdigit(*str) && 3451 + *str != arch->objdump.memory_ref_char) 3452 + str++; 3453 + } 3454 + 3455 + op_loc->offset = strtol(str, &p, 0); 3456 + 3457 + p = strchr(p, arch->objdump.register_char); 3458 + if (p == NULL) 3459 + return -1; 3460 + 3461 + regname = strdup(p); 3462 + if (regname == NULL) 3463 + return -1; 3464 + 3465 + op_loc->reg = get_dwarf_regnum(regname, 0); 3466 + free(regname); 3467 + return 0; 3468 + } 3469 + 3470 + /** 3471 + * annotate_get_insn_location - Get location of instruction 3472 + * @arch: the architecture info 3473 + * @dl: the target instruction 3474 + * @loc: a buffer to save the data 3475 + * 3476 + * Get detailed location info (register and offset) in the instruction. 3477 + * It needs both source and target operand and whether it accesses a 3478 + * memory location. The offset field is meaningful only when the 3479 + * corresponding mem flag is set. 3480 + * 3481 + * Some examples on x86: 3482 + * 3483 + * mov (%rax), %rcx # src_reg = rax, src_mem = 1, src_offset = 0 3484 + * # dst_reg = rcx, dst_mem = 0 3485 + * 3486 + * mov 0x18, %r8 # src_reg = -1, dst_reg = r8 3487 + */ 3488 + int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, 3489 + struct annotated_insn_loc *loc) 3490 + { 3491 + struct ins_operands *ops; 3492 + struct annotated_op_loc *op_loc; 3493 + int i; 3494 + 3495 + if (!strcmp(dl->ins.name, "lock")) 3496 + ops = dl->ops.locked.ops; 3497 + else 3498 + ops = &dl->ops; 3499 + 3500 + if (ops == NULL) 3501 + return -1; 3502 + 3503 + memset(loc, 0, sizeof(*loc)); 3504 + 3505 + for_each_insn_op_loc(loc, i, op_loc) { 3506 + const char *insn_str = ops->source.raw; 3507 + 3508 + if (i == INSN_OP_TARGET) 3509 + insn_str = ops->target.raw; 3510 + 3511 + /* Invalidate the register by default */ 3512 + op_loc->reg = -1; 3513 + 3514 + if (insn_str == NULL) 3515 + continue; 3516 + 3517 + if (strchr(insn_str, arch->objdump.memory_ref_char)) { 3518 + op_loc->mem_ref = true; 3519 + extract_reg_offset(arch, insn_str, op_loc); 3520 + } else { 3521 + char *s = strdup(insn_str); 3522 + 3523 + if (s) { 3524 + op_loc->reg = get_dwarf_regnum(s, 0); 3525 + free(s); 3526 + } 3527 + } 3528 + } 3529 + 3530 + return 0; 3531 + } 3532 + 3533 + static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel) 3534 + { 3535 + struct disasm_line *dl, *tmp_dl; 3536 + struct annotation *notes; 3537 + 3538 + notes = symbol__annotation(ms->sym); 3539 + if (!list_empty(&notes->src->source)) 3540 + return; 3541 + 3542 + if (symbol__annotate(ms, evsel, NULL) < 0) 3543 + return; 3544 + 3545 + /* remove non-insn disasm lines for simplicity */ 3546 + list_for_each_entry_safe(dl, tmp_dl, &notes->src->source, al.node) { 3547 + if (dl->al.offset == -1) { 3548 + list_del(&dl->al.node); 3549 + free(dl); 3550 + } 3551 + } 3552 + } 3553 + 3554 + static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip) 3555 + { 3556 + struct disasm_line *dl; 3557 + struct annotation *notes; 3558 + 3559 + notes = symbol__annotation(sym); 3560 + 3561 + list_for_each_entry(dl, &notes->src->source, al.node) { 3562 + if (sym->start + dl->al.offset == ip) 3563 + return dl; 3564 + } 3565 + return NULL; 3566 + } 3567 + 3568 + static struct annotated_item_stat *annotate_data_stat(struct list_head *head, 3569 + const char *name) 3570 + { 3571 + struct annotated_item_stat *istat; 3572 + 3573 + list_for_each_entry(istat, head, list) { 3574 + if (!strcmp(istat->name, name)) 3575 + return istat; 3576 + } 3577 + 3578 + istat = zalloc(sizeof(*istat)); 3579 + if (istat == NULL) 3580 + return NULL; 3581 + 3582 + istat->name = strdup(name); 3583 + if (istat->name == NULL) { 3584 + free(istat); 3585 + return NULL; 3586 + } 3587 + 3588 + list_add_tail(&istat->list, head); 3589 + return istat; 3590 + } 3591 + 3592 + /** 3593 + * hist_entry__get_data_type - find data type for given hist entry 3594 + * @he: hist entry 3595 + * 3596 + * This function first annotates the instruction at @he->ip and extracts 3597 + * register and offset info from it. Then it searches the DWARF debug 3598 + * info to get a variable and type information using the address, register, 3599 + * and offset. 3600 + */ 3601 + struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) 3602 + { 3603 + struct map_symbol *ms = &he->ms; 3604 + struct evsel *evsel = hists_to_evsel(he->hists); 3605 + struct arch *arch; 3606 + struct disasm_line *dl; 3607 + struct annotated_insn_loc loc; 3608 + struct annotated_op_loc *op_loc; 3609 + struct annotated_data_type *mem_type; 3610 + struct annotated_item_stat *istat; 3611 + u64 ip = he->ip; 3612 + int i; 3613 + 3614 + ann_data_stat.total++; 3615 + 3616 + if (ms->map == NULL || ms->sym == NULL) { 3617 + ann_data_stat.no_sym++; 3618 + return NULL; 3619 + } 3620 + 3621 + if (!symbol_conf.init_annotation) { 3622 + ann_data_stat.no_sym++; 3623 + return NULL; 3624 + } 3625 + 3626 + if (evsel__get_arch(evsel, &arch) < 0) { 3627 + ann_data_stat.no_insn++; 3628 + return NULL; 3629 + } 3630 + 3631 + /* Make sure it runs objdump to get disasm of the function */ 3632 + symbol__ensure_annotate(ms, evsel); 3633 + 3634 + /* 3635 + * Get a disasm to extract the location from the insn. 3636 + * This is too slow... 3637 + */ 3638 + dl = find_disasm_line(ms->sym, ip); 3639 + if (dl == NULL) { 3640 + ann_data_stat.no_insn++; 3641 + return NULL; 3642 + } 3643 + 3644 + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); 3645 + if (istat == NULL) { 3646 + ann_data_stat.no_insn++; 3647 + return NULL; 3648 + } 3649 + 3650 + if (annotate_get_insn_location(arch, dl, &loc) < 0) { 3651 + ann_data_stat.no_insn_ops++; 3652 + istat->bad++; 3653 + return NULL; 3654 + } 3655 + 3656 + for_each_insn_op_loc(&loc, i, op_loc) { 3657 + if (!op_loc->mem_ref) 3658 + continue; 3659 + 3660 + mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); 3661 + if (mem_type) 3662 + istat->good++; 3663 + else 3664 + istat->bad++; 3665 + 3666 + if (symbol_conf.annotate_data_sample) { 3667 + annotated_data_type__update_samples(mem_type, evsel, 3668 + op_loc->offset, 3669 + he->stat.nr_events, 3670 + he->stat.period); 3671 + } 3672 + he->mem_type_off = op_loc->offset; 3673 + return mem_type; 3674 + } 3675 + 3676 + ann_data_stat.no_mem_ops++; 3677 + istat->bad++; 3678 + return NULL; 3524 3679 }
+100 -44
tools/perf/util/annotate.h
··· 23 23 struct perf_sample; 24 24 struct evsel; 25 25 struct symbol; 26 + struct annotated_data_type; 26 27 27 28 struct ins { 28 29 const char *name; ··· 32 31 33 32 struct ins_operands { 34 33 char *raw; 35 - char *raw_comment; 36 - char *raw_func_start; 37 34 struct { 38 35 char *raw; 39 36 char *name; ··· 40 41 s64 offset; 41 42 bool offset_avail; 42 43 bool outside; 44 + bool multi_regs; 43 45 } target; 44 46 union { 45 47 struct { 46 48 char *raw; 47 49 char *name; 48 50 u64 addr; 51 + bool multi_regs; 49 52 } source; 50 53 struct { 51 54 struct ins ins; 52 55 struct ins_operands *ops; 53 56 } locked; 57 + struct { 58 + char *raw_comment; 59 + char *raw_func_start; 60 + } jump; 54 61 }; 55 62 }; 56 63 57 64 struct arch; 65 + 66 + bool arch__is(struct arch *arch, const char *name); 58 67 59 68 struct ins_ops { 60 69 void (*free)(struct ins_operands *ops); ··· 108 101 unsigned int percent_type; 109 102 }; 110 103 104 + extern struct annotation_options annotate_opts; 105 + 111 106 enum { 112 107 ANNOTATION__OFFSET_JUMP_TARGETS = 1, 113 108 ANNOTATION__OFFSET_CALL, ··· 139 130 struct sym_hist_entry he; 140 131 }; 141 132 133 + struct cycles_info { 134 + float ipc; 135 + u64 avg; 136 + u64 max; 137 + u64 min; 138 + }; 139 + 142 140 struct annotation_line { 143 141 struct list_head node; 144 142 struct rb_node rb_node; ··· 153 137 char *line; 154 138 int line_nr; 155 139 char *fileloc; 156 - int jump_sources; 157 - float ipc; 158 - u64 cycles; 159 - u64 cycles_max; 160 - u64 cycles_min; 161 140 char *path; 141 + struct cycles_info *cycles; 142 + int jump_sources; 162 143 u32 idx; 163 144 int idx_asm; 164 145 int data_nr; ··· 227 214 }; 228 215 229 216 void annotation_line__write(struct annotation_line *al, struct annotation *notes, 230 - struct annotation_write_ops *ops, 231 - struct annotation_options *opts); 217 + struct annotation_write_ops *ops); 232 218 233 219 int __annotation__scnprintf_samples_period(struct annotation *notes, 234 220 char *bf, size_t size, ··· 276 264 * returns. 277 265 */ 278 266 struct annotated_source { 279 - struct list_head source; 280 - int nr_histograms; 281 - size_t sizeof_sym_hist; 282 - struct cyc_hist *cycles_hist; 283 - struct sym_hist *histograms; 267 + struct list_head source; 268 + size_t sizeof_sym_hist; 269 + struct sym_hist *histograms; 270 + struct annotation_line **offsets; 271 + int nr_histograms; 272 + int nr_entries; 273 + int nr_asm_entries; 274 + u16 max_line_len; 284 275 }; 285 276 286 - struct LOCKABLE annotation { 287 - u64 max_coverage; 288 - u64 start; 277 + struct annotated_branch { 289 278 u64 hit_cycles; 290 279 u64 hit_insn; 291 280 unsigned int total_insn; 292 281 unsigned int cover_insn; 293 - struct annotation_options *options; 294 - struct annotation_line **offsets; 282 + struct cyc_hist *cycles_hist; 283 + u64 max_coverage; 284 + }; 285 + 286 + struct LOCKABLE annotation { 287 + u64 start; 295 288 int nr_events; 296 289 int max_jump_sources; 297 - int nr_entries; 298 - int nr_asm_entries; 299 - u16 max_line_len; 300 290 struct { 301 291 u8 addr; 302 292 u8 jumps; ··· 307 293 u8 max_addr; 308 294 u8 max_ins_name; 309 295 } widths; 310 - bool have_cycles; 311 296 struct annotated_source *src; 297 + struct annotated_branch *branch; 312 298 }; 313 299 314 300 static inline void annotation__init(struct annotation *notes __maybe_unused) ··· 322 308 323 309 static inline int annotation__cycles_width(struct annotation *notes) 324 310 { 325 - if (notes->have_cycles && notes->options->show_minmax_cycle) 311 + if (notes->branch && annotate_opts.show_minmax_cycle) 326 312 return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; 327 313 328 - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; 314 + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; 329 315 } 330 316 331 317 static inline int annotation__pcnt_width(struct annotation *notes) ··· 333 319 return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; 334 320 } 335 321 336 - static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) 322 + static inline bool annotation_line__filter(struct annotation_line *al) 337 323 { 338 - return notes->options->hide_src_code && al->offset == -1; 324 + return annotate_opts.hide_src_code && al->offset == -1; 339 325 } 340 326 341 327 void annotation__set_offsets(struct annotation *notes, s64 size); 342 - void annotation__compute_ipc(struct annotation *notes, size_t size); 343 328 void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); 344 329 void annotation__update_column_widths(struct annotation *notes); 345 330 void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); ··· 362 349 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, 363 350 struct evsel *evsel); 364 351 352 + struct annotated_branch *annotation__get_branch(struct annotation *notes); 353 + 365 354 int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, 366 355 struct addr_map_symbol *start, 367 356 unsigned cycles); ··· 376 361 377 362 int symbol__annotate(struct map_symbol *ms, 378 363 struct evsel *evsel, 379 - struct annotation_options *options, 380 364 struct arch **parch); 381 365 int symbol__annotate2(struct map_symbol *ms, 382 366 struct evsel *evsel, 383 - struct annotation_options *options, 384 367 struct arch **parch); 385 368 386 369 enum symbol_disassemble_errno { ··· 405 392 406 393 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); 407 394 408 - int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, 409 - struct annotation_options *options); 395 + int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); 410 396 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); 411 397 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); 412 398 void annotated_source__purge(struct annotated_source *as); 413 399 414 - int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, 415 - struct annotation_options *opts); 400 + int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); 416 401 417 402 bool ui__has_annotation(void); 418 403 419 - int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); 404 + int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); 420 405 421 - int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); 406 + int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); 422 407 423 408 #ifdef HAVE_SLANG_SUPPORT 424 409 int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, 425 - struct hist_browser_timer *hbt, 426 - struct annotation_options *opts); 410 + struct hist_browser_timer *hbt); 427 411 #else 428 412 static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, 429 413 struct evsel *evsel __maybe_unused, 430 - struct hist_browser_timer *hbt __maybe_unused, 431 - struct annotation_options *opts __maybe_unused) 414 + struct hist_browser_timer *hbt __maybe_unused) 432 415 { 433 416 return 0; 434 417 } 435 418 #endif 436 419 437 - void annotation_options__init(struct annotation_options *opt); 438 - void annotation_options__exit(struct annotation_options *opt); 420 + void annotation_options__init(void); 421 + void annotation_options__exit(void); 439 422 440 - void annotation_config__init(struct annotation_options *opt); 423 + void annotation_config__init(void); 441 424 442 425 int annotate_parse_percent_type(const struct option *opt, const char *_str, 443 426 int unset); 444 427 445 - int annotate_check_args(struct annotation_options *args); 428 + int annotate_check_args(void); 429 + 430 + /** 431 + * struct annotated_op_loc - Location info of instruction operand 432 + * @reg: Register in the operand 433 + * @offset: Memory access offset in the operand 434 + * @mem_ref: Whether the operand accesses memory 435 + */ 436 + struct annotated_op_loc { 437 + int reg; 438 + int offset; 439 + bool mem_ref; 440 + }; 441 + 442 + enum annotated_insn_ops { 443 + INSN_OP_SOURCE = 0, 444 + INSN_OP_TARGET = 1, 445 + 446 + INSN_OP_MAX, 447 + }; 448 + 449 + /** 450 + * struct annotated_insn_loc - Location info of instruction 451 + * @ops: Array of location info for source and target operands 452 + */ 453 + struct annotated_insn_loc { 454 + struct annotated_op_loc ops[INSN_OP_MAX]; 455 + }; 456 + 457 + #define for_each_insn_op_loc(insn_loc, i, op_loc) \ 458 + for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i]; \ 459 + i < INSN_OP_MAX; \ 460 + i++, op_loc++) 461 + 462 + /* Get detailed location info in the instruction */ 463 + int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, 464 + struct annotated_insn_loc *loc); 465 + 466 + /* Returns a data type from the sample instruction (if any) */ 467 + struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he); 468 + 469 + struct annotated_item_stat { 470 + struct list_head list; 471 + char *name; 472 + int good; 473 + int bad; 474 + }; 475 + extern struct list_head ann_insn_stat; 446 476 447 477 #endif /* __PERF_ANNOTATE_H */
+5 -2
tools/perf/util/auxtrace.c
··· 174 174 struct evlist *evlist, 175 175 struct evsel *evsel, int idx) 176 176 { 177 - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); 177 + bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); 178 178 179 179 mp->mmap_needed = evsel->needs_auxtrace_mmap; 180 180 ··· 648 648 649 649 static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) 650 650 { 651 - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); 651 + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); 652 652 653 653 if (per_cpu_mmaps) { 654 654 struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); ··· 1637 1637 break; 1638 1638 case 'Z': 1639 1639 synth_opts->timeless_decoding = true; 1640 + break; 1641 + case 'T': 1642 + synth_opts->use_timestamp = true; 1640 1643 break; 1641 1644 case ' ': 1642 1645 case ',':
+3
tools/perf/util/auxtrace.h
··· 99 99 * @remote_access: whether to synthesize remote access events 100 100 * @mem: whether to synthesize memory events 101 101 * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps 102 + * @use_timestamp: use the timestamp trace as kernel time 102 103 * @vm_time_correlation: perform VM Time Correlation 103 104 * @vm_tm_corr_dry_run: VM Time Correlation dry-run 104 105 * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments ··· 147 146 bool remote_access; 148 147 bool mem; 149 148 bool timeless_decoding; 149 + bool use_timestamp; 150 150 bool vm_time_correlation; 151 151 bool vm_tm_corr_dry_run; 152 152 char *vm_tm_corr_args; ··· 680 678 " q: quicker (less detailed) decoding\n" \ 681 679 " A: approximate IPC\n" \ 682 680 " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ 681 + " T: use the timestamp trace as kernel time\n" \ 683 682 " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ 684 683 " concatenate multiple options. Default is iybxwpe or cewp\n" 685 684
+4 -6
tools/perf/util/block-info.c
··· 129 129 al.sym = he->ms.sym; 130 130 131 131 notes = symbol__annotation(he->ms.sym); 132 - if (!notes || !notes->src || !notes->src->cycles_hist) 132 + if (!notes || !notes->branch || !notes->branch->cycles_hist) 133 133 return 0; 134 - ch = notes->src->cycles_hist; 134 + ch = notes->branch->cycles_hist; 135 135 for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { 136 136 if (ch[i].num_aggr) { 137 137 struct block_info *bi; ··· 464 464 } 465 465 466 466 int report__browse_block_hists(struct block_hist *bh, float min_percent, 467 - struct evsel *evsel, struct perf_env *env, 468 - struct annotation_options *annotation_opts) 467 + struct evsel *evsel, struct perf_env *env) 469 468 { 470 469 int ret; 471 470 ··· 476 477 return 0; 477 478 case 1: 478 479 symbol_conf.report_individual_block = true; 479 - ret = block_hists_tui_browse(bh, evsel, min_percent, 480 - env, annotation_opts); 480 + ret = block_hists_tui_browse(bh, evsel, min_percent, env); 481 481 return ret; 482 482 default: 483 483 return -1;
+1 -2
tools/perf/util/block-info.h
··· 78 78 void block_info__free_report(struct block_report *reps, int nr_reps); 79 79 80 80 int report__browse_block_hists(struct block_hist *bh, float min_percent, 81 - struct evsel *evsel, struct perf_env *env, 82 - struct annotation_options *annotation_opts); 81 + struct evsel *evsel, struct perf_env *env); 83 82 84 83 float block_info__total_cycles_percent(struct hist_entry *he); 85 84
+6 -1
tools/perf/util/block-range.c
··· 311 311 double block_range__coverage(struct block_range *br) 312 312 { 313 313 struct symbol *sym; 314 + struct annotated_branch *branch; 314 315 315 316 if (!br) { 316 317 if (block_ranges.blocks) ··· 324 323 if (!sym) 325 324 return -1; 326 325 327 - return (double)br->coverage / symbol__annotation(sym)->max_coverage; 326 + branch = symbol__annotation(sym)->branch; 327 + if (!branch) 328 + return -1; 329 + 330 + return (double)br->coverage / branch->max_coverage; 328 331 }
+7 -4
tools/perf/util/bpf-event.c
··· 386 386 int err; 387 387 int fd; 388 388 389 + if (opts->no_bpf_event) 390 + return 0; 391 + 389 392 event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); 390 393 if (!event) 391 394 return -1; ··· 545 542 return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); 546 543 } 547 544 548 - void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, 549 - struct perf_env *env, 550 - FILE *fp) 545 + void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, 546 + struct perf_env *env, 547 + FILE *fp) 551 548 { 552 549 __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); 553 550 __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); ··· 563 560 if (info->btf_id) { 564 561 struct btf_node *node; 565 562 566 - node = perf_env__find_btf(env, info->btf_id); 563 + node = __perf_env__find_btf(env, info->btf_id); 567 564 if (node) 568 565 btf = btf__new((__u8 *)(node->data), 569 566 node->data_size);
+6 -6
tools/perf/util/bpf-event.h
··· 33 33 int machine__process_bpf(struct machine *machine, union perf_event *event, 34 34 struct perf_sample *sample); 35 35 int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); 36 - void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, 37 - struct perf_env *env, 38 - FILE *fp); 36 + void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, 37 + struct perf_env *env, 38 + FILE *fp); 39 39 #else 40 40 static inline int machine__process_bpf(struct machine *machine __maybe_unused, 41 41 union perf_event *event __maybe_unused, ··· 50 50 return 0; 51 51 } 52 52 53 - static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, 54 - struct perf_env *env __maybe_unused, 55 - FILE *fp __maybe_unused) 53 + static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, 54 + struct perf_env *env __maybe_unused, 55 + FILE *fp __maybe_unused) 56 56 { 57 57 58 58 }
+1 -1
tools/perf/util/bpf_counter.c
··· 455 455 return -1; 456 456 457 457 if (!all_cpu_map) { 458 - all_cpu_map = perf_cpu_map__new(NULL); 458 + all_cpu_map = perf_cpu_map__new_online_cpus(); 459 459 if (!all_cpu_map) 460 460 return -1; 461 461 }
+1 -1
tools/perf/util/bpf_lock_contention.c
··· 318 318 } 319 319 320 320 /* make sure it loads the kernel map */ 321 - map__load(maps__first(machine->kmaps)->map); 321 + maps__load_first(machine->kmaps); 322 322 323 323 prev_key = NULL; 324 324 while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+5 -2
tools/perf/util/compress.h
··· 3 3 #define PERF_COMPRESS_H 4 4 5 5 #include <stdbool.h> 6 + #include <stddef.h> 7 + #include <sys/types.h> 6 8 #ifdef HAVE_ZSTD_SUPPORT 7 9 #include <zstd.h> 8 10 #endif ··· 23 21 #ifdef HAVE_ZSTD_SUPPORT 24 22 ZSTD_CStream *cstream; 25 23 ZSTD_DStream *dstream; 24 + int comp_level; 26 25 #endif 27 26 }; 28 27 ··· 32 29 int zstd_init(struct zstd_data *data, int level); 33 30 int zstd_fini(struct zstd_data *data); 34 31 35 - size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 32 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 36 33 void *src, size_t src_size, size_t max_record_size, 37 34 size_t process_header(void *record, size_t increment)); 38 35 ··· 51 48 } 52 49 53 50 static inline 54 - size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, 51 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, 55 52 void *dst __maybe_unused, size_t dst_size __maybe_unused, 56 53 void *src __maybe_unused, size_t src_size __maybe_unused, 57 54 size_t max_record_size __maybe_unused,
+1 -1
tools/perf/util/cpumap.c
··· 672 672 static struct perf_cpu_map *online; 673 673 674 674 if (!online) 675 - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ 675 + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ 676 676 677 677 return online; 678 678 }
+1 -1
tools/perf/util/cputopo.c
··· 267 267 ncpus = cpu__max_present_cpu().cpu; 268 268 269 269 /* build online CPU map */ 270 - map = perf_cpu_map__new(NULL); 270 + map = perf_cpu_map__new_online_cpus(); 271 271 if (map == NULL) { 272 272 pr_debug("failed to get system cpumap\n"); 273 273 return NULL;
+18 -3
tools/perf/util/cs-etm.c
··· 3346 3346 etm->metadata = metadata; 3347 3347 etm->auxtrace_type = auxtrace_info->type; 3348 3348 3349 - /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3350 - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3349 + if (etm->synth_opts.use_timestamp) 3350 + /* 3351 + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3352 + * therefore the decoder cannot know if the timestamp trace is 3353 + * same with the kernel time. 3354 + * 3355 + * If a user has knowledge for the working platform and can 3356 + * specify itrace option 'T' to tell decoder to forcely use the 3357 + * traced timestamp as the kernel time. 3358 + */ 3359 + etm->has_virtual_ts = true; 3360 + else 3361 + /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3362 + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3351 3363 3352 3364 if (!etm->has_virtual_ts) 3353 3365 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3354 - "The time field of the samples will not be set accurately.\n\n"); 3366 + "The time field of the samples will not be set accurately.\n" 3367 + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3368 + "you can specify the itrace option 'T' for timestamp decoding\n" 3369 + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3355 3370 3356 3371 etm->auxtrace.process_event = cs_etm__process_event; 3357 3372 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
+2 -2
tools/perf/util/db-export.c
··· 253 253 */ 254 254 addr_location__init(&al); 255 255 al.sym = node->ms.sym; 256 - al.map = node->ms.map; 257 - al.maps = thread__maps(thread); 256 + al.map = map__get(node->ms.map); 257 + al.maps = maps__get(thread__maps(thread)); 258 258 al.addr = node->ip; 259 259 260 260 if (al.map && !al.sym)
+15 -7
tools/perf/util/debug.c
··· 38 38 int debug_ordered_events; 39 39 static int redirect_to_stderr; 40 40 int debug_data_convert; 41 - static FILE *debug_file; 41 + static FILE *_debug_file; 42 42 bool debug_display_time; 43 + 44 + FILE *debug_file(void) 45 + { 46 + if (!_debug_file) { 47 + pr_warning_once("debug_file not set"); 48 + debug_set_file(stderr); 49 + } 50 + return _debug_file; 51 + } 43 52 44 53 void debug_set_file(FILE *file) 45 54 { 46 - debug_file = file; 55 + _debug_file = file; 47 56 } 48 57 49 58 void debug_set_display_time(bool set) ··· 87 78 if (use_browser >= 1 && !redirect_to_stderr) { 88 79 ui_helpline__vshow(fmt, args); 89 80 } else { 90 - ret = fprintf_time(debug_file); 91 - ret += vfprintf(debug_file, fmt, args); 81 + ret = fprintf_time(debug_file()); 82 + ret += vfprintf(debug_file(), fmt, args); 92 83 } 93 84 } 94 85 ··· 116 107 nsecs -= secs * NSEC_PER_SEC; 117 108 usecs = nsecs / NSEC_PER_USEC; 118 109 119 - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", 120 - secs, usecs); 121 - ret += vfprintf(stderr, fmt, args); 110 + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); 111 + ret += vfprintf(debug_file(), fmt, args); 122 112 return ret; 123 113 } 124 114
+1
tools/perf/util/debug.h
··· 77 77 int veprintf(int level, int var, const char *fmt, va_list args); 78 78 79 79 int perf_debug_option(const char *str); 80 + FILE *debug_file(void); 80 81 void debug_set_file(FILE *file); 81 82 void debug_set_display_time(bool set); 82 83 void perf_debug_setup(void);
+205
tools/perf/util/debuginfo.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * DWARF debug information handling code. Copied from probe-finder.c. 4 + * 5 + * Written by Masami Hiramatsu <mhiramat@redhat.com> 6 + */ 7 + 8 + #include <errno.h> 9 + #include <fcntl.h> 10 + #include <stdio.h> 11 + #include <stdlib.h> 12 + #include <string.h> 13 + #include <unistd.h> 14 + #include <linux/zalloc.h> 15 + 16 + #include "build-id.h" 17 + #include "dso.h" 18 + #include "debug.h" 19 + #include "debuginfo.h" 20 + #include "symbol.h" 21 + 22 + #ifdef HAVE_DEBUGINFOD_SUPPORT 23 + #include <elfutils/debuginfod.h> 24 + #endif 25 + 26 + /* Dwarf FL wrappers */ 27 + static char *debuginfo_path; /* Currently dummy */ 28 + 29 + static const Dwfl_Callbacks offline_callbacks = { 30 + .find_debuginfo = dwfl_standard_find_debuginfo, 31 + .debuginfo_path = &debuginfo_path, 32 + 33 + .section_address = dwfl_offline_section_address, 34 + 35 + /* We use this table for core files too. */ 36 + .find_elf = dwfl_build_id_find_elf, 37 + }; 38 + 39 + /* Get a Dwarf from offline image */ 40 + static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, 41 + const char *path) 42 + { 43 + GElf_Addr dummy; 44 + int fd; 45 + 46 + fd = open(path, O_RDONLY); 47 + if (fd < 0) 48 + return fd; 49 + 50 + dbg->dwfl = dwfl_begin(&offline_callbacks); 51 + if (!dbg->dwfl) 52 + goto error; 53 + 54 + dwfl_report_begin(dbg->dwfl); 55 + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); 56 + if (!dbg->mod) 57 + goto error; 58 + 59 + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); 60 + if (!dbg->dbg) 61 + goto error; 62 + 63 + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); 64 + 65 + dwfl_report_end(dbg->dwfl, NULL, NULL); 66 + 67 + return 0; 68 + error: 69 + if (dbg->dwfl) 70 + dwfl_end(dbg->dwfl); 71 + else 72 + close(fd); 73 + memset(dbg, 0, sizeof(*dbg)); 74 + 75 + return -ENOENT; 76 + } 77 + 78 + static struct debuginfo *__debuginfo__new(const char *path) 79 + { 80 + struct debuginfo *dbg = zalloc(sizeof(*dbg)); 81 + if (!dbg) 82 + return NULL; 83 + 84 + if (debuginfo__init_offline_dwarf(dbg, path) < 0) 85 + zfree(&dbg); 86 + if (dbg) 87 + pr_debug("Open Debuginfo file: %s\n", path); 88 + return dbg; 89 + } 90 + 91 + enum dso_binary_type distro_dwarf_types[] = { 92 + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, 93 + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, 94 + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, 95 + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, 96 + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, 97 + DSO_BINARY_TYPE__NOT_FOUND, 98 + }; 99 + 100 + struct debuginfo *debuginfo__new(const char *path) 101 + { 102 + enum dso_binary_type *type; 103 + char buf[PATH_MAX], nil = '\0'; 104 + struct dso *dso; 105 + struct debuginfo *dinfo = NULL; 106 + struct build_id bid; 107 + 108 + /* Try to open distro debuginfo files */ 109 + dso = dso__new(path); 110 + if (!dso) 111 + goto out; 112 + 113 + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ 114 + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) 115 + dso__set_build_id(dso, &bid); 116 + 117 + for (type = distro_dwarf_types; 118 + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; 119 + type++) { 120 + if (dso__read_binary_type_filename(dso, *type, &nil, 121 + buf, PATH_MAX) < 0) 122 + continue; 123 + dinfo = __debuginfo__new(buf); 124 + } 125 + dso__put(dso); 126 + 127 + out: 128 + /* if failed to open all distro debuginfo, open given binary */ 129 + return dinfo ? : __debuginfo__new(path); 130 + } 131 + 132 + void debuginfo__delete(struct debuginfo *dbg) 133 + { 134 + if (dbg) { 135 + if (dbg->dwfl) 136 + dwfl_end(dbg->dwfl); 137 + free(dbg); 138 + } 139 + } 140 + 141 + /* For the kernel module, we need a special code to get a DIE */ 142 + int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, 143 + bool adjust_offset) 144 + { 145 + int n, i; 146 + Elf32_Word shndx; 147 + Elf_Scn *scn; 148 + Elf *elf; 149 + GElf_Shdr mem, *shdr; 150 + const char *p; 151 + 152 + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); 153 + if (!elf) 154 + return -EINVAL; 155 + 156 + /* Get the number of relocations */ 157 + n = dwfl_module_relocations(dbg->mod); 158 + if (n < 0) 159 + return -ENOENT; 160 + /* Search the relocation related .text section */ 161 + for (i = 0; i < n; i++) { 162 + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); 163 + if (strcmp(p, ".text") == 0) { 164 + /* OK, get the section header */ 165 + scn = elf_getscn(elf, shndx); 166 + if (!scn) 167 + return -ENOENT; 168 + shdr = gelf_getshdr(scn, &mem); 169 + if (!shdr) 170 + return -ENOENT; 171 + *offs = shdr->sh_addr; 172 + if (adjust_offset) 173 + *offs -= shdr->sh_offset; 174 + } 175 + } 176 + return 0; 177 + } 178 + 179 + #ifdef HAVE_DEBUGINFOD_SUPPORT 180 + int get_source_from_debuginfod(const char *raw_path, 181 + const char *sbuild_id, char **new_path) 182 + { 183 + debuginfod_client *c = debuginfod_begin(); 184 + const char *p = raw_path; 185 + int fd; 186 + 187 + if (!c) 188 + return -ENOMEM; 189 + 190 + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, 191 + 0, p, new_path); 192 + pr_debug("Search %s from debuginfod -> %d\n", p, fd); 193 + if (fd >= 0) 194 + close(fd); 195 + debuginfod_end(c); 196 + if (fd < 0) { 197 + pr_debug("Failed to find %s in debuginfod (%s)\n", 198 + raw_path, sbuild_id); 199 + return -ENOENT; 200 + } 201 + pr_debug("Got a source %s\n", *new_path); 202 + 203 + return 0; 204 + } 205 + #endif /* HAVE_DEBUGINFOD_SUPPORT */
+64
tools/perf/util/debuginfo.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + #ifndef _PERF_DEBUGINFO_H 3 + #define _PERF_DEBUGINFO_H 4 + 5 + #include <errno.h> 6 + #include <linux/compiler.h> 7 + 8 + #ifdef HAVE_DWARF_SUPPORT 9 + 10 + #include "dwarf-aux.h" 11 + 12 + /* debug information structure */ 13 + struct debuginfo { 14 + Dwarf *dbg; 15 + Dwfl_Module *mod; 16 + Dwfl *dwfl; 17 + Dwarf_Addr bias; 18 + const unsigned char *build_id; 19 + }; 20 + 21 + /* This also tries to open distro debuginfo */ 22 + struct debuginfo *debuginfo__new(const char *path); 23 + void debuginfo__delete(struct debuginfo *dbg); 24 + 25 + int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, 26 + bool adjust_offset); 27 + 28 + #else /* HAVE_DWARF_SUPPORT */ 29 + 30 + /* dummy debug information structure */ 31 + struct debuginfo { 32 + }; 33 + 34 + static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) 35 + { 36 + return NULL; 37 + } 38 + 39 + static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) 40 + { 41 + } 42 + 43 + static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, 44 + Dwarf_Addr *offs __maybe_unused, 45 + bool adjust_offset __maybe_unused) 46 + { 47 + return -EINVAL; 48 + } 49 + 50 + #endif /* HAVE_DWARF_SUPPORT */ 51 + 52 + #ifdef HAVE_DEBUGINFOD_SUPPORT 53 + int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, 54 + char **new_path); 55 + #else /* HAVE_DEBUGINFOD_SUPPORT */ 56 + static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, 57 + const char *sbuild_id __maybe_unused, 58 + char **new_path __maybe_unused) 59 + { 60 + return -ENOTSUP; 61 + } 62 + #endif /* HAVE_DEBUGINFOD_SUPPORT */ 63 + 64 + #endif /* _PERF_DEBUGINFO_H */
+4
tools/perf/util/dso.c
··· 31 31 #include "debug.h" 32 32 #include "string2.h" 33 33 #include "vdso.h" 34 + #include "annotate-data.h" 34 35 35 36 static const char * const debuglink_paths[] = { 36 37 "%.0s%s", ··· 1328 1327 dso->data.cache = RB_ROOT; 1329 1328 dso->inlined_nodes = RB_ROOT_CACHED; 1330 1329 dso->srclines = RB_ROOT_CACHED; 1330 + dso->data_types = RB_ROOT; 1331 1331 dso->data.fd = -1; 1332 1332 dso->data.status = DSO_DATA_STATUS_UNKNOWN; 1333 1333 dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; ··· 1372 1370 symbols__delete(&dso->symbols); 1373 1371 dso->symbol_names_len = 0; 1374 1372 zfree(&dso->symbol_names); 1373 + annotated_data_type__tree_delete(&dso->data_types); 1374 + 1375 1375 if (dso->short_name_allocated) { 1376 1376 zfree((char **)&dso->short_name); 1377 1377 dso->short_name_allocated = false;
+2
tools/perf/util/dso.h
··· 154 154 size_t symbol_names_len; 155 155 struct rb_root_cached inlined_nodes; 156 156 struct rb_root_cached srclines; 157 + struct rb_root data_types; 158 + 157 159 struct { 158 160 u64 addr; 159 161 struct symbol *symbol;
+232 -17
tools/perf/util/dwarf-aux.c
··· 1051 1051 } 1052 1052 1053 1053 /** 1054 - * die_get_typename - Get the name of given variable DIE 1055 - * @vr_die: a variable DIE 1054 + * die_get_typename_from_type - Get the name of given type DIE 1055 + * @type_die: a type DIE 1056 1056 * @buf: a strbuf for result type name 1057 1057 * 1058 - * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. 1058 + * Get the name of @type_die and stores it to @buf. Return 0 if succeeded. 1059 1059 * and Return -ENOENT if failed to find type name. 1060 1060 * Note that the result will stores typedef name if possible, and stores 1061 1061 * "*(function_type)" if the type is a function pointer. 1062 1062 */ 1063 - int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) 1063 + int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf) 1064 1064 { 1065 - Dwarf_Die type; 1066 1065 int tag, ret; 1067 1066 const char *tmp = ""; 1068 1067 1069 - if (__die_get_real_type(vr_die, &type) == NULL) 1070 - return -ENOENT; 1071 - 1072 - tag = dwarf_tag(&type); 1068 + tag = dwarf_tag(type_die); 1073 1069 if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) 1074 1070 tmp = "*"; 1075 1071 else if (tag == DW_TAG_subroutine_type) { 1076 1072 /* Function pointer */ 1077 1073 return strbuf_add(buf, "(function_type)", 15); 1078 1074 } else { 1079 - const char *name = dwarf_diename(&type); 1075 + const char *name = dwarf_diename(type_die); 1080 1076 1081 1077 if (tag == DW_TAG_union_type) 1082 1078 tmp = "union "; ··· 1085 1089 /* Write a base name */ 1086 1090 return strbuf_addf(buf, "%s%s", tmp, name ?: ""); 1087 1091 } 1088 - ret = die_get_typename(&type, buf); 1089 - return ret ? ret : strbuf_addstr(buf, tmp); 1092 + ret = die_get_typename(type_die, buf); 1093 + if (ret < 0) { 1094 + /* void pointer has no type attribute */ 1095 + if (tag == DW_TAG_pointer_type && ret == -ENOENT) 1096 + return strbuf_addf(buf, "void*"); 1097 + 1098 + return ret; 1099 + } 1100 + return strbuf_addstr(buf, tmp); 1101 + } 1102 + 1103 + /** 1104 + * die_get_typename - Get the name of given variable DIE 1105 + * @vr_die: a variable DIE 1106 + * @buf: a strbuf for result type name 1107 + * 1108 + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. 1109 + * and Return -ENOENT if failed to find type name. 1110 + * Note that the result will stores typedef name if possible, and stores 1111 + * "*(function_type)" if the type is a function pointer. 1112 + */ 1113 + int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) 1114 + { 1115 + Dwarf_Die type; 1116 + 1117 + if (__die_get_real_type(vr_die, &type) == NULL) 1118 + return -ENOENT; 1119 + 1120 + return die_get_typename_from_type(&type, buf); 1090 1121 } 1091 1122 1092 1123 /** ··· 1261 1238 out: 1262 1239 return ret; 1263 1240 } 1264 - #else 1265 - int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, 1266 - Dwarf_Die *vr_die __maybe_unused, 1267 - struct strbuf *buf __maybe_unused) 1241 + 1242 + /* Interval parameters for __die_find_var_reg_cb() */ 1243 + struct find_var_data { 1244 + /* Target instruction address */ 1245 + Dwarf_Addr pc; 1246 + /* Target memory address (for global data) */ 1247 + Dwarf_Addr addr; 1248 + /* Target register */ 1249 + unsigned reg; 1250 + /* Access offset, set for global data */ 1251 + int offset; 1252 + }; 1253 + 1254 + /* Max number of registers DW_OP_regN supports */ 1255 + #define DWARF_OP_DIRECT_REGS 32 1256 + 1257 + /* Only checks direct child DIEs in the given scope. */ 1258 + static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) 1268 1259 { 1269 - return -ENOTSUP; 1260 + struct find_var_data *data = arg; 1261 + int tag = dwarf_tag(die_mem); 1262 + ptrdiff_t off = 0; 1263 + Dwarf_Attribute attr; 1264 + Dwarf_Addr base, start, end; 1265 + Dwarf_Op *ops; 1266 + size_t nops; 1267 + 1268 + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) 1269 + return DIE_FIND_CB_SIBLING; 1270 + 1271 + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) 1272 + return DIE_FIND_CB_SIBLING; 1273 + 1274 + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { 1275 + /* Assuming the location list is sorted by address */ 1276 + if (end < data->pc) 1277 + continue; 1278 + if (start > data->pc) 1279 + break; 1280 + 1281 + /* Only match with a simple case */ 1282 + if (data->reg < DWARF_OP_DIRECT_REGS) { 1283 + if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1) 1284 + return DIE_FIND_CB_END; 1285 + } else { 1286 + if (ops->atom == DW_OP_regx && ops->number == data->reg && 1287 + nops == 1) 1288 + return DIE_FIND_CB_END; 1289 + } 1290 + } 1291 + return DIE_FIND_CB_SIBLING; 1292 + } 1293 + 1294 + /** 1295 + * die_find_variable_by_reg - Find a variable saved in a register 1296 + * @sc_die: a scope DIE 1297 + * @pc: the program address to find 1298 + * @reg: the register number to find 1299 + * @die_mem: a buffer to save the resulting DIE 1300 + * 1301 + * Find the variable DIE accessed by the given register. 1302 + */ 1303 + Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, 1304 + Dwarf_Die *die_mem) 1305 + { 1306 + struct find_var_data data = { 1307 + .pc = pc, 1308 + .reg = reg, 1309 + }; 1310 + return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); 1311 + } 1312 + 1313 + /* Only checks direct child DIEs in the given scope */ 1314 + static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) 1315 + { 1316 + struct find_var_data *data = arg; 1317 + int tag = dwarf_tag(die_mem); 1318 + ptrdiff_t off = 0; 1319 + Dwarf_Attribute attr; 1320 + Dwarf_Addr base, start, end; 1321 + Dwarf_Word size; 1322 + Dwarf_Die type_die; 1323 + Dwarf_Op *ops; 1324 + size_t nops; 1325 + 1326 + if (tag != DW_TAG_variable) 1327 + return DIE_FIND_CB_SIBLING; 1328 + 1329 + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) 1330 + return DIE_FIND_CB_SIBLING; 1331 + 1332 + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { 1333 + if (ops->atom != DW_OP_addr) 1334 + continue; 1335 + 1336 + if (data->addr < ops->number) 1337 + continue; 1338 + 1339 + if (data->addr == ops->number) { 1340 + /* Update offset relative to the start of the variable */ 1341 + data->offset = 0; 1342 + return DIE_FIND_CB_END; 1343 + } 1344 + 1345 + if (die_get_real_type(die_mem, &type_die) == NULL) 1346 + continue; 1347 + 1348 + if (dwarf_aggregate_size(&type_die, &size) < 0) 1349 + continue; 1350 + 1351 + if (data->addr >= ops->number + size) 1352 + continue; 1353 + 1354 + /* Update offset relative to the start of the variable */ 1355 + data->offset = data->addr - ops->number; 1356 + return DIE_FIND_CB_END; 1357 + } 1358 + return DIE_FIND_CB_SIBLING; 1359 + } 1360 + 1361 + /** 1362 + * die_find_variable_by_addr - Find variable located at given address 1363 + * @sc_die: a scope DIE 1364 + * @pc: the program address to find 1365 + * @addr: the data address to find 1366 + * @die_mem: a buffer to save the resulting DIE 1367 + * @offset: the offset in the resulting type 1368 + * 1369 + * Find the variable DIE located at the given address (in PC-relative mode). 1370 + * This is usually for global variables. 1371 + */ 1372 + Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, 1373 + Dwarf_Addr addr, Dwarf_Die *die_mem, 1374 + int *offset) 1375 + { 1376 + struct find_var_data data = { 1377 + .pc = pc, 1378 + .addr = addr, 1379 + }; 1380 + Dwarf_Die *result; 1381 + 1382 + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); 1383 + if (result) 1384 + *offset = data.offset; 1385 + return result; 1270 1386 } 1271 1387 #endif 1272 1388 ··· 1586 1424 return; 1587 1425 1588 1426 *entrypc = postprologue_addr; 1427 + } 1428 + 1429 + /* Internal parameters for __die_find_scope_cb() */ 1430 + struct find_scope_data { 1431 + /* Target instruction address */ 1432 + Dwarf_Addr pc; 1433 + /* Number of scopes found [output] */ 1434 + int nr; 1435 + /* Array of scopes found, 0 for the outermost one. [output] */ 1436 + Dwarf_Die *scopes; 1437 + }; 1438 + 1439 + static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) 1440 + { 1441 + struct find_scope_data *data = arg; 1442 + 1443 + if (dwarf_haspc(die_mem, data->pc)) { 1444 + Dwarf_Die *tmp; 1445 + 1446 + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); 1447 + if (tmp == NULL) 1448 + return DIE_FIND_CB_END; 1449 + 1450 + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); 1451 + data->scopes = tmp; 1452 + data->nr++; 1453 + return DIE_FIND_CB_CHILD; 1454 + } 1455 + return DIE_FIND_CB_SIBLING; 1456 + } 1457 + 1458 + /** 1459 + * die_get_scopes - Return a list of scopes including the address 1460 + * @cu_die: a compile unit DIE 1461 + * @pc: the address to find 1462 + * @scopes: the array of DIEs for scopes (result) 1463 + * 1464 + * This function does the same as the dwarf_getscopes() but doesn't follow 1465 + * the origins of inlined functions. It returns the number of scopes saved 1466 + * in the @scopes argument. The outer scope will be saved first (index 0) and 1467 + * the last one is the innermost scope at the @pc. 1468 + */ 1469 + int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) 1470 + { 1471 + struct find_scope_data data = { 1472 + .pc = pc, 1473 + }; 1474 + Dwarf_Die die_mem; 1475 + 1476 + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); 1477 + 1478 + *scopes = data.scopes; 1479 + return data.nr; 1589 1480 }
+49 -2
tools/perf/util/dwarf-aux.h
··· 116 116 Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, 117 117 Dwarf_Die *die_mem); 118 118 119 + /* Get the name of given type DIE */ 120 + int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf); 121 + 119 122 /* Get the name of given variable DIE */ 120 123 int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); 121 124 122 125 /* Get the name and type of given variable DIE, stored as "type\tname" */ 123 126 int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); 124 - int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); 125 127 126 128 /* Check if target program is compiled with optimization */ 127 129 bool die_is_optimized_target(Dwarf_Die *cu_die); ··· 132 130 void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, 133 131 Dwarf_Addr *entrypc); 134 132 135 - #endif 133 + /* Get the list of including scopes */ 134 + int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); 135 + 136 + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT 137 + 138 + /* Get byte offset range of given variable DIE */ 139 + int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); 140 + 141 + /* Find a variable saved in the 'reg' at given address */ 142 + Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, 143 + Dwarf_Die *die_mem); 144 + 145 + /* Find a (global) variable located in the 'addr' */ 146 + Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, 147 + Dwarf_Addr addr, Dwarf_Die *die_mem, 148 + int *offset); 149 + 150 + #else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ 151 + 152 + static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, 153 + Dwarf_Die *vr_die __maybe_unused, 154 + struct strbuf *buf __maybe_unused) 155 + { 156 + return -ENOTSUP; 157 + } 158 + 159 + static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, 160 + Dwarf_Addr pc __maybe_unused, 161 + int reg __maybe_unused, 162 + Dwarf_Die *die_mem __maybe_unused) 163 + { 164 + return NULL; 165 + } 166 + 167 + static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, 168 + Dwarf_Addr pc __maybe_unused, 169 + Dwarf_Addr addr __maybe_unused, 170 + Dwarf_Die *die_mem __maybe_unused, 171 + int *offset __maybe_unused) 172 + { 173 + return NULL; 174 + } 175 + 176 + #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ 177 + 178 + #endif /* _DWARF_AUX_H */
+34
tools/perf/util/dwarf-regs.c
··· 5 5 * Written by: Masami Hiramatsu <mhiramat@kernel.org> 6 6 */ 7 7 8 + #include <stdlib.h> 9 + #include <string.h> 8 10 #include <debug.h> 9 11 #include <dwarf-regs.h> 10 12 #include <elf.h> 13 + #include <errno.h> 11 14 #include <linux/kernel.h> 12 15 13 16 #ifndef EM_AARCH64 ··· 70 67 pr_err("ELF MACHINE %x is not supported.\n", machine); 71 68 } 72 69 return NULL; 70 + } 71 + 72 + __weak int get_arch_regnum(const char *name __maybe_unused) 73 + { 74 + return -ENOTSUP; 75 + } 76 + 77 + /* Return DWARF register number from architecture register name */ 78 + int get_dwarf_regnum(const char *name, unsigned int machine) 79 + { 80 + char *regname = strdup(name); 81 + int reg = -1; 82 + char *p; 83 + 84 + if (regname == NULL) 85 + return -EINVAL; 86 + 87 + /* For convenience, remove trailing characters */ 88 + p = strpbrk(regname, " ,)"); 89 + if (p) 90 + *p = '\0'; 91 + 92 + switch (machine) { 93 + case EM_NONE: /* Generic arch - use host arch */ 94 + reg = get_arch_regnum(regname); 95 + break; 96 + default: 97 + pr_err("ELF MACHINE %x is not supported.\n", machine); 98 + } 99 + free(regname); 100 + return reg; 73 101 }
+64 -18
tools/perf/util/env.c
··· 3 3 #include "debug.h" 4 4 #include "env.h" 5 5 #include "util/header.h" 6 + #include "linux/compiler.h" 6 7 #include <linux/ctype.h> 7 8 #include <linux/zalloc.h> 8 9 #include "cgroup.h" ··· 13 12 #include <string.h> 14 13 #include "pmus.h" 15 14 #include "strbuf.h" 15 + #include "trace/beauty/beauty.h" 16 16 17 17 struct perf_env perf_env; 18 18 ··· 25 23 void perf_env__insert_bpf_prog_info(struct perf_env *env, 26 24 struct bpf_prog_info_node *info_node) 27 25 { 26 + down_write(&env->bpf_progs.lock); 27 + __perf_env__insert_bpf_prog_info(env, info_node); 28 + up_write(&env->bpf_progs.lock); 29 + } 30 + 31 + void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) 32 + { 28 33 __u32 prog_id = info_node->info_linear->info.id; 29 34 struct bpf_prog_info_node *node; 30 35 struct rb_node *parent = NULL; 31 36 struct rb_node **p; 32 37 33 - down_write(&env->bpf_progs.lock); 34 38 p = &env->bpf_progs.infos.rb_node; 35 39 36 40 while (*p != NULL) { ··· 48 40 p = &(*p)->rb_right; 49 41 } else { 50 42 pr_debug("duplicated bpf prog info %u\n", prog_id); 51 - goto out; 43 + return; 52 44 } 53 45 } 54 46 55 47 rb_link_node(&info_node->rb_node, parent, p); 56 48 rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); 57 49 env->bpf_progs.infos_cnt++; 58 - out: 59 - up_write(&env->bpf_progs.lock); 60 50 } 61 51 62 52 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, ··· 84 78 85 79 bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) 86 80 { 81 + bool ret; 82 + 83 + down_write(&env->bpf_progs.lock); 84 + ret = __perf_env__insert_btf(env, btf_node); 85 + up_write(&env->bpf_progs.lock); 86 + return ret; 87 + } 88 + 89 + bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) 90 + { 87 91 struct rb_node *parent = NULL; 88 92 __u32 btf_id = btf_node->id; 89 93 struct btf_node *node; 90 94 struct rb_node **p; 91 - bool ret = true; 92 95 93 - down_write(&env->bpf_progs.lock); 94 96 p = &env->bpf_progs.btfs.rb_node; 95 97 96 98 while (*p != NULL) { ··· 110 96 p = &(*p)->rb_right; 111 97 } else { 112 98 pr_debug("duplicated btf %u\n", btf_id); 113 - ret = false; 114 - goto out; 99 + return false; 115 100 } 116 101 } 117 102 118 103 rb_link_node(&btf_node->rb_node, parent, p); 119 104 rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); 120 105 env->bpf_progs.btfs_cnt++; 121 - out: 122 - up_write(&env->bpf_progs.lock); 123 - return ret; 106 + return true; 124 107 } 125 108 126 109 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) 127 110 { 111 + struct btf_node *res; 112 + 113 + down_read(&env->bpf_progs.lock); 114 + res = __perf_env__find_btf(env, btf_id); 115 + up_read(&env->bpf_progs.lock); 116 + return res; 117 + } 118 + 119 + struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) 120 + { 128 121 struct btf_node *node = NULL; 129 122 struct rb_node *n; 130 123 131 - down_read(&env->bpf_progs.lock); 132 124 n = env->bpf_progs.btfs.rb_node; 133 125 134 126 while (n) { ··· 144 124 else if (btf_id > node->id) 145 125 n = n->rb_right; 146 126 else 147 - goto out; 127 + return node; 148 128 } 149 - node = NULL; 150 - 151 - out: 152 - up_read(&env->bpf_progs.lock); 153 - return node; 129 + return NULL; 154 130 } 155 131 156 132 /* purge data in bpf_progs.infos tree */ ··· 469 453 return normalize_arch(arch_name); 470 454 } 471 455 456 + const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) 457 + { 458 + #if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) 459 + if (env->arch_strerrno == NULL) 460 + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); 461 + 462 + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; 463 + #else 464 + return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; 465 + #endif 466 + } 467 + 472 468 const char *perf_env__cpuid(struct perf_env *env) 473 469 { 474 470 int status; ··· 557 529 } 558 530 559 531 return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; 532 + } 533 + 534 + bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) 535 + { 536 + char *pmu_mapping = env->pmu_mappings, *colon; 537 + 538 + for (int i = 0; i < env->nr_pmu_mappings; ++i) { 539 + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') 540 + goto out_error; 541 + 542 + pmu_mapping = colon + 1; 543 + if (strcmp(pmu_mapping, pmu_name) == 0) 544 + return true; 545 + 546 + pmu_mapping += strlen(pmu_mapping) + 1; 547 + } 548 + out_error: 549 + return false; 560 550 } 561 551 562 552 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+17
tools/perf/util/env.h
··· 46 46 struct pmu_caps { 47 47 int nr_caps; 48 48 unsigned int max_branches; 49 + unsigned int br_cntr_nr; 50 + unsigned int br_cntr_width; 51 + 49 52 char **caps; 50 53 char *pmu_name; 51 54 }; 55 + 56 + typedef const char *(arch_syscalls__strerrno_t)(int err); 57 + 58 + arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); 52 59 53 60 struct perf_env { 54 61 char *hostname; ··· 69 62 unsigned long long total_mem; 70 63 unsigned int msr_pmu_type; 71 64 unsigned int max_branches; 65 + unsigned int br_cntr_nr; 66 + unsigned int br_cntr_width; 72 67 int kernel_is_64_bit; 73 68 74 69 int nr_cmdline; ··· 139 130 */ 140 131 bool enabled; 141 132 } clock; 133 + arch_syscalls__strerrno_t *arch_strerrno; 142 134 }; 143 135 144 136 enum perf_compress_type { ··· 169 159 void cpu_cache_level__free(struct cpu_cache_level *cache); 170 160 171 161 const char *perf_env__arch(struct perf_env *env); 162 + const char *perf_env__arch_strerrno(struct perf_env *env, int err); 172 163 const char *perf_env__cpuid(struct perf_env *env); 173 164 const char *perf_env__raw_arch(struct perf_env *env); 174 165 int perf_env__nr_cpus_avail(struct perf_env *env); 175 166 176 167 void perf_env__init(struct perf_env *env); 168 + void __perf_env__insert_bpf_prog_info(struct perf_env *env, 169 + struct bpf_prog_info_node *info_node); 177 170 void perf_env__insert_bpf_prog_info(struct perf_env *env, 178 171 struct bpf_prog_info_node *info_node); 179 172 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, 180 173 __u32 prog_id); 181 174 bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); 175 + bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); 182 176 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); 177 + struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); 183 178 184 179 int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); 185 180 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, 186 181 const char *cap); 182 + 183 + bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); 187 184 #endif /* __PERF_ENV_H */
+2 -2
tools/perf/util/event.c
··· 617 617 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { 618 618 al->level = 'k'; 619 619 maps = machine__kernel_maps(machine); 620 - load_map = true; 620 + load_map = !symbol_conf.lazy_load_kernel_maps; 621 621 } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { 622 622 al->level = '.'; 623 623 } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { 624 624 al->level = 'g'; 625 625 maps = machine__kernel_maps(machine); 626 - load_map = true; 626 + load_map = !symbol_conf.lazy_load_kernel_maps; 627 627 } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { 628 628 al->level = 'u'; 629 629 } else {
+32 -2
tools/perf/util/evlist.c
··· 1056 1056 return -1; 1057 1057 1058 1058 if (target__uses_dummy_map(target)) 1059 - cpus = perf_cpu_map__dummy_new(); 1059 + cpus = perf_cpu_map__new_any_cpu(); 1060 1060 else 1061 1061 cpus = perf_cpu_map__new(target->cpu_list); 1062 1062 ··· 1352 1352 * error, and we may not want to do that fallback to a 1353 1353 * default cpu identity map :-\ 1354 1354 */ 1355 - cpus = perf_cpu_map__new(NULL); 1355 + cpus = perf_cpu_map__new_online_cpus(); 1356 1356 if (!cpus) 1357 1357 goto out; 1358 1358 ··· 2517 2517 perf_cpu_map__put(intersect); 2518 2518 } 2519 2519 perf_cpu_map__put(user_requested_cpus); 2520 + } 2521 + 2522 + void evlist__uniquify_name(struct evlist *evlist) 2523 + { 2524 + char *new_name, empty_attributes[2] = ":", *attributes; 2525 + struct evsel *pos; 2526 + 2527 + if (perf_pmus__num_core_pmus() == 1) 2528 + return; 2529 + 2530 + evlist__for_each_entry(evlist, pos) { 2531 + if (!evsel__is_hybrid(pos)) 2532 + continue; 2533 + 2534 + if (strchr(pos->name, '/')) 2535 + continue; 2536 + 2537 + attributes = strchr(pos->name, ':'); 2538 + if (attributes) 2539 + *attributes = '\0'; 2540 + else 2541 + attributes = empty_attributes; 2542 + 2543 + if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { 2544 + free(pos->name); 2545 + pos->name = new_name; 2546 + } else { 2547 + *attributes = ':'; 2548 + } 2549 + } 2520 2550 }
+1
tools/perf/util/evlist.h
··· 442 442 int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); 443 443 void evlist__check_mem_load_aux(struct evlist *evlist); 444 444 void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); 445 + void evlist__uniquify_name(struct evlist *evlist); 445 446 446 447 #endif /* __PERF_EVLIST_H */
+45 -10
tools/perf/util/evsel.c
··· 1801 1801 1802 1802 if (cpus == NULL) { 1803 1803 if (empty_cpu_map == NULL) { 1804 - empty_cpu_map = perf_cpu_map__dummy_new(); 1804 + empty_cpu_map = perf_cpu_map__new_any_cpu(); 1805 1805 if (empty_cpu_map == NULL) 1806 1806 return -ENOMEM; 1807 1807 } ··· 1832 1832 1833 1833 static void evsel__disable_missing_features(struct evsel *evsel) 1834 1834 { 1835 + if (perf_missing_features.branch_counters) 1836 + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; 1835 1837 if (perf_missing_features.read_lost) 1836 1838 evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; 1837 1839 if (perf_missing_features.weight_struct) { ··· 1887 1885 * Must probe features in the order they were added to the 1888 1886 * perf_event_attr interface. 1889 1887 */ 1890 - if (!perf_missing_features.read_lost && 1888 + if (!perf_missing_features.branch_counters && 1889 + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { 1890 + perf_missing_features.branch_counters = true; 1891 + pr_debug2("switching off branch counters support\n"); 1892 + return true; 1893 + } else if (!perf_missing_features.read_lost && 1891 1894 (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { 1892 1895 perf_missing_features.read_lost = true; 1893 1896 pr_debug2("switching off PERF_FORMAT_LOST support\n"); ··· 2325 2318 return new_val; 2326 2319 } 2327 2320 2321 + static inline bool evsel__has_branch_counters(const struct evsel *evsel) 2322 + { 2323 + struct evsel *cur, *leader = evsel__leader(evsel); 2324 + 2325 + /* The branch counters feature only supports group */ 2326 + if (!leader || !evsel->evlist) 2327 + return false; 2328 + 2329 + evlist__for_each_entry(evsel->evlist, cur) { 2330 + if ((leader == evsel__leader(cur)) && 2331 + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) 2332 + return true; 2333 + } 2334 + return false; 2335 + } 2336 + 2328 2337 int evsel__parse_sample(struct evsel *evsel, union perf_event *event, 2329 2338 struct perf_sample *data) 2330 2339 { ··· 2574 2551 2575 2552 OVERFLOW_CHECK(array, sz, max_size); 2576 2553 array = (void *)array + sz; 2554 + 2555 + if (evsel__has_branch_counters(evsel)) { 2556 + OVERFLOW_CHECK_u64(array); 2557 + 2558 + data->branch_stack_cntr = (u64 *)array; 2559 + sz = data->branch_stack->nr * sizeof(u64); 2560 + 2561 + OVERFLOW_CHECK(array, sz, max_size); 2562 + array = (void *)array + sz; 2563 + } 2577 2564 } 2578 2565 2579 2566 if (type & PERF_SAMPLE_REGS_USER) { ··· 2853 2820 2854 2821 #endif 2855 2822 2856 - bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) 2823 + bool evsel__fallback(struct evsel *evsel, struct target *target, int err, 2824 + char *msg, size_t msgsize) 2857 2825 { 2858 2826 int paranoid; 2859 2827 ··· 2862 2828 evsel->core.attr.type == PERF_TYPE_HARDWARE && 2863 2829 evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { 2864 2830 /* 2865 - * If it's cycles then fall back to hrtimer based 2866 - * cpu-clock-tick sw counter, which is always available even if 2867 - * no PMU support. 2831 + * If it's cycles then fall back to hrtimer based cpu-clock sw 2832 + * counter, which is always available even if no PMU support. 2868 2833 * 2869 2834 * PPC returns ENXIO until 2.6.37 (behavior changed with commit 2870 2835 * b0a873e). 2871 2836 */ 2872 - scnprintf(msg, msgsize, "%s", 2873 - "The cycles event is not supported, trying to fall back to cpu-clock-ticks"); 2874 - 2875 2837 evsel->core.attr.type = PERF_TYPE_SOFTWARE; 2876 - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; 2838 + evsel->core.attr.config = target__has_cpu(target) 2839 + ? PERF_COUNT_SW_CPU_CLOCK 2840 + : PERF_COUNT_SW_TASK_CLOCK; 2841 + scnprintf(msg, msgsize, 2842 + "The cycles event is not supported, trying to fall back to %s", 2843 + target__has_cpu(target) ? "cpu-clock" : "task-clock"); 2877 2844 2878 2845 zfree(&evsel->name); 2879 2846 return true;
+3 -1
tools/perf/util/evsel.h
··· 191 191 bool code_page_size; 192 192 bool weight_struct; 193 193 bool read_lost; 194 + bool branch_counters; 194 195 }; 195 196 196 197 extern struct perf_missing_features perf_missing_features; ··· 460 459 evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); 461 460 } 462 461 463 - bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); 462 + bool evsel__fallback(struct evsel *evsel, struct target *target, int err, 463 + char *msg, size_t msgsize); 464 464 int evsel__open_strerror(struct evsel *evsel, struct target *target, 465 465 int err, char *msg, size_t size); 466 466
+3 -3
tools/perf/util/genelf.c
··· 293 293 */ 294 294 phdr = elf_newphdr(e, 1); 295 295 phdr[0].p_type = PT_LOAD; 296 - phdr[0].p_offset = 0; 297 - phdr[0].p_vaddr = 0; 298 - phdr[0].p_paddr = 0; 296 + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; 297 + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; 298 + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; 299 299 phdr[0].p_filesz = csize; 300 300 phdr[0].p_memsz = csize; 301 301 phdr[0].p_flags = PF_X | PF_R;
+33 -10
tools/perf/util/header.c
··· 1444 1444 nodes = new_nodes; 1445 1445 size += 4; 1446 1446 } 1447 - ret = memory_node__read(&nodes[cnt++], idx); 1447 + ret = memory_node__read(&nodes[cnt], idx); 1448 + if (!ret) 1449 + cnt += 1; 1448 1450 } 1449 1451 out: 1450 1452 closedir(dir); ··· 1849 1847 node = rb_entry(next, struct bpf_prog_info_node, rb_node); 1850 1848 next = rb_next(&node->rb_node); 1851 1849 1852 - bpf_event__print_bpf_prog_info(&node->info_linear->info, 1853 - env, fp); 1850 + __bpf_event__print_bpf_prog_info(&node->info_linear->info, 1851 + env, fp); 1854 1852 } 1855 1853 1856 1854 up_read(&env->bpf_progs.lock); ··· 2146 2144 pmu_caps = &ff->ph->env.pmu_caps[i]; 2147 2145 __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, 2148 2146 pmu_caps->pmu_name); 2147 + } 2148 + 2149 + if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && 2150 + perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { 2151 + char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); 2152 + 2153 + if (max_precise != NULL && atoi(max_precise) == 0) 2154 + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); 2149 2155 } 2150 2156 } 2151 2157 ··· 3188 3178 /* after reading from file, translate offset to address */ 3189 3179 bpil_offs_to_addr(info_linear); 3190 3180 info_node->info_linear = info_linear; 3191 - perf_env__insert_bpf_prog_info(env, info_node); 3181 + __perf_env__insert_bpf_prog_info(env, info_node); 3192 3182 } 3193 3183 3194 3184 up_write(&env->bpf_progs.lock); ··· 3235 3225 if (__do_read(ff, node->data, data_size)) 3236 3226 goto out; 3237 3227 3238 - perf_env__insert_btf(env, node); 3228 + __perf_env__insert_btf(env, node); 3239 3229 node = NULL; 3240 3230 } 3241 3231 ··· 3269 3259 } 3270 3260 3271 3261 static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, 3272 - char ***caps, unsigned int *max_branches) 3262 + char ***caps, unsigned int *max_branches, 3263 + unsigned int *br_cntr_nr, 3264 + unsigned int *br_cntr_width) 3273 3265 { 3274 3266 char *name, *value, *ptr; 3275 3267 u32 nr_pmu_caps, i; ··· 3306 3294 if (!strcmp(name, "branches")) 3307 3295 *max_branches = atoi(value); 3308 3296 3297 + if (!strcmp(name, "branch_counter_nr")) 3298 + *br_cntr_nr = atoi(value); 3299 + 3300 + if (!strcmp(name, "branch_counter_width")) 3301 + *br_cntr_width = atoi(value); 3302 + 3309 3303 free(value); 3310 3304 free(name); 3311 3305 } ··· 3336 3318 { 3337 3319 int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, 3338 3320 &ff->ph->env.cpu_pmu_caps, 3339 - &ff->ph->env.max_branches); 3321 + &ff->ph->env.max_branches, 3322 + &ff->ph->env.br_cntr_nr, 3323 + &ff->ph->env.br_cntr_width); 3340 3324 3341 3325 if (!ret && !ff->ph->env.cpu_pmu_caps) 3342 3326 pr_debug("cpu pmu capabilities not available\n"); ··· 3367 3347 for (i = 0; i < nr_pmu; i++) { 3368 3348 ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, 3369 3349 &pmu_caps[i].caps, 3370 - &pmu_caps[i].max_branches); 3350 + &pmu_caps[i].max_branches, 3351 + &pmu_caps[i].br_cntr_nr, 3352 + &pmu_caps[i].br_cntr_width); 3371 3353 if (ret) 3372 3354 goto err; 3373 3355 ··· 4391 4369 ret += fprintf(fp, "... "); 4392 4370 4393 4371 map = cpu_map__new_data(&ev->cpus.cpus); 4394 - if (map) 4372 + if (map) { 4395 4373 ret += cpu_map__fprintf(map, fp); 4396 - else 4374 + perf_cpu_map__put(map); 4375 + } else 4397 4376 ret += fprintf(fp, "failed to get cpus\n"); 4398 4377 break; 4399 4378 default:
+1
tools/perf/util/hisi-ptt.c
··· 123 123 if (dump_trace) 124 124 hisi_ptt_dump_event(ptt, data, size); 125 125 126 + free(data); 126 127 return 0; 127 128 } 128 129
+11 -17
tools/perf/util/hist.h
··· 82 82 HISTC_ADDR_TO, 83 83 HISTC_ADDR, 84 84 HISTC_SIMD, 85 + HISTC_TYPE, 86 + HISTC_TYPE_OFFSET, 87 + HISTC_SYMBOL_OFFSET, 85 88 HISTC_NR_COLS, /* Last entry */ 86 89 }; 87 90 ··· 460 457 int refresh; 461 458 }; 462 459 463 - struct annotation_options; 464 460 struct res_sample; 465 461 466 462 enum rstype { ··· 475 473 void attr_to_script(char *buf, struct perf_event_attr *attr); 476 474 477 475 int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, 478 - struct hist_browser_timer *hbt, 479 - struct annotation_options *annotation_opts); 476 + struct hist_browser_timer *hbt); 480 477 481 478 int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, 482 - struct hist_browser_timer *hbt, 483 - struct annotation_options *annotation_opts); 479 + struct hist_browser_timer *hbt); 484 480 485 481 int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, 486 - float min_pcnt, struct perf_env *env, bool warn_lost_event, 487 - struct annotation_options *annotation_options); 482 + float min_pcnt, struct perf_env *env, bool warn_lost_event); 488 483 489 484 int script_browse(const char *script_opt, struct evsel *evsel); 490 485 ··· 491 492 void res_sample_init(void); 492 493 493 494 int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, 494 - float min_percent, struct perf_env *env, 495 - struct annotation_options *annotation_opts); 495 + float min_percent, struct perf_env *env); 496 496 #else 497 497 static inline 498 498 int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, ··· 499 501 struct hist_browser_timer *hbt __maybe_unused, 500 502 float min_pcnt __maybe_unused, 501 503 struct perf_env *env __maybe_unused, 502 - bool warn_lost_event __maybe_unused, 503 - struct annotation_options *annotation_options __maybe_unused) 504 + bool warn_lost_event __maybe_unused) 504 505 { 505 506 return 0; 506 507 } 507 508 static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, 508 509 struct evsel *evsel __maybe_unused, 509 - struct hist_browser_timer *hbt __maybe_unused, 510 - struct annotation_options *annotation_options __maybe_unused) 510 + struct hist_browser_timer *hbt __maybe_unused) 511 511 { 512 512 return 0; 513 513 } 514 514 515 515 static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, 516 516 struct evsel *evsel __maybe_unused, 517 - struct hist_browser_timer *hbt __maybe_unused, 518 - struct annotation_options *annotation_opts __maybe_unused) 517 + struct hist_browser_timer *hbt __maybe_unused) 519 518 { 520 519 return 0; 521 520 } ··· 536 541 static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, 537 542 struct evsel *evsel __maybe_unused, 538 543 float min_percent __maybe_unused, 539 - struct perf_env *env __maybe_unused, 540 - struct annotation_options *annotation_opts __maybe_unused) 544 + struct perf_env *env __maybe_unused) 541 545 { 542 546 return 0; 543 547 }
+19
tools/perf/util/include/dwarf-regs.h
··· 2 2 #ifndef _PERF_DWARF_REGS_H_ 3 3 #define _PERF_DWARF_REGS_H_ 4 4 5 + #define DWARF_REG_PC 0xd3af9c /* random number */ 6 + #define DWARF_REG_FB 0xd3affb /* random number */ 7 + 5 8 #ifdef HAVE_DWARF_SUPPORT 6 9 const char *get_arch_regstr(unsigned int n); 7 10 /* ··· 13 10 * machine: ELF machine signature (EM_*) 14 11 */ 15 12 const char *get_dwarf_regstr(unsigned int n, unsigned int machine); 13 + 14 + int get_arch_regnum(const char *name); 15 + /* 16 + * get_dwarf_regnum - Returns DWARF regnum from register name 17 + * name: architecture register name 18 + * machine: ELF machine signature (EM_*) 19 + */ 20 + int get_dwarf_regnum(const char *name, unsigned int machine); 21 + 22 + #else /* HAVE_DWARF_SUPPORT */ 23 + 24 + static inline int get_dwarf_regnum(const char *name __maybe_unused, 25 + unsigned int machine __maybe_unused) 26 + { 27 + return -1; 28 + } 16 29 #endif 17 30 18 31 #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+41 -34
tools/perf/util/machine.c
··· 453 453 * Guest code can be found in hypervisor process at the same address 454 454 * so copy host maps. 455 455 */ 456 - err = maps__clone(thread, thread__maps(host_thread)); 456 + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); 457 457 thread__put(host_thread); 458 458 if (err) 459 459 goto out_err; ··· 1285 1285 #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 1286 1286 #define X86_64_ENTRY_TRAMPOLINE 0x6000 1287 1287 1288 + struct machine__map_x86_64_entry_trampolines_args { 1289 + struct maps *kmaps; 1290 + bool found; 1291 + }; 1292 + 1293 + static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) 1294 + { 1295 + struct machine__map_x86_64_entry_trampolines_args *args = data; 1296 + struct map *dest_map; 1297 + struct kmap *kmap = __map__kmap(map); 1298 + 1299 + if (!kmap || !is_entry_trampoline(kmap->name)) 1300 + return 0; 1301 + 1302 + dest_map = maps__find(args->kmaps, map__pgoff(map)); 1303 + if (dest_map != map) 1304 + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); 1305 + 1306 + args->found = true; 1307 + return 0; 1308 + } 1309 + 1288 1310 /* Map x86_64 PTI entry trampolines */ 1289 1311 int machine__map_x86_64_entry_trampolines(struct machine *machine, 1290 1312 struct dso *kernel) 1291 1313 { 1292 - struct maps *kmaps = machine__kernel_maps(machine); 1314 + struct machine__map_x86_64_entry_trampolines_args args = { 1315 + .kmaps = machine__kernel_maps(machine), 1316 + .found = false, 1317 + }; 1293 1318 int nr_cpus_avail, cpu; 1294 - bool found = false; 1295 - struct map_rb_node *rb_node; 1296 1319 u64 pgoff; 1297 1320 1298 1321 /* 1299 1322 * In the vmlinux case, pgoff is a virtual address which must now be 1300 1323 * mapped to a vmlinux offset. 1301 1324 */ 1302 - maps__for_each_entry(kmaps, rb_node) { 1303 - struct map *dest_map, *map = rb_node->map; 1304 - struct kmap *kmap = __map__kmap(map); 1325 + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); 1305 1326 1306 - if (!kmap || !is_entry_trampoline(kmap->name)) 1307 - continue; 1308 - 1309 - dest_map = maps__find(kmaps, map__pgoff(map)); 1310 - if (dest_map != map) 1311 - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); 1312 - found = true; 1313 - } 1314 - if (found || machine->trampolines_mapped) 1327 + if (args.found || machine->trampolines_mapped) 1315 1328 return 0; 1316 1329 1317 1330 pgoff = find_entry_trampoline(kernel); ··· 1372 1359 if (machine->vmlinux_map == NULL) 1373 1360 return -ENOMEM; 1374 1361 1375 - map__set_map_ip(machine->vmlinux_map, identity__map_ip); 1376 - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); 1362 + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); 1377 1363 return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); 1378 1364 } 1379 1365 ··· 1762 1750 1763 1751 if (end == ~0ULL) { 1764 1752 /* update end address of the kernel map using adjacent module address */ 1765 - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), 1766 - machine__kernel_map(machine)); 1767 - struct map_rb_node *next = map_rb_node__next(rb_node); 1753 + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), 1754 + machine__kernel_map(machine)); 1768 1755 1769 1756 if (next) 1770 - machine__set_kernel_mmap(machine, start, map__start(next->map)); 1757 + machine__set_kernel_mmap(machine, start, map__start(next)); 1771 1758 } 1772 1759 1773 1760 out_put: ··· 2168 2157 if (dump_trace) 2169 2158 perf_event__fprintf_task(event, stdout); 2170 2159 2171 - if (thread != NULL) 2172 - thread__put(thread); 2173 - 2160 + if (thread != NULL) { 2161 + if (symbol_conf.keep_exited_threads) 2162 + thread__set_exited(thread, /*exited=*/true); 2163 + else 2164 + machine__remove_thread(machine, thread); 2165 + } 2166 + thread__put(thread); 2174 2167 return 0; 2175 2168 } 2176 2169 ··· 3410 3395 int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) 3411 3396 { 3412 3397 struct maps *maps = machine__kernel_maps(machine); 3413 - struct map_rb_node *pos; 3414 - int err = 0; 3415 3398 3416 - maps__for_each_entry(maps, pos) { 3417 - err = fn(pos->map, priv); 3418 - if (err != 0) { 3419 - break; 3420 - } 3421 - } 3422 - return err; 3399 + return maps__for_each_map(maps, fn, priv); 3423 3400 } 3424 3401 3425 3402 bool machine__is_lock_function(struct machine *machine, u64 addr)
+2 -18
tools/perf/util/map.c
··· 109 109 map__set_pgoff(map, pgoff); 110 110 map__set_reloc(map, 0); 111 111 map__set_dso(map, dso__get(dso)); 112 - map__set_map_ip(map, map__dso_map_ip); 113 - map__set_unmap_ip(map, map__dso_unmap_ip); 112 + map__set_mapping_type(map, MAPPING_TYPE__DSO); 114 113 map__set_erange_warned(map, false); 115 114 refcount_set(map__refcnt(map), 1); 116 115 } ··· 171 172 map__init(result, start, start + len, pgoff, dso); 172 173 173 174 if (anon || no_dso) { 174 - map->map_ip = map->unmap_ip = identity__map_ip; 175 + map->mapping_type = MAPPING_TYPE__IDENTITY; 175 176 176 177 /* 177 178 * Set memory without DSO as loaded. All map__find_* ··· 628 629 return NULL; 629 630 } 630 631 return kmap->kmaps; 631 - } 632 - 633 - u64 map__dso_map_ip(const struct map *map, u64 ip) 634 - { 635 - return ip - map__start(map) + map__pgoff(map); 636 - } 637 - 638 - u64 map__dso_unmap_ip(const struct map *map, u64 ip) 639 - { 640 - return ip + map__start(map) - map__pgoff(map); 641 - } 642 - 643 - u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) 644 - { 645 - return ip; 646 632 }
+43 -40
tools/perf/util/map.h
··· 16 16 struct maps; 17 17 struct machine; 18 18 19 + enum mapping_type { 20 + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ 21 + MAPPING_TYPE__DSO, 22 + /* map__map_ip/map__unmap_ip are just the given ip value. */ 23 + MAPPING_TYPE__IDENTITY, 24 + }; 25 + 19 26 DECLARE_RC_STRUCT(map) { 20 27 u64 start; 21 28 u64 end; 22 - bool erange_warned:1; 23 - bool priv:1; 24 - u32 prot; 25 29 u64 pgoff; 26 30 u64 reloc; 27 - 28 - /* ip -> dso rip */ 29 - u64 (*map_ip)(const struct map *, u64); 30 - /* dso rip -> ip */ 31 - u64 (*unmap_ip)(const struct map *, u64); 32 - 33 31 struct dso *dso; 34 32 refcount_t refcnt; 33 + u32 prot; 35 34 u32 flags; 35 + enum mapping_type mapping_type:8; 36 + bool erange_warned; 37 + bool priv; 36 38 }; 37 39 38 40 struct kmap; ··· 43 41 struct kmap *map__kmap(struct map *map); 44 42 struct maps *map__kmaps(struct map *map); 45 43 46 - /* ip -> dso rip */ 47 - u64 map__dso_map_ip(const struct map *map, u64 ip); 48 - /* dso rip -> ip */ 49 - u64 map__dso_unmap_ip(const struct map *map, u64 ip); 50 - /* Returns ip */ 51 - u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); 52 - 53 44 static inline struct dso *map__dso(const struct map *map) 54 45 { 55 46 return RC_CHK_ACCESS(map)->dso; 56 - } 57 - 58 - static inline u64 map__map_ip(const struct map *map, u64 ip) 59 - { 60 - return RC_CHK_ACCESS(map)->map_ip(map, ip); 61 - } 62 - 63 - static inline u64 map__unmap_ip(const struct map *map, u64 ip) 64 - { 65 - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); 66 - } 67 - 68 - static inline void *map__map_ip_ptr(struct map *map) 69 - { 70 - return RC_CHK_ACCESS(map)->map_ip; 71 - } 72 - 73 - static inline void* map__unmap_ip_ptr(struct map *map) 74 - { 75 - return RC_CHK_ACCESS(map)->unmap_ip; 76 47 } 77 48 78 49 static inline u64 map__start(const struct map *map) ··· 96 121 static inline size_t map__size(const struct map *map) 97 122 { 98 123 return map__end(map) - map__start(map); 124 + } 125 + 126 + /* ip -> dso rip */ 127 + static inline u64 map__dso_map_ip(const struct map *map, u64 ip) 128 + { 129 + return ip - map__start(map) + map__pgoff(map); 130 + } 131 + 132 + /* dso rip -> ip */ 133 + static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) 134 + { 135 + return rip + map__start(map) - map__pgoff(map); 136 + } 137 + 138 + static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) 139 + { 140 + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) 141 + return map__dso_map_ip(map, ip_or_rip); 142 + else 143 + return ip_or_rip; 144 + } 145 + 146 + static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) 147 + { 148 + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) 149 + return map__dso_unmap_ip(map, ip_or_rip); 150 + else 151 + return ip_or_rip; 99 152 } 100 153 101 154 /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ ··· 297 294 RC_CHK_ACCESS(map)->dso = dso; 298 295 } 299 296 300 - static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) 297 + static inline void map__set_mapping_type(struct map *map, enum mapping_type type) 301 298 { 302 - RC_CHK_ACCESS(map)->map_ip = map_ip; 299 + RC_CHK_ACCESS(map)->mapping_type = type; 303 300 } 304 301 305 - static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) 302 + static inline enum mapping_type map__mapping_type(struct map *map) 306 303 { 307 - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; 304 + return RC_CHK_ACCESS(map)->mapping_type; 308 305 } 309 306 #endif /* __PERF_MAP_H */
+447 -85
tools/perf/util/maps.c
··· 10 10 #include "ui/ui.h" 11 11 #include "unwind.h" 12 12 13 + struct map_rb_node { 14 + struct rb_node rb_node; 15 + struct map *map; 16 + }; 17 + 18 + #define maps__for_each_entry(maps, map) \ 19 + for (map = maps__first(maps); map; map = map_rb_node__next(map)) 20 + 21 + #define maps__for_each_entry_safe(maps, map, next) \ 22 + for (map = maps__first(maps), next = map_rb_node__next(map); map; \ 23 + map = next, next = map_rb_node__next(map)) 24 + 25 + static struct rb_root *maps__entries(struct maps *maps) 26 + { 27 + return &RC_CHK_ACCESS(maps)->entries; 28 + } 29 + 30 + static struct rw_semaphore *maps__lock(struct maps *maps) 31 + { 32 + return &RC_CHK_ACCESS(maps)->lock; 33 + } 34 + 35 + static struct map **maps__maps_by_name(struct maps *maps) 36 + { 37 + return RC_CHK_ACCESS(maps)->maps_by_name; 38 + } 39 + 40 + static struct map_rb_node *maps__first(struct maps *maps) 41 + { 42 + struct rb_node *first = rb_first(maps__entries(maps)); 43 + 44 + if (first) 45 + return rb_entry(first, struct map_rb_node, rb_node); 46 + return NULL; 47 + } 48 + 49 + static struct map_rb_node *map_rb_node__next(struct map_rb_node *node) 50 + { 51 + struct rb_node *next; 52 + 53 + if (!node) 54 + return NULL; 55 + 56 + next = rb_next(&node->rb_node); 57 + 58 + if (!next) 59 + return NULL; 60 + 61 + return rb_entry(next, struct map_rb_node, rb_node); 62 + } 63 + 64 + static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) 65 + { 66 + struct map_rb_node *rb_node; 67 + 68 + maps__for_each_entry(maps, rb_node) { 69 + if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) 70 + return rb_node; 71 + } 72 + return NULL; 73 + } 74 + 13 75 static void maps__init(struct maps *maps, struct machine *machine) 14 76 { 15 77 refcount_set(maps__refcnt(maps), 1); ··· 258 196 RC_CHK_PUT(maps); 259 197 } 260 198 199 + int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) 200 + { 201 + struct map_rb_node *pos; 202 + int ret = 0; 203 + 204 + down_read(maps__lock(maps)); 205 + maps__for_each_entry(maps, pos) { 206 + ret = cb(pos->map, data); 207 + if (ret) 208 + break; 209 + } 210 + up_read(maps__lock(maps)); 211 + return ret; 212 + } 213 + 214 + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) 215 + { 216 + struct map_rb_node *pos, *next; 217 + unsigned int start_nr_maps; 218 + 219 + down_write(maps__lock(maps)); 220 + 221 + start_nr_maps = maps__nr_maps(maps); 222 + maps__for_each_entry_safe(maps, pos, next) { 223 + if (cb(pos->map, data)) { 224 + __maps__remove(maps, pos); 225 + --RC_CHK_ACCESS(maps)->nr_maps; 226 + } 227 + } 228 + if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps)) 229 + __maps__free_maps_by_name(maps); 230 + 231 + up_write(maps__lock(maps)); 232 + } 233 + 261 234 struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) 262 235 { 263 236 struct map *map = maps__find(maps, addr); ··· 307 210 return NULL; 308 211 } 309 212 310 - struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) 311 - { 213 + struct maps__find_symbol_by_name_args { 214 + struct map **mapp; 215 + const char *name; 312 216 struct symbol *sym; 313 - struct map_rb_node *pos; 217 + }; 314 218 315 - down_read(maps__lock(maps)); 219 + static int maps__find_symbol_by_name_cb(struct map *map, void *data) 220 + { 221 + struct maps__find_symbol_by_name_args *args = data; 316 222 317 - maps__for_each_entry(maps, pos) { 318 - sym = map__find_symbol_by_name(pos->map, name); 223 + args->sym = map__find_symbol_by_name(map, args->name); 224 + if (!args->sym) 225 + return 0; 319 226 320 - if (sym == NULL) 321 - continue; 322 - if (!map__contains_symbol(pos->map, sym)) { 323 - sym = NULL; 324 - continue; 325 - } 326 - if (mapp != NULL) 327 - *mapp = pos->map; 328 - goto out; 227 + if (!map__contains_symbol(map, args->sym)) { 228 + args->sym = NULL; 229 + return 0; 329 230 } 330 231 331 - sym = NULL; 332 - out: 333 - up_read(maps__lock(maps)); 334 - return sym; 232 + if (args->mapp != NULL) 233 + *args->mapp = map__get(map); 234 + return 1; 235 + } 236 + 237 + struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) 238 + { 239 + struct maps__find_symbol_by_name_args args = { 240 + .mapp = mapp, 241 + .name = name, 242 + .sym = NULL, 243 + }; 244 + 245 + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); 246 + return args.sym; 335 247 } 336 248 337 249 int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) ··· 359 253 return ams->ms.sym ? 0 : -1; 360 254 } 361 255 362 - size_t maps__fprintf(struct maps *maps, FILE *fp) 256 + struct maps__fprintf_args { 257 + FILE *fp; 258 + size_t printed; 259 + }; 260 + 261 + static int maps__fprintf_cb(struct map *map, void *data) 363 262 { 364 - size_t printed = 0; 365 - struct map_rb_node *pos; 263 + struct maps__fprintf_args *args = data; 366 264 367 - down_read(maps__lock(maps)); 368 - 369 - maps__for_each_entry(maps, pos) { 370 - printed += fprintf(fp, "Map:"); 371 - printed += map__fprintf(pos->map, fp); 372 - if (verbose > 2) { 373 - printed += dso__fprintf(map__dso(pos->map), fp); 374 - printed += fprintf(fp, "--\n"); 375 - } 265 + args->printed += fprintf(args->fp, "Map:"); 266 + args->printed += map__fprintf(map, args->fp); 267 + if (verbose > 2) { 268 + args->printed += dso__fprintf(map__dso(map), args->fp); 269 + args->printed += fprintf(args->fp, "--\n"); 376 270 } 377 - 378 - up_read(maps__lock(maps)); 379 - 380 - return printed; 271 + return 0; 381 272 } 382 273 383 - int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) 274 + size_t maps__fprintf(struct maps *maps, FILE *fp) 275 + { 276 + struct maps__fprintf_args args = { 277 + .fp = fp, 278 + .printed = 0, 279 + }; 280 + 281 + maps__for_each_map(maps, maps__fprintf_cb, &args); 282 + 283 + return args.printed; 284 + } 285 + 286 + /* 287 + * Find first map where end > map->start. 288 + * Same as find_vma() in kernel. 289 + */ 290 + static struct rb_node *first_ending_after(struct maps *maps, const struct map *map) 384 291 { 385 292 struct rb_root *root; 386 293 struct rb_node *next, *first; 387 - int err = 0; 388 - 389 - down_write(maps__lock(maps)); 390 294 391 295 root = maps__entries(maps); 392 - 393 - /* 394 - * Find first map where end > map->start. 395 - * Same as find_vma() in kernel. 396 - */ 397 296 next = root->rb_node; 398 297 first = NULL; 399 298 while (next) { ··· 412 301 } else 413 302 next = next->rb_right; 414 303 } 304 + return first; 305 + } 415 306 416 - next = first; 307 + /* 308 + * Adds new to maps, if new overlaps existing entries then the existing maps are 309 + * adjusted or removed so that new fits without overlapping any entries. 310 + */ 311 + int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) 312 + { 313 + 314 + struct rb_node *next; 315 + int err = 0; 316 + FILE *fp = debug_file(); 317 + 318 + down_write(maps__lock(maps)); 319 + 320 + next = first_ending_after(maps, new); 417 321 while (next && !err) { 418 322 struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); 419 323 next = rb_next(&pos->rb_node); ··· 437 311 * Stop if current map starts after map->end. 438 312 * Maps are ordered by start: next will not overlap for sure. 439 313 */ 440 - if (map__start(pos->map) >= map__end(map)) 314 + if (map__start(pos->map) >= map__end(new)) 441 315 break; 442 316 443 317 if (verbose >= 2) { 444 318 445 319 if (use_browser) { 446 320 pr_debug("overlapping maps in %s (disable tui for more info)\n", 447 - map__dso(map)->name); 321 + map__dso(new)->name); 448 322 } else { 449 - fputs("overlapping maps:\n", fp); 450 - map__fprintf(map, fp); 323 + pr_debug("overlapping maps:\n"); 324 + map__fprintf(new, fp); 451 325 map__fprintf(pos->map, fp); 452 326 } 453 327 } 454 328 455 - rb_erase_init(&pos->rb_node, root); 329 + rb_erase_init(&pos->rb_node, maps__entries(maps)); 456 330 /* 457 331 * Now check if we need to create new maps for areas not 458 332 * overlapped by the new map: 459 333 */ 460 - if (map__start(map) > map__start(pos->map)) { 334 + if (map__start(new) > map__start(pos->map)) { 461 335 struct map *before = map__clone(pos->map); 462 336 463 337 if (before == NULL) { ··· 465 339 goto put_map; 466 340 } 467 341 468 - map__set_end(before, map__start(map)); 342 + map__set_end(before, map__start(new)); 469 343 err = __maps__insert(maps, before); 470 344 if (err) { 471 345 map__put(before); ··· 477 351 map__put(before); 478 352 } 479 353 480 - if (map__end(map) < map__end(pos->map)) { 354 + if (map__end(new) < map__end(pos->map)) { 481 355 struct map *after = map__clone(pos->map); 482 356 483 357 if (after == NULL) { ··· 485 359 goto put_map; 486 360 } 487 361 488 - map__set_start(after, map__end(map)); 489 - map__add_pgoff(after, map__end(map) - map__start(pos->map)); 490 - assert(map__map_ip(pos->map, map__end(map)) == 491 - map__map_ip(after, map__end(map))); 362 + map__set_start(after, map__end(new)); 363 + map__add_pgoff(after, map__end(new) - map__start(pos->map)); 364 + assert(map__map_ip(pos->map, map__end(new)) == 365 + map__map_ip(after, map__end(new))); 492 366 err = __maps__insert(maps, after); 493 367 if (err) { 494 368 map__put(after); ··· 502 376 map__put(pos->map); 503 377 free(pos); 504 378 } 379 + /* Add the map. */ 380 + err = __maps__insert(maps, new); 505 381 up_write(maps__lock(maps)); 506 382 return err; 507 383 } 508 384 509 - /* 510 - * XXX This should not really _copy_ te maps, but refcount them. 511 - */ 512 - int maps__clone(struct thread *thread, struct maps *parent) 385 + int maps__copy_from(struct maps *maps, struct maps *parent) 513 386 { 514 - struct maps *maps = thread__maps(thread); 515 387 int err; 516 388 struct map_rb_node *rb_node; 517 389 ··· 540 416 return err; 541 417 } 542 418 543 - struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) 544 - { 545 - struct map_rb_node *rb_node; 546 - 547 - maps__for_each_entry(maps, rb_node) { 548 - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) 549 - return rb_node; 550 - } 551 - return NULL; 552 - } 553 - 554 419 struct map *maps__find(struct maps *maps, u64 ip) 555 420 { 556 421 struct rb_node *p; ··· 565 452 return m ? m->map : NULL; 566 453 } 567 454 568 - struct map_rb_node *maps__first(struct maps *maps) 455 + static int map__strcmp(const void *a, const void *b) 569 456 { 570 - struct rb_node *first = rb_first(maps__entries(maps)); 457 + const struct map *map_a = *(const struct map **)a; 458 + const struct map *map_b = *(const struct map **)b; 459 + const struct dso *dso_a = map__dso(map_a); 460 + const struct dso *dso_b = map__dso(map_b); 461 + int ret = strcmp(dso_a->short_name, dso_b->short_name); 571 462 572 - if (first) 573 - return rb_entry(first, struct map_rb_node, rb_node); 463 + if (ret == 0 && map_a != map_b) { 464 + /* 465 + * Ensure distinct but name equal maps have an order in part to 466 + * aid reference counting. 467 + */ 468 + ret = (int)map__start(map_a) - (int)map__start(map_b); 469 + if (ret == 0) 470 + ret = (int)((intptr_t)map_a - (intptr_t)map_b); 471 + } 472 + 473 + return ret; 474 + } 475 + 476 + static int map__strcmp_name(const void *name, const void *b) 477 + { 478 + const struct dso *dso = map__dso(*(const struct map **)b); 479 + 480 + return strcmp(name, dso->short_name); 481 + } 482 + 483 + void __maps__sort_by_name(struct maps *maps) 484 + { 485 + qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); 486 + } 487 + 488 + static int map__groups__sort_by_name_from_rbtree(struct maps *maps) 489 + { 490 + struct map_rb_node *rb_node; 491 + struct map **maps_by_name = realloc(maps__maps_by_name(maps), 492 + maps__nr_maps(maps) * sizeof(struct map *)); 493 + int i = 0; 494 + 495 + if (maps_by_name == NULL) 496 + return -1; 497 + 498 + up_read(maps__lock(maps)); 499 + down_write(maps__lock(maps)); 500 + 501 + RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; 502 + RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); 503 + 504 + maps__for_each_entry(maps, rb_node) 505 + maps_by_name[i++] = map__get(rb_node->map); 506 + 507 + __maps__sort_by_name(maps); 508 + 509 + up_write(maps__lock(maps)); 510 + down_read(maps__lock(maps)); 511 + 512 + return 0; 513 + } 514 + 515 + static struct map *__maps__find_by_name(struct maps *maps, const char *name) 516 + { 517 + struct map **mapp; 518 + 519 + if (maps__maps_by_name(maps) == NULL && 520 + map__groups__sort_by_name_from_rbtree(maps)) 521 + return NULL; 522 + 523 + mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), 524 + sizeof(*mapp), map__strcmp_name); 525 + if (mapp) 526 + return *mapp; 574 527 return NULL; 575 528 } 576 529 577 - struct map_rb_node *map_rb_node__next(struct map_rb_node *node) 530 + struct map *maps__find_by_name(struct maps *maps, const char *name) 578 531 { 579 - struct rb_node *next; 532 + struct map_rb_node *rb_node; 533 + struct map *map; 580 534 581 - if (!node) 582 - return NULL; 535 + down_read(maps__lock(maps)); 583 536 584 - next = rb_next(&node->rb_node); 585 537 586 - if (!next) 587 - return NULL; 538 + if (RC_CHK_ACCESS(maps)->last_search_by_name) { 539 + const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); 588 540 589 - return rb_entry(next, struct map_rb_node, rb_node); 541 + if (strcmp(dso->short_name, name) == 0) { 542 + map = RC_CHK_ACCESS(maps)->last_search_by_name; 543 + goto out_unlock; 544 + } 545 + } 546 + /* 547 + * If we have maps->maps_by_name, then the name isn't in the rbtree, 548 + * as maps->maps_by_name mirrors the rbtree when lookups by name are 549 + * made. 550 + */ 551 + map = __maps__find_by_name(maps, name); 552 + if (map || maps__maps_by_name(maps) != NULL) 553 + goto out_unlock; 554 + 555 + /* Fallback to traversing the rbtree... */ 556 + maps__for_each_entry(maps, rb_node) { 557 + struct dso *dso; 558 + 559 + map = rb_node->map; 560 + dso = map__dso(map); 561 + if (strcmp(dso->short_name, name) == 0) { 562 + RC_CHK_ACCESS(maps)->last_search_by_name = map; 563 + goto out_unlock; 564 + } 565 + } 566 + map = NULL; 567 + 568 + out_unlock: 569 + up_read(maps__lock(maps)); 570 + return map; 571 + } 572 + 573 + struct map *maps__find_next_entry(struct maps *maps, struct map *map) 574 + { 575 + struct map_rb_node *rb_node = maps__find_node(maps, map); 576 + struct map_rb_node *next = map_rb_node__next(rb_node); 577 + 578 + if (next) 579 + return next->map; 580 + 581 + return NULL; 582 + } 583 + 584 + void maps__fixup_end(struct maps *maps) 585 + { 586 + struct map_rb_node *prev = NULL, *curr; 587 + 588 + down_write(maps__lock(maps)); 589 + 590 + maps__for_each_entry(maps, curr) { 591 + if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map))) 592 + map__set_end(prev->map, map__start(curr->map)); 593 + 594 + prev = curr; 595 + } 596 + 597 + /* 598 + * We still haven't the actual symbols, so guess the 599 + * last map final address. 600 + */ 601 + if (curr && !map__end(curr->map)) 602 + map__set_end(curr->map, ~0ULL); 603 + 604 + up_write(maps__lock(maps)); 605 + } 606 + 607 + /* 608 + * Merges map into maps by splitting the new map within the existing map 609 + * regions. 610 + */ 611 + int maps__merge_in(struct maps *kmaps, struct map *new_map) 612 + { 613 + struct map_rb_node *rb_node; 614 + struct rb_node *first; 615 + bool overlaps; 616 + LIST_HEAD(merged); 617 + int err = 0; 618 + 619 + down_read(maps__lock(kmaps)); 620 + first = first_ending_after(kmaps, new_map); 621 + rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL; 622 + overlaps = rb_node && map__start(rb_node->map) < map__end(new_map); 623 + up_read(maps__lock(kmaps)); 624 + 625 + if (!overlaps) 626 + return maps__insert(kmaps, new_map); 627 + 628 + maps__for_each_entry(kmaps, rb_node) { 629 + struct map *old_map = rb_node->map; 630 + 631 + /* no overload with this one */ 632 + if (map__end(new_map) < map__start(old_map) || 633 + map__start(new_map) >= map__end(old_map)) 634 + continue; 635 + 636 + if (map__start(new_map) < map__start(old_map)) { 637 + /* 638 + * |new...... 639 + * |old.... 640 + */ 641 + if (map__end(new_map) < map__end(old_map)) { 642 + /* 643 + * |new......| -> |new..| 644 + * |old....| -> |old....| 645 + */ 646 + map__set_end(new_map, map__start(old_map)); 647 + } else { 648 + /* 649 + * |new.............| -> |new..| |new..| 650 + * |old....| -> |old....| 651 + */ 652 + struct map_list_node *m = map_list_node__new(); 653 + 654 + if (!m) { 655 + err = -ENOMEM; 656 + goto out; 657 + } 658 + 659 + m->map = map__clone(new_map); 660 + if (!m->map) { 661 + free(m); 662 + err = -ENOMEM; 663 + goto out; 664 + } 665 + 666 + map__set_end(m->map, map__start(old_map)); 667 + list_add_tail(&m->node, &merged); 668 + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); 669 + map__set_start(new_map, map__end(old_map)); 670 + } 671 + } else { 672 + /* 673 + * |new...... 674 + * |old.... 675 + */ 676 + if (map__end(new_map) < map__end(old_map)) { 677 + /* 678 + * |new..| -> x 679 + * |old.........| -> |old.........| 680 + */ 681 + map__put(new_map); 682 + new_map = NULL; 683 + break; 684 + } else { 685 + /* 686 + * |new......| -> |new...| 687 + * |old....| -> |old....| 688 + */ 689 + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); 690 + map__set_start(new_map, map__end(old_map)); 691 + } 692 + } 693 + } 694 + 695 + out: 696 + while (!list_empty(&merged)) { 697 + struct map_list_node *old_node; 698 + 699 + old_node = list_entry(merged.next, struct map_list_node, node); 700 + list_del_init(&old_node->node); 701 + if (!err) 702 + err = maps__insert(kmaps, old_node->map); 703 + map__put(old_node->map); 704 + free(old_node); 705 + } 706 + 707 + if (new_map) { 708 + if (!err) 709 + err = maps__insert(kmaps, new_map); 710 + map__put(new_map); 711 + } 712 + return err; 713 + } 714 + 715 + void maps__load_first(struct maps *maps) 716 + { 717 + struct map_rb_node *first; 718 + 719 + down_read(maps__lock(maps)); 720 + 721 + first = maps__first(maps); 722 + if (first) 723 + map__load(first->map); 724 + 725 + up_read(maps__lock(maps)); 590 726 }
+19 -29
tools/perf/util/maps.h
··· 14 14 struct machine; 15 15 struct map; 16 16 struct maps; 17 - struct thread; 18 17 19 - struct map_rb_node { 20 - struct rb_node rb_node; 18 + struct map_list_node { 19 + struct list_head node; 21 20 struct map *map; 22 21 }; 23 22 24 - struct map_rb_node *maps__first(struct maps *maps); 25 - struct map_rb_node *map_rb_node__next(struct map_rb_node *node); 26 - struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); 23 + static inline struct map_list_node *map_list_node__new(void) 24 + { 25 + return malloc(sizeof(struct map_list_node)); 26 + } 27 + 27 28 struct map *maps__find(struct maps *maps, u64 addr); 28 - 29 - #define maps__for_each_entry(maps, map) \ 30 - for (map = maps__first(maps); map; map = map_rb_node__next(map)) 31 - 32 - #define maps__for_each_entry_safe(maps, map, next) \ 33 - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ 34 - map = next, next = map_rb_node__next(map)) 35 29 36 30 DECLARE_RC_STRUCT(maps) { 37 31 struct rb_root entries; ··· 52 58 53 59 struct maps *maps__new(struct machine *machine); 54 60 bool maps__empty(struct maps *maps); 55 - int maps__clone(struct thread *thread, struct maps *parent); 61 + int maps__copy_from(struct maps *maps, struct maps *parent); 56 62 57 63 struct maps *maps__get(struct maps *maps); 58 64 void maps__put(struct maps *maps); ··· 65 71 66 72 #define maps__zput(map) __maps__zput(&map) 67 73 68 - static inline struct rb_root *maps__entries(struct maps *maps) 69 - { 70 - return &RC_CHK_ACCESS(maps)->entries; 71 - } 74 + /* Iterate over map calling cb for each entry. */ 75 + int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); 76 + /* Iterate over map removing an entry if cb returns true. */ 77 + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); 72 78 73 79 static inline struct machine *maps__machine(struct maps *maps) 74 80 { 75 81 return RC_CHK_ACCESS(maps)->machine; 76 - } 77 - 78 - static inline struct rw_semaphore *maps__lock(struct maps *maps) 79 - { 80 - return &RC_CHK_ACCESS(maps)->lock; 81 - } 82 - 83 - static inline struct map **maps__maps_by_name(struct maps *maps) 84 - { 85 - return RC_CHK_ACCESS(maps)->maps_by_name; 86 82 } 87 83 88 84 static inline unsigned int maps__nr_maps(const struct maps *maps) ··· 109 125 110 126 int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); 111 127 112 - int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); 128 + int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); 113 129 114 130 struct map *maps__find_by_name(struct maps *maps, const char *name); 131 + 132 + struct map *maps__find_next_entry(struct maps *maps, struct map *map); 115 133 116 134 int maps__merge_in(struct maps *kmaps, struct map *new_map); 117 135 118 136 void __maps__sort_by_name(struct maps *maps); 137 + 138 + void maps__fixup_end(struct maps *maps); 139 + 140 + void maps__load_first(struct maps *maps); 119 141 120 142 #endif // __PERF_MAPS_H
+14 -11
tools/perf/util/mem-events.c
··· 100 100 return -1; 101 101 } 102 102 103 - static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) 103 + static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu, 104 + struct perf_mem_event *e) 104 105 { 106 + char sysfs_name[100]; 105 107 char path[PATH_MAX]; 106 108 struct stat st; 107 109 110 + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); 108 111 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); 109 112 return !stat(path, &st); 110 113 } ··· 123 120 124 121 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 125 122 struct perf_mem_event *e = perf_mem_events__ptr(j); 126 - char sysfs_name[100]; 127 123 struct perf_pmu *pmu = NULL; 128 124 129 125 /* ··· 138 136 * of core PMU. 139 137 */ 140 138 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 141 - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); 142 - e->supported |= perf_mem_event__supported(mnt, sysfs_name); 139 + e->supported |= perf_mem_event__supported(mnt, pmu, e); 140 + if (e->supported) { 141 + found = true; 142 + break; 143 + } 143 144 } 144 - 145 - if (e->supported) 146 - found = true; 147 145 } 148 146 149 147 return found ? 0 : -ENOENT; ··· 169 167 int idx) 170 168 { 171 169 const char *mnt = sysfs__mount(); 172 - char sysfs_name[100]; 173 170 struct perf_pmu *pmu = NULL; 174 171 175 172 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 176 - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, 177 - pmu->name); 178 - if (!perf_mem_event__supported(mnt, sysfs_name)) { 173 + if (!perf_mem_event__supported(mnt, pmu, e)) { 179 174 pr_err("failed: event '%s' not supported\n", 180 175 perf_mem_events__name(idx, pmu->name)); 181 176 } ··· 182 183 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, 183 184 char **rec_tmp, int *tmp_nr) 184 185 { 186 + const char *mnt = sysfs__mount(); 185 187 int i = *argv_nr, k = 0; 186 188 struct perf_mem_event *e; 187 189 ··· 210 210 211 211 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 212 212 const char *s = perf_mem_events__name(j, pmu->name); 213 + 214 + if (!perf_mem_event__supported(mnt, pmu, e)) 215 + continue; 213 216 214 217 rec_argv[i++] = "-e"; 215 218 if (s) {
+2 -3
tools/perf/util/mmap.c
··· 295 295 296 296 map->core.flush = mp->flush; 297 297 298 - map->comp_level = mp->comp_level; 299 298 #ifndef PYTHON_PERF 300 - if (zstd_init(&map->zstd_data, map->comp_level)) { 299 + if (zstd_init(&map->zstd_data, mp->comp_level)) { 301 300 pr_debug2("failed to init mmap compressor, error %d\n", errno); 302 301 return -1; 303 302 } 304 303 #endif 305 304 306 - if (map->comp_level && !perf_mmap__aio_enabled(map)) { 305 + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { 307 306 map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, 308 307 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); 309 308 if (map->data == MAP_FAILED) {
-1
tools/perf/util/mmap.h
··· 39 39 #endif 40 40 struct mmap_cpu_mask affinity_mask; 41 41 void *data; 42 - int comp_level; 43 42 struct perf_data_file *file; 44 43 struct zstd_data zstd_data; 45 44 };
+1
tools/perf/util/parse-branch-options.c
··· 36 36 BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), 37 37 BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), 38 38 BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), 39 + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), 39 40 BRANCH_END 40 41 }; 41 42
+38 -14
tools/perf/util/parse-events.c
··· 976 976 struct parse_events_error *err) 977 977 { 978 978 if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { 979 - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); 979 + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); 980 980 981 981 if (!pmu) { 982 982 char *err_str; ··· 986 986 err_str, /*help=*/NULL); 987 987 return -EINVAL; 988 988 } 989 - if (perf_pmu__supports_legacy_cache(pmu)) { 989 + /* 990 + * Rewrite the PMU event to a legacy cache one unless the PMU 991 + * doesn't support legacy cache events or the event is present 992 + * within the PMU. 993 + */ 994 + if (perf_pmu__supports_legacy_cache(pmu) && 995 + !perf_pmu__have_event(pmu, term->config)) { 990 996 attr->type = PERF_TYPE_HW_CACHE; 991 997 return parse_events__decode_legacy_cache(term->config, pmu->type, 992 998 &attr->config); 993 - } else 999 + } else { 994 1000 term->type_term = PARSE_EVENTS__TERM_TYPE_USER; 1001 + term->no_value = true; 1002 + } 995 1003 } 996 1004 if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { 997 - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); 1005 + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); 998 1006 999 1007 if (!pmu) { 1000 1008 char *err_str; ··· 1012 1004 err_str, /*help=*/NULL); 1013 1005 return -EINVAL; 1014 1006 } 1015 - attr->type = PERF_TYPE_HARDWARE; 1016 - attr->config = term->val.num; 1017 - if (perf_pmus__supports_extended_type()) 1018 - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; 1007 + /* 1008 + * If the PMU has a sysfs or json event prefer it over 1009 + * legacy. ARM requires this. 1010 + */ 1011 + if (perf_pmu__have_event(pmu, term->config)) { 1012 + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; 1013 + term->no_value = true; 1014 + } else { 1015 + attr->type = PERF_TYPE_HARDWARE; 1016 + attr->config = term->val.num; 1017 + if (perf_pmus__supports_extended_type()) 1018 + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; 1019 + } 1019 1020 return 0; 1020 1021 } 1021 1022 if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || ··· 1398 1381 YYLTYPE *loc = loc_; 1399 1382 LIST_HEAD(config_terms); 1400 1383 struct parse_events_terms parsed_terms; 1384 + bool alias_rewrote_terms = false; 1401 1385 1402 1386 pmu = parse_state->fake_pmu ?: perf_pmus__find(name); 1403 1387 ··· 1451 1433 return evsel ? 0 : -ENOMEM; 1452 1434 } 1453 1435 1454 - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) { 1436 + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ 1437 + if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { 1438 + parse_events_terms__exit(&parsed_terms); 1439 + return -EINVAL; 1440 + } 1441 + 1442 + /* Look for event names in the terms and rewrite into format based terms. */ 1443 + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, 1444 + &info, &alias_rewrote_terms, err)) { 1455 1445 parse_events_terms__exit(&parsed_terms); 1456 1446 return -EINVAL; 1457 1447 } ··· 1473 1447 strbuf_release(&sb); 1474 1448 } 1475 1449 1476 - /* 1477 - * Configure hardcoded terms first, no need to check 1478 - * return value when called with fail == 0 ;) 1479 - */ 1480 - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { 1450 + /* Configure attr/terms again if an alias was expanded. */ 1451 + if (alias_rewrote_terms && 1452 + config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { 1481 1453 parse_events_terms__exit(&parsed_terms); 1482 1454 return -EINVAL; 1483 1455 }
+2 -2
tools/perf/util/perf_api_probe.c
··· 64 64 struct perf_cpu cpu; 65 65 int ret, i = 0; 66 66 67 - cpus = perf_cpu_map__new(NULL); 67 + cpus = perf_cpu_map__new_online_cpus(); 68 68 if (!cpus) 69 69 return false; 70 70 cpu = perf_cpu_map__cpu(cpus, 0); ··· 140 140 struct perf_cpu cpu; 141 141 int fd; 142 142 143 - cpus = perf_cpu_map__new(NULL); 143 + cpus = perf_cpu_map__new_online_cpus(); 144 144 if (!cpus) 145 145 return false; 146 146
+1
tools/perf/util/perf_event_attr_fprintf.c
··· 55 55 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 56 56 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 57 57 bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), 58 + bit_name(COUNTERS), 58 59 { .name = NULL, } 59 60 }; 60 61 #undef bit_name
+6 -2
tools/perf/util/pmu.c
··· 1494 1494 * defined for the alias 1495 1495 */ 1496 1496 int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, 1497 - struct perf_pmu_info *info, struct parse_events_error *err) 1497 + struct perf_pmu_info *info, bool *rewrote_terms, 1498 + struct parse_events_error *err) 1498 1499 { 1499 1500 struct parse_events_term *term, *h; 1500 1501 struct perf_pmu_alias *alias; 1501 1502 int ret; 1502 1503 1504 + *rewrote_terms = false; 1503 1505 info->per_pkg = false; 1504 1506 1505 1507 /* ··· 1523 1521 NULL); 1524 1522 return ret; 1525 1523 } 1526 - 1524 + *rewrote_terms = true; 1527 1525 ret = check_info_data(pmu, alias, info, err, term->err_term); 1528 1526 if (ret) 1529 1527 return ret; ··· 1617 1615 1618 1616 bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) 1619 1617 { 1618 + if (!name) 1619 + return false; 1620 1620 if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) 1621 1621 return true; 1622 1622 if (pmu->cpu_aliases_added || !pmu->events_table)
+2 -1
tools/perf/util/pmu.h
··· 201 201 __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); 202 202 int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); 203 203 int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, 204 - struct perf_pmu_info *info, struct parse_events_error *err); 204 + struct perf_pmu_info *info, bool *rewrote_terms, 205 + struct parse_events_error *err); 205 206 int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); 206 207 207 208 int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
+26 -14
tools/perf/util/probe-event.c
··· 149 149 return 0; 150 150 } 151 151 152 + struct kernel_get_module_map_cb_args { 153 + const char *module; 154 + struct map *result; 155 + }; 156 + 157 + static int kernel_get_module_map_cb(struct map *map, void *data) 158 + { 159 + struct kernel_get_module_map_cb_args *args = data; 160 + struct dso *dso = map__dso(map); 161 + const char *short_name = dso->short_name; /* short_name is "[module]" */ 162 + u16 short_name_len = dso->short_name_len; 163 + 164 + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && 165 + args->module[short_name_len - 2] == '\0') { 166 + args->result = map__get(map); 167 + return 1; 168 + } 169 + return 0; 170 + } 171 + 152 172 static struct map *kernel_get_module_map(const char *module) 153 173 { 154 - struct maps *maps = machine__kernel_maps(host_machine); 155 - struct map_rb_node *pos; 174 + struct kernel_get_module_map_cb_args args = { 175 + .module = module, 176 + .result = NULL, 177 + }; 156 178 157 179 /* A file path -- this is an offline module */ 158 180 if (module && strchr(module, '/')) ··· 186 164 return map__get(map); 187 165 } 188 166 189 - maps__for_each_entry(maps, pos) { 190 - /* short_name is "[module]" */ 191 - struct dso *dso = map__dso(pos->map); 192 - const char *short_name = dso->short_name; 193 - u16 short_name_len = dso->short_name_len; 167 + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); 194 168 195 - if (strncmp(short_name + 1, module, 196 - short_name_len - 2) == 0 && 197 - module[short_name_len - 2] == '\0') { 198 - return map__get(pos->map); 199 - } 200 - } 201 - return NULL; 169 + return args.result; 202 170 } 203 171 204 172 struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
+5 -196
tools/perf/util/probe-finder.c
··· 23 23 #include "event.h" 24 24 #include "dso.h" 25 25 #include "debug.h" 26 + #include "debuginfo.h" 26 27 #include "intlist.h" 27 28 #include "strbuf.h" 28 29 #include "strlist.h" ··· 32 31 #include "probe-file.h" 33 32 #include "string2.h" 34 33 35 - #ifdef HAVE_DEBUGINFOD_SUPPORT 36 - #include <elfutils/debuginfod.h> 37 - #endif 38 - 39 34 /* Kprobe tracer basic type is up to u64 */ 40 35 #define MAX_BASIC_TYPE_BITS 64 41 - 42 - /* Dwarf FL wrappers */ 43 - static char *debuginfo_path; /* Currently dummy */ 44 - 45 - static const Dwfl_Callbacks offline_callbacks = { 46 - .find_debuginfo = dwfl_standard_find_debuginfo, 47 - .debuginfo_path = &debuginfo_path, 48 - 49 - .section_address = dwfl_offline_section_address, 50 - 51 - /* We use this table for core files too. */ 52 - .find_elf = dwfl_build_id_find_elf, 53 - }; 54 - 55 - /* Get a Dwarf from offline image */ 56 - static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, 57 - const char *path) 58 - { 59 - GElf_Addr dummy; 60 - int fd; 61 - 62 - fd = open(path, O_RDONLY); 63 - if (fd < 0) 64 - return fd; 65 - 66 - dbg->dwfl = dwfl_begin(&offline_callbacks); 67 - if (!dbg->dwfl) 68 - goto error; 69 - 70 - dwfl_report_begin(dbg->dwfl); 71 - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); 72 - if (!dbg->mod) 73 - goto error; 74 - 75 - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); 76 - if (!dbg->dbg) 77 - goto error; 78 - 79 - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); 80 - 81 - dwfl_report_end(dbg->dwfl, NULL, NULL); 82 - 83 - return 0; 84 - error: 85 - if (dbg->dwfl) 86 - dwfl_end(dbg->dwfl); 87 - else 88 - close(fd); 89 - memset(dbg, 0, sizeof(*dbg)); 90 - 91 - return -ENOENT; 92 - } 93 - 94 - static struct debuginfo *__debuginfo__new(const char *path) 95 - { 96 - struct debuginfo *dbg = zalloc(sizeof(*dbg)); 97 - if (!dbg) 98 - return NULL; 99 - 100 - if (debuginfo__init_offline_dwarf(dbg, path) < 0) 101 - zfree(&dbg); 102 - if (dbg) 103 - pr_debug("Open Debuginfo file: %s\n", path); 104 - return dbg; 105 - } 106 - 107 - enum dso_binary_type distro_dwarf_types[] = { 108 - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, 109 - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, 110 - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, 111 - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, 112 - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, 113 - DSO_BINARY_TYPE__NOT_FOUND, 114 - }; 115 - 116 - struct debuginfo *debuginfo__new(const char *path) 117 - { 118 - enum dso_binary_type *type; 119 - char buf[PATH_MAX], nil = '\0'; 120 - struct dso *dso; 121 - struct debuginfo *dinfo = NULL; 122 - struct build_id bid; 123 - 124 - /* Try to open distro debuginfo files */ 125 - dso = dso__new(path); 126 - if (!dso) 127 - goto out; 128 - 129 - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ 130 - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) 131 - dso__set_build_id(dso, &bid); 132 - 133 - for (type = distro_dwarf_types; 134 - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; 135 - type++) { 136 - if (dso__read_binary_type_filename(dso, *type, &nil, 137 - buf, PATH_MAX) < 0) 138 - continue; 139 - dinfo = __debuginfo__new(buf); 140 - } 141 - dso__put(dso); 142 - 143 - out: 144 - /* if failed to open all distro debuginfo, open given binary */ 145 - return dinfo ? : __debuginfo__new(path); 146 - } 147 - 148 - void debuginfo__delete(struct debuginfo *dbg) 149 - { 150 - if (dbg) { 151 - if (dbg->dwfl) 152 - dwfl_end(dbg->dwfl); 153 - free(dbg); 154 - } 155 - } 156 36 157 37 /* 158 38 * Probe finder related functions ··· 604 722 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); 605 723 if (ret <= 0 || nops == 0) { 606 724 pf->fb_ops = NULL; 607 - #if _ELFUTILS_PREREQ(0, 142) 725 + #ifdef HAVE_DWARF_CFI_SUPPORT 608 726 } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && 609 727 (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { 610 728 if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && ··· 615 733 free(frame); 616 734 return -ENOENT; 617 735 } 618 - #endif 736 + #endif /* HAVE_DWARF_CFI_SUPPORT */ 619 737 } 620 738 621 739 /* Call finder's callback handler */ ··· 1140 1258 1141 1259 pf->machine = ehdr.e_machine; 1142 1260 1143 - #if _ELFUTILS_PREREQ(0, 142) 1261 + #ifdef HAVE_DWARF_CFI_SUPPORT 1144 1262 do { 1145 1263 GElf_Shdr shdr; 1146 1264 ··· 1150 1268 1151 1269 pf->cfi_dbg = dwarf_getcfi(dbg->dbg); 1152 1270 } while (0); 1153 - #endif 1271 + #endif /* HAVE_DWARF_CFI_SUPPORT */ 1154 1272 1155 1273 ret = debuginfo__find_probe_location(dbg, pf); 1156 1274 return ret; ··· 1559 1677 return (ret < 0) ? ret : af.nvls; 1560 1678 } 1561 1679 1562 - /* For the kernel module, we need a special code to get a DIE */ 1563 - int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, 1564 - bool adjust_offset) 1565 - { 1566 - int n, i; 1567 - Elf32_Word shndx; 1568 - Elf_Scn *scn; 1569 - Elf *elf; 1570 - GElf_Shdr mem, *shdr; 1571 - const char *p; 1572 - 1573 - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); 1574 - if (!elf) 1575 - return -EINVAL; 1576 - 1577 - /* Get the number of relocations */ 1578 - n = dwfl_module_relocations(dbg->mod); 1579 - if (n < 0) 1580 - return -ENOENT; 1581 - /* Search the relocation related .text section */ 1582 - for (i = 0; i < n; i++) { 1583 - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); 1584 - if (strcmp(p, ".text") == 0) { 1585 - /* OK, get the section header */ 1586 - scn = elf_getscn(elf, shndx); 1587 - if (!scn) 1588 - return -ENOENT; 1589 - shdr = gelf_getshdr(scn, &mem); 1590 - if (!shdr) 1591 - return -ENOENT; 1592 - *offs = shdr->sh_addr; 1593 - if (adjust_offset) 1594 - *offs -= shdr->sh_offset; 1595 - } 1596 - } 1597 - return 0; 1598 - } 1599 - 1600 1680 /* Reverse search */ 1601 1681 int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, 1602 1682 struct perf_probe_point *ppt) ··· 1853 2009 return (ret < 0) ? ret : lf.found; 1854 2010 } 1855 2011 1856 - #ifdef HAVE_DEBUGINFOD_SUPPORT 1857 - /* debuginfod doesn't require the comp_dir but buildid is required */ 1858 - static int get_source_from_debuginfod(const char *raw_path, 1859 - const char *sbuild_id, char **new_path) 1860 - { 1861 - debuginfod_client *c = debuginfod_begin(); 1862 - const char *p = raw_path; 1863 - int fd; 1864 - 1865 - if (!c) 1866 - return -ENOMEM; 1867 - 1868 - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, 1869 - 0, p, new_path); 1870 - pr_debug("Search %s from debuginfod -> %d\n", p, fd); 1871 - if (fd >= 0) 1872 - close(fd); 1873 - debuginfod_end(c); 1874 - if (fd < 0) { 1875 - pr_debug("Failed to find %s in debuginfod (%s)\n", 1876 - raw_path, sbuild_id); 1877 - return -ENOENT; 1878 - } 1879 - pr_debug("Got a source %s\n", *new_path); 1880 - 1881 - return 0; 1882 - } 1883 - #else 1884 - static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, 1885 - const char *sbuild_id __maybe_unused, 1886 - char **new_path __maybe_unused) 1887 - { 1888 - return -ENOTSUP; 1889 - } 1890 - #endif 1891 2012 /* 1892 2013 * Find a src file from a DWARF tag path. Prepend optional source path prefix 1893 2014 * and chop off leading directories that do not exist. Result is passed back as
+1 -18
tools/perf/util/probe-finder.h
··· 24 24 #ifdef HAVE_DWARF_SUPPORT 25 25 26 26 #include "dwarf-aux.h" 27 - 28 - /* TODO: export debuginfo data structure even if no dwarf support */ 29 - 30 - /* debug information structure */ 31 - struct debuginfo { 32 - Dwarf *dbg; 33 - Dwfl_Module *mod; 34 - Dwfl *dwfl; 35 - Dwarf_Addr bias; 36 - const unsigned char *build_id; 37 - }; 38 - 39 - /* This also tries to open distro debuginfo */ 40 - struct debuginfo *debuginfo__new(const char *path); 41 - void debuginfo__delete(struct debuginfo *dbg); 27 + #include "debuginfo.h" 42 28 43 29 /* Find probe_trace_events specified by perf_probe_event from debuginfo */ 44 30 int debuginfo__find_trace_events(struct debuginfo *dbg, ··· 34 48 /* Find a perf_probe_point from debuginfo */ 35 49 int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, 36 50 struct perf_probe_point *ppt); 37 - 38 - int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, 39 - bool adjust_offset); 40 51 41 52 /* Find a line range */ 42 53 int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
+2 -2
tools/perf/util/record.c
··· 237 237 238 238 evsel = evlist__last(temp_evlist); 239 239 240 - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { 241 - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); 240 + if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { 241 + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); 242 242 243 243 if (cpus) 244 244 cpu = perf_cpu_map__cpu(cpus, 0);
+2
tools/perf/util/s390-cpumcf-kernel.h
··· 12 12 #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ 13 13 #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ 14 14 #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ 15 + #define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ 16 + #define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ 15 17 16 18 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 17 19 unsigned int def:16; /* 0-15 Data Entry Format */
+106 -12
tools/perf/util/s390-sample-raw.c
··· 51 51 struct cf_trailer_entry *te; 52 52 struct cf_ctrset_entry *cep, ce; 53 53 54 - if (!len) 55 - return false; 56 54 while (offset < len) { 57 55 cep = (struct cf_ctrset_entry *)(buf + offset); 58 56 ce.def = be16_to_cpu(cep->def); ··· 123 125 return 128; 124 126 case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ 125 127 return 448; 128 + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ 129 + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ 130 + return setnr; 126 131 default: 127 132 return -1; 128 133 } ··· 213 212 } 214 213 } 215 214 216 - /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events 217 - * and if the event was triggered by a counter set diagnostic event display 218 - * its raw data. 219 - * The function is only invoked when the dump flag -D is set. 215 + #pragma GCC diagnostic push 216 + #pragma GCC diagnostic ignored "-Wpacked" 217 + #pragma GCC diagnostic ignored "-Wattributes" 218 + /* 219 + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. 220 220 */ 221 - void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) 221 + struct pai_data { /* Event number and value */ 222 + u16 event_nr; 223 + u64 event_val; 224 + } __packed; 225 + 226 + #pragma GCC diagnostic pop 227 + 228 + /* 229 + * Test for valid raw data. At least one PAI event should be in the raw 230 + * data section. 231 + */ 232 + static bool s390_pai_all_test(struct perf_sample *sample) 222 233 { 234 + size_t len = sample->raw_size; 235 + 236 + if (len < 0xa) 237 + return false; 238 + return true; 239 + } 240 + 241 + static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) 242 + { 243 + size_t len = sample->raw_size, offset = 0; 244 + unsigned char *p = sample->raw_data; 245 + const char *color = PERF_COLOR_BLUE; 246 + struct pai_data pai_data; 247 + char *ev_name; 248 + 249 + while (offset < len) { 250 + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); 251 + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); 252 + p += sizeof(pai_data.event_nr); 253 + offset += sizeof(pai_data.event_nr); 254 + 255 + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); 256 + pai_data.event_val = be64_to_cpu(pai_data.event_val); 257 + p += sizeof(pai_data.event_val); 258 + offset += sizeof(pai_data.event_val); 259 + 260 + ev_name = get_counter_name(evsel->core.attr.config, 261 + pai_data.event_nr, evsel->pmu); 262 + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", 263 + pai_data.event_nr, ev_name ?: "<unknown>", 264 + pai_data.event_val); 265 + free(ev_name); 266 + 267 + if (offset + 0xa > len) 268 + break; 269 + } 270 + color_fprintf(stdout, color, "\n"); 271 + } 272 + 273 + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events 274 + * and if the event was triggered by a 275 + * - counter set diagnostic event 276 + * - processor activity assist (PAI) crypto counter event 277 + * - processor activity assist (PAI) neural network processor assist (NNPA) 278 + * counter event 279 + * display its raw data. 280 + * The function is only invoked when the dump flag -D is set. 281 + * 282 + * Function evlist__s390_sample_raw() is defined as call back after it has 283 + * been verified that the perf.data file was created on s390 platform. 284 + */ 285 + void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, 286 + struct perf_sample *sample) 287 + { 288 + const char *pai_name; 223 289 struct evsel *evsel; 224 290 225 291 if (event->header.type != PERF_RECORD_SAMPLE) 226 292 return; 227 293 228 294 evsel = evlist__event2evsel(evlist, event); 229 - if (evsel == NULL || 230 - evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) 295 + if (!evsel) 296 + return; 297 + 298 + /* Check for raw data in sample */ 299 + if (!sample->raw_size || !sample->raw_data) 231 300 return; 232 301 233 302 /* Display raw data on screen */ 234 - if (!s390_cpumcfdg_testctr(sample)) { 235 - pr_err("Invalid counter set data encountered\n"); 303 + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { 304 + if (!evsel->pmu) 305 + evsel->pmu = perf_pmus__find("cpum_cf"); 306 + if (!s390_cpumcfdg_testctr(sample)) 307 + pr_err("Invalid counter set data encountered\n"); 308 + else 309 + s390_cpumcfdg_dump(evsel->pmu, sample); 236 310 return; 237 311 } 238 - s390_cpumcfdg_dump(evsel->pmu, sample); 312 + 313 + switch (evsel->core.attr.config) { 314 + case PERF_EVENT_PAI_NNPA_ALL: 315 + pai_name = "NNPA_ALL"; 316 + break; 317 + case PERF_EVENT_PAI_CRYPTO_ALL: 318 + pai_name = "CRYPTO_ALL"; 319 + break; 320 + default: 321 + return; 322 + } 323 + 324 + if (!s390_pai_all_test(sample)) { 325 + pr_err("Invalid %s raw data encountered\n", pai_name); 326 + } else { 327 + if (!evsel->pmu) 328 + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); 329 + s390_pai_all_dump(evsel, sample); 330 + } 239 331 }
+1
tools/perf/util/sample.h
··· 113 113 void *raw_data; 114 114 struct ip_callchain *callchain; 115 115 struct branch_stack *branch_stack; 116 + u64 *branch_stack_cntr; 116 117 struct regs_dump user_regs; 117 118 struct regs_dump intr_regs; 118 119 struct stack_dump user_stack;
+3
tools/perf/util/scripting-engines/trace-event-perl.c
··· 490 490 scripting_context->session = session; 491 491 492 492 command_line = malloc((argc + 2) * sizeof(const char *)); 493 + if (!command_line) 494 + return -ENOMEM; 495 + 493 496 command_line[0] = ""; 494 497 command_line[1] = script; 495 498 for (i = 2; i < argc + 2; i++)
+15 -2
tools/perf/util/scripting-engines/trace-event-python.c
··· 353 353 354 354 if (is_array) { 355 355 list = PyList_New(field->arraylen); 356 + if (!list) 357 + Py_FatalError("couldn't create Python list"); 356 358 item_size = field->size / field->arraylen; 357 359 n_items = field->arraylen; 358 360 } else { ··· 756 754 } 757 755 } 758 756 759 - static void set_regs_in_dict(PyObject *dict, 757 + static int set_regs_in_dict(PyObject *dict, 760 758 struct perf_sample *sample, 761 759 struct evsel *evsel) 762 760 { ··· 772 770 */ 773 771 int size = __sw_hweight64(attr->sample_regs_intr) * 28; 774 772 char *bf = malloc(size); 773 + if (!bf) 774 + return -1; 775 775 776 776 regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); 777 777 ··· 785 781 pydict_set_item_string_decref(dict, "uregs", 786 782 _PyUnicode_FromString(bf)); 787 783 free(bf); 784 + 785 + return 0; 788 786 } 789 787 790 788 static void set_sym_in_dict(PyObject *dict, struct addr_location *al, ··· 926 920 PyLong_FromUnsignedLongLong(sample->cyc_cnt)); 927 921 } 928 922 929 - set_regs_in_dict(dict, sample, evsel); 923 + if (set_regs_in_dict(dict, sample, evsel)) 924 + Py_FatalError("Failed to setting regs in dict"); 930 925 931 926 return dict; 932 927 } ··· 1925 1918 scripting_context->session = session; 1926 1919 #if PY_MAJOR_VERSION < 3 1927 1920 command_line = malloc((argc + 1) * sizeof(const char *)); 1921 + if (!command_line) 1922 + return -1; 1923 + 1928 1924 command_line[0] = script; 1929 1925 for (i = 1; i < argc + 1; i++) 1930 1926 command_line[i] = argv[i - 1]; 1931 1927 PyImport_AppendInittab(name, initperf_trace_context); 1932 1928 #else 1933 1929 command_line = malloc((argc + 1) * sizeof(wchar_t *)); 1930 + if (!command_line) 1931 + return -1; 1932 + 1934 1933 command_line[0] = Py_DecodeLocale(script, NULL); 1935 1934 for (i = 1; i < argc + 1; i++) 1936 1935 command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+18 -2
tools/perf/util/session.c
··· 115 115 return -1; 116 116 } 117 117 118 + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { 119 + /* Auxiliary events may reference exited threads, hold onto dead ones. */ 120 + symbol_conf.keep_exited_threads = true; 121 + } 122 + 118 123 if (perf_data__is_pipe(data)) 119 124 return 0; 120 125 ··· 1155 1150 i, callchain->ips[i]); 1156 1151 } 1157 1152 1158 - static void branch_stack__printf(struct perf_sample *sample, bool callstack) 1153 + static void branch_stack__printf(struct perf_sample *sample, 1154 + struct evsel *evsel) 1159 1155 { 1160 1156 struct branch_entry *entries = perf_sample__branch_entries(sample); 1157 + bool callstack = evsel__has_branch_callstack(evsel); 1158 + u64 *branch_stack_cntr = sample->branch_stack_cntr; 1159 + struct perf_env *env = evsel__env(evsel); 1161 1160 uint64_t i; 1162 1161 1163 1162 if (!callstack) { ··· 1202 1193 printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from); 1203 1194 } 1204 1195 } 1196 + } 1197 + 1198 + if (branch_stack_cntr) { 1199 + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", 1200 + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); 1201 + for (i = 0; i < sample->branch_stack->nr; i++) 1202 + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); 1205 1203 } 1206 1204 } 1207 1205 ··· 1371 1355 callchain__printf(evsel, sample); 1372 1356 1373 1357 if (evsel__has_br_stack(evsel)) 1374 - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); 1358 + branch_stack__printf(sample, evsel); 1375 1359 1376 1360 if (sample_type & PERF_SAMPLE_REGS_USER) 1377 1361 regs_user__printf(sample, arch);
+207 -9
tools/perf/util/sort.c
··· 24 24 #include "strbuf.h" 25 25 #include "mem-events.h" 26 26 #include "annotate.h" 27 + #include "annotate-data.h" 27 28 #include "event.h" 28 29 #include "time-utils.h" 29 30 #include "cgroup.h" ··· 419 418 .se_width_idx = HISTC_SYMBOL, 420 419 }; 421 420 421 + /* --sort symoff */ 422 + 423 + static int64_t 424 + sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) 425 + { 426 + int64_t ret; 427 + 428 + ret = sort__sym_cmp(left, right); 429 + if (ret) 430 + return ret; 431 + 432 + return left->ip - right->ip; 433 + } 434 + 435 + static int64_t 436 + sort__symoff_sort(struct hist_entry *left, struct hist_entry *right) 437 + { 438 + int64_t ret; 439 + 440 + ret = sort__sym_sort(left, right); 441 + if (ret) 442 + return ret; 443 + 444 + return left->ip - right->ip; 445 + } 446 + 447 + static int 448 + hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) 449 + { 450 + struct symbol *sym = he->ms.sym; 451 + 452 + if (sym == NULL) 453 + return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip); 454 + 455 + return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start); 456 + } 457 + 458 + struct sort_entry sort_sym_offset = { 459 + .se_header = "Symbol Offset", 460 + .se_cmp = sort__symoff_cmp, 461 + .se_sort = sort__symoff_sort, 462 + .se_snprintf = hist_entry__symoff_snprintf, 463 + .se_filter = hist_entry__sym_filter, 464 + .se_width_idx = HISTC_SYMBOL_OFFSET, 465 + }; 466 + 422 467 /* --sort srcline */ 423 468 424 469 char *hist_entry__srcline(struct hist_entry *he) ··· 630 583 { 631 584 632 585 struct symbol *sym = he->ms.sym; 633 - struct annotation *notes; 586 + struct annotated_branch *branch; 634 587 double ipc = 0.0, coverage = 0.0; 635 588 char tmp[64]; 636 589 637 590 if (!sym) 638 591 return repsep_snprintf(bf, size, "%-*s", width, "-"); 639 592 640 - notes = symbol__annotation(sym); 593 + branch = symbol__annotation(sym)->branch; 641 594 642 - if (notes->hit_cycles) 643 - ipc = notes->hit_insn / ((double)notes->hit_cycles); 595 + if (branch && branch->hit_cycles) 596 + ipc = branch->hit_insn / ((double)branch->hit_cycles); 644 597 645 - if (notes->total_insn) { 646 - coverage = notes->cover_insn * 100.0 / 647 - ((double)notes->total_insn); 598 + if (branch && branch->total_insn) { 599 + coverage = branch->cover_insn * 100.0 / 600 + ((double)branch->total_insn); 648 601 } 649 602 650 603 snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); ··· 2141 2094 .se_width_idx = HISTC_DSO_SIZE, 2142 2095 }; 2143 2096 2144 - /* --sort dso_size */ 2097 + /* --sort addr */ 2145 2098 2146 2099 static int64_t 2147 2100 sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) ··· 2176 2129 .se_cmp = sort__addr_cmp, 2177 2130 .se_snprintf = hist_entry__addr_snprintf, 2178 2131 .se_width_idx = HISTC_ADDR, 2132 + }; 2133 + 2134 + /* --sort type */ 2135 + 2136 + struct annotated_data_type unknown_type = { 2137 + .self = { 2138 + .type_name = (char *)"(unknown)", 2139 + .children = LIST_HEAD_INIT(unknown_type.self.children), 2140 + }, 2141 + }; 2142 + 2143 + static int64_t 2144 + sort__type_cmp(struct hist_entry *left, struct hist_entry *right) 2145 + { 2146 + return sort__addr_cmp(left, right); 2147 + } 2148 + 2149 + static void sort__type_init(struct hist_entry *he) 2150 + { 2151 + if (he->mem_type) 2152 + return; 2153 + 2154 + he->mem_type = hist_entry__get_data_type(he); 2155 + if (he->mem_type == NULL) { 2156 + he->mem_type = &unknown_type; 2157 + he->mem_type_off = 0; 2158 + } 2159 + } 2160 + 2161 + static int64_t 2162 + sort__type_collapse(struct hist_entry *left, struct hist_entry *right) 2163 + { 2164 + struct annotated_data_type *left_type = left->mem_type; 2165 + struct annotated_data_type *right_type = right->mem_type; 2166 + 2167 + if (!left_type) { 2168 + sort__type_init(left); 2169 + left_type = left->mem_type; 2170 + } 2171 + 2172 + if (!right_type) { 2173 + sort__type_init(right); 2174 + right_type = right->mem_type; 2175 + } 2176 + 2177 + return strcmp(left_type->self.type_name, right_type->self.type_name); 2178 + } 2179 + 2180 + static int64_t 2181 + sort__type_sort(struct hist_entry *left, struct hist_entry *right) 2182 + { 2183 + return sort__type_collapse(left, right); 2184 + } 2185 + 2186 + static int hist_entry__type_snprintf(struct hist_entry *he, char *bf, 2187 + size_t size, unsigned int width) 2188 + { 2189 + return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name); 2190 + } 2191 + 2192 + struct sort_entry sort_type = { 2193 + .se_header = "Data Type", 2194 + .se_cmp = sort__type_cmp, 2195 + .se_collapse = sort__type_collapse, 2196 + .se_sort = sort__type_sort, 2197 + .se_init = sort__type_init, 2198 + .se_snprintf = hist_entry__type_snprintf, 2199 + .se_width_idx = HISTC_TYPE, 2200 + }; 2201 + 2202 + /* --sort typeoff */ 2203 + 2204 + static int64_t 2205 + sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) 2206 + { 2207 + struct annotated_data_type *left_type = left->mem_type; 2208 + struct annotated_data_type *right_type = right->mem_type; 2209 + int64_t ret; 2210 + 2211 + if (!left_type) { 2212 + sort__type_init(left); 2213 + left_type = left->mem_type; 2214 + } 2215 + 2216 + if (!right_type) { 2217 + sort__type_init(right); 2218 + right_type = right->mem_type; 2219 + } 2220 + 2221 + ret = strcmp(left_type->self.type_name, right_type->self.type_name); 2222 + if (ret) 2223 + return ret; 2224 + return left->mem_type_off - right->mem_type_off; 2225 + } 2226 + 2227 + static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, 2228 + int offset, bool first) 2229 + { 2230 + struct annotated_member *child; 2231 + 2232 + if (list_empty(&m->children)) 2233 + return; 2234 + 2235 + list_for_each_entry(child, &m->children, node) { 2236 + if (child->offset <= offset && offset < child->offset + child->size) { 2237 + int len = 0; 2238 + 2239 + /* It can have anonymous struct/union members */ 2240 + if (child->var_name) { 2241 + len = scnprintf(buf, sz, "%s%s", 2242 + first ? "" : ".", child->var_name); 2243 + first = false; 2244 + } 2245 + 2246 + fill_member_name(buf + len, sz - len, child, offset, first); 2247 + return; 2248 + } 2249 + } 2250 + } 2251 + 2252 + static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, 2253 + size_t size, unsigned int width __maybe_unused) 2254 + { 2255 + struct annotated_data_type *he_type = he->mem_type; 2256 + char buf[4096]; 2257 + 2258 + buf[0] = '\0'; 2259 + if (list_empty(&he_type->self.children)) 2260 + snprintf(buf, sizeof(buf), "no field"); 2261 + else 2262 + fill_member_name(buf, sizeof(buf), &he_type->self, 2263 + he->mem_type_off, true); 2264 + buf[4095] = '\0'; 2265 + 2266 + return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name, 2267 + he->mem_type_off, buf); 2268 + } 2269 + 2270 + struct sort_entry sort_type_offset = { 2271 + .se_header = "Data Type Offset", 2272 + .se_cmp = sort__type_cmp, 2273 + .se_collapse = sort__typeoff_sort, 2274 + .se_sort = sort__typeoff_sort, 2275 + .se_init = sort__type_init, 2276 + .se_snprintf = hist_entry__typeoff_snprintf, 2277 + .se_width_idx = HISTC_TYPE_OFFSET, 2179 2278 }; 2180 2279 2181 2280 ··· 2378 2185 DIM(SORT_ADDR, "addr", sort_addr), 2379 2186 DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), 2380 2187 DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), 2381 - DIM(SORT_SIMD, "simd", sort_simd) 2188 + DIM(SORT_SIMD, "simd", sort_simd), 2189 + DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), 2190 + DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), 2191 + DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), 2382 2192 }; 2383 2193 2384 2194 #undef DIM ··· 3401 3205 list->thread = 1; 3402 3206 } else if (sd->entry == &sort_comm) { 3403 3207 list->comm = 1; 3208 + } else if (sd->entry == &sort_type_offset) { 3209 + symbol_conf.annotate_data_member = true; 3404 3210 } 3405 3211 3406 3212 return __sort_dimension__add(sd, list, level);
+7
tools/perf/util/sort.h
··· 15 15 16 16 struct option; 17 17 struct thread; 18 + struct annotated_data_type; 18 19 19 20 extern regex_t parent_regex; 20 21 extern const char *sort_order; ··· 35 34 extern struct sort_entry sort_sym_from; 36 35 extern struct sort_entry sort_sym_to; 37 36 extern struct sort_entry sort_srcline; 37 + extern struct sort_entry sort_type; 38 38 extern const char default_mem_sort_order[]; 39 39 extern bool chk_double_cl; 40 40 ··· 113 111 u64 p_stage_cyc; 114 112 u8 cpumode; 115 113 u8 depth; 114 + int mem_type_off; 116 115 struct simd_flags simd_flags; 117 116 118 117 /* We are added by hists__add_dummy_entry. */ ··· 157 154 struct perf_hpp_list *hpp_list; 158 155 struct hist_entry *parent_he; 159 156 struct hist_entry_ops *ops; 157 + struct annotated_data_type *mem_type; 160 158 union { 161 159 /* this is for hierarchical entry structure */ 162 160 struct { ··· 247 243 SORT_LOCAL_RETIRE_LAT, 248 244 SORT_GLOBAL_RETIRE_LAT, 249 245 SORT_SIMD, 246 + SORT_ANNOTATE_DATA_TYPE, 247 + SORT_ANNOTATE_DATA_TYPE_OFFSET, 248 + SORT_SYM_OFFSET, 250 249 251 250 /* branch stack specific sort keys */ 252 251 __SORT_BRANCH_STACK,
+1 -1
tools/perf/util/stat-display.c
··· 898 898 899 899 static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) 900 900 { 901 - if (config->no_merge || hybrid_uniquify(counter, config)) 901 + if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) 902 902 uniquify_event_name(counter); 903 903 } 904 904
+1 -1
tools/perf/util/stat-shadow.c
··· 264 264 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 265 265 266 266 print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, 267 - "of all L1-icache accesses"); 267 + "of all LL-cache accesses"); 268 268 } 269 269 270 270 static void print_dtlb_miss(struct perf_stat_config *config,
+2 -2
tools/perf/util/stat.c
··· 315 315 if (!counter->per_pkg) 316 316 return 0; 317 317 318 - if (perf_cpu_map__empty(cpus)) 318 + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 319 319 return 0; 320 320 321 321 if (!mask) { ··· 592 592 { 593 593 struct evsel *evsel; 594 594 595 - if (config->no_merge) 595 + if (config->aggr_mode == AGGR_NONE) 596 596 return; 597 597 598 598 evlist__for_each_entry(evlist, evsel)
-1
tools/perf/util/stat.h
··· 76 76 bool null_run; 77 77 bool ru_display; 78 78 bool big_num; 79 - bool no_merge; 80 79 bool hybrid_merge; 81 80 bool walltime_run_table; 82 81 bool all_kernel;
+2 -4
tools/perf/util/symbol-elf.c
··· 1392 1392 map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); 1393 1393 map__set_end(map, map__start(map) + shdr->sh_size); 1394 1394 map__set_pgoff(map, shdr->sh_offset); 1395 - map__set_map_ip(map, map__dso_map_ip); 1396 - map__set_unmap_ip(map, map__dso_unmap_ip); 1395 + map__set_mapping_type(map, MAPPING_TYPE__DSO); 1397 1396 /* Ensure maps are correctly ordered */ 1398 1397 if (kmaps) { 1399 1398 int err; ··· 1454 1455 map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); 1455 1456 map__set_pgoff(curr_map, shdr->sh_offset); 1456 1457 } else { 1457 - map__set_map_ip(curr_map, identity__map_ip); 1458 - map__set_unmap_ip(curr_map, identity__map_ip); 1458 + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); 1459 1459 } 1460 1460 curr_dso->symtab_type = dso->symtab_type; 1461 1461 if (maps__insert(kmaps, curr_map))
+6 -4
tools/perf/util/symbol-minimal.c
··· 159 159 goto out_free; 160 160 161 161 ret = read_build_id(buf, buf_size, bid, need_swap); 162 - if (ret == 0) 162 + if (ret == 0) { 163 163 ret = bid->size; 164 - break; 164 + break; 165 + } 165 166 } 166 167 } else { 167 168 Elf64_Ehdr ehdr; ··· 211 210 goto out_free; 212 211 213 212 ret = read_build_id(buf, buf_size, bid, need_swap); 214 - if (ret == 0) 213 + if (ret == 0) { 215 214 ret = bid->size; 216 - break; 215 + break; 216 + } 217 217 } 218 218 } 219 219 out_free:
+29 -277
tools/perf/util/symbol.c
··· 48 48 int vmlinux_path__nr_entries; 49 49 char **vmlinux_path; 50 50 51 - struct map_list_node { 52 - struct list_head node; 53 - struct map *map; 54 - }; 55 - 56 51 struct symbol_conf symbol_conf = { 57 52 .nanosecs = false, 58 53 .use_modules = true, ··· 84 89 }; 85 90 86 91 #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) 87 - 88 - static struct map_list_node *map_list_node__new(void) 89 - { 90 - return malloc(sizeof(struct map_list_node)); 91 - } 92 92 93 93 static bool symbol_type__filter(char symbol_type) 94 94 { ··· 258 268 /* Last entry */ 259 269 if (curr->end == curr->start) 260 270 curr->end = roundup(curr->start, 4096) + 4096; 261 - } 262 - 263 - void maps__fixup_end(struct maps *maps) 264 - { 265 - struct map_rb_node *prev = NULL, *curr; 266 - 267 - down_write(maps__lock(maps)); 268 - 269 - maps__for_each_entry(maps, curr) { 270 - if (prev != NULL && !map__end(prev->map)) 271 - map__set_end(prev->map, map__start(curr->map)); 272 - 273 - prev = curr; 274 - } 275 - 276 - /* 277 - * We still haven't the actual symbols, so guess the 278 - * last map final address. 279 - */ 280 - if (curr && !map__end(curr->map)) 281 - map__set_end(curr->map, ~0ULL); 282 - 283 - up_write(maps__lock(maps)); 284 271 } 285 272 286 273 struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) ··· 923 956 return -1; 924 957 } 925 958 926 - map__set_map_ip(curr_map, identity__map_ip); 927 - map__set_unmap_ip(curr_map, identity__map_ip); 959 + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); 928 960 if (maps__insert(kmaps, curr_map)) { 929 961 dso__put(ndso); 930 962 return -1; ··· 1114 1148 return ret; 1115 1149 } 1116 1150 1151 + static int do_validate_kcore_modules_cb(struct map *old_map, void *data) 1152 + { 1153 + struct rb_root *modules = data; 1154 + struct module_info *mi; 1155 + struct dso *dso; 1156 + 1157 + if (!__map__is_kmodule(old_map)) 1158 + return 0; 1159 + 1160 + dso = map__dso(old_map); 1161 + /* Module must be in memory at the same address */ 1162 + mi = find_module(dso->short_name, modules); 1163 + if (!mi || mi->start != map__start(old_map)) 1164 + return -EINVAL; 1165 + 1166 + return 0; 1167 + } 1168 + 1117 1169 static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) 1118 1170 { 1119 1171 struct rb_root modules = RB_ROOT; 1120 - struct map_rb_node *old_node; 1121 1172 int err; 1122 1173 1123 1174 err = read_proc_modules(filename, &modules); 1124 1175 if (err) 1125 1176 return err; 1126 1177 1127 - maps__for_each_entry(kmaps, old_node) { 1128 - struct map *old_map = old_node->map; 1129 - struct module_info *mi; 1130 - struct dso *dso; 1178 + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); 1131 1179 1132 - if (!__map__is_kmodule(old_map)) { 1133 - continue; 1134 - } 1135 - dso = map__dso(old_map); 1136 - /* Module must be in memory at the same address */ 1137 - mi = find_module(dso->short_name, &modules); 1138 - if (!mi || mi->start != map__start(old_map)) { 1139 - err = -EINVAL; 1140 - goto out; 1141 - } 1142 - } 1143 - out: 1144 1180 delete_modules(&modules); 1145 1181 return err; 1146 1182 } ··· 1239 1271 return 0; 1240 1272 } 1241 1273 1242 - /* 1243 - * Merges map into maps by splitting the new map within the existing map 1244 - * regions. 1245 - */ 1246 - int maps__merge_in(struct maps *kmaps, struct map *new_map) 1274 + static bool remove_old_maps(struct map *map, void *data) 1247 1275 { 1248 - struct map_rb_node *rb_node; 1249 - LIST_HEAD(merged); 1250 - int err = 0; 1276 + const struct map *map_to_save = data; 1251 1277 1252 - maps__for_each_entry(kmaps, rb_node) { 1253 - struct map *old_map = rb_node->map; 1254 - 1255 - /* no overload with this one */ 1256 - if (map__end(new_map) < map__start(old_map) || 1257 - map__start(new_map) >= map__end(old_map)) 1258 - continue; 1259 - 1260 - if (map__start(new_map) < map__start(old_map)) { 1261 - /* 1262 - * |new...... 1263 - * |old.... 1264 - */ 1265 - if (map__end(new_map) < map__end(old_map)) { 1266 - /* 1267 - * |new......| -> |new..| 1268 - * |old....| -> |old....| 1269 - */ 1270 - map__set_end(new_map, map__start(old_map)); 1271 - } else { 1272 - /* 1273 - * |new.............| -> |new..| |new..| 1274 - * |old....| -> |old....| 1275 - */ 1276 - struct map_list_node *m = map_list_node__new(); 1277 - 1278 - if (!m) { 1279 - err = -ENOMEM; 1280 - goto out; 1281 - } 1282 - 1283 - m->map = map__clone(new_map); 1284 - if (!m->map) { 1285 - free(m); 1286 - err = -ENOMEM; 1287 - goto out; 1288 - } 1289 - 1290 - map__set_end(m->map, map__start(old_map)); 1291 - list_add_tail(&m->node, &merged); 1292 - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); 1293 - map__set_start(new_map, map__end(old_map)); 1294 - } 1295 - } else { 1296 - /* 1297 - * |new...... 1298 - * |old.... 1299 - */ 1300 - if (map__end(new_map) < map__end(old_map)) { 1301 - /* 1302 - * |new..| -> x 1303 - * |old.........| -> |old.........| 1304 - */ 1305 - map__put(new_map); 1306 - new_map = NULL; 1307 - break; 1308 - } else { 1309 - /* 1310 - * |new......| -> |new...| 1311 - * |old....| -> |old....| 1312 - */ 1313 - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); 1314 - map__set_start(new_map, map__end(old_map)); 1315 - } 1316 - } 1317 - } 1318 - 1319 - out: 1320 - while (!list_empty(&merged)) { 1321 - struct map_list_node *old_node; 1322 - 1323 - old_node = list_entry(merged.next, struct map_list_node, node); 1324 - list_del_init(&old_node->node); 1325 - if (!err) 1326 - err = maps__insert(kmaps, old_node->map); 1327 - map__put(old_node->map); 1328 - free(old_node); 1329 - } 1330 - 1331 - if (new_map) { 1332 - if (!err) 1333 - err = maps__insert(kmaps, new_map); 1334 - map__put(new_map); 1335 - } 1336 - return err; 1278 + /* 1279 + * We need to preserve eBPF maps even if they are covered by kcore, 1280 + * because we need to access eBPF dso for source data. 1281 + */ 1282 + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); 1337 1283 } 1338 1284 1339 1285 static int dso__load_kcore(struct dso *dso, struct map *map, ··· 1256 1374 struct maps *kmaps = map__kmaps(map); 1257 1375 struct kcore_mapfn_data md; 1258 1376 struct map *replacement_map = NULL; 1259 - struct map_rb_node *old_node, *next; 1260 1377 struct machine *machine; 1261 1378 bool is_64_bit; 1262 1379 int err, fd; ··· 1302 1421 } 1303 1422 1304 1423 /* Remove old maps */ 1305 - maps__for_each_entry_safe(kmaps, old_node, next) { 1306 - struct map *old_map = old_node->map; 1307 - 1308 - /* 1309 - * We need to preserve eBPF maps even if they are 1310 - * covered by kcore, because we need to access 1311 - * eBPF dso for source data. 1312 - */ 1313 - if (old_map != map && !__map__is_bpf_prog(old_map)) 1314 - maps__remove(kmaps, old_map); 1315 - } 1424 + maps__remove_maps(kmaps, remove_old_maps, map); 1316 1425 machine->trampolines_mapped = false; 1317 1426 1318 1427 /* Find the kernel map using the '_stext' symbol */ ··· 1346 1475 map__set_start(map, map__start(new_map)); 1347 1476 map__set_end(map, map__end(new_map)); 1348 1477 map__set_pgoff(map, map__pgoff(new_map)); 1349 - map__set_map_ip(map, map__map_ip_ptr(new_map)); 1350 - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); 1478 + map__set_mapping_type(map, map__mapping_type(new_map)); 1351 1479 /* Ensure maps are correctly ordered */ 1352 1480 map_ref = map__get(map); 1353 1481 maps__remove(kmaps, map_ref); ··· 1935 2065 nsinfo__mountns_exit(&nsc); 1936 2066 1937 2067 return ret; 1938 - } 1939 - 1940 - static int map__strcmp(const void *a, const void *b) 1941 - { 1942 - const struct map *map_a = *(const struct map **)a; 1943 - const struct map *map_b = *(const struct map **)b; 1944 - const struct dso *dso_a = map__dso(map_a); 1945 - const struct dso *dso_b = map__dso(map_b); 1946 - int ret = strcmp(dso_a->short_name, dso_b->short_name); 1947 - 1948 - if (ret == 0 && map_a != map_b) { 1949 - /* 1950 - * Ensure distinct but name equal maps have an order in part to 1951 - * aid reference counting. 1952 - */ 1953 - ret = (int)map__start(map_a) - (int)map__start(map_b); 1954 - if (ret == 0) 1955 - ret = (int)((intptr_t)map_a - (intptr_t)map_b); 1956 - } 1957 - 1958 - return ret; 1959 - } 1960 - 1961 - static int map__strcmp_name(const void *name, const void *b) 1962 - { 1963 - const struct dso *dso = map__dso(*(const struct map **)b); 1964 - 1965 - return strcmp(name, dso->short_name); 1966 - } 1967 - 1968 - void __maps__sort_by_name(struct maps *maps) 1969 - { 1970 - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); 1971 - } 1972 - 1973 - static int map__groups__sort_by_name_from_rbtree(struct maps *maps) 1974 - { 1975 - struct map_rb_node *rb_node; 1976 - struct map **maps_by_name = realloc(maps__maps_by_name(maps), 1977 - maps__nr_maps(maps) * sizeof(struct map *)); 1978 - int i = 0; 1979 - 1980 - if (maps_by_name == NULL) 1981 - return -1; 1982 - 1983 - up_read(maps__lock(maps)); 1984 - down_write(maps__lock(maps)); 1985 - 1986 - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; 1987 - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); 1988 - 1989 - maps__for_each_entry(maps, rb_node) 1990 - maps_by_name[i++] = map__get(rb_node->map); 1991 - 1992 - __maps__sort_by_name(maps); 1993 - 1994 - up_write(maps__lock(maps)); 1995 - down_read(maps__lock(maps)); 1996 - 1997 - return 0; 1998 - } 1999 - 2000 - static struct map *__maps__find_by_name(struct maps *maps, const char *name) 2001 - { 2002 - struct map **mapp; 2003 - 2004 - if (maps__maps_by_name(maps) == NULL && 2005 - map__groups__sort_by_name_from_rbtree(maps)) 2006 - return NULL; 2007 - 2008 - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), 2009 - sizeof(*mapp), map__strcmp_name); 2010 - if (mapp) 2011 - return *mapp; 2012 - return NULL; 2013 - } 2014 - 2015 - struct map *maps__find_by_name(struct maps *maps, const char *name) 2016 - { 2017 - struct map_rb_node *rb_node; 2018 - struct map *map; 2019 - 2020 - down_read(maps__lock(maps)); 2021 - 2022 - 2023 - if (RC_CHK_ACCESS(maps)->last_search_by_name) { 2024 - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); 2025 - 2026 - if (strcmp(dso->short_name, name) == 0) { 2027 - map = RC_CHK_ACCESS(maps)->last_search_by_name; 2028 - goto out_unlock; 2029 - } 2030 - } 2031 - /* 2032 - * If we have maps->maps_by_name, then the name isn't in the rbtree, 2033 - * as maps->maps_by_name mirrors the rbtree when lookups by name are 2034 - * made. 2035 - */ 2036 - map = __maps__find_by_name(maps, name); 2037 - if (map || maps__maps_by_name(maps) != NULL) 2038 - goto out_unlock; 2039 - 2040 - /* Fallback to traversing the rbtree... */ 2041 - maps__for_each_entry(maps, rb_node) { 2042 - struct dso *dso; 2043 - 2044 - map = rb_node->map; 2045 - dso = map__dso(map); 2046 - if (strcmp(dso->short_name, name) == 0) { 2047 - RC_CHK_ACCESS(maps)->last_search_by_name = map; 2048 - goto out_unlock; 2049 - } 2050 - } 2051 - map = NULL; 2052 - 2053 - out_unlock: 2054 - up_read(maps__lock(maps)); 2055 - return map; 2056 2068 } 2057 2069 2058 2070 int dso__load_vmlinux(struct dso *dso, struct map *map,
-1
tools/perf/util/symbol.h
··· 189 189 void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); 190 190 void symbols__fixup_duplicate(struct rb_root_cached *symbols); 191 191 void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); 192 - void maps__fixup_end(struct maps *maps); 193 192 194 193 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); 195 194 int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+5 -1
tools/perf/util/symbol_conf.h
··· 42 42 inline_name, 43 43 disable_add2line_warn, 44 44 buildid_mmap2, 45 - guest_code; 45 + guest_code, 46 + lazy_load_kernel_maps, 47 + keep_exited_threads, 48 + annotate_data_member, 49 + annotate_data_sample; 46 50 const char *vmlinux_name, 47 51 *kallsyms_name, 48 52 *source_prefix,
+67 -51
tools/perf/util/synthetic-events.c
··· 665 665 } 666 666 #endif 667 667 668 + struct perf_event__synthesize_modules_maps_cb_args { 669 + struct perf_tool *tool; 670 + perf_event__handler_t process; 671 + struct machine *machine; 672 + union perf_event *event; 673 + }; 674 + 675 + static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) 676 + { 677 + struct perf_event__synthesize_modules_maps_cb_args *args = data; 678 + union perf_event *event = args->event; 679 + struct dso *dso; 680 + size_t size; 681 + 682 + if (!__map__is_kmodule(map)) 683 + return 0; 684 + 685 + dso = map__dso(map); 686 + if (symbol_conf.buildid_mmap2) { 687 + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); 688 + event->mmap2.header.type = PERF_RECORD_MMAP2; 689 + event->mmap2.header.size = (sizeof(event->mmap2) - 690 + (sizeof(event->mmap2.filename) - size)); 691 + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); 692 + event->mmap2.header.size += args->machine->id_hdr_size; 693 + event->mmap2.start = map__start(map); 694 + event->mmap2.len = map__size(map); 695 + event->mmap2.pid = args->machine->pid; 696 + 697 + memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); 698 + 699 + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); 700 + } else { 701 + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); 702 + event->mmap.header.type = PERF_RECORD_MMAP; 703 + event->mmap.header.size = (sizeof(event->mmap) - 704 + (sizeof(event->mmap.filename) - size)); 705 + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); 706 + event->mmap.header.size += args->machine->id_hdr_size; 707 + event->mmap.start = map__start(map); 708 + event->mmap.len = map__size(map); 709 + event->mmap.pid = args->machine->pid; 710 + 711 + memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); 712 + } 713 + 714 + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) 715 + return -1; 716 + 717 + return 0; 718 + } 719 + 668 720 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, 669 721 struct machine *machine) 670 722 { 671 - int rc = 0; 672 - struct map_rb_node *pos; 723 + int rc; 673 724 struct maps *maps = machine__kernel_maps(machine); 674 - union perf_event *event; 675 - size_t size = symbol_conf.buildid_mmap2 ? 676 - sizeof(event->mmap2) : sizeof(event->mmap); 725 + struct perf_event__synthesize_modules_maps_cb_args args = { 726 + .tool = tool, 727 + .process = process, 728 + .machine = machine, 729 + }; 730 + size_t size = symbol_conf.buildid_mmap2 731 + ? sizeof(args.event->mmap2) 732 + : sizeof(args.event->mmap); 677 733 678 - event = zalloc(size + machine->id_hdr_size); 679 - if (event == NULL) { 734 + args.event = zalloc(size + machine->id_hdr_size); 735 + if (args.event == NULL) { 680 736 pr_debug("Not enough memory synthesizing mmap event " 681 737 "for kernel modules\n"); 682 738 return -1; ··· 743 687 * __perf_event_mmap 744 688 */ 745 689 if (machine__is_host(machine)) 746 - event->header.misc = PERF_RECORD_MISC_KERNEL; 690 + args.event->header.misc = PERF_RECORD_MISC_KERNEL; 747 691 else 748 - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 692 + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 749 693 750 - maps__for_each_entry(maps, pos) { 751 - struct map *map = pos->map; 752 - struct dso *dso; 694 + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); 753 695 754 - if (!__map__is_kmodule(map)) 755 - continue; 756 - 757 - dso = map__dso(map); 758 - if (symbol_conf.buildid_mmap2) { 759 - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); 760 - event->mmap2.header.type = PERF_RECORD_MMAP2; 761 - event->mmap2.header.size = (sizeof(event->mmap2) - 762 - (sizeof(event->mmap2.filename) - size)); 763 - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); 764 - event->mmap2.header.size += machine->id_hdr_size; 765 - event->mmap2.start = map__start(map); 766 - event->mmap2.len = map__size(map); 767 - event->mmap2.pid = machine->pid; 768 - 769 - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); 770 - 771 - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); 772 - } else { 773 - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); 774 - event->mmap.header.type = PERF_RECORD_MMAP; 775 - event->mmap.header.size = (sizeof(event->mmap) - 776 - (sizeof(event->mmap.filename) - size)); 777 - memset(event->mmap.filename + size, 0, machine->id_hdr_size); 778 - event->mmap.header.size += machine->id_hdr_size; 779 - event->mmap.start = map__start(map); 780 - event->mmap.len = map__size(map); 781 - event->mmap.pid = machine->pid; 782 - 783 - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); 784 - } 785 - 786 - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { 787 - rc = -1; 788 - break; 789 - } 790 - } 791 - 792 - free(event); 696 + free(args.event); 793 697 return rc; 794 698 } 795 699
+20 -22
tools/perf/util/thread.c
··· 345 345 if (ret) 346 346 return ret; 347 347 348 - maps__fixup_overlappings(thread__maps(thread), map, stderr); 349 - return maps__insert(thread__maps(thread), map); 348 + return maps__fixup_overlap_and_insert(thread__maps(thread), map); 350 349 } 351 350 352 - static int __thread__prepare_access(struct thread *thread) 351 + struct thread__prepare_access_maps_cb_args { 352 + int err; 353 + struct maps *maps; 354 + }; 355 + 356 + static int thread__prepare_access_maps_cb(struct map *map, void *data) 353 357 { 354 358 bool initialized = false; 355 - int err = 0; 356 - struct maps *maps = thread__maps(thread); 357 - struct map_rb_node *rb_node; 359 + struct thread__prepare_access_maps_cb_args *args = data; 358 360 359 - down_read(maps__lock(maps)); 361 + args->err = unwind__prepare_access(args->maps, map, &initialized); 360 362 361 - maps__for_each_entry(maps, rb_node) { 362 - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); 363 - if (err || initialized) 364 - break; 365 - } 366 - 367 - up_read(maps__lock(maps)); 368 - 369 - return err; 363 + return (args->err || initialized) ? 1 : 0; 370 364 } 371 365 372 366 static int thread__prepare_access(struct thread *thread) 373 367 { 374 - int err = 0; 368 + struct thread__prepare_access_maps_cb_args args = { 369 + .err = 0, 370 + }; 375 371 376 - if (dwarf_callchain_users) 377 - err = __thread__prepare_access(thread); 372 + if (dwarf_callchain_users) { 373 + args.maps = thread__maps(thread); 374 + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); 375 + } 378 376 379 - return err; 377 + return args.err; 380 378 } 381 379 382 380 static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) ··· 383 385 if (thread__pid(thread) == thread__pid(parent)) 384 386 return thread__prepare_access(thread); 385 387 386 - if (thread__maps(thread) == thread__maps(parent)) { 388 + if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) { 387 389 pr_debug("broken map groups on thread %d/%d parent %d/%d\n", 388 390 thread__pid(thread), thread__tid(thread), 389 391 thread__pid(parent), thread__tid(parent)); 390 392 return 0; 391 393 } 392 394 /* But this one is new process, copy maps. */ 393 - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; 395 + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; 394 396 } 395 397 396 398 int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
+14
tools/perf/util/thread.h
··· 36 36 }; 37 37 38 38 DECLARE_RC_STRUCT(thread) { 39 + /** @maps: mmaps associated with this thread. */ 39 40 struct maps *maps; 40 41 pid_t pid_; /* Not all tools update this */ 42 + /** @tid: thread ID number unique to a machine. */ 41 43 pid_t tid; 44 + /** @ppid: parent process of the process this thread belongs to. */ 42 45 pid_t ppid; 43 46 int cpu; 44 47 int guest_cpu; /* For QEMU thread */ 45 48 refcount_t refcnt; 49 + /** 50 + * @exited: Has the thread had an exit event. Such threads are usually 51 + * removed from the machine's threads but some events/tools require 52 + * access to dead threads. 53 + */ 54 + bool exited; 46 55 bool comm_set; 47 56 int comm_len; 48 57 struct list_head namespaces_list; ··· 196 187 static inline refcount_t *thread__refcnt(struct thread *thread) 197 188 { 198 189 return &RC_CHK_ACCESS(thread)->refcnt; 190 + } 191 + 192 + static inline void thread__set_exited(struct thread *thread, bool exited) 193 + { 194 + RC_CHK_ACCESS(thread)->exited = exited; 199 195 } 200 196 201 197 static inline bool thread__comm_set(const struct thread *thread)
+4 -5
tools/perf/util/top.c
··· 28 28 struct record_opts *opts = &top->record_opts; 29 29 struct target *target = &opts->target; 30 30 size_t ret = 0; 31 + int nr_cpus; 31 32 32 33 if (top->samples) { 33 34 samples_per_sec = top->samples / top->delay_secs; ··· 94 93 else 95 94 ret += SNPRINTF(bf + ret, size - ret, " (all"); 96 95 96 + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); 97 97 if (target->cpu_list) 98 98 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", 99 - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 100 - ? "s" : "", 99 + nr_cpus > 1 ? "s" : "", 101 100 target->cpu_list); 102 101 else { 103 102 if (target->tid) 104 103 ret += SNPRINTF(bf + ret, size - ret, ")"); 105 104 else 106 105 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", 107 - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), 108 - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 109 - ? "s" : ""); 106 + nr_cpus, nr_cpus > 1 ? "s" : ""); 110 107 } 111 108 112 109 perf_top__reset_sample_counters(top);
-1
tools/perf/util/top.h
··· 21 21 struct perf_tool tool; 22 22 struct evlist *evlist, *sb_evlist; 23 23 struct record_opts record_opts; 24 - struct annotation_options annotation_opts; 25 24 struct evswitch evswitch; 26 25 /* 27 26 * Symbols will be added here in perf_event__process_sample and will
+17 -4
tools/perf/util/unwind-libdw.c
··· 46 46 { 47 47 Dwfl_Module *mod; 48 48 struct dso *dso = NULL; 49 + Dwarf_Addr base; 49 50 /* 50 51 * Some callers will use al->sym, so we can't just use the 51 52 * cheaper thread__find_map() here. ··· 59 58 if (!dso) 60 59 return 0; 61 60 61 + /* 62 + * The generated JIT DSO files only map the code segment without 63 + * ELF headers. Since JIT codes used to be packed in a memory 64 + * segment, calculating the base address using pgoff falls into 65 + * a different code in another DSO. So just use the map->start 66 + * directly to pick the correct one. 67 + */ 68 + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) 69 + base = map__start(al->map); 70 + else 71 + base = map__start(al->map) - map__pgoff(al->map); 72 + 62 73 mod = dwfl_addrmodule(ui->dwfl, ip); 63 74 if (mod) { 64 75 Dwarf_Addr s; 65 76 66 77 dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); 67 - if (s != map__start(al->map) - map__pgoff(al->map)) 68 - mod = 0; 78 + if (s != base) 79 + mod = NULL; 69 80 } 70 81 71 82 if (!mod) { ··· 85 72 86 73 __symbol__join_symfs(filename, sizeof(filename), dso->long_name); 87 74 mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, 88 - map__start(al->map) - map__pgoff(al->map), false); 75 + base, false); 89 76 } 90 77 if (!mod) { 91 78 char filename[PATH_MAX]; 92 79 93 80 if (dso__build_id_filename(dso, filename, sizeof(filename), false)) 94 81 mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, 95 - map__start(al->map) - map__pgoff(al->map), false); 82 + base, false); 96 83 } 97 84 98 85 if (mod) {
+24 -10
tools/perf/util/unwind-libunwind-local.c
··· 302 302 return 0; 303 303 } 304 304 305 + struct read_unwind_spec_eh_frame_maps_cb_args { 306 + struct dso *dso; 307 + u64 base_addr; 308 + }; 309 + 310 + static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) 311 + { 312 + 313 + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; 314 + 315 + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) 316 + args->base_addr = map__start(map) - map__pgoff(map); 317 + 318 + return 0; 319 + } 320 + 321 + 305 322 static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, 306 323 u64 *table_data, u64 *segbase, 307 324 u64 *fde_count) 308 325 { 309 - struct map_rb_node *map_node; 310 - u64 base_addr = UINT64_MAX; 326 + struct read_unwind_spec_eh_frame_maps_cb_args args = { 327 + .dso = dso, 328 + .base_addr = UINT64_MAX, 329 + }; 311 330 int ret, fd; 312 331 313 332 if (dso->data.eh_frame_hdr_offset == 0) { ··· 344 325 return -EINVAL; 345 326 } 346 327 347 - maps__for_each_entry(thread__maps(ui->thread), map_node) { 348 - struct map *map = map_node->map; 349 - u64 start = map__start(map); 328 + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); 350 329 351 - if (map__dso(map) == dso && start < base_addr) 352 - base_addr = start; 353 - } 354 - base_addr -= dso->data.elf_base_addr; 330 + args.base_addr -= dso->data.elf_base_addr; 355 331 /* Address of .eh_frame_hdr */ 356 - *segbase = base_addr + dso->data.eh_frame_hdr_addr; 332 + *segbase = args.base_addr + dso->data.eh_frame_hdr_addr; 357 333 ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, 358 334 table_data, fde_count); 359 335 if (ret)
+23 -12
tools/perf/util/vdso.c
··· 140 140 return dso; 141 141 } 142 142 143 + struct machine__thread_dso_type_maps_cb_args { 144 + struct machine *machine; 145 + enum dso_type dso_type; 146 + }; 147 + 148 + static int machine__thread_dso_type_maps_cb(struct map *map, void *data) 149 + { 150 + struct machine__thread_dso_type_maps_cb_args *args = data; 151 + struct dso *dso = map__dso(map); 152 + 153 + if (!dso || dso->long_name[0] != '/') 154 + return 0; 155 + 156 + args->dso_type = dso__type(dso, args->machine); 157 + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; 158 + } 159 + 143 160 static enum dso_type machine__thread_dso_type(struct machine *machine, 144 161 struct thread *thread) 145 162 { 146 - enum dso_type dso_type = DSO__TYPE_UNKNOWN; 147 - struct map_rb_node *rb_node; 163 + struct machine__thread_dso_type_maps_cb_args args = { 164 + .machine = machine, 165 + .dso_type = DSO__TYPE_UNKNOWN, 166 + }; 148 167 149 - maps__for_each_entry(thread__maps(thread), rb_node) { 150 - struct dso *dso = map__dso(rb_node->map); 168 + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); 151 169 152 - if (!dso || dso->long_name[0] != '/') 153 - continue; 154 - dso_type = dso__type(dso, machine); 155 - if (dso_type != DSO__TYPE_UNKNOWN) 156 - break; 157 - } 158 - 159 - return dso_type; 170 + return args.dso_type; 160 171 } 161 172 162 173 #if BITS_PER_LONG == 64
+33 -30
tools/perf/util/zstd.c
··· 7 7 8 8 int zstd_init(struct zstd_data *data, int level) 9 9 { 10 - size_t ret; 11 - 12 - data->dstream = ZSTD_createDStream(); 13 - if (data->dstream == NULL) { 14 - pr_err("Couldn't create decompression stream.\n"); 15 - return -1; 16 - } 17 - 18 - ret = ZSTD_initDStream(data->dstream); 19 - if (ZSTD_isError(ret)) { 20 - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); 21 - return -1; 22 - } 23 - 24 - if (!level) 25 - return 0; 26 - 27 - data->cstream = ZSTD_createCStream(); 28 - if (data->cstream == NULL) { 29 - pr_err("Couldn't create compression stream.\n"); 30 - return -1; 31 - } 32 - 33 - ret = ZSTD_initCStream(data->cstream, level); 34 - if (ZSTD_isError(ret)) { 35 - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); 36 - return -1; 37 - } 38 - 10 + data->comp_level = level; 11 + data->dstream = NULL; 12 + data->cstream = NULL; 39 13 return 0; 40 14 } 41 15 ··· 28 54 return 0; 29 55 } 30 56 31 - size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 57 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 32 58 void *src, size_t src_size, size_t max_record_size, 33 59 size_t process_header(void *record, size_t increment)) 34 60 { ··· 36 62 ZSTD_inBuffer input = { src, src_size, 0 }; 37 63 ZSTD_outBuffer output; 38 64 void *record; 65 + 66 + if (!data->cstream) { 67 + data->cstream = ZSTD_createCStream(); 68 + if (data->cstream == NULL) { 69 + pr_err("Couldn't create compression stream.\n"); 70 + return -1; 71 + } 72 + 73 + ret = ZSTD_initCStream(data->cstream, data->comp_level); 74 + if (ZSTD_isError(ret)) { 75 + pr_err("Failed to initialize compression stream: %s\n", 76 + ZSTD_getErrorName(ret)); 77 + return -1; 78 + } 79 + } 39 80 40 81 while (input.pos < input.size) { 41 82 record = dst; ··· 85 96 ZSTD_inBuffer input = { src, src_size, 0 }; 86 97 ZSTD_outBuffer output = { dst, dst_size, 0 }; 87 98 99 + if (!data->dstream) { 100 + data->dstream = ZSTD_createDStream(); 101 + if (data->dstream == NULL) { 102 + pr_err("Couldn't create decompression stream.\n"); 103 + return 0; 104 + } 105 + 106 + ret = ZSTD_initDStream(data->dstream); 107 + if (ZSTD_isError(ret)) { 108 + pr_err("Failed to initialize decompression stream: %s\n", 109 + ZSTD_getErrorName(ret)); 110 + return 0; 111 + } 112 + } 88 113 while (input.pos < input.size) { 89 114 ret = ZSTD_decompressStream(data->dstream, &output, &input); 90 115 if (ZSTD_isError(ret)) {