Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-tools-2020-06-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tooling updates from Arnaldo Carvalho de Melo:
"These are additional changes to the perf tools, on top of what Ingo
already submitted.

- Further Intel PT call-trace fixes

- Improve SELinux docs and tool warnings

- Fix race at exit in 'perf record' using eventfd.

- Add missing build tests to the default set of 'make -C tools/perf
build-test'

- Sync msr-index.h getting new AMD MSRs to decode and filter in 'perf
trace'.

- Fix fallback to libaudit in 'perf trace' for arches not using
per-arch *.tbl files.

- Fixes for 'perf ftrace'.

- Fixes and improvements for the 'perf stat' metrics.

- Use dummy event to get PERF_RECORD_{FORK,MMAP,etc} while
synthesizing those metadata events for pre-existing threads.

- Fix leaks detected using clang tooling.

- Improvements to PMU event metric testing.

- Report summary for 'perf stat' interval mode at the end, summing up
all the intervals.

- Improve pipe mode, i.e. this now works as expected, continuously
dumping samples:

# perf record -g -e raw_syscalls:sys_enter | perf --no-pager script

- Fixes for event grouping, detecting incompatible groups such as:

# perf stat -e '{cycles,power/energy-cores/}' -v
WARNING: group events cpu maps do not match, disabling group:
anon group { power/energy-cores/, cycles }
power/energy-cores/: 0
cycles: 0-7

- Fixes for 'perf probe': blacklist address checking, number of
kretprobe instances, etc.

- JIT processing improvements and fixes plus the addition of a 'perf
test' entry for the java demangler.

- Add support for synthesizing first/last level cache, TLB and remove
access events from HW tracing in the auxtrace code, first to use is
ARM SPE.

- Vendor events updates and fixes, including for POWER9 and Intel.

- Allow using ~/.perfconfig for removing the ',' separators in 'perf
stat' output.

- Opt-in support for libpfm4"

* tag 'perf-tools-2020-06-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (120 commits)
perf tools: Remove some duplicated includes
perf symbols: Fix kernel maps for kcore and eBPF
tools arch x86: Sync the msr-index.h copy with the kernel sources
perf stat: Ensure group is defined on top of the same cpu mask
perf libdw: Fix off-by 1 relative directory includes
perf arm-spe: Support synthetic events
perf auxtrace: Add four itrace options
perf tools: Move arm-spe-pkt-decoder.h/c to the new dir
perf test: Initialize memory in dwarf-unwind
perf tests: Don't tail call optimize in unwind test
tools compiler.h: Add attribute to disable tail calls
perf build: Add a LIBPFM4=1 build test entry
perf tools: Add optional support for libpfm4
perf tools: Correct license on jsmn JSON parser
perf jit: Fix inaccurate DWARF line table
perf jvmti: Remove redundant jitdump line table entries
perf build: Add NO_SDT=1 to the default set of build tests
perf build: Add NO_LIBCRYPTO=1 to the default set of build tests
perf build: Add NO_SYSCALL_TABLE=1 to the build tests
perf build: Remove libaudit from the default feature checks
...

+4309 -980
+3
tools/arch/x86/include/asm/msr-index.h
··· 301 301 #define MSR_PP1_ENERGY_STATUS 0x00000641 302 302 #define MSR_PP1_POLICY 0x00000642 303 303 304 + #define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b 305 + #define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 306 + 304 307 /* Config TDP MSRs */ 305 308 #define MSR_CONFIG_TDP_NOMINAL 0x00000648 306 309 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649
-2
tools/build/Makefile.feature
··· 40 40 glibc \ 41 41 gtk2 \ 42 42 gtk2-infobar \ 43 - libaudit \ 44 43 libbfd \ 45 44 libcap \ 46 45 libelf \ ··· 111 112 dwarf_getlocations \ 112 113 glibc \ 113 114 gtk2 \ 114 - libaudit \ 115 115 libbfd \ 116 116 libcap \ 117 117 libelf \
+1 -1
tools/build/feature/Makefile
··· 91 91 ############################### 92 92 93 93 $(OUTPUT)test-all.bin: 94 - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma 94 + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma 95 95 96 96 $(OUTPUT)test-hello.bin: 97 97 $(BUILD)
-5
tools/build/feature/test-all.c
··· 74 74 # include "test-libunwind.c" 75 75 #undef main 76 76 77 - #define main main_test_libaudit 78 - # include "test-libaudit.c" 79 - #undef main 80 - 81 77 #define main main_test_libslang 82 78 # include "test-libslang.c" 83 79 #undef main ··· 204 208 main_test_libelf_gelf_getnote(); 205 209 main_test_libelf_getshdrstrndx(); 206 210 main_test_libunwind(); 207 - main_test_libaudit(); 208 211 main_test_libslang(); 209 212 main_test_gtk2(argc, argv); 210 213 main_test_gtk2_infobar(argc, argv);
+12
tools/include/linux/compiler-gcc.h
··· 27 27 #define __pure __attribute__((pure)) 28 28 #endif 29 29 #define noinline __attribute__((noinline)) 30 + #ifdef __has_attribute 31 + #if __has_attribute(disable_tail_calls) 32 + #define __no_tail_call __attribute__((disable_tail_calls)) 33 + #endif 34 + #endif 35 + #ifndef __no_tail_call 36 + #if GCC_VERSION > 40201 37 + #define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) 38 + #else 39 + #define __no_tail_call 40 + #endif 41 + #endif 30 42 #ifndef __packed 31 43 #define __packed __attribute__((packed)) 32 44 #endif
+3
tools/include/linux/compiler.h
··· 47 47 #ifndef noinline 48 48 #define noinline 49 49 #endif 50 + #ifndef __no_tail_call 51 + #define __no_tail_call 52 + #endif 50 53 51 54 /* Are two types/vars the same type (ignoring qualifiers)? */ 52 55 #ifndef __same_type
+5 -1
tools/perf/Documentation/itrace.txt
··· 1 1 i synthesize instructions events 2 - b synthesize branches events 2 + b synthesize branches events (branch misses for Arm SPE) 3 3 c synthesize branches events (calls only) 4 4 r synthesize branches events (returns only) 5 5 x synthesize transactions events ··· 9 9 of aux-output (refer to perf record) 10 10 e synthesize error events 11 11 d create a debug log 12 + f synthesize first level cache events 13 + m synthesize last level cache events 14 + t synthesize TLB events 15 + a synthesize remote access events 12 16 g synthesize a call chain (use with i or x) 13 17 G synthesize a call chain on existing event records 14 18 l synthesize last branch entries (use with i or x)
+1 -1
tools/perf/Documentation/perf-c2c.txt
··· 40 40 -------------- 41 41 -e:: 42 42 --event=:: 43 - Select the PMU event. Use 'perf mem record -e list' 43 + Select the PMU event. Use 'perf c2c record -e list' 44 44 to list available events. 45 45 46 46 -v::
+5
tools/perf/Documentation/perf-config.txt
··· 667 667 Limit the size of ordered_events queue, so we could control 668 668 allocation size of perf data files without proper finished 669 669 round events. 670 + stat.*:: 671 + 672 + stat.big-num:: 673 + (boolean) Change the default for "--big-num". To make 674 + "--no-big-num" the default, set "stat.big-num=false". 670 675 671 676 intel-pt.*:: 672 677
+1 -1
tools/perf/Documentation/perf-intel-pt.txt
··· 687 687 PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes 688 688 are scheduled out and in, just not by whom, which is left for the 689 689 PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, 690 - which in turn requires CAP_SYS_ADMIN. 690 + which in turn requires CAP_PERFMON or CAP_SYS_ADMIN. 691 691 692 692 Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context 693 693 switches") commit, that introduces these metadata events for further info.
+14 -1
tools/perf/Documentation/perf-record.txt
··· 458 458 459 459 --switch-events:: 460 460 Record context switch events i.e. events of type PERF_RECORD_SWITCH or 461 - PERF_RECORD_SWITCH_CPU_WIDE. 461 + PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) 462 + switch events will be enabled automatically, which can be suppressed by 463 + by the option --no-switch-events. 462 464 463 465 --clang-path=PATH:: 464 466 Path to clang binary to use for compiling BPF scriptlets. ··· 614 612 --num-thread-synthesize:: 615 613 The number of threads to run when synthesizing events for existing processes. 616 614 By default, the number of threads equals 1. 615 + 616 + ifdef::HAVE_LIBPFM[] 617 + --pfm-events events:: 618 + Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) 619 + including support for event filters. For example '--pfm-events 620 + inst_retired:any_p:u:c=1:i'. More than one event can be passed to the 621 + option using the comma separator. Hardware events and generic hardware 622 + events cannot be mixed together. The latter must be used with the -e 623 + option. The -e option and this one can be mixed and matched. Events 624 + can be grouped using the {} notation. 625 + endif::HAVE_LIBPFM[] 617 626 618 627 SEE ALSO 619 628 --------
+32 -1
tools/perf/Documentation/perf-stat.txt
··· 71 71 --tid=<tid>:: 72 72 stat events on existing thread id (comma separated list) 73 73 74 + ifdef::HAVE_LIBPFM[] 75 + --pfm-events events:: 76 + Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) 77 + including support for event filters. For example '--pfm-events 78 + inst_retired:any_p:u:c=1:i'. More than one event can be passed to the 79 + option using the comma separator. Hardware events and generic hardware 80 + events cannot be mixed together. The latter must be used with the -e 81 + option. The -e option and this one can be mixed and matched. Events 82 + can be grouped using the {} notation. 83 + endif::HAVE_LIBPFM[] 74 84 75 85 -a:: 76 86 --all-cpus:: ··· 103 93 104 94 -B:: 105 95 --big-num:: 106 - print large numbers with thousands' separators according to locale 96 + print large numbers with thousands' separators according to locale. 97 + Enabled by default. Use "--no-big-num" to disable. 98 + Default setting can be changed with "perf config stat.big-num=false". 107 99 108 100 -C:: 109 101 --cpu=:: ··· 245 233 --transaction:: 246 234 247 235 Print statistics of transactional execution if supported. 236 + 237 + --metric-no-group:: 238 + By default, events to compute a metric are placed in weak groups. The 239 + group tries to enforce scheduling all or none of the events. The 240 + --metric-no-group option places events outside of groups and may 241 + increase the chance of the event being scheduled - leading to more 242 + accuracy. However, as events may not be scheduled together accuracy 243 + for metrics like instructions per cycle can be lower - as both metrics 244 + may no longer be being measured at the same time. 245 + 246 + --metric-no-merge:: 247 + By default metric events in different weak groups can be shared if one 248 + group contains all the events needed by another. In such cases one 249 + group will be eliminated reducing event multiplexing and making it so 250 + that certain groups of metrics sum to 100%. A downside to sharing a 251 + group is that the group may require multiplexing and so accuracy for a 252 + small group that need not have multiplexing is lowered. This option 253 + forbids the event merging logic from sharing events between groups and 254 + may be used to increase accuracy in this case. 248 255 249 256 STAT RECORD 250 257 -----------
+11
tools/perf/Documentation/perf-top.txt
··· 329 329 The known limitations include exception handing such as 330 330 setjmp/longjmp will have calls/returns not match. 331 331 332 + ifdef::HAVE_LIBPFM[] 333 + --pfm-events events:: 334 + Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) 335 + including support for event filters. For example '--pfm-events 336 + inst_retired:any_p:u:c=1:i'. More than one event can be passed to the 337 + option using the comma separator. Hardware events and generic hardware 338 + events cannot be mixed together. The latter must be used with the -e 339 + option. The -e option and this one can be mixed and matched. Events 340 + can be grouped using the {} notation. 341 + endif::HAVE_LIBPFM[] 342 + 332 343 INTERACTIVE PROMPTING KEYS 333 344 -------------------------- 334 345
+237
tools/perf/Documentation/security.txt
··· 1 + Overview 2 + ======== 3 + 4 + For general security related questions of perf_event_open() syscall usage, 5 + performance monitoring and observability operations by Perf see here: 6 + https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html 7 + 8 + Enabling LSM based mandatory access control (MAC) to perf_event_open() syscall 9 + ============================================================================== 10 + 11 + LSM hooks for mandatory access control for perf_event_open() syscall can be 12 + used starting from Linux v5.3. Below are the steps to extend Fedora (v31) with 13 + Targeted policy with perf_event_open() access control capabilities: 14 + 15 + 1. Download selinux-policy SRPM package (e.g. selinux-policy-3.14.4-48.fc31.src.rpm on FC31) 16 + and install it so rpmbuild directory would exist in the current working directory: 17 + 18 + # rpm -Uhv selinux-policy-3.14.4-48.fc31.src.rpm 19 + 20 + 2. Get into rpmbuild/SPECS directory and unpack the source code: 21 + 22 + # rpmbuild -bp selinux-policy.spec 23 + 24 + 3. Place patch below at rpmbuild/BUILD/selinux-policy-b86eaaf4dbcf2d51dd4432df7185c0eaf3cbcc02 25 + directory and apply it: 26 + 27 + # patch -p1 < selinux-policy-perf-events-perfmon.patch 28 + patching file policy/flask/access_vectors 29 + patching file policy/flask/security_classes 30 + # cat selinux-policy-perf-events-perfmon.patch 31 + diff -Nura a/policy/flask/access_vectors b/policy/flask/access_vectors 32 + --- a/policy/flask/access_vectors 2020-02-04 18:19:53.000000000 +0300 33 + +++ b/policy/flask/access_vectors 2020-02-28 23:37:25.000000000 +0300 34 + @@ -174,6 +174,7 @@ 35 + wake_alarm 36 + block_suspend 37 + audit_read 38 + + perfmon 39 + } 40 + 41 + # 42 + @@ -1099,3 +1100,15 @@ 43 + 44 + class xdp_socket 45 + inherits socket 46 + + 47 + +class perf_event 48 + +{ 49 + + open 50 + + cpu 51 + + kernel 52 + + tracepoint 53 + + read 54 + + write 55 + +} 56 + + 57 + + 58 + diff -Nura a/policy/flask/security_classes b/policy/flask/security_classes 59 + --- a/policy/flask/security_classes 2020-02-04 18:19:53.000000000 +0300 60 + +++ b/policy/flask/security_classes 2020-02-28 21:35:17.000000000 +0300 61 + @@ -200,4 +200,6 @@ 62 + 63 + class xdp_socket 64 + 65 + +class perf_event 66 + + 67 + # FLASK 68 + 69 + 4. Get into rpmbuild/SPECS directory and build policy packages from patched sources: 70 + 71 + # rpmbuild --noclean --noprep -ba selinux-policy.spec 72 + 73 + so you have this: 74 + 75 + # ls -alh rpmbuild/RPMS/noarch/ 76 + total 33M 77 + drwxr-xr-x. 2 root root 4.0K Mar 20 12:16 . 78 + drwxr-xr-x. 3 root root 4.0K Mar 20 12:16 .. 79 + -rw-r--r--. 1 root root 112K Mar 20 12:16 selinux-policy-3.14.4-48.fc31.noarch.rpm 80 + -rw-r--r--. 1 root root 1.2M Mar 20 12:17 selinux-policy-devel-3.14.4-48.fc31.noarch.rpm 81 + -rw-r--r--. 1 root root 2.3M Mar 20 12:17 selinux-policy-doc-3.14.4-48.fc31.noarch.rpm 82 + -rw-r--r--. 1 root root 12M Mar 20 12:17 selinux-policy-minimum-3.14.4-48.fc31.noarch.rpm 83 + -rw-r--r--. 1 root root 4.5M Mar 20 12:16 selinux-policy-mls-3.14.4-48.fc31.noarch.rpm 84 + -rw-r--r--. 1 root root 111K Mar 20 12:16 selinux-policy-sandbox-3.14.4-48.fc31.noarch.rpm 85 + -rw-r--r--. 1 root root 14M Mar 20 12:17 selinux-policy-targeted-3.14.4-48.fc31.noarch.rpm 86 + 87 + 5. Install SELinux packages from Fedora repo, if not already done so, and 88 + update with the patched rpms above: 89 + 90 + # rpm -Uhv rpmbuild/RPMS/noarch/selinux-policy-* 91 + 92 + 6. Enable SELinux Permissive mode for Targeted policy, if not already done so: 93 + 94 + # cat /etc/selinux/config 95 + 96 + # This file controls the state of SELinux on the system. 97 + # SELINUX= can take one of these three values: 98 + # enforcing - SELinux security policy is enforced. 99 + # permissive - SELinux prints warnings instead of enforcing. 100 + # disabled - No SELinux policy is loaded. 101 + SELINUX=permissive 102 + # SELINUXTYPE= can take one of these three values: 103 + # targeted - Targeted processes are protected, 104 + # minimum - Modification of targeted policy. Only selected processes are protected. 105 + # mls - Multi Level Security protection. 106 + SELINUXTYPE=targeted 107 + 108 + 7. Enable filesystem SELinux labeling at the next reboot: 109 + 110 + # touch /.autorelabel 111 + 112 + 8. Reboot machine and it will label filesystems and load Targeted policy into the kernel; 113 + 114 + 9. Login and check that dmesg output doesn't mention that perf_event class is unknown to SELinux subsystem; 115 + 116 + 10. Check that SELinux is enabled and in Permissive mode 117 + 118 + # getenforce 119 + Permissive 120 + 121 + 11. Turn SELinux into Enforcing mode: 122 + 123 + # setenforce 1 124 + # getenforce 125 + Enforcing 126 + 127 + Opening access to perf_event_open() syscall on Fedora with SELinux 128 + ================================================================== 129 + 130 + Access to performance monitoring and observability operations by Perf 131 + can be limited for superuser or CAP_PERFMON or CAP_SYS_ADMIN privileged 132 + processes. MAC policy settings (e.g. SELinux) can be loaded into the kernel 133 + and prevent unauthorized access to perf_event_open() syscall. In such case 134 + Perf tool provides a message similar to the one below: 135 + 136 + # perf stat 137 + Error: 138 + Access to performance monitoring and observability operations is limited. 139 + Enforced MAC policy settings (SELinux) can limit access to performance 140 + monitoring and observability operations. Inspect system audit records for 141 + more perf_event access control information and adjusting the policy. 142 + Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open 143 + access to performance monitoring and observability operations for users 144 + without CAP_PERFMON or CAP_SYS_ADMIN Linux capability. 145 + perf_event_paranoid setting is -1: 146 + -1: Allow use of (almost) all events by all users 147 + Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK 148 + >= 0: Disallow raw and ftrace function tracepoint access 149 + >= 1: Disallow CPU event access 150 + >= 2: Disallow kernel profiling 151 + To make the adjusted perf_event_paranoid setting permanent preserve it 152 + in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>) 153 + 154 + To make sure that access is limited by MAC policy settings inspect system 155 + audit records using journalctl command or /var/log/audit/audit.log so the 156 + output would contain AVC denied records related to perf_event: 157 + 158 + # journalctl --reverse --no-pager | grep perf_event 159 + 160 + python3[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. 161 + If you believe that perf should be allowed open access on perf_event labeled unconfined_t by default. 162 + setroubleshoot[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. For complete SELinux messages run: sealert -l 4595ce5b-e58f-462c-9d86-3bc2074935de 163 + audit[1318098]: AVC avc: denied { open } for pid=1318098 comm="perf" scontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=perf_event permissive=0 164 + 165 + In order to open access to perf_event_open() syscall MAC policy settings can 166 + require to be extended. On SELinux system this can be done by loading a special 167 + policy module extending base policy settings. Perf related policy module can 168 + be generated using the system audit records about blocking perf_event access. 169 + Run the command below to generate my-perf.te policy extension file with 170 + perf_event related rules: 171 + 172 + # ausearch -c 'perf' --raw | audit2allow -M my-perf && cat my-perf.te 173 + 174 + module my-perf 1.0; 175 + 176 + require { 177 + type unconfined_t; 178 + class perf_event { cpu kernel open read tracepoint write }; 179 + } 180 + 181 + #============= unconfined_t ============== 182 + allow unconfined_t self:perf_event { cpu kernel open read tracepoint write }; 183 + 184 + Now compile, pack and load my-perf.pp extension module into the kernel: 185 + 186 + # checkmodule -M -m -o my-perf.mod my-perf.te 187 + # semodule_package -o my-perf.pp -m my-perf.mod 188 + # semodule -X 300 -i my-perf.pp 189 + 190 + After all those taken steps above access to perf_event_open() syscall should 191 + now be allowed by the policy settings. Check access running Perf like this: 192 + 193 + # perf stat 194 + ^C 195 + Performance counter stats for 'system wide': 196 + 197 + 36,387.41 msec cpu-clock # 7.999 CPUs utilized 198 + 2,629 context-switches # 0.072 K/sec 199 + 57 cpu-migrations # 0.002 K/sec 200 + 1 page-faults # 0.000 K/sec 201 + 263,721,559 cycles # 0.007 GHz 202 + 175,746,713 instructions # 0.67 insn per cycle 203 + 19,628,798 branches # 0.539 M/sec 204 + 1,259,201 branch-misses # 6.42% of all branches 205 + 206 + 4.549061439 seconds time elapsed 207 + 208 + The generated perf-event.pp related policy extension module can be removed 209 + from the kernel using this command: 210 + 211 + # semodule -X 300 -r my-perf 212 + 213 + Alternatively the module can be temporarily disabled and enabled back using 214 + these two commands: 215 + 216 + # semodule -d my-perf 217 + # semodule -e my-perf 218 + 219 + If something went wrong 220 + ======================= 221 + 222 + To turn SELinux into Permissive mode: 223 + # setenforce 0 224 + 225 + To fully disable SELinux during kernel boot [3] set kernel command line parameter selinux=0 226 + 227 + To remove SELinux labeling from local filesystems: 228 + # find / -mount -print0 | xargs -0 setfattr -h -x security.selinux 229 + 230 + To fully turn SELinux off a machine set SELINUX=disabled at /etc/selinux/config file and reboot; 231 + 232 + Links 233 + ===== 234 + 235 + [1] https://download-ib01.fedoraproject.org/pub/fedora/linux/updates/31/Everything/SRPMS/Packages/s/selinux-policy-3.14.4-49.fc31.src.rpm 236 + [2] https://docs.fedoraproject.org/en-US/Fedora/11/html/Security-Enhanced_Linux/sect-Security-Enhanced_Linux-Working_with_SELinux-Enabling_and_Disabling_SELinux.html 237 + [3] https://danwalsh.livejournal.com/10972.html
+33 -10
tools/perf/Makefile.config
··· 23 23 $(call detected_var,SRCARCH) 24 24 25 25 NO_PERF_REGS := 1 26 - NO_SYSCALL_TABLE := 1 26 + 27 + ifneq ($(NO_SYSCALL_TABLE),1) 28 + NO_SYSCALL_TABLE := 1 29 + 30 + ifeq ($(SRCARCH),x86) 31 + ifeq (${IS_64_BIT}, 1) 32 + NO_SYSCALL_TABLE := 0 33 + endif 34 + else 35 + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) 36 + NO_SYSCALL_TABLE := 0 37 + endif 38 + endif 39 + 40 + ifneq ($(NO_SYSCALL_TABLE),1) 41 + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT 42 + endif 43 + endif 27 44 28 45 # Additional ARCH settings for ppc 29 46 ifeq ($(SRCARCH),powerpc) 30 47 NO_PERF_REGS := 0 31 - NO_SYSCALL_TABLE := 0 32 48 CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated 33 49 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 34 50 endif ··· 53 37 ifeq ($(SRCARCH),x86) 54 38 $(call detected,CONFIG_X86) 55 39 ifeq (${IS_64_BIT}, 1) 56 - NO_SYSCALL_TABLE := 0 57 40 CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated 58 41 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S 59 42 LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma ··· 70 55 71 56 ifeq ($(SRCARCH),arm64) 72 57 NO_PERF_REGS := 0 73 - NO_SYSCALL_TABLE := 0 74 58 CFLAGS += -I$(OUTPUT)arch/arm64/include/generated 75 59 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 76 60 endif ··· 84 70 85 71 ifeq ($(ARCH),s390) 86 72 NO_PERF_REGS := 0 87 - NO_SYSCALL_TABLE := 0 88 73 CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated 89 74 endif 90 75 91 76 ifeq ($(NO_PERF_REGS),0) 92 77 $(call detected,CONFIG_PERF_REGS) 93 - endif 94 - 95 - ifneq ($(NO_SYSCALL_TABLE),1) 96 - CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT 97 78 endif 98 79 99 80 # So far there's only x86 and arm libdw unwind support merged in perf. ··· 355 346 endif 356 347 357 348 ifeq ($(feature-eventfd), 1) 358 - CFLAGS += -DHAVE_EVENTFD 349 + CFLAGS += -DHAVE_EVENTFD_SUPPORT 359 350 endif 360 351 361 352 ifeq ($(feature-get_current_dir_name), 1) ··· 660 651 $(call detected,CONFIG_TRACE) 661 652 else 662 653 ifndef NO_LIBAUDIT 654 + $(call feature_check,libaudit) 663 655 ifneq ($(feature-libaudit), 1) 664 656 msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); 665 657 NO_LIBAUDIT := 1 ··· 1019 1009 endif 1020 1010 endif 1021 1011 endif 1012 + endif 1013 + endif 1014 + 1015 + ifdef LIBPFM4 1016 + $(call feature_check,libpfm4) 1017 + ifeq ($(feature-libpfm4), 1) 1018 + CFLAGS += -DHAVE_LIBPFM 1019 + EXTLIBS += -lpfm 1020 + ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1 1021 + $(call detected,CONFIG_LIBPFM4) 1022 + else 1023 + msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev); 1024 + NO_LIBPFM4 := 1 1022 1025 endif 1023 1026 endif 1024 1027
+11 -3
tools/perf/Makefile.perf
··· 118 118 # 119 119 # Define LIBBPF_DYNAMIC to enable libbpf dynamic linking. 120 120 # 121 + # Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables 122 + # generated from the kernel .tbl or unistd.h files and use, if available, libaudit 123 + # for doing the conversions to/from strings/id. 124 + # 125 + # Define LIBPFM4 to enable libpfm4 events extension. 126 + # 121 127 122 128 # As per kernel Makefile, avoid funny character set dependencies 123 129 unexport LC_ALL ··· 284 278 285 279 ifneq ($(OUTPUT),) 286 280 TE_PATH=$(OUTPUT) 281 + PLUGINS_PATH=$(OUTPUT) 287 282 BPF_PATH=$(OUTPUT) 288 283 SUBCMD_PATH=$(OUTPUT) 289 284 LIBPERF_PATH=$(OUTPUT) ··· 295 288 endif 296 289 else 297 290 TE_PATH=$(TRACE_EVENT_DIR) 291 + PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ 298 292 API_PATH=$(LIB_DIR) 299 293 BPF_PATH=$(BPF_DIR) 300 294 SUBCMD_PATH=$(SUBCMD_DIR) ··· 305 297 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a 306 298 export LIBTRACEEVENT 307 299 308 - LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list 300 + LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list 309 301 310 302 # 311 303 # The static build has no dynsym table, so this does not work for ··· 764 756 $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a 765 757 766 758 libtraceevent_plugins: FORCE 767 - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins 759 + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins 768 760 769 761 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins 770 - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list 762 + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list 771 763 772 764 $(LIBTRACEEVENT)-clean: 773 765 $(call QUIET_CLEAN, libtraceevent)
+4 -3
tools/perf/arch/arm/util/cs-etm.c
··· 216 216 struct evsel *evsel) 217 217 { 218 218 char msg[BUFSIZ], path[PATH_MAX], *sink; 219 - struct perf_evsel_config_term *term; 219 + struct evsel_config_term *term; 220 220 int ret = -EINVAL; 221 221 u32 hash; 222 222 ··· 224 224 return 0; 225 225 226 226 list_for_each_entry(term, &evsel->config_terms, list) { 227 - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) 227 + if (term->type != EVSEL__CONFIG_TERM_DRV_CFG) 228 228 continue; 229 229 230 230 sink = term->val.str; ··· 265 265 ptr->evlist = evlist; 266 266 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 267 267 268 - if (perf_can_record_switch_events()) 268 + if (!record_opts__no_switch_events(opts) && 269 + perf_can_record_switch_events()) 269 270 opts->record_switch_events = true; 270 271 271 272 evlist__for_each_entry(evlist, evsel) {
+3 -3
tools/perf/arch/arm64/util/unwind-libdw.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <elfutils/libdwfl.h> 3 - #include "../../util/unwind-libdw.h" 4 - #include "../../util/perf_regs.h" 5 - #include "../../util/event.h" 3 + #include "../../../util/unwind-libdw.h" 4 + #include "../../../util/perf_regs.h" 5 + #include "../../../util/event.h" 6 6 7 7 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) 8 8 {
+1
tools/perf/arch/powerpc/util/Build
··· 2 2 perf-y += kvm-stat.o 3 3 perf-y += perf_regs.o 4 4 perf-y += mem-events.o 5 + perf-y += sym-handling.o 5 6 6 7 perf-$(CONFIG_DWARF) += dwarf-regs.o 7 8 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
+3 -3
tools/perf/arch/powerpc/util/unwind-libdw.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <elfutils/libdwfl.h> 3 3 #include <linux/kernel.h> 4 - #include "../../util/unwind-libdw.h" 5 - #include "../../util/perf_regs.h" 6 - #include "../../util/event.h" 4 + #include "../../../util/unwind-libdw.h" 5 + #include "../../../util/perf_regs.h" 6 + #include "../../../util/event.h" 7 7 8 8 /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ 9 9 static const int special_regs[3][2] = {
+8
tools/perf/arch/x86/tests/dwarf-unwind.c
··· 55 55 return -1; 56 56 } 57 57 58 + #ifdef MEMORY_SANITIZER 59 + /* 60 + * Assignments to buf in the assembly function perf_regs_load aren't 61 + * seen by memory sanitizer. Zero the memory to convince memory 62 + * sanitizer the memory is initialized. 63 + */ 64 + memset(buf, 0, sizeof(u64) * PERF_REGS_MAX); 65 + #endif 58 66 perf_regs_load(buf); 59 67 regs->abi = PERF_SAMPLE_REGS_ABI; 60 68 regs->regs = buf;
+25 -16
tools/perf/arch/x86/util/intel-pt.c
··· 59 59 size_t priv_size; 60 60 }; 61 61 62 - static int intel_pt_parse_terms_with_default(struct list_head *formats, 62 + static int intel_pt_parse_terms_with_default(const char *pmu_name, 63 + struct list_head *formats, 63 64 const char *str, 64 65 u64 *config) 65 66 { ··· 79 78 goto out_free; 80 79 81 80 attr.config = *config; 82 - err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); 81 + err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true, 82 + NULL); 83 83 if (err) 84 84 goto out_free; 85 85 ··· 90 88 return err; 91 89 } 92 90 93 - static int intel_pt_parse_terms(struct list_head *formats, const char *str, 94 - u64 *config) 91 + static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats, 92 + const char *str, u64 *config) 95 93 { 96 94 *config = 0; 97 - return intel_pt_parse_terms_with_default(formats, str, config); 95 + return intel_pt_parse_terms_with_default(pmu_name, formats, str, 96 + config); 98 97 } 99 98 100 99 static u64 intel_pt_masked_bits(u64 mask, u64 bits) ··· 232 229 233 230 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 234 231 235 - intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); 232 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf, 233 + &config); 236 234 237 235 return config; 238 236 } ··· 341 337 if (priv_size != ptr->priv_size) 342 338 return -EINVAL; 343 339 344 - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 345 - intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", 346 - &noretcomp_bit); 347 - intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); 340 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, 341 + "tsc", &tsc_bit); 342 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, 343 + "noretcomp", &noretcomp_bit); 344 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, 345 + "mtc", &mtc_bit); 348 346 mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, 349 347 "mtc_period"); 350 - intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); 348 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, 349 + "cyc", &cyc_bit); 351 350 352 351 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); 353 352 ··· 563 556 static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, 564 557 struct evsel *evsel) 565 558 { 566 - struct perf_evsel_config_term *term; 567 559 u64 user_bits = 0, bits; 560 + struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG); 568 561 569 - term = perf_evsel__get_config_term(evsel, CFG_CHG); 570 562 if (term) 571 563 user_bits = term->val.cfg_chg; 572 564 ··· 775 769 } 776 770 } 777 771 778 - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 772 + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, 773 + "tsc", &tsc_bit); 779 774 780 775 if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) 781 776 have_timing_info = true; ··· 787 780 * Per-cpu recording needs sched_switch events to distinguish different 788 781 * threads. 789 782 */ 790 - if (have_timing_info && !perf_cpu_map__empty(cpus)) { 783 + if (have_timing_info && !perf_cpu_map__empty(cpus) && 784 + !record_opts__no_switch_events(opts)) { 791 785 if (perf_can_record_switch_events()) { 792 786 bool cpu_wide = !target__none(&opts->target) && 793 787 !target__has_task(&opts->target); ··· 883 875 * per-cpu with no sched_switch (except workload-only). 884 876 */ 885 877 if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && 886 - !target__none(&opts->target)) 878 + !target__none(&opts->target) && 879 + !intel_pt_evsel->core.attr.exclude_user) 887 880 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); 888 881 889 882 return 0;
+3 -3
tools/perf/arch/x86/util/unwind-libdw.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <elfutils/libdwfl.h> 3 - #include "../../util/unwind-libdw.h" 4 - #include "../../util/perf_regs.h" 5 - #include "../../util/event.h" 3 + #include "../../../util/unwind-libdw.h" 4 + #include "../../../util/perf_regs.h" 5 + #include "../../../util/event.h" 6 6 7 7 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) 8 8 {
+2 -2
tools/perf/bench/epoll-ctl.c
··· 5 5 * Benchmark the various operations allowed for epoll_ctl(2). 6 6 * The idea is to concurrently stress a single epoll instance 7 7 */ 8 - #ifdef HAVE_EVENTFD 8 + #ifdef HAVE_EVENTFD_SUPPORT 9 9 /* For the CLR_() macros */ 10 10 #include <string.h> 11 11 #include <pthread.h> ··· 412 412 errmem: 413 413 err(EXIT_FAILURE, "calloc"); 414 414 } 415 - #endif // HAVE_EVENTFD 415 + #endif // HAVE_EVENTFD_SUPPORT
+2 -2
tools/perf/bench/epoll-wait.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - #ifdef HAVE_EVENTFD 2 + #ifdef HAVE_EVENTFD_SUPPORT 3 3 /* 4 4 * Copyright (C) 2018 Davidlohr Bueso. 5 5 * ··· 540 540 errmem: 541 541 err(EXIT_FAILURE, "calloc"); 542 542 } 543 - #endif // HAVE_EVENTFD 543 + #endif // HAVE_EVENTFD_SUPPORT
+1 -1
tools/perf/bench/sched-messaging.c
··· 40 40 unsigned int num_fds; 41 41 int ready_out; 42 42 int wakefd; 43 - int out_fds[0]; 43 + int out_fds[]; 44 44 }; 45 45 46 46 struct receiver_context {
+1 -1
tools/perf/builtin-annotate.c
··· 432 432 hists__collapse_resort(hists, NULL); 433 433 /* Don't sort callchain */ 434 434 evsel__reset_sample_bit(pos, CALLCHAIN); 435 - perf_evsel__output_resort(pos, NULL); 435 + evsel__output_resort(pos, NULL); 436 436 437 437 if (symbol_conf.event_group && !evsel__is_group_leader(pos)) 438 438 continue;
+3 -3
tools/perf/builtin-bench.c
··· 67 67 { NULL, NULL, NULL } 68 68 }; 69 69 70 - #ifdef HAVE_EVENTFD 70 + #ifdef HAVE_EVENTFD_SUPPORT 71 71 static struct bench epoll_benchmarks[] = { 72 72 { "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait }, 73 73 { "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl }, 74 74 { "all", "Run all futex benchmarks", NULL }, 75 75 { NULL, NULL, NULL } 76 76 }; 77 - #endif // HAVE_EVENTFD 77 + #endif // HAVE_EVENTFD_SUPPORT 78 78 79 79 static struct bench internals_benchmarks[] = { 80 80 { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, ··· 95 95 { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, 96 96 #endif 97 97 {"futex", "Futex stressing benchmarks", futex_benchmarks }, 98 - #ifdef HAVE_EVENTFD 98 + #ifdef HAVE_EVENTFD_SUPPORT 99 99 {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, 100 100 #endif 101 101 { "internals", "Perf-internals benchmarks", internals_benchmarks },
+8 -1
tools/perf/builtin-c2c.c
··· 2887 2887 { 2888 2888 bool *event_set = (bool *) opt->value; 2889 2889 2890 + if (!strcmp(str, "list")) { 2891 + perf_mem_events__list(); 2892 + exit(0); 2893 + } 2894 + if (perf_mem_events__parse(str)) 2895 + exit(-1); 2896 + 2890 2897 *event_set = true; 2891 - return perf_mem_events__parse(str); 2898 + return 0; 2892 2899 } 2893 2900 2894 2901
+1 -1
tools/perf/builtin-evlist.c
··· 34 34 return PTR_ERR(session); 35 35 36 36 evlist__for_each_entry(session->evlist, pos) { 37 - perf_evsel__fprintf(pos, details, stdout); 37 + evsel__fprintf(pos, details, stdout); 38 38 39 39 if (pos->core.attr.type == PERF_TYPE_TRACEPOINT) 40 40 has_tracepoint = true;
+12 -3
tools/perf/builtin-ftrace.c
··· 45 45 char name[]; 46 46 }; 47 47 48 + static volatile int workload_exec_errno; 48 49 static bool done; 49 50 50 51 static void sig_handler(int sig __maybe_unused) ··· 64 63 siginfo_t *info __maybe_unused, 65 64 void *ucontext __maybe_unused) 66 65 { 67 - /* workload_exec_errno = info->si_value.sival_int; */ 66 + workload_exec_errno = info->si_value.sival_int; 68 67 done = true; 69 68 } 70 69 ··· 384 383 385 384 write_tracing_file("tracing_on", "0"); 386 385 386 + if (workload_exec_errno) { 387 + const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); 388 + /* flush stdout first so below error msg appears at the end. */ 389 + fflush(stdout); 390 + pr_err("workload failed: %s\n", emsg); 391 + goto out_close_fd; 392 + } 393 + 387 394 /* read remaining buffer contents */ 388 395 while (true) { 389 396 int n = read(trace_fd, buf, sizeof(buf)); ··· 406 397 out_reset: 407 398 reset_tracing_files(ftrace); 408 399 out: 409 - return done ? 0 : -1; 400 + return (done && !workload_exec_errno) ? 0 : -1; 410 401 } 411 402 412 403 static int perf_ftrace_config(const char *var, const char *value, void *cb) ··· 503 494 argc = parse_options(argc, argv, ftrace_options, ftrace_usage, 504 495 PARSE_OPT_STOP_AT_NON_OPTION); 505 496 if (!argc && target__none(&ftrace.target)) 506 - usage_with_options(ftrace_usage, ftrace_options); 497 + ftrace.target.system_wide = true; 507 498 508 499 ret = target__validate(&ftrace.target); 509 500 if (ret) {
+1 -1
tools/perf/builtin-inject.c
··· 51 51 struct event_entry { 52 52 struct list_head node; 53 53 u32 tid; 54 - union perf_event event[0]; 54 + union perf_event event[]; 55 55 }; 56 56 57 57 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
+1 -1
tools/perf/builtin-list.c
··· 42 42 OPT_END() 43 43 }; 44 44 const char * const list_usage[] = { 45 - "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]", 45 + "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]", 46 46 NULL 47 47 }; 48 48
+7 -17
tools/perf/builtin-mem.c
··· 38 38 const char *str, int unset __maybe_unused) 39 39 { 40 40 struct perf_mem *mem = *(struct perf_mem **)opt->value; 41 - int j; 42 41 43 - if (strcmp(str, "list")) { 44 - if (!perf_mem_events__parse(str)) { 45 - mem->operation = 0; 46 - return 0; 47 - } 42 + if (!strcmp(str, "list")) { 43 + perf_mem_events__list(); 44 + exit(0); 45 + } 46 + if (perf_mem_events__parse(str)) 48 47 exit(-1); 49 - } 50 48 51 - for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 52 - struct perf_mem_event *e = &perf_mem_events[j]; 53 - 54 - fprintf(stderr, "%-13s%-*s%s\n", 55 - e->tag, 56 - verbose > 0 ? 25 : 0, 57 - verbose > 0 ? perf_mem_events__name(j) : "", 58 - e->supported ? ": available" : ""); 59 - } 60 - exit(0); 49 + mem->operation = 0; 50 + return 0; 61 51 } 62 52 63 53 static const char * const __usage[] = {
+3
tools/perf/builtin-probe.c
··· 364 364 365 365 for (k = 0; k < pev->ntevs; k++) { 366 366 struct probe_trace_event *tev = &pev->tevs[k]; 367 + /* Skipped events have no event name */ 368 + if (!tev->event) 369 + continue; 367 370 368 371 /* We use tev's name for showing new events */ 369 372 show_perf_probe_event(tev->group, tev->event, pev,
+62 -7
tools/perf/builtin-record.c
··· 45 45 #include "util/units.h" 46 46 #include "util/bpf-event.h" 47 47 #include "util/util.h" 48 + #include "util/pfm.h" 48 49 #include "asm/bug.h" 49 50 #include "perf.h" 50 51 ··· 57 56 #include <unistd.h> 58 57 #include <sched.h> 59 58 #include <signal.h> 59 + #ifdef HAVE_EVENTFD_SUPPORT 60 + #include <sys/eventfd.h> 61 + #endif 60 62 #include <sys/mman.h> 61 63 #include <sys/wait.h> 62 64 #include <sys/types.h> ··· 542 538 543 539 static volatile int signr = -1; 544 540 static volatile int child_finished; 541 + #ifdef HAVE_EVENTFD_SUPPORT 542 + static int done_fd = -1; 543 + #endif 545 544 546 545 static void sig_handler(int sig) 547 546 { ··· 554 547 signr = sig; 555 548 556 549 done = 1; 550 + #ifdef HAVE_EVENTFD_SUPPORT 551 + { 552 + u64 tmp = 1; 553 + /* 554 + * It is possible for this signal handler to run after done is checked 555 + * in the main loop, but before the perf counter fds are polled. If this 556 + * happens, the poll() will continue to wait even though done is set, 557 + * and will only break out if either another signal is received, or the 558 + * counters are ready for read. To ensure the poll() doesn't sleep when 559 + * done is set, use an eventfd (done_fd) to wake up the poll(). 560 + */ 561 + if (write(done_fd, &tmp, sizeof(tmp)) < 0) 562 + pr_err("failed to signal wakeup fd, error: %m\n"); 563 + } 564 + #endif // HAVE_EVENTFD_SUPPORT 557 565 } 558 566 559 567 static void sigsegv_handler(int sig) ··· 847 825 int rc = 0; 848 826 849 827 /* 850 - * For initial_delay we need to add a dummy event so that we can track 851 - * PERF_RECORD_MMAP while we wait for the initial delay to enable the 852 - * real events, the ones asked by the user. 828 + * For initial_delay or system wide, we need to add a dummy event so 829 + * that we can track PERF_RECORD_MMAP to cover the delay of waiting or 830 + * event synthesis. 853 831 */ 854 - if (opts->initial_delay) { 832 + if (opts->initial_delay || target__has_cpu(&opts->target)) { 855 833 if (perf_evlist__add_dummy(evlist)) 856 834 return -ENOMEM; 857 835 836 + /* Disable tracking of mmaps on lead event. */ 858 837 pos = evlist__first(evlist); 859 838 pos->tracking = 0; 839 + /* Set up dummy event. */ 860 840 pos = evlist__last(evlist); 861 841 pos->tracking = 1; 862 - pos->core.attr.enable_on_exec = 1; 842 + /* 843 + * Enable the dummy event when the process is forked for 844 + * initial_delay, immediately for system wide. 845 + */ 846 + if (opts->initial_delay) 847 + pos->core.attr.enable_on_exec = 1; 848 + else 849 + pos->immediate = 1; 863 850 } 864 851 865 852 perf_evlist__config(evlist, opts, &callchain_param); ··· 1569 1538 pr_err("Compression initialization failed.\n"); 1570 1539 return -1; 1571 1540 } 1541 + #ifdef HAVE_EVENTFD_SUPPORT 1542 + done_fd = eventfd(0, EFD_NONBLOCK); 1543 + if (done_fd < 0) { 1544 + pr_err("Failed to create wakeup eventfd, error: %m\n"); 1545 + status = -1; 1546 + goto out_delete_session; 1547 + } 1548 + err = evlist__add_pollfd(rec->evlist, done_fd); 1549 + if (err < 0) { 1550 + pr_err("Failed to add wakeup eventfd to poll list\n"); 1551 + status = err; 1552 + goto out_delete_session; 1553 + } 1554 + #endif // HAVE_EVENTFD_SUPPORT 1572 1555 1573 1556 session->header.env.comp_type = PERF_COMP_ZSTD; 1574 1557 session->header.env.comp_level = rec->opts.comp_level; ··· 1941 1896 } 1942 1897 1943 1898 out_delete_session: 1899 + #ifdef HAVE_EVENTFD_SUPPORT 1900 + if (done_fd >= 0) 1901 + close(done_fd); 1902 + #endif 1944 1903 zstd_fini(&session->zstd_data); 1945 1904 perf_session__delete(session); 1946 1905 ··· 2502 2453 "Record namespaces events"), 2503 2454 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 2504 2455 "Record cgroup events"), 2505 - OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2506 - "Record context switch events"), 2456 + OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 2457 + &record.opts.record_switch_events_set, 2458 + "Record context switch events"), 2507 2459 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2508 2460 "Configure all used events to run in kernel space.", 2509 2461 PARSE_OPT_EXCLUSIVE), ··· 2556 2506 OPT_UINTEGER(0, "num-thread-synthesize", 2557 2507 &record.opts.nr_threads_synthesize, 2558 2508 "number of threads to run for event synthesis"), 2509 + #ifdef HAVE_LIBPFM 2510 + OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 2511 + "libpfm4 event selector. use 'perf list' to list available events", 2512 + parse_libpfm_events_option), 2513 + #endif 2559 2514 OPT_END() 2560 2515 }; 2561 2516
+23 -14
tools/perf/builtin-report.c
··· 47 47 #include "util/time-utils.h" 48 48 #include "util/auxtrace.h" 49 49 #include "util/units.h" 50 - #include "util/branch.h" 51 50 #include "util/util.h" // perf_tip() 52 51 #include "ui/ui.h" 53 52 #include "ui/progress.h" ··· 401 402 } 402 403 } 403 404 404 - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { 405 - if ((sample_type & PERF_SAMPLE_REGS_USER) && 406 - (sample_type & PERF_SAMPLE_STACK_USER)) { 407 - callchain_param.record_mode = CALLCHAIN_DWARF; 408 - dwarf_callchain_users = true; 409 - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) 410 - callchain_param.record_mode = CALLCHAIN_LBR; 411 - else 412 - callchain_param.record_mode = CALLCHAIN_FP; 413 - } 405 + callchain_param_setup(sample_type); 414 406 415 407 if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { 416 408 ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" ··· 706 716 ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); 707 717 708 718 evlist__for_each_entry(rep->session->evlist, pos) { 709 - perf_evsel__output_resort_cb(pos, &prog, 710 - hists__resort_cb, rep); 719 + evsel__output_resort_cb(pos, &prog, hists__resort_cb, rep); 711 720 } 712 721 713 722 ui_progress__finish(); ··· 1079 1090 return 0; 1080 1091 } 1081 1092 1093 + static int process_attr(struct perf_tool *tool __maybe_unused, 1094 + union perf_event *event, 1095 + struct evlist **pevlist) 1096 + { 1097 + u64 sample_type; 1098 + int err; 1099 + 1100 + err = perf_event__process_attr(tool, event, pevlist); 1101 + if (err) 1102 + return err; 1103 + 1104 + /* 1105 + * Check if we need to enable callchains based 1106 + * on events sample_type. 1107 + */ 1108 + sample_type = perf_evlist__combined_sample_type(*pevlist); 1109 + callchain_param_setup(sample_type); 1110 + return 0; 1111 + } 1112 + 1082 1113 int cmd_report(int argc, const char **argv) 1083 1114 { 1084 1115 struct perf_session *session; ··· 1129 1120 .fork = perf_event__process_fork, 1130 1121 .lost = perf_event__process_lost, 1131 1122 .read = process_read_event, 1132 - .attr = perf_event__process_attr, 1123 + .attr = process_attr, 1133 1124 .tracing_data = perf_event__process_tracing_data, 1134 1125 .build_id = perf_event__process_build_id, 1135 1126 .id_index = perf_event__process_id_index,
+35 -6
tools/perf/builtin-script.c
··· 167 167 u64 fields; 168 168 u64 invalid_fields; 169 169 u64 user_set_fields; 170 + u64 user_unset_fields; 170 171 } output[OUTPUT_TYPE_MAX] = { 171 172 172 173 [PERF_TYPE_HARDWARE] = { ··· 2086 2085 struct perf_script *scr = container_of(tool, struct perf_script, tool); 2087 2086 struct evlist *evlist; 2088 2087 struct evsel *evsel, *pos; 2088 + u64 sample_type; 2089 2089 int err; 2090 2090 static struct evsel_script *es; 2091 2091 ··· 2119 2117 return 0; 2120 2118 } 2121 2119 2122 - set_print_ip_opts(&evsel->core.attr); 2123 - 2124 - if (evsel->core.attr.sample_type) 2120 + if (evsel->core.attr.sample_type) { 2125 2121 err = perf_evsel__check_attr(evsel, scr->session); 2122 + if (err) 2123 + return err; 2124 + } 2126 2125 2127 - return err; 2126 + /* 2127 + * Check if we need to enable callchains based 2128 + * on events sample_type. 2129 + */ 2130 + sample_type = perf_evlist__combined_sample_type(evlist); 2131 + callchain_param_setup(sample_type); 2132 + 2133 + /* Enable fields for callchain entries */ 2134 + if (symbol_conf.use_callchain && 2135 + (sample_type & PERF_SAMPLE_CALLCHAIN || 2136 + sample_type & PERF_SAMPLE_BRANCH_STACK || 2137 + (sample_type & PERF_SAMPLE_REGS_USER && 2138 + sample_type & PERF_SAMPLE_STACK_USER))) { 2139 + int type = output_type(evsel->core.attr.type); 2140 + 2141 + if (!(output[type].user_unset_fields & PERF_OUTPUT_IP)) 2142 + output[type].fields |= PERF_OUTPUT_IP; 2143 + if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM)) 2144 + output[type].fields |= PERF_OUTPUT_SYM; 2145 + } 2146 + set_print_ip_opts(&evsel->core.attr); 2147 + return 0; 2128 2148 } 2129 2149 2130 2150 static int print_event_with_time(struct perf_tool *tool, ··· 2458 2434 struct script_spec { 2459 2435 struct list_head node; 2460 2436 struct scripting_ops *ops; 2461 - char spec[0]; 2437 + char spec[]; 2462 2438 }; 2463 2439 2464 2440 static LIST_HEAD(script_specs); ··· 2696 2672 if (change == REMOVE) { 2697 2673 output[j].fields &= ~all_output_options[i].field; 2698 2674 output[j].user_set_fields &= ~all_output_options[i].field; 2675 + output[j].user_unset_fields |= all_output_options[i].field; 2699 2676 } else { 2700 2677 output[j].fields |= all_output_options[i].field; 2701 2678 output[j].user_set_fields |= all_output_options[i].field; 2679 + output[j].user_unset_fields &= ~all_output_options[i].field; 2702 2680 } 2703 2681 output[j].user_set = true; 2704 2682 output[j].wildcard_set = true; ··· 3312 3286 const char *str __maybe_unused, 3313 3287 int unset __maybe_unused) 3314 3288 { 3315 - force_pager("xed -F insn: -A -64 | less"); 3289 + if (isatty(1)) 3290 + force_pager("xed -F insn: -A -64 | less"); 3291 + else 3292 + force_pager("xed -F insn: -A -64"); 3316 3293 return 0; 3317 3294 } 3318 3295
+146 -35
tools/perf/builtin-stat.c
··· 66 66 #include "util/time-utils.h" 67 67 #include "util/top.h" 68 68 #include "util/affinity.h" 69 + #include "util/pfm.h" 69 70 #include "asm/bug.h" 70 71 71 72 #include <linux/time64.h> ··· 189 188 .walltime_nsecs_stats = &walltime_nsecs_stats, 190 189 .big_num = true, 191 190 }; 191 + 192 + static bool cpus_map_matched(struct evsel *a, struct evsel *b) 193 + { 194 + if (!a->core.cpus && !b->core.cpus) 195 + return true; 196 + 197 + if (!a->core.cpus || !b->core.cpus) 198 + return false; 199 + 200 + if (a->core.cpus->nr != b->core.cpus->nr) 201 + return false; 202 + 203 + for (int i = 0; i < a->core.cpus->nr; i++) { 204 + if (a->core.cpus->map[i] != b->core.cpus->map[i]) 205 + return false; 206 + } 207 + 208 + return true; 209 + } 210 + 211 + static void evlist__check_cpu_maps(struct evlist *evlist) 212 + { 213 + struct evsel *evsel, *pos, *leader; 214 + char buf[1024]; 215 + 216 + evlist__for_each_entry(evlist, evsel) { 217 + leader = evsel->leader; 218 + 219 + /* Check that leader matches cpus with each member. */ 220 + if (leader == evsel) 221 + continue; 222 + if (cpus_map_matched(leader, evsel)) 223 + continue; 224 + 225 + /* If there's mismatch disable the group and warn user. */ 226 + WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n"); 227 + evsel__group_desc(leader, buf, sizeof(buf)); 228 + pr_warning(" %s\n", buf); 229 + 230 + if (verbose) { 231 + cpu_map__snprint(leader->core.cpus, buf, sizeof(buf)); 232 + pr_warning(" %s: %s\n", leader->name, buf); 233 + cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf)); 234 + pr_warning(" %s: %s\n", evsel->name, buf); 235 + } 236 + 237 + for_each_group_evsel(pos, leader) { 238 + pos->leader = pos; 239 + pos->core.nr_members = 0; 240 + } 241 + evsel->leader->core.nr_members = 0; 242 + } 243 + } 192 244 193 245 static inline void diff_timespec(struct timespec *r, struct timespec *a, 194 246 struct timespec *b) ··· 368 314 return 0; 369 315 } 370 316 371 - static void read_counters(struct timespec *rs) 317 + static int read_affinity_counters(struct timespec *rs) 372 318 { 373 319 struct evsel *counter; 374 320 struct affinity affinity; 375 321 int i, ncpus, cpu; 376 322 377 323 if (affinity__setup(&affinity) < 0) 378 - return; 324 + return -1; 379 325 380 326 ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); 381 327 if (!target__has_cpu(&target) || target__has_per_thread(&target)) ··· 395 341 } 396 342 } 397 343 affinity__cleanup(&affinity); 344 + return 0; 345 + } 346 + 347 + static void read_counters(struct timespec *rs) 348 + { 349 + struct evsel *counter; 350 + 351 + if (!stat_config.summary && (read_affinity_counters(rs) < 0)) 352 + return; 398 353 399 354 evlist__for_each_entry(evsel_list, counter) { 400 355 if (counter->err) ··· 414 351 } 415 352 } 416 353 354 + static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 355 + { 356 + int i; 357 + 358 + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 359 + if (!config->stats) 360 + return -1; 361 + 362 + config->stats_num = nthreads; 363 + 364 + for (i = 0; i < nthreads; i++) 365 + runtime_stat__init(&config->stats[i]); 366 + 367 + return 0; 368 + } 369 + 370 + static void runtime_stat_delete(struct perf_stat_config *config) 371 + { 372 + int i; 373 + 374 + if (!config->stats) 375 + return; 376 + 377 + for (i = 0; i < config->stats_num; i++) 378 + runtime_stat__exit(&config->stats[i]); 379 + 380 + zfree(&config->stats); 381 + } 382 + 383 + static void runtime_stat_reset(struct perf_stat_config *config) 384 + { 385 + int i; 386 + 387 + if (!config->stats) 388 + return; 389 + 390 + for (i = 0; i < config->stats_num; i++) 391 + perf_stat__reset_shadow_per_stat(&config->stats[i]); 392 + } 393 + 417 394 static void process_interval(void) 418 395 { 419 396 struct timespec ts, rs; ··· 462 359 diff_timespec(&rs, &ts, &ref_time); 463 360 464 361 perf_stat__reset_shadow_per_stat(&rt_stat); 362 + runtime_stat_reset(&stat_config); 465 363 read_counters(&rs); 466 364 467 365 if (STAT_RECORD) { ··· 471 367 } 472 368 473 369 init_stats(&walltime_nsecs_stats); 474 - update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 370 + update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL); 475 371 print_counters(&rs, 0, NULL); 476 372 } 477 373 ··· 826 722 if (stat_config.walltime_run_table) 827 723 stat_config.walltime_run[run_idx] = t1 - t0; 828 724 829 - update_stats(&walltime_nsecs_stats, t1 - t0); 725 + if (interval) { 726 + stat_config.interval = 0; 727 + stat_config.summary = true; 728 + init_stats(&walltime_nsecs_stats); 729 + update_stats(&walltime_nsecs_stats, t1 - t0); 730 + 731 + if (stat_config.aggr_mode == AGGR_GLOBAL) 732 + perf_evlist__save_aggr_prev_raw_counts(evsel_list); 733 + 734 + perf_evlist__copy_prev_raw_counts(evsel_list); 735 + perf_evlist__reset_prev_raw_counts(evsel_list); 736 + runtime_stat_reset(&stat_config); 737 + perf_stat__reset_shadow_per_stat(&rt_stat); 738 + } else 739 + update_stats(&walltime_nsecs_stats, t1 - t0); 830 740 831 741 /* 832 742 * Closing a group leader splits the group, and as we only disable ··· 939 821 kill(getpid(), signr); 940 822 } 941 823 824 + void perf_stat__set_big_num(int set) 825 + { 826 + stat_config.big_num = (set != 0); 827 + } 828 + 942 829 static int stat__set_big_num(const struct option *opt __maybe_unused, 943 830 const char *s __maybe_unused, int unset) 944 831 { 945 832 big_num_opt = unset ? 0 : 1; 833 + perf_stat__set_big_num(!unset); 946 834 return 0; 947 835 } 948 836 ··· 964 840 const char *str, 965 841 int unset __maybe_unused) 966 842 { 967 - return metricgroup__parse_groups(opt, str, &stat_config.metric_events); 843 + return metricgroup__parse_groups(opt, str, 844 + stat_config.metric_no_group, 845 + stat_config.metric_no_merge, 846 + &stat_config.metric_events); 968 847 } 969 848 970 849 static struct option stat_options[] = { ··· 1045 918 "ms to wait before starting measurement after program start"), 1046 919 OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, 1047 920 "Only print computed metrics. No raw values", enable_metric_only), 921 + OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, 922 + "don't group metric events, impacts multiplexing"), 923 + OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, 924 + "don't try to share events between metrics in a group"), 1048 925 OPT_BOOLEAN(0, "topdown", &topdown_run, 1049 926 "measure topdown level 1 statistics"), 1050 927 OPT_BOOLEAN(0, "smi-cost", &smi_cost, ··· 1066 935 "Use with 'percore' event qualifier to show the event " 1067 936 "counts of one hardware thread by sum up total hardware " 1068 937 "threads of same physical core"), 938 + #ifdef HAVE_LIBPFM 939 + OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", 940 + "libpfm4 event selector. use 'perf list' to list available events", 941 + parse_libpfm_events_option), 942 + #endif 1069 943 OPT_END() 1070 944 }; 1071 945 ··· 1578 1442 struct option opt = { .value = &evsel_list }; 1579 1443 1580 1444 return metricgroup__parse_groups(&opt, "transaction", 1445 + stat_config.metric_no_group, 1446 + stat_config.metric_no_merge, 1581 1447 &stat_config.metric_events); 1582 1448 } 1583 1449 ··· 1875 1737 return set_maps(st); 1876 1738 } 1877 1739 1878 - static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 1879 - { 1880 - int i; 1881 - 1882 - config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 1883 - if (!config->stats) 1884 - return -1; 1885 - 1886 - config->stats_num = nthreads; 1887 - 1888 - for (i = 0; i < nthreads; i++) 1889 - runtime_stat__init(&config->stats[i]); 1890 - 1891 - return 0; 1892 - } 1893 - 1894 - static void runtime_stat_delete(struct perf_stat_config *config) 1895 - { 1896 - int i; 1897 - 1898 - if (!config->stats) 1899 - return; 1900 - 1901 - for (i = 0; i < config->stats_num; i++) 1902 - runtime_stat__exit(&config->stats[i]); 1903 - 1904 - zfree(&config->stats); 1905 - } 1906 - 1907 1740 static const char * const stat_report_usage[] = { 1908 1741 "perf stat report [<options>]", 1909 1742 NULL, ··· 2166 2057 goto out; 2167 2058 } 2168 2059 2060 + evlist__check_cpu_maps(evsel_list); 2061 + 2169 2062 /* 2170 2063 * Initialize thread_map with comm names, 2171 2064 * so we could print it out on output. ··· 2258 2147 } 2259 2148 } 2260 2149 2261 - if (!forever && status != -1 && !interval) 2150 + if (!forever && status != -1 && (!interval || stat_config.summary)) 2262 2151 print_counters(NULL, argc, argv); 2263 2152 2264 2153 if (STAT_RECORD) {
+1 -1
tools/perf/builtin-timechart.c
··· 128 128 struct sample_wrapper *next; 129 129 130 130 u64 timestamp; 131 - unsigned char data[0]; 131 + unsigned char data[]; 132 132 }; 133 133 134 134 #define TYPE_NONE 0
+9 -3
tools/perf/builtin-top.c
··· 53 53 54 54 #include "util/debug.h" 55 55 #include "util/ordered-events.h" 56 + #include "util/pfm.h" 56 57 57 58 #include <assert.h> 58 59 #include <elf.h> ··· 308 307 } 309 308 310 309 evlist__for_each_entry(evlist, pos) { 311 - perf_evsel__output_resort(pos, NULL); 310 + evsel__output_resort(pos, NULL); 312 311 } 313 312 } 314 313 ··· 950 949 { 951 950 struct record_opts *opts = &top->record_opts; 952 951 struct evlist *evlist = top->evlist; 953 - struct perf_evsel_config_term *term; 952 + struct evsel_config_term *term; 954 953 struct list_head *config_terms; 955 954 struct evsel *evsel; 956 955 int set, overwrite = -1; ··· 959 958 set = -1; 960 959 config_terms = &evsel->config_terms; 961 960 list_for_each_entry(term, config_terms, list) { 962 - if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE) 961 + if (term->type == EVSEL__CONFIG_TERM_OVERWRITE) 963 962 set = term->val.overwrite ? 1 : 0; 964 963 } 965 964 ··· 1576 1575 "WARNING: should be used on grouped events."), 1577 1576 OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr, 1578 1577 "Enable LBR callgraph stitching approach"), 1578 + #ifdef HAVE_LIBPFM 1579 + OPT_CALLBACK(0, "pfm-events", &top.evlist, "event", 1580 + "libpfm4 event selector. use 'perf list' to list available events", 1581 + parse_libpfm_events_option), 1582 + #endif 1579 1583 OPTS_EVSWITCH(&top.evswitch), 1580 1584 OPT_END() 1581 1585 };
+66 -26
tools/perf/builtin-trace.c
··· 461 461 462 462 static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) 463 463 { 464 - struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 464 + struct evsel *evsel = evsel__newtp("raw_syscalls", direction); 465 465 466 466 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 467 467 if (IS_ERR(evsel)) 468 - evsel = perf_evsel__newtp("syscalls", direction); 468 + evsel = evsel__newtp("syscalls", direction); 469 469 470 470 if (IS_ERR(evsel)) 471 471 return NULL; ··· 1748 1748 struct syscall *sc; 1749 1749 const char *name = syscalltbl__name(trace->sctbl, id); 1750 1750 1751 + #ifdef HAVE_SYSCALL_TABLE_SUPPORT 1751 1752 if (trace->syscalls.table == NULL) { 1752 1753 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); 1753 1754 if (trace->syscalls.table == NULL) 1754 1755 return -ENOMEM; 1755 1756 } 1757 + #else 1758 + if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) { 1759 + // When using libaudit we don't know beforehand what is the max syscall id 1760 + struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1756 1761 1762 + if (table == NULL) 1763 + return -ENOMEM; 1764 + 1765 + memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); 1766 + 1767 + trace->syscalls.table = table; 1768 + trace->sctbl->syscalls.max_id = id; 1769 + } 1770 + #endif 1757 1771 sc = trace->syscalls.table + id; 1758 1772 if (sc->nonexistent) 1759 1773 return 0; ··· 2091 2077 2092 2078 err = -EINVAL; 2093 2079 2094 - if (id > trace->sctbl->syscalls.max_id) 2080 + #ifdef HAVE_SYSCALL_TABLE_SUPPORT 2081 + if (id > trace->sctbl->syscalls.max_id) { 2082 + #else 2083 + if (id >= trace->sctbl->syscalls.max_id) { 2084 + /* 2085 + * With libaudit we don't know beforehand what is the max_id, 2086 + * so we let trace__read_syscall_info() figure that out as we 2087 + * go on reading syscalls. 2088 + */ 2089 + err = trace__read_syscall_info(trace, id); 2090 + if (err) 2091 + #endif 2095 2092 goto out_cant_read; 2093 + } 2096 2094 2097 2095 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && 2098 2096 (err = trace__read_syscall_info(trace, id)) != 0) ··· 3071 3045 return found; 3072 3046 } 3073 3047 3074 - static struct evsel *perf_evsel__new_pgfault(u64 config) 3048 + static struct evsel *evsel__new_pgfault(u64 config) 3075 3049 { 3076 3050 struct evsel *evsel; 3077 3051 struct perf_event_attr attr = { ··· 3200 3174 } 3201 3175 3202 3176 #ifdef HAVE_LIBBPF_SUPPORT 3177 + static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) 3178 + { 3179 + if (trace->bpf_obj == NULL) 3180 + return NULL; 3181 + 3182 + return bpf_object__find_map_by_name(trace->bpf_obj, name); 3183 + } 3184 + 3185 + static void trace__set_bpf_map_filtered_pids(struct trace *trace) 3186 + { 3187 + trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); 3188 + } 3189 + 3190 + static void trace__set_bpf_map_syscalls(struct trace *trace) 3191 + { 3192 + trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); 3193 + trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); 3194 + trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); 3195 + } 3196 + 3203 3197 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) 3204 3198 { 3205 3199 if (trace->bpf_obj == NULL) ··· 3558 3512 trace->bpf_obj = NULL; 3559 3513 } 3560 3514 #else // HAVE_LIBBPF_SUPPORT 3515 + static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused, 3516 + const char *name __maybe_unused) 3517 + { 3518 + return NULL; 3519 + } 3520 + 3521 + static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused) 3522 + { 3523 + } 3524 + 3525 + static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused) 3526 + { 3527 + } 3528 + 3561 3529 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) 3562 3530 { 3563 3531 return 0; ··· 3901 3841 } 3902 3842 3903 3843 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 3904 - pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 3844 + pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 3905 3845 if (pgfault_maj == NULL) 3906 3846 goto out_error_mem; 3907 3847 evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); ··· 3909 3849 } 3910 3850 3911 3851 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 3912 - pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 3852 + pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 3913 3853 if (pgfault_min == NULL) 3914 3854 goto out_error_mem; 3915 3855 evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); ··· 4658 4598 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); 4659 4599 4660 4600 return 0; 4661 - } 4662 - 4663 - static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) 4664 - { 4665 - if (trace->bpf_obj == NULL) 4666 - return NULL; 4667 - 4668 - return bpf_object__find_map_by_name(trace->bpf_obj, name); 4669 - } 4670 - 4671 - static void trace__set_bpf_map_filtered_pids(struct trace *trace) 4672 - { 4673 - trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); 4674 - } 4675 - 4676 - static void trace__set_bpf_map_syscalls(struct trace *trace) 4677 - { 4678 - trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); 4679 - trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); 4680 - trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); 4681 4601 } 4682 4602 4683 4603 static int trace__config(const char *var, const char *value, void *arg)
+4
tools/perf/check-headers.sh
··· 128 128 # diff non-symmetric files 129 129 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl 130 130 131 + # check duplicated library files 132 + check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h 133 + check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c 134 + 131 135 cd tools/perf
+39 -53
tools/perf/jvmti/libjvmti.c
··· 32 32 33 33 #ifdef HAVE_JVMTI_CMLR 34 34 static jvmtiError 35 - do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, 36 - jvmti_line_info_t *tab, jint *nr) 35 + do_get_line_number(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, 36 + jvmti_line_info_t *tab) 37 37 { 38 - jint i, lines = 0; 39 - jint nr_lines = 0; 38 + jint i, nr_lines = 0; 40 39 jvmtiLineNumberEntry *loc_tab = NULL; 41 40 jvmtiError ret; 41 + jint src_line = -1; 42 42 43 43 ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); 44 - if (ret != JVMTI_ERROR_NONE) { 44 + if (ret == JVMTI_ERROR_ABSENT_INFORMATION || ret == JVMTI_ERROR_NATIVE_METHOD) { 45 + /* No debug information for this method */ 46 + return ret; 47 + } else if (ret != JVMTI_ERROR_NONE) { 45 48 print_error(jvmti, "GetLineNumberTable", ret); 46 49 return ret; 47 50 } 48 51 49 - for (i = 0; i < nr_lines; i++) { 50 - if (loc_tab[i].start_location < bci) { 51 - tab[lines].pc = (unsigned long)pc; 52 - tab[lines].line_number = loc_tab[i].line_number; 53 - tab[lines].discrim = 0; /* not yet used */ 54 - tab[lines].methodID = m; 55 - lines++; 56 - } else { 57 - break; 58 - } 52 + for (i = 0; i < nr_lines && loc_tab[i].start_location <= bci; i++) { 53 + src_line = i; 59 54 } 55 + 56 + if (src_line != -1) { 57 + tab->pc = (unsigned long)pc; 58 + tab->line_number = loc_tab[src_line].line_number; 59 + tab->discrim = 0; /* not yet used */ 60 + tab->methodID = m; 61 + 62 + ret = JVMTI_ERROR_NONE; 63 + } else { 64 + ret = JVMTI_ERROR_ABSENT_INFORMATION; 65 + } 66 + 60 67 (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); 61 - *nr = lines; 62 - return JVMTI_ERROR_NONE; 68 + 69 + return ret; 63 70 } 64 71 65 72 static jvmtiError ··· 74 67 { 75 68 const jvmtiCompiledMethodLoadRecordHeader *hdr; 76 69 jvmtiCompiledMethodLoadInlineRecord *rec; 77 - jvmtiLineNumberEntry *lne = NULL; 78 70 PCStackInfo *c; 79 - jint nr, ret; 71 + jint ret; 80 72 int nr_total = 0; 81 73 int i, lines_total = 0; 82 74 ··· 88 82 for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { 89 83 if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { 90 84 rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; 91 - for (i = 0; i < rec->numpcs; i++) { 92 - c = rec->pcinfo + i; 93 - nr = 0; 94 - /* 95 - * unfortunately, need a tab to get the number of lines! 96 - */ 97 - ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); 98 - if (ret == JVMTI_ERROR_NONE) { 99 - /* free what was allocated for nothing */ 100 - (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); 101 - nr_total += (int)nr; 102 - } else { 103 - print_error(jvmti, "GetLineNumberTable", ret); 104 - } 105 - } 85 + nr_total += rec->numpcs; 106 86 } 107 87 } 108 88 ··· 107 115 rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; 108 116 for (i = 0; i < rec->numpcs; i++) { 109 117 c = rec->pcinfo + i; 110 - nr = 0; 111 - ret = do_get_line_numbers(jvmti, c->pc, 112 - c->methods[0], 113 - c->bcis[0], 114 - *tab + lines_total, 115 - &nr); 118 + /* 119 + * c->methods is the stack of inlined method calls 120 + * at c->pc. [0] is the leaf method. Caller frames 121 + * are ignored at the moment. 122 + */ 123 + ret = do_get_line_number(jvmti, c->pc, 124 + c->methods[0], 125 + c->bcis[0], 126 + *tab + lines_total); 116 127 if (ret == JVMTI_ERROR_NONE) 117 - lines_total += nr; 128 + lines_total++; 118 129 } 119 130 } 120 131 } ··· 241 246 char *class_sign = NULL; 242 247 char *func_name = NULL; 243 248 char *func_sign = NULL; 244 - char *file_name = NULL; 245 - char fn[PATH_MAX]; 246 249 uint64_t addr = (uint64_t)(uintptr_t)code_addr; 247 250 jvmtiError ret; 248 251 int nr_lines = 0; /* in line_tab[] */ ··· 257 264 if (has_line_numbers && map && map_length) { 258 265 ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); 259 266 if (ret != JVMTI_ERROR_NONE) { 260 - warnx("jvmti: cannot get line table for method"); 267 + if (ret != JVMTI_ERROR_NOT_FOUND) { 268 + warnx("jvmti: cannot get line table for method"); 269 + } 261 270 nr_lines = 0; 262 271 } else if (nr_lines > 0) { 263 272 line_file_names = malloc(sizeof(char*) * nr_lines); ··· 277 282 } 278 283 } 279 284 280 - ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); 281 - if (ret != JVMTI_ERROR_NONE) { 282 - print_error(jvmti, "GetSourceFileName", ret); 283 - goto error; 284 - } 285 - 286 285 ret = (*jvmti)->GetClassSignature(jvmti, decl_class, 287 286 &class_sign, NULL); 288 287 if (ret != JVMTI_ERROR_NONE) { ··· 290 301 print_error(jvmti, "GetMethodName", ret); 291 302 goto error; 292 303 } 293 - 294 - copy_class_filename(class_sign, file_name, fn, PATH_MAX); 295 304 296 305 /* 297 306 * write source line info record if we have it ··· 310 323 (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); 311 324 (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); 312 325 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); 313 - (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); 314 326 free(line_tab); 315 327 while (line_file_names && (nr_lines > 0)) { 316 328 if (line_file_names[nr_lines - 1]) {
+1 -1
tools/perf/pmu-events/arch/powerpc/power8/metrics.json
··· 169 169 }, 170 170 { 171 171 "BriefDescription": "Cycles GCT empty where dispatch was held", 172 - "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)", 172 + "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL", 173 173 "MetricGroup": "cpi_breakdown", 174 174 "MetricName": "gct_empty_disp_held_cpi" 175 175 },
+81 -68
tools/perf/pmu-events/arch/powerpc/power9/metrics.json
··· 208 208 "MetricName": "fxu_stall_cpi" 209 209 }, 210 210 { 211 + "BriefDescription": "Instruction Completion Table empty for this thread due to branch mispred", 212 + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", 213 + "MetricGroup": "cpi_breakdown", 214 + "MetricName": "ict_noslot_br_mpred_cpi" 215 + }, 216 + { 217 + "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss and branch mispred", 218 + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", 219 + "MetricGroup": "cpi_breakdown", 220 + "MetricName": "ict_noslot_br_mpred_icmiss_cpi" 221 + }, 222 + { 223 + "BriefDescription": "Instruction Completion Table other stalls", 224 + "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", 225 + "MetricGroup": "cpi_breakdown", 226 + "MetricName": "ict_noslot_cyc_other_cpi" 227 + }, 228 + { 229 + "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", 230 + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", 231 + "MetricGroup": "cpi_breakdown", 232 + "MetricName": "ict_noslot_disp_held_cpi" 233 + }, 234 + { 235 + "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", 236 + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", 237 + "MetricGroup": "cpi_breakdown", 238 + "MetricName": "ict_noslot_disp_held_hb_full_cpi" 239 + }, 240 + { 241 + "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", 242 + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", 243 + "MetricGroup": "cpi_breakdown", 244 + "MetricName": "ict_noslot_disp_held_issq_cpi" 245 + }, 246 + { 247 + "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", 248 + "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", 249 + "MetricGroup": "cpi_breakdown", 250 + "MetricName": "ict_noslot_disp_held_other_cpi" 251 + }, 252 + { 253 + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", 254 + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", 255 + "MetricGroup": "cpi_breakdown", 256 + "MetricName": "ict_noslot_disp_held_sync_cpi" 257 + }, 258 + { 259 + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", 260 + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", 261 + "MetricGroup": "cpi_breakdown", 262 + "MetricName": "ict_noslot_disp_held_tbegin_cpi" 263 + }, 264 + { 265 + "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", 266 + "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", 267 + "MetricGroup": "cpi_breakdown", 268 + "MetricName": "ict_noslot_ic_l2_cpi" 269 + }, 270 + { 271 + "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3", 272 + "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", 273 + "MetricGroup": "cpi_breakdown", 274 + "MetricName": "ict_noslot_ic_l3_cpi" 275 + }, 276 + { 277 + "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", 278 + "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", 279 + "MetricGroup": "cpi_breakdown", 280 + "MetricName": "ict_noslot_ic_l3miss_cpi" 281 + }, 282 + { 283 + "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss", 284 + "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", 285 + "MetricGroup": "cpi_breakdown", 286 + "MetricName": "ict_noslot_ic_miss_cpi" 287 + }, 288 + { 211 289 "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", 212 290 "MetricGroup": "cpi_breakdown", 213 291 "MetricName": "issue_hold_cpi" ··· 391 313 "MetricName": "nested_tend_stall_cpi" 392 314 }, 393 315 { 394 - "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", 316 + "BriefDescription": "Number of cycles the Instruction Completion Table has no itags assigned to this thread", 395 317 "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL", 396 318 "MetricGroup": "cpi_breakdown", 397 319 "MetricName": "nothing_dispatched_cpi" ··· 440 362 }, 441 363 { 442 364 "BriefDescription": "Completion stall for other reasons", 443 - "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", 365 + "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", 444 366 "MetricGroup": "cpi_breakdown", 445 367 "MetricName": "other_stall_cpi" 446 368 }, ··· 503 425 "MetricName": "st_fwd_stall_cpi" 504 426 }, 505 427 { 506 - "BriefDescription": "Nothing completed and ICT not empty", 428 + "BriefDescription": "Nothing completed and Instruction Completion Table not empty", 507 429 "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL", 508 430 "MetricGroup": "cpi_breakdown", 509 431 "MetricName": "stall_cpi" ··· 1896 1818 "BriefDescription": "All FXU Idle", 1897 1819 "MetricExpr": "PM_FXU_IDLE / PM_CYC", 1898 1820 "MetricName": "fxu_all_idle" 1899 - }, 1900 - { 1901 - "BriefDescription": "Ict empty for this thread due to branch mispred", 1902 - "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", 1903 - "MetricName": "ict_noslot_br_mpred_cpi" 1904 - }, 1905 - { 1906 - "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", 1907 - "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", 1908 - "MetricName": "ict_noslot_br_mpred_icmiss_cpi" 1909 - }, 1910 - { 1911 - "BriefDescription": "ICT other stalls", 1912 - "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", 1913 - "MetricName": "ict_noslot_cyc_other_cpi" 1914 - }, 1915 - { 1916 - "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", 1917 - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", 1918 - "MetricName": "ict_noslot_disp_held_cpi" 1919 - }, 1920 - { 1921 - "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", 1922 - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", 1923 - "MetricName": "ict_noslot_disp_held_hb_full_cpi" 1924 - }, 1925 - { 1926 - "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", 1927 - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", 1928 - "MetricName": "ict_noslot_disp_held_issq_cpi" 1929 - }, 1930 - { 1931 - "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", 1932 - "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", 1933 - "MetricName": "ict_noslot_disp_held_other_cpi" 1934 - }, 1935 - { 1936 - "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", 1937 - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", 1938 - "MetricName": "ict_noslot_disp_held_sync_cpi" 1939 - }, 1940 - { 1941 - "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", 1942 - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", 1943 - "MetricName": "ict_noslot_disp_held_tbegin_cpi" 1944 - }, 1945 - { 1946 - "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", 1947 - "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", 1948 - "MetricName": "ict_noslot_ic_l2_cpi" 1949 - }, 1950 - { 1951 - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", 1952 - "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", 1953 - "MetricName": "ict_noslot_ic_l3_cpi" 1954 - }, 1955 - { 1956 - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", 1957 - "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", 1958 - "MetricName": "ict_noslot_ic_l3miss_cpi" 1959 - }, 1960 - { 1961 - "BriefDescription": "Ict empty for this thread due to Icache Miss", 1962 - "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", 1963 - "MetricName": "ict_noslot_ic_miss_cpi" 1964 1821 }, 1965 1822 { 1966 1823 "BriefDescription": "Rate of IERAT reloads from L2",
+5 -5
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
··· 328 328 }, 329 329 { 330 330 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 331 - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 331 + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 332 332 "MetricGroup": "Memory_Lat", 333 333 "MetricName": "DRAM_Read_Latency" 334 334 }, 335 335 { 336 336 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 337 - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 337 + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", 338 338 "MetricGroup": "Memory_BW", 339 339 "MetricName": "DRAM_Parallel_Reads" 340 340 }, 341 341 { 342 342 "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", 343 - "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0", 343 + "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )", 344 344 "MetricGroup": "Memory_Lat", 345 345 "MetricName": "MEM_PMM_Read_Latency" 346 346 }, 347 347 { 348 348 "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", 349 - "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", 349 + "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )", 350 350 "MetricGroup": "Memory_BW", 351 351 "MetricName": "PMM_Read_BW" 352 352 }, 353 353 { 354 354 "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", 355 - "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", 355 + "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )", 356 356 "MetricGroup": "Memory_BW", 357 357 "MetricName": "PMM_Write_BW" 358 358 },
+2 -2
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
··· 328 328 }, 329 329 { 330 330 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 331 - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 331 + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 332 332 "MetricGroup": "Memory_Lat", 333 333 "MetricName": "DRAM_Read_Latency" 334 334 }, 335 335 { 336 336 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 337 - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 337 + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", 338 338 "MetricGroup": "Memory_BW", 339 339 "MetricName": "DRAM_Parallel_Reads" 340 340 },
+1 -1
tools/perf/pmu-events/jsmn.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 1 + /* SPDX-License-Identifier: MIT */ 2 2 #ifndef __JSMN_H_ 3 3 #define __JSMN_H_ 4 4
+2
tools/perf/tests/Build
··· 57 57 perf-y += time-utils-test.o 58 58 perf-y += genelf.o 59 59 perf-y += api-io.o 60 + perf-y += demangle-java-test.o 61 + perf-y += pfm.o 60 62 61 63 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build 62 64 $(call rule_mkdir)
+50
tools/perf/tests/attr/system-wide-dummy
··· 1 + # Event added by system-wide or CPU perf-record to handle the race of 2 + # processes starting while /proc is processed. 3 + [event] 4 + fd=1 5 + group_fd=-1 6 + cpu=* 7 + pid=-1 8 + flags=8 9 + type=1 10 + size=120 11 + config=9 12 + sample_period=4000 13 + sample_type=455 14 + read_format=4 15 + # Event will be enabled right away. 16 + disabled=0 17 + inherit=1 18 + pinned=0 19 + exclusive=0 20 + exclude_user=0 21 + exclude_kernel=0 22 + exclude_hv=0 23 + exclude_idle=0 24 + mmap=1 25 + comm=1 26 + freq=1 27 + inherit_stat=0 28 + enable_on_exec=0 29 + task=1 30 + watermark=0 31 + precise_ip=0 32 + mmap_data=0 33 + sample_id_all=1 34 + exclude_host=0 35 + exclude_guest=0 36 + exclude_callchain_kernel=0 37 + exclude_callchain_user=0 38 + mmap2=1 39 + comm_exec=1 40 + context_switch=0 41 + write_backward=0 42 + namespaces=0 43 + use_clockid=0 44 + wakeup_events=0 45 + bp_type=0 46 + config1=0 47 + config2=0 48 + branch_sample_type=0 49 + sample_regs_user=0 50 + sample_stack_user=0
+10 -2
tools/perf/tests/attr/test-record-C0
··· 9 9 # no enable on exec for CPU attached 10 10 enable_on_exec=0 11 11 12 - # PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD 12 + # PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | 13 + # PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD 13 14 # + PERF_SAMPLE_CPU added by -C 0 14 - sample_type=391 15 + sample_type=455 16 + 17 + # Dummy event handles mmaps, comm and task. 18 + mmap=0 19 + comm=0 20 + task=0 21 + 22 + [event:system-wide-dummy]
+55 -10
tools/perf/tests/builtin-test.c
··· 75 75 { 76 76 .desc = "PMU events", 77 77 .func = test__pmu_events, 78 + .subtest = { 79 + .skip_if_fail = false, 80 + .get_nr = test__pmu_events_subtest_get_nr, 81 + .get_desc = test__pmu_events_subtest_get_desc, 82 + .skip_reason = test__pmu_events_subtest_skip_reason, 83 + }, 84 + 78 85 }, 79 86 { 80 87 .desc = "DSO data read", ··· 317 310 .func = test__jit_write_elf, 318 311 }, 319 312 { 313 + .desc = "Test libpfm4 support", 314 + .func = test__pfm, 315 + .subtest = { 316 + .skip_if_fail = true, 317 + .get_nr = test__pfm_subtest_get_nr, 318 + .get_desc = test__pfm_subtest_get_desc, 319 + } 320 + }, 321 + { 320 322 .desc = "Test api io", 321 323 .func = test__api_io, 322 324 }, 323 325 { 324 326 .desc = "maps__merge_in", 325 327 .func = test__maps__merge_in, 328 + }, 329 + { 330 + .desc = "Demangle Java", 331 + .func = test__demangle_java, 326 332 }, 327 333 { 328 334 .func = NULL, ··· 347 327 arch_tests, 348 328 }; 349 329 350 - static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[]) 330 + static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) 351 331 { 352 332 int i; 353 333 ··· 364 344 continue; 365 345 } 366 346 367 - if (strcasestr(test->desc, argv[i])) 347 + if (strcasestr(desc, argv[i])) 368 348 return true; 369 349 } 370 350 ··· 449 429 case TEST_OK: 450 430 pr_info(" Ok\n"); 451 431 break; 452 - case TEST_SKIP: 453 - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); 432 + case TEST_SKIP: { 433 + const char *skip_reason = NULL; 434 + if (t->subtest.skip_reason) 435 + skip_reason = t->subtest.skip_reason(subtest); 436 + if (skip_reason) 437 + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason); 438 + else 439 + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); 440 + } 454 441 break; 455 442 case TEST_FAIL: 456 443 default: ··· 593 566 .priv = &st, 594 567 }; 595 568 596 - if (!perf_test__matches(&test, curr, argc, argv)) 569 + if (!perf_test__matches(test.desc, curr, argc, argv)) 597 570 continue; 598 571 599 572 st.file = ent->d_name; ··· 621 594 622 595 for_each_test(j, t) { 623 596 int curr = i++, err; 597 + int subi; 624 598 625 - if (!perf_test__matches(t, curr, argc, argv)) 626 - continue; 599 + if (!perf_test__matches(t->desc, curr, argc, argv)) { 600 + bool skip = true; 601 + int subn; 602 + 603 + if (!t->subtest.get_nr) 604 + continue; 605 + 606 + subn = t->subtest.get_nr(); 607 + 608 + for (subi = 0; subi < subn; subi++) { 609 + if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) 610 + skip = false; 611 + } 612 + 613 + if (skip) 614 + continue; 615 + } 627 616 628 617 if (t->is_supported && !t->is_supported()) { 629 618 pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); ··· 667 624 */ 668 625 int subw = width > 2 ? width - 2 : width; 669 626 bool skip = false; 670 - int subi; 671 627 672 628 if (subn <= 0) { 673 629 color_fprintf(stderr, PERF_COLOR_YELLOW, ··· 683 641 } 684 642 685 643 for (subi = 0; subi < subn; subi++) { 644 + if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) 645 + continue; 646 + 686 647 pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, 687 648 t->subtest.get_desc(subi)); 688 649 err = test_and_print(t, skip, subi); ··· 719 674 .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name), 720 675 }; 721 676 722 - if (!perf_test__matches(&t, curr, argc, argv)) 677 + if (!perf_test__matches(t.desc, curr, argc, argv)) 723 678 continue; 724 679 725 680 pr_info("%2d: %s\n", i, t.desc); ··· 738 693 for_each_test(j, t) { 739 694 int curr = i++; 740 695 741 - if (!perf_test__matches(t, curr, argc, argv) || 696 + if (!perf_test__matches(t->desc, curr, argc, argv) || 742 697 (t->is_supported && !t->is_supported())) 743 698 continue; 744 699
+42
tools/perf/tests/demangle-java-test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <string.h> 3 + #include <stdlib.h> 4 + #include <stdio.h> 5 + #include "tests.h" 6 + #include "session.h" 7 + #include "debug.h" 8 + #include "demangle-java.h" 9 + 10 + int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused) 11 + { 12 + int ret = TEST_OK; 13 + char *buf = NULL; 14 + size_t i; 15 + 16 + struct { 17 + const char *mangled, *demangled; 18 + } test_cases[] = { 19 + { "Ljava/lang/StringLatin1;equals([B[B)Z", 20 + "boolean java.lang.StringLatin1.equals(byte[], byte[])" }, 21 + { "Ljava/util/zip/ZipUtils;CENSIZ([BI)J", 22 + "long java.util.zip.ZipUtils.CENSIZ(byte[], int)" }, 23 + { "Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z", 24 + "boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)" }, 25 + { "Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V", 26 + "void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)" }, 27 + { "Ljava/lang/Object;<init>()V", 28 + "void java.lang.Object<init>()" }, 29 + }; 30 + 31 + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { 32 + buf = java_demangle_sym(test_cases[i].mangled, 0); 33 + if (strcmp(buf, test_cases[i].demangled)) { 34 + pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, 35 + buf, test_cases[i].demangled); 36 + ret = TEST_FAIL; 37 + } 38 + free(buf); 39 + } 40 + 41 + return ret; 42 + }
+6 -5
tools/perf/tests/dwarf-unwind.c
··· 37 37 union perf_event event; 38 38 pid_t pid = getpid(); 39 39 40 + memset(&event, 0, sizeof(event)); 40 41 return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, 41 42 mmap_handler, machine, true); 42 43 } ··· 95 94 return strcmp((const char *) symbol, funcs[idx]); 96 95 } 97 96 98 - noinline int test_dwarf_unwind__thread(struct thread *thread) 97 + __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) 99 98 { 100 99 struct perf_sample sample; 101 100 unsigned long cnt = 0; ··· 126 125 127 126 static int global_unwind_retval = -INT_MAX; 128 127 129 - noinline int test_dwarf_unwind__compare(void *p1, void *p2) 128 + __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) 130 129 { 131 130 /* Any possible value should be 'thread' */ 132 131 struct thread *thread = *(struct thread **)p1; ··· 145 144 return p1 - p2; 146 145 } 147 146 148 - noinline int test_dwarf_unwind__krava_3(struct thread *thread) 147 + __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) 149 148 { 150 149 struct thread *array[2] = {thread, thread}; 151 150 void *fp = &bsearch; ··· 164 163 return global_unwind_retval; 165 164 } 166 165 167 - noinline int test_dwarf_unwind__krava_2(struct thread *thread) 166 + __no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) 168 167 { 169 168 return test_dwarf_unwind__krava_3(thread); 170 169 } 171 170 172 - noinline int test_dwarf_unwind__krava_1(struct thread *thread) 171 + __no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) 173 172 { 174 173 return test_dwarf_unwind__krava_2(thread); 175 174 }
+2 -3
tools/perf/tests/evsel-roundtrip-name.c
··· 100 100 { 101 101 int err = 0, ret = 0; 102 102 103 - err = perf_evsel__name_array_test(perf_evsel__hw_names); 103 + err = perf_evsel__name_array_test(evsel__hw_names); 104 104 if (err) 105 105 ret = err; 106 106 107 - err = __perf_evsel__name_array_test(perf_evsel__sw_names, 108 - PERF_COUNT_SW_DUMMY + 1); 107 + err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1); 109 108 if (err) 110 109 ret = err; 111 110
+4 -4
tools/perf/tests/evsel-tp-sched.c
··· 35 35 36 36 int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) 37 37 { 38 - struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); 38 + struct evsel *evsel = evsel__newtp("sched", "sched_switch"); 39 39 int ret = 0; 40 40 41 41 if (IS_ERR(evsel)) { 42 - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); 42 + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); 43 43 return -1; 44 44 } 45 45 ··· 66 66 67 67 evsel__delete(evsel); 68 68 69 - evsel = perf_evsel__newtp("sched", "sched_wakeup"); 69 + evsel = evsel__newtp("sched", "sched_wakeup"); 70 70 71 71 if (IS_ERR(evsel)) { 72 - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); 72 + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); 73 73 return -1; 74 74 } 75 75
+25 -21
tools/perf/tests/expr.c
··· 19 19 int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) 20 20 { 21 21 const char *p; 22 - const char **other; 23 - double val; 24 - int i, ret; 22 + double val, *val_ptr; 23 + int ret; 25 24 struct expr_parse_ctx ctx; 26 - int num_other; 27 25 28 26 expr__ctx_init(&ctx); 29 - expr__add_id(&ctx, "FOO", 1); 30 - expr__add_id(&ctx, "BAR", 2); 27 + expr__add_id(&ctx, strdup("FOO"), 1); 28 + expr__add_id(&ctx, strdup("BAR"), 2); 31 29 32 30 ret = test(&ctx, "1+1", 2); 33 31 ret |= test(&ctx, "FOO+BAR", 3); ··· 37 39 ret |= test(&ctx, "min(1,2) + 1", 2); 38 40 ret |= test(&ctx, "max(1,2) + 1", 3); 39 41 ret |= test(&ctx, "1+1 if 3*4 else 0", 2); 42 + ret |= test(&ctx, "1.1 + 2.1", 3.2); 43 + ret |= test(&ctx, ".1 + 2.", 2.1); 40 44 41 45 if (ret) 42 46 return ret; ··· 51 51 ret = expr__parse(&val, &ctx, p, 1); 52 52 TEST_ASSERT_VAL("missing operand", ret == -1); 53 53 54 + expr__ctx_clear(&ctx); 54 55 TEST_ASSERT_VAL("find other", 55 - expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0); 56 - TEST_ASSERT_VAL("find other", num_other == 3); 57 - TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); 58 - TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); 59 - TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); 60 - TEST_ASSERT_VAL("find other", other[3] == NULL); 56 + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", 57 + &ctx, 1) == 0); 58 + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3); 59 + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR", 60 + (void **)&val_ptr)); 61 + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ", 62 + (void **)&val_ptr)); 63 + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO", 64 + (void **)&val_ptr)); 61 65 66 + expr__ctx_clear(&ctx); 62 67 TEST_ASSERT_VAL("find other", 63 - expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL, 64 - &other, &num_other, 3) == 0); 65 - TEST_ASSERT_VAL("find other", num_other == 2); 66 - TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/")); 67 - TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/")); 68 - TEST_ASSERT_VAL("find other", other[2] == NULL); 68 + expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", 69 + NULL, &ctx, 3) == 0); 70 + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2); 71 + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/", 72 + (void **)&val_ptr)); 73 + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/", 74 + (void **)&val_ptr)); 69 75 70 - for (i = 0; i < num_other; i++) 71 - zfree(&other[i]); 72 - free((void *)other); 76 + expr__ctx_clear(&ctx); 73 77 74 78 return 0; 75 79 }
+1 -1
tools/perf/tests/hists_cumulate.c
··· 190 190 * function since TEST_ASSERT_VAL() returns in case of failure. 191 191 */ 192 192 hists__collapse_resort(hists, NULL); 193 - perf_evsel__output_resort(hists_to_evsel(hists), NULL); 193 + evsel__output_resort(hists_to_evsel(hists), NULL); 194 194 195 195 if (verbose > 2) { 196 196 pr_info("use callchain: %d, cumulate callchain: %d\n",
+1 -1
tools/perf/tests/hists_filter.c
··· 142 142 struct hists *hists = evsel__hists(evsel); 143 143 144 144 hists__collapse_resort(hists, NULL); 145 - perf_evsel__output_resort(evsel, NULL); 145 + evsel__output_resort(evsel, NULL); 146 146 147 147 if (verbose > 2) { 148 148 pr_info("Normal histogram\n");
+5 -5
tools/perf/tests/hists_output.c
··· 155 155 goto out; 156 156 157 157 hists__collapse_resort(hists, NULL); 158 - perf_evsel__output_resort(evsel, NULL); 158 + evsel__output_resort(evsel, NULL); 159 159 160 160 if (verbose > 2) { 161 161 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 255 255 goto out; 256 256 257 257 hists__collapse_resort(hists, NULL); 258 - perf_evsel__output_resort(evsel, NULL); 258 + evsel__output_resort(evsel, NULL); 259 259 260 260 if (verbose > 2) { 261 261 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 309 309 goto out; 310 310 311 311 hists__collapse_resort(hists, NULL); 312 - perf_evsel__output_resort(evsel, NULL); 312 + evsel__output_resort(evsel, NULL); 313 313 314 314 if (verbose > 2) { 315 315 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 387 387 goto out; 388 388 389 389 hists__collapse_resort(hists, NULL); 390 - perf_evsel__output_resort(evsel, NULL); 390 + evsel__output_resort(evsel, NULL); 391 391 392 392 if (verbose > 2) { 393 393 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 490 490 goto out; 491 491 492 492 hists__collapse_resort(hists, NULL); 493 - perf_evsel__output_resort(evsel, NULL); 493 + evsel__output_resort(evsel, NULL); 494 494 495 495 if (verbose > 2) { 496 496 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+9 -1
tools/perf/tests/make
··· 84 84 make_no_libbionic := NO_LIBBIONIC=1 85 85 make_no_auxtrace := NO_AUXTRACE=1 86 86 make_no_libbpf := NO_LIBBPF=1 87 + make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1 87 88 make_no_libcrypto := NO_LIBCRYPTO=1 88 89 make_with_babeltrace:= LIBBABELTRACE=1 89 90 make_no_sdt := NO_SDT=1 91 + make_no_syscall_tbl := NO_SYSCALL_TABLE=1 90 92 make_with_clangllvm := LIBCLANGLLVM=1 93 + make_with_libpfm4 := LIBPFM4=1 91 94 make_tags := tags 92 95 make_cscope := cscope 93 96 make_help := help ··· 115 112 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 116 113 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 117 114 make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 118 - make_minimal += NO_LIBCAP=1 115 + make_minimal += NO_LIBCAP=1 NO_SYSCALL_TABLE=1 119 116 120 117 # $(run) contains all available tests 121 118 run := make_pure ··· 147 144 run += make_no_libbionic 148 145 run += make_no_auxtrace 149 146 run += make_no_libbpf 147 + run += make_no_libbpf_DEBUG 148 + run += make_no_libcrypto 149 + run += make_no_sdt 150 + run += make_no_syscall_tbl 150 151 run += make_with_babeltrace 151 152 run += make_with_clangllvm 153 + run += make_with_libpfm4 152 154 run += make_help 153 155 run += make_doc 154 156 run += make_perf_o
+2 -2
tools/perf/tests/mmap-basic.c
··· 79 79 char name[64]; 80 80 81 81 snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); 82 - evsels[i] = perf_evsel__newtp("syscalls", name); 82 + evsels[i] = evsel__newtp("syscalls", name); 83 83 if (IS_ERR(evsels[i])) { 84 - pr_debug("perf_evsel__new(%s)\n", name); 84 + pr_debug("evsel__new(%s)\n", name); 85 85 goto out_delete_evlist; 86 86 } 87 87
+4 -4
tools/perf/tests/openat-syscall-all-cpus.c
··· 44 44 45 45 CPU_ZERO(&cpu_set); 46 46 47 - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); 47 + evsel = evsel__newtp("syscalls", "sys_enter_openat"); 48 48 if (IS_ERR(evsel)) { 49 49 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); 50 50 pr_debug("%s\n", errbuf); ··· 90 90 * we use the auto allocation it will allocate just for 1 cpu, 91 91 * as we start by cpu 0. 92 92 */ 93 - if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { 94 - pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 93 + if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { 94 + pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 95 95 goto out_close_fd; 96 96 } 97 97 ··· 117 117 } 118 118 } 119 119 120 - perf_evsel__free_counts(evsel); 120 + evsel__free_counts(evsel); 121 121 out_close_fd: 122 122 perf_evsel__close_fd(&evsel->core); 123 123 out_evsel_delete:
+2 -2
tools/perf/tests/openat-syscall-tp-fields.c
··· 46 46 goto out; 47 47 } 48 48 49 - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); 49 + evsel = evsel__newtp("syscalls", "sys_enter_openat"); 50 50 if (IS_ERR(evsel)) { 51 - pr_debug("%s: perf_evsel__newtp\n", __func__); 51 + pr_debug("%s: evsel__newtp\n", __func__); 52 52 goto out_delete_evlist; 53 53 } 54 54
+1 -1
tools/perf/tests/openat-syscall.c
··· 27 27 return -1; 28 28 } 29 29 30 - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); 30 + evsel = evsel__newtp("syscalls", "sys_enter_openat"); 31 31 if (IS_ERR(evsel)) { 32 32 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); 33 33 pr_debug("%s\n", errbuf);
+203
tools/perf/tests/pfm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test support for libpfm4 event encodings. 4 + * 5 + * Copyright 2020 Google LLC. 6 + */ 7 + #include "tests.h" 8 + #include "util/debug.h" 9 + #include "util/evlist.h" 10 + #include "util/pfm.h" 11 + 12 + #include <linux/kernel.h> 13 + 14 + #ifdef HAVE_LIBPFM 15 + static int test__pfm_events(void); 16 + static int test__pfm_group(void); 17 + #endif 18 + 19 + static const struct { 20 + int (*func)(void); 21 + const char *desc; 22 + } pfm_testcase_table[] = { 23 + #ifdef HAVE_LIBPFM 24 + { 25 + .func = test__pfm_events, 26 + .desc = "test of individual --pfm-events", 27 + }, 28 + { 29 + .func = test__pfm_group, 30 + .desc = "test groups of --pfm-events", 31 + }, 32 + #endif 33 + }; 34 + 35 + #ifdef HAVE_LIBPFM 36 + static int count_pfm_events(struct perf_evlist *evlist) 37 + { 38 + struct perf_evsel *evsel; 39 + int count = 0; 40 + 41 + perf_evlist__for_each_entry(evlist, evsel) { 42 + count++; 43 + } 44 + return count; 45 + } 46 + 47 + static int test__pfm_events(void) 48 + { 49 + struct evlist *evlist; 50 + struct option opt; 51 + size_t i; 52 + const struct { 53 + const char *events; 54 + int nr_events; 55 + } table[] = { 56 + { 57 + .events = "", 58 + .nr_events = 0, 59 + }, 60 + { 61 + .events = "instructions", 62 + .nr_events = 1, 63 + }, 64 + { 65 + .events = "instructions,cycles", 66 + .nr_events = 2, 67 + }, 68 + { 69 + .events = "stereolab", 70 + .nr_events = 0, 71 + }, 72 + { 73 + .events = "instructions,instructions", 74 + .nr_events = 2, 75 + }, 76 + { 77 + .events = "stereolab,instructions", 78 + .nr_events = 0, 79 + }, 80 + { 81 + .events = "instructions,stereolab", 82 + .nr_events = 1, 83 + }, 84 + }; 85 + 86 + for (i = 0; i < ARRAY_SIZE(table); i++) { 87 + evlist = evlist__new(); 88 + if (evlist == NULL) 89 + return -ENOMEM; 90 + 91 + opt.value = evlist; 92 + parse_libpfm_events_option(&opt, 93 + table[i].events, 94 + 0); 95 + TEST_ASSERT_EQUAL(table[i].events, 96 + count_pfm_events(&evlist->core), 97 + table[i].nr_events); 98 + TEST_ASSERT_EQUAL(table[i].events, 99 + evlist->nr_groups, 100 + 0); 101 + 102 + evlist__delete(evlist); 103 + } 104 + return 0; 105 + } 106 + 107 + static int test__pfm_group(void) 108 + { 109 + struct evlist *evlist; 110 + struct option opt; 111 + size_t i; 112 + const struct { 113 + const char *events; 114 + int nr_events; 115 + int nr_groups; 116 + } table[] = { 117 + { 118 + .events = "{},", 119 + .nr_events = 0, 120 + .nr_groups = 0, 121 + }, 122 + { 123 + .events = "{instructions}", 124 + .nr_events = 1, 125 + .nr_groups = 1, 126 + }, 127 + { 128 + .events = "{instructions},{}", 129 + .nr_events = 1, 130 + .nr_groups = 1, 131 + }, 132 + { 133 + .events = "{},{instructions}", 134 + .nr_events = 0, 135 + .nr_groups = 0, 136 + }, 137 + { 138 + .events = "{instructions},{instructions}", 139 + .nr_events = 2, 140 + .nr_groups = 2, 141 + }, 142 + { 143 + .events = "{instructions,cycles},{instructions,cycles}", 144 + .nr_events = 4, 145 + .nr_groups = 2, 146 + }, 147 + { 148 + .events = "{stereolab}", 149 + .nr_events = 0, 150 + .nr_groups = 0, 151 + }, 152 + { 153 + .events = 154 + "{instructions,cycles},{instructions,stereolab}", 155 + .nr_events = 3, 156 + .nr_groups = 1, 157 + }, 158 + }; 159 + 160 + for (i = 0; i < ARRAY_SIZE(table); i++) { 161 + evlist = evlist__new(); 162 + if (evlist == NULL) 163 + return -ENOMEM; 164 + 165 + opt.value = evlist; 166 + parse_libpfm_events_option(&opt, 167 + table[i].events, 168 + 0); 169 + TEST_ASSERT_EQUAL(table[i].events, 170 + count_pfm_events(&evlist->core), 171 + table[i].nr_events); 172 + TEST_ASSERT_EQUAL(table[i].events, 173 + evlist->nr_groups, 174 + table[i].nr_groups); 175 + 176 + evlist__delete(evlist); 177 + } 178 + return 0; 179 + } 180 + #endif 181 + 182 + const char *test__pfm_subtest_get_desc(int i) 183 + { 184 + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) 185 + return NULL; 186 + return pfm_testcase_table[i].desc; 187 + } 188 + 189 + int test__pfm_subtest_get_nr(void) 190 + { 191 + return (int)ARRAY_SIZE(pfm_testcase_table); 192 + } 193 + 194 + int test__pfm(struct test *test __maybe_unused, int i __maybe_unused) 195 + { 196 + #ifdef HAVE_LIBPFM 197 + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) 198 + return TEST_FAIL; 199 + return pfm_testcase_table[i].func(); 200 + #else 201 + return TEST_SKIP; 202 + #endif 203 + }
+167 -6
tools/perf/tests/pmu-events.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + #include "math.h" 2 3 #include "parse-events.h" 3 4 #include "pmu.h" 4 5 #include "tests.h" ··· 9 8 #include <linux/zalloc.h> 10 9 #include "debug.h" 11 10 #include "../pmu-events/pmu-events.h" 11 + #include "util/evlist.h" 12 + #include "util/expr.h" 13 + #include "util/parse-events.h" 12 14 13 15 struct perf_pmu_test_event { 14 16 struct pmu_event event; ··· 148 144 } 149 145 150 146 /* Verify generated events from pmu-events.c is as expected */ 151 - static int __test_pmu_event_table(void) 147 + static int test_pmu_event_table(void) 152 148 { 153 149 struct pmu_events_map *map = __test_pmu_get_events_map(); 154 150 struct pmu_event *table; ··· 351 347 return res; 352 348 } 353 349 354 - int test__pmu_events(struct test *test __maybe_unused, 355 - int subtest __maybe_unused) 350 + 351 + static int test_aliases(void) 356 352 { 357 353 struct perf_pmu *pmu = NULL; 358 - 359 - if (__test_pmu_event_table()) 360 - return -1; 361 354 362 355 while ((pmu = perf_pmu__scan(pmu)) != NULL) { 363 356 int count = 0; ··· 377 376 } 378 377 379 378 return 0; 379 + } 380 + 381 + static bool is_number(const char *str) 382 + { 383 + char *end_ptr; 384 + double v; 385 + 386 + errno = 0; 387 + v = strtod(str, &end_ptr); 388 + (void)v; // We're not interested in this value, only if it is valid 389 + return errno == 0 && end_ptr != str; 390 + } 391 + 392 + static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) 393 + { 394 + struct parse_events_error error; 395 + struct evlist *evlist; 396 + int ret; 397 + 398 + /* Numbers are always valid. */ 399 + if (is_number(id)) 400 + return 0; 401 + 402 + evlist = evlist__new(); 403 + memset(&error, 0, sizeof(error)); 404 + ret = parse_events(evlist, id, &error); 405 + if (ret && same_cpu) { 406 + pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", 407 + pe->metric_name, id, pe->metric_expr); 408 + pr_warning("Error string '%s' help '%s'\n", error.str, 409 + error.help); 410 + } else if (ret) { 411 + pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n", 412 + id, pe->metric_name, pe->metric_expr); 413 + ret = 0; 414 + } 415 + evlist__delete(evlist); 416 + free(error.str); 417 + free(error.help); 418 + free(error.first_str); 419 + free(error.first_help); 420 + return ret; 421 + } 422 + 423 + static void expr_failure(const char *msg, 424 + const struct pmu_events_map *map, 425 + const struct pmu_event *pe) 426 + { 427 + pr_debug("%s for map %s %s %s\n", 428 + msg, map->cpuid, map->version, map->type); 429 + pr_debug("On metric %s\n", pe->metric_name); 430 + pr_debug("On expression %s\n", pe->metric_expr); 431 + } 432 + 433 + static int test_parsing(void) 434 + { 435 + struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); 436 + struct pmu_events_map *map; 437 + struct pmu_event *pe; 438 + int i, j, k; 439 + int ret = 0; 440 + struct expr_parse_ctx ctx; 441 + double result; 442 + 443 + i = 0; 444 + for (;;) { 445 + map = &pmu_events_map[i++]; 446 + if (!map->table) 447 + break; 448 + j = 0; 449 + for (;;) { 450 + struct hashmap_entry *cur; 451 + size_t bkt; 452 + 453 + pe = &map->table[j++]; 454 + if (!pe->name && !pe->metric_group && !pe->metric_name) 455 + break; 456 + if (!pe->metric_expr) 457 + continue; 458 + expr__ctx_init(&ctx); 459 + if (expr__find_other(pe->metric_expr, NULL, &ctx, 0) 460 + < 0) { 461 + expr_failure("Parse other failed", map, pe); 462 + ret++; 463 + continue; 464 + } 465 + 466 + /* 467 + * Add all ids with a made up value. The value may 468 + * trigger divide by zero when subtracted and so try to 469 + * make them unique. 470 + */ 471 + k = 1; 472 + hashmap__for_each_entry((&ctx.ids), cur, bkt) 473 + expr__add_id(&ctx, strdup(cur->key), k++); 474 + 475 + hashmap__for_each_entry((&ctx.ids), cur, bkt) { 476 + if (check_parse_id(cur->key, map == cpus_map, 477 + pe)) 478 + ret++; 479 + } 480 + 481 + if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { 482 + expr_failure("Parse failed", map, pe); 483 + ret++; 484 + } 485 + expr__ctx_clear(&ctx); 486 + } 487 + } 488 + /* TODO: fail when not ok */ 489 + return ret == 0 ? TEST_OK : TEST_SKIP; 490 + } 491 + 492 + static const struct { 493 + int (*func)(void); 494 + const char *desc; 495 + } pmu_events_testcase_table[] = { 496 + { 497 + .func = test_pmu_event_table, 498 + .desc = "PMU event table sanity", 499 + }, 500 + { 501 + .func = test_aliases, 502 + .desc = "PMU event map aliases", 503 + }, 504 + { 505 + .func = test_parsing, 506 + .desc = "Parsing of PMU event table metrics", 507 + }, 508 + }; 509 + 510 + const char *test__pmu_events_subtest_get_desc(int subtest) 511 + { 512 + if (subtest < 0 || 513 + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 514 + return NULL; 515 + return pmu_events_testcase_table[subtest].desc; 516 + } 517 + 518 + const char *test__pmu_events_subtest_skip_reason(int subtest) 519 + { 520 + if (subtest < 0 || 521 + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 522 + return NULL; 523 + if (pmu_events_testcase_table[subtest].func != test_parsing) 524 + return NULL; 525 + return "some metrics failed"; 526 + } 527 + 528 + int test__pmu_events_subtest_get_nr(void) 529 + { 530 + return (int)ARRAY_SIZE(pmu_events_testcase_table); 531 + } 532 + 533 + int test__pmu_events(struct test *test __maybe_unused, int subtest) 534 + { 535 + if (subtest < 0 || 536 + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 537 + return TEST_FAIL; 538 + return pmu_events_testcase_table[subtest].func(); 380 539 }
+2 -2
tools/perf/tests/pmu.c
··· 156 156 if (ret) 157 157 break; 158 158 159 - ret = perf_pmu__config_terms(&formats, &attr, terms, 160 - false, NULL); 159 + ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr, 160 + terms, false, NULL); 161 161 if (ret) 162 162 break; 163 163
+1 -1
tools/perf/tests/sw-clock.c
··· 56 56 57 57 evsel = evsel__new(&attr); 58 58 if (evsel == NULL) { 59 - pr_debug("perf_evsel__new\n"); 59 + pr_debug("evsel__new\n"); 60 60 goto out_delete_evlist; 61 61 } 62 62 evlist__add(evlist, evsel);
+8
tools/perf/tests/tests.h
··· 34 34 bool skip_if_fail; 35 35 int (*get_nr)(void); 36 36 const char *(*get_desc)(int subtest); 37 + const char *(*skip_reason)(int subtest); 37 38 } subtest; 38 39 bool (*is_supported)(void); 39 40 void *priv; ··· 51 50 int test__syscall_openat_tp_fields(struct test *test, int subtest); 52 51 int test__pmu(struct test *test, int subtest); 53 52 int test__pmu_events(struct test *test, int subtest); 53 + const char *test__pmu_events_subtest_get_desc(int subtest); 54 + const char *test__pmu_events_subtest_skip_reason(int subtest); 55 + int test__pmu_events_subtest_get_nr(void); 54 56 int test__attr(struct test *test, int subtest); 55 57 int test__dso_data(struct test *test, int subtest); 56 58 int test__dso_data_cache(struct test *test, int subtest); ··· 117 113 int test__time_utils(struct test *t, int subtest); 118 114 int test__jit_write_elf(struct test *test, int subtest); 119 115 int test__api_io(struct test *test, int subtest); 116 + int test__demangle_java(struct test *test, int subtest); 117 + int test__pfm(struct test *test, int subtest); 118 + const char *test__pfm_subtest_get_desc(int subtest); 119 + int test__pfm_subtest_get_nr(void); 120 120 121 121 bool test__bp_signal_is_supported(void); 122 122 bool test__bp_account_is_supported(void);
+2 -2
tools/perf/trace/beauty/arch_errno_names.sh
··· 57 57 local arch="$1" 58 58 local asm_errno=$(asm_errno_file "$arch") 59 59 60 - $gcc $include_path -E -dM -x c $asm_errno \ 60 + $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \ 61 61 |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \ 62 62 |awk '{ print $2","$3; }' \ 63 63 |sort -t, -k2 -nu \ ··· 91 91 # in tools/perf/arch 92 92 archlist="" 93 93 for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do 94 - test -d arch/$arch && archlist="$archlist $arch" 94 + test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch" 95 95 done 96 96 97 97 for arch in x86 $archlist generic; do
+7 -1
tools/perf/util/Build
··· 106 106 perf-$(CONFIG_AUXTRACE) += intel-pt.o 107 107 perf-$(CONFIG_AUXTRACE) += intel-bts.o 108 108 perf-$(CONFIG_AUXTRACE) += arm-spe.o 109 - perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o 109 + perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/ 110 110 perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o 111 111 112 112 ifdef CONFIG_LIBOPENCSD ··· 135 135 perf-$(CONFIG_LIBELF) += symbol-elf.o 136 136 perf-$(CONFIG_LIBELF) += probe-file.o 137 137 perf-$(CONFIG_LIBELF) += probe-event.o 138 + 139 + ifndef CONFIG_LIBBPF 140 + perf-y += hashmap.o 141 + endif 138 142 139 143 ifndef CONFIG_LIBELF 140 144 perf-y += symbol-minimal.o ··· 182 178 perf-$(CONFIG_LIBBPF) += bpf-event.o 183 179 184 180 perf-$(CONFIG_CXX) += c++/ 181 + 182 + perf-$(CONFIG_LIBPFM4) += pfm.o 185 183 186 184 CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 187 185 CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
-1
tools/perf/util/annotate.c
··· 41 41 #include <linux/bitops.h> 42 42 #include <linux/kernel.h> 43 43 #include <linux/string.h> 44 - #include <bpf/libbpf.h> 45 44 #include <subcmd/parse-options.h> 46 45 #include <subcmd/run-command.h> 47 46
+2 -2
tools/perf/util/annotate.h
··· 144 144 u32 idx; 145 145 int idx_asm; 146 146 int data_nr; 147 - struct annotation_data data[0]; 147 + struct annotation_data data[]; 148 148 }; 149 149 150 150 struct disasm_line { ··· 227 227 struct sym_hist { 228 228 u64 nr_samples; 229 229 u64 period; 230 - struct sym_hist_entry addr[0]; 230 + struct sym_hist_entry addr[]; 231 231 }; 232 232 233 233 struct cyc_hist {
+1
tools/perf/util/arm-spe-decoder/Build
··· 1 + perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
+219
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * arm_spe_decoder.c: ARM SPE support 4 + */ 5 + 6 + #ifndef _GNU_SOURCE 7 + #define _GNU_SOURCE 8 + #endif 9 + #include <errno.h> 10 + #include <inttypes.h> 11 + #include <stdbool.h> 12 + #include <string.h> 13 + #include <stdint.h> 14 + #include <stdlib.h> 15 + #include <linux/compiler.h> 16 + #include <linux/zalloc.h> 17 + 18 + #include "../auxtrace.h" 19 + #include "../debug.h" 20 + #include "../util.h" 21 + 22 + #include "arm-spe-decoder.h" 23 + 24 + #ifndef BIT 25 + #define BIT(n) (1UL << (n)) 26 + #endif 27 + 28 + static u64 arm_spe_calc_ip(int index, u64 payload) 29 + { 30 + u8 *addr = (u8 *)&payload; 31 + int ns, el; 32 + 33 + /* Instruction virtual address or Branch target address */ 34 + if (index == SPE_ADDR_PKT_HDR_INDEX_INS || 35 + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { 36 + ns = addr[7] & SPE_ADDR_PKT_NS; 37 + el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET; 38 + 39 + /* Fill highest byte for EL1 or EL2 (VHE) mode */ 40 + if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) 41 + addr[7] = 0xff; 42 + /* Clean highest byte for other cases */ 43 + else 44 + addr[7] = 0x0; 45 + 46 + /* Data access virtual address */ 47 + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { 48 + 49 + /* Fill highest byte if bits [48..55] is 0xff */ 50 + if (addr[6] == 0xff) 51 + addr[7] = 0xff; 52 + /* Otherwise, cleanup tags */ 53 + else 54 + addr[7] = 0x0; 55 + 56 + /* Data access physical address */ 57 + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { 58 + /* Cleanup byte 7 */ 59 + addr[7] = 0x0; 60 + } else { 61 + pr_err("unsupported address packet index: 0x%x\n", index); 62 + } 63 + 64 + return payload; 65 + } 66 + 67 + struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params) 68 + { 69 + struct arm_spe_decoder *decoder; 70 + 71 + if (!params->get_trace) 72 + return NULL; 73 + 74 + decoder = zalloc(sizeof(struct arm_spe_decoder)); 75 + if (!decoder) 76 + return NULL; 77 + 78 + decoder->get_trace = params->get_trace; 79 + decoder->data = params->data; 80 + 81 + return decoder; 82 + } 83 + 84 + void arm_spe_decoder_free(struct arm_spe_decoder *decoder) 85 + { 86 + free(decoder); 87 + } 88 + 89 + static int arm_spe_get_data(struct arm_spe_decoder *decoder) 90 + { 91 + struct arm_spe_buffer buffer = { .buf = 0, }; 92 + int ret; 93 + 94 + pr_debug("Getting more data\n"); 95 + ret = decoder->get_trace(&buffer, decoder->data); 96 + if (ret < 0) 97 + return ret; 98 + 99 + decoder->buf = buffer.buf; 100 + decoder->len = buffer.len; 101 + 102 + if (!decoder->len) 103 + pr_debug("No more data\n"); 104 + 105 + return decoder->len; 106 + } 107 + 108 + static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder) 109 + { 110 + int ret; 111 + 112 + do { 113 + if (!decoder->len) { 114 + ret = arm_spe_get_data(decoder); 115 + 116 + /* Failed to read out trace data */ 117 + if (ret <= 0) 118 + return ret; 119 + } 120 + 121 + ret = arm_spe_get_packet(decoder->buf, decoder->len, 122 + &decoder->packet); 123 + if (ret <= 0) { 124 + /* Move forward for 1 byte */ 125 + decoder->buf += 1; 126 + decoder->len -= 1; 127 + return -EBADMSG; 128 + } 129 + 130 + decoder->buf += ret; 131 + decoder->len -= ret; 132 + } while (decoder->packet.type == ARM_SPE_PAD); 133 + 134 + return 1; 135 + } 136 + 137 + static int arm_spe_read_record(struct arm_spe_decoder *decoder) 138 + { 139 + int err; 140 + int idx; 141 + u64 payload, ip; 142 + 143 + memset(&decoder->record, 0x0, sizeof(decoder->record)); 144 + 145 + while (1) { 146 + err = arm_spe_get_next_packet(decoder); 147 + if (err <= 0) 148 + return err; 149 + 150 + idx = decoder->packet.index; 151 + payload = decoder->packet.payload; 152 + 153 + switch (decoder->packet.type) { 154 + case ARM_SPE_TIMESTAMP: 155 + decoder->record.timestamp = payload; 156 + return 1; 157 + case ARM_SPE_END: 158 + return 1; 159 + case ARM_SPE_ADDRESS: 160 + ip = arm_spe_calc_ip(idx, payload); 161 + if (idx == SPE_ADDR_PKT_HDR_INDEX_INS) 162 + decoder->record.from_ip = ip; 163 + else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) 164 + decoder->record.to_ip = ip; 165 + break; 166 + case ARM_SPE_COUNTER: 167 + break; 168 + case ARM_SPE_CONTEXT: 169 + break; 170 + case ARM_SPE_OP_TYPE: 171 + break; 172 + case ARM_SPE_EVENTS: 173 + if (payload & BIT(EV_L1D_REFILL)) 174 + decoder->record.type |= ARM_SPE_L1D_MISS; 175 + 176 + if (payload & BIT(EV_L1D_ACCESS)) 177 + decoder->record.type |= ARM_SPE_L1D_ACCESS; 178 + 179 + if (payload & BIT(EV_TLB_WALK)) 180 + decoder->record.type |= ARM_SPE_TLB_MISS; 181 + 182 + if (payload & BIT(EV_TLB_ACCESS)) 183 + decoder->record.type |= ARM_SPE_TLB_ACCESS; 184 + 185 + if ((idx == 1 || idx == 2 || idx == 3) && 186 + (payload & BIT(EV_LLC_MISS))) 187 + decoder->record.type |= ARM_SPE_LLC_MISS; 188 + 189 + if ((idx == 1 || idx == 2 || idx == 3) && 190 + (payload & BIT(EV_LLC_ACCESS))) 191 + decoder->record.type |= ARM_SPE_LLC_ACCESS; 192 + 193 + if ((idx == 1 || idx == 2 || idx == 3) && 194 + (payload & BIT(EV_REMOTE_ACCESS))) 195 + decoder->record.type |= ARM_SPE_REMOTE_ACCESS; 196 + 197 + if (payload & BIT(EV_MISPRED)) 198 + decoder->record.type |= ARM_SPE_BRANCH_MISS; 199 + 200 + break; 201 + case ARM_SPE_DATA_SOURCE: 202 + break; 203 + case ARM_SPE_BAD: 204 + break; 205 + case ARM_SPE_PAD: 206 + break; 207 + default: 208 + pr_err("Get packet error!\n"); 209 + return -1; 210 + } 211 + } 212 + 213 + return 0; 214 + } 215 + 216 + int arm_spe_decode(struct arm_spe_decoder *decoder) 217 + { 218 + return arm_spe_read_record(decoder); 219 + }
+82
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * arm_spe_decoder.h: Arm Statistical Profiling Extensions support 4 + * Copyright (c) 2019-2020, Arm Ltd. 5 + */ 6 + 7 + #ifndef INCLUDE__ARM_SPE_DECODER_H__ 8 + #define INCLUDE__ARM_SPE_DECODER_H__ 9 + 10 + #include <stdbool.h> 11 + #include <stddef.h> 12 + #include <stdint.h> 13 + 14 + #include "arm-spe-pkt-decoder.h" 15 + 16 + enum arm_spe_events { 17 + EV_EXCEPTION_GEN = 0, 18 + EV_RETIRED = 1, 19 + EV_L1D_ACCESS = 2, 20 + EV_L1D_REFILL = 3, 21 + EV_TLB_ACCESS = 4, 22 + EV_TLB_WALK = 5, 23 + EV_NOT_TAKEN = 6, 24 + EV_MISPRED = 7, 25 + EV_LLC_ACCESS = 8, 26 + EV_LLC_MISS = 9, 27 + EV_REMOTE_ACCESS = 10, 28 + EV_ALIGNMENT = 11, 29 + EV_PARTIAL_PREDICATE = 17, 30 + EV_EMPTY_PREDICATE = 18, 31 + }; 32 + 33 + enum arm_spe_sample_type { 34 + ARM_SPE_L1D_ACCESS = 1 << 0, 35 + ARM_SPE_L1D_MISS = 1 << 1, 36 + ARM_SPE_LLC_ACCESS = 1 << 2, 37 + ARM_SPE_LLC_MISS = 1 << 3, 38 + ARM_SPE_TLB_ACCESS = 1 << 4, 39 + ARM_SPE_TLB_MISS = 1 << 5, 40 + ARM_SPE_BRANCH_MISS = 1 << 6, 41 + ARM_SPE_REMOTE_ACCESS = 1 << 7, 42 + }; 43 + 44 + struct arm_spe_record { 45 + enum arm_spe_sample_type type; 46 + int err; 47 + u64 from_ip; 48 + u64 to_ip; 49 + u64 timestamp; 50 + }; 51 + 52 + struct arm_spe_insn; 53 + 54 + struct arm_spe_buffer { 55 + const unsigned char *buf; 56 + size_t len; 57 + u64 offset; 58 + u64 trace_nr; 59 + }; 60 + 61 + struct arm_spe_params { 62 + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); 63 + void *data; 64 + }; 65 + 66 + struct arm_spe_decoder { 67 + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); 68 + void *data; 69 + struct arm_spe_record record; 70 + 71 + const unsigned char *buf; 72 + size_t len; 73 + 74 + struct arm_spe_pkt packet; 75 + }; 76 + 77 + struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params); 78 + void arm_spe_decoder_free(struct arm_spe_decoder *decoder); 79 + 80 + int arm_spe_decode(struct arm_spe_decoder *decoder); 81 + 82 + #endif
tools/perf/util/arm-spe-pkt-decoder.c tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+16
tools/perf/util/arm-spe-pkt-decoder.h tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
··· 15 15 #define ARM_SPE_NEED_MORE_BYTES -1 16 16 #define ARM_SPE_BAD_PACKET -2 17 17 18 + #define ARM_SPE_PKT_MAX_SZ 16 19 + 18 20 enum arm_spe_pkt_type { 19 21 ARM_SPE_BAD, 20 22 ARM_SPE_PAD, ··· 35 33 unsigned char index; 36 34 uint64_t payload; 37 35 }; 36 + 37 + #define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) 38 + #define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) 39 + #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) 40 + #define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) 41 + 42 + #define SPE_ADDR_PKT_NS BIT(7) 43 + #define SPE_ADDR_PKT_CH BIT(6) 44 + #define SPE_ADDR_PKT_EL_OFFSET (5) 45 + #define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET) 46 + #define SPE_ADDR_PKT_EL0 (0) 47 + #define SPE_ADDR_PKT_EL1 (1) 48 + #define SPE_ADDR_PKT_EL2 (2) 49 + #define SPE_ADDR_PKT_EL3 (3) 38 50 39 51 const char *arm_spe_pkt_name(enum arm_spe_pkt_type); 40 52
+780 -43
tools/perf/util/arm-spe.c
··· 4 4 * Copyright (c) 2017-2018, Arm Ltd. 5 5 */ 6 6 7 + #include <byteswap.h> 7 8 #include <endian.h> 8 9 #include <errno.h> 9 - #include <byteswap.h> 10 10 #include <inttypes.h> 11 - #include <unistd.h> 12 - #include <stdlib.h> 13 - #include <linux/kernel.h> 14 - #include <linux/types.h> 15 11 #include <linux/bitops.h> 12 + #include <linux/kernel.h> 16 13 #include <linux/log2.h> 14 + #include <linux/types.h> 17 15 #include <linux/zalloc.h> 16 + #include <stdlib.h> 17 + #include <unistd.h> 18 18 19 + #include "auxtrace.h" 19 20 #include "color.h" 21 + #include "debug.h" 22 + #include "evlist.h" 20 23 #include "evsel.h" 21 24 #include "machine.h" 22 25 #include "session.h" 23 - #include "debug.h" 24 - #include "auxtrace.h" 26 + #include "symbol.h" 27 + #include "thread.h" 28 + #include "thread-stack.h" 29 + #include "tool.h" 30 + #include "util/synthetic-events.h" 31 + 25 32 #include "arm-spe.h" 26 - #include "arm-spe-pkt-decoder.h" 33 + #include "arm-spe-decoder/arm-spe-decoder.h" 34 + #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 35 + 36 + #define MAX_TIMESTAMP (~0ULL) 27 37 28 38 struct arm_spe { 29 39 struct auxtrace auxtrace; 30 40 struct auxtrace_queues queues; 31 41 struct auxtrace_heap heap; 42 + struct itrace_synth_opts synth_opts; 32 43 u32 auxtrace_type; 33 44 struct perf_session *session; 34 45 struct machine *machine; 35 46 u32 pmu_type; 47 + 48 + u8 timeless_decoding; 49 + u8 data_queued; 50 + 51 + u8 sample_flc; 52 + u8 sample_llc; 53 + u8 sample_tlb; 54 + u8 sample_branch; 55 + u8 sample_remote_access; 56 + 57 + u64 l1d_miss_id; 58 + u64 l1d_access_id; 59 + u64 llc_miss_id; 60 + u64 llc_access_id; 61 + u64 tlb_miss_id; 62 + u64 tlb_access_id; 63 + u64 branch_miss_id; 64 + u64 remote_access_id; 65 + 66 + u64 kernel_start; 67 + 68 + unsigned long num_events; 36 69 }; 37 70 38 71 struct arm_spe_queue { 39 - struct arm_spe *spe; 40 - unsigned int queue_nr; 41 - struct auxtrace_buffer *buffer; 42 - bool on_heap; 43 - bool done; 44 - pid_t pid; 45 - pid_t tid; 46 - int cpu; 72 + struct arm_spe *spe; 73 + unsigned int queue_nr; 74 + struct auxtrace_buffer *buffer; 75 + struct auxtrace_buffer *old_buffer; 76 + union perf_event *event_buf; 77 + bool on_heap; 78 + bool done; 79 + pid_t pid; 80 + pid_t tid; 81 + int cpu; 82 + struct arm_spe_decoder *decoder; 83 + u64 time; 84 + u64 timestamp; 85 + struct thread *thread; 47 86 }; 48 87 49 88 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, ··· 131 92 arm_spe_dump(spe, buf, len); 132 93 } 133 94 134 - static int arm_spe_process_event(struct perf_session *session __maybe_unused, 135 - union perf_event *event __maybe_unused, 136 - struct perf_sample *sample __maybe_unused, 137 - struct perf_tool *tool __maybe_unused) 95 + static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 138 96 { 97 + struct arm_spe_queue *speq = data; 98 + struct auxtrace_buffer *buffer = speq->buffer; 99 + struct auxtrace_buffer *old_buffer = speq->old_buffer; 100 + struct auxtrace_queue *queue; 101 + 102 + queue = &speq->spe->queues.queue_array[speq->queue_nr]; 103 + 104 + buffer = auxtrace_buffer__next(queue, buffer); 105 + /* If no more data, drop the previous auxtrace_buffer and return */ 106 + if (!buffer) { 107 + if (old_buffer) 108 + auxtrace_buffer__drop_data(old_buffer); 109 + b->len = 0; 110 + return 0; 111 + } 112 + 113 + speq->buffer = buffer; 114 + 115 + /* If the aux_buffer doesn't have data associated, try to load it */ 116 + if (!buffer->data) { 117 + /* get the file desc associated with the perf data file */ 118 + int fd = perf_data__fd(speq->spe->session->data); 119 + 120 + buffer->data = auxtrace_buffer__get_data(buffer, fd); 121 + if (!buffer->data) 122 + return -ENOMEM; 123 + } 124 + 125 + b->len = buffer->size; 126 + b->buf = buffer->data; 127 + 128 + if (b->len) { 129 + if (old_buffer) 130 + auxtrace_buffer__drop_data(old_buffer); 131 + speq->old_buffer = buffer; 132 + } else { 133 + auxtrace_buffer__drop_data(buffer); 134 + return arm_spe_get_trace(b, data); 135 + } 136 + 139 137 return 0; 138 + } 139 + 140 + static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 141 + unsigned int queue_nr) 142 + { 143 + struct arm_spe_params params = { .get_trace = 0, }; 144 + struct arm_spe_queue *speq; 145 + 146 + speq = zalloc(sizeof(*speq)); 147 + if (!speq) 148 + return NULL; 149 + 150 + speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 151 + if (!speq->event_buf) 152 + goto out_free; 153 + 154 + speq->spe = spe; 155 + speq->queue_nr = queue_nr; 156 + speq->pid = -1; 157 + speq->tid = -1; 158 + speq->cpu = -1; 159 + 160 + /* params set */ 161 + params.get_trace = arm_spe_get_trace; 162 + params.data = speq; 163 + 164 + /* create new decoder */ 165 + speq->decoder = arm_spe_decoder_new(&params); 166 + if (!speq->decoder) 167 + goto out_free; 168 + 169 + return speq; 170 + 171 + out_free: 172 + zfree(&speq->event_buf); 173 + free(speq); 174 + 175 + return NULL; 176 + } 177 + 178 + static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 179 + { 180 + return ip >= spe->kernel_start ? 181 + PERF_RECORD_MISC_KERNEL : 182 + PERF_RECORD_MISC_USER; 183 + } 184 + 185 + static void arm_spe_prep_sample(struct arm_spe *spe, 186 + struct arm_spe_queue *speq, 187 + union perf_event *event, 188 + struct perf_sample *sample) 189 + { 190 + struct arm_spe_record *record = &speq->decoder->record; 191 + 192 + if (!spe->timeless_decoding) 193 + sample->time = speq->timestamp; 194 + 195 + sample->ip = record->from_ip; 196 + sample->cpumode = arm_spe_cpumode(spe, sample->ip); 197 + sample->pid = speq->pid; 198 + sample->tid = speq->tid; 199 + sample->addr = record->to_ip; 200 + sample->period = 1; 201 + sample->cpu = speq->cpu; 202 + 203 + event->sample.header.type = PERF_RECORD_SAMPLE; 204 + event->sample.header.misc = sample->cpumode; 205 + event->sample.header.size = sizeof(struct perf_event_header); 206 + } 207 + 208 + static inline int 209 + arm_spe_deliver_synth_event(struct arm_spe *spe, 210 + struct arm_spe_queue *speq __maybe_unused, 211 + union perf_event *event, 212 + struct perf_sample *sample) 213 + { 214 + int ret; 215 + 216 + ret = perf_session__deliver_synth_event(spe->session, event, sample); 217 + if (ret) 218 + pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 219 + 220 + return ret; 221 + } 222 + 223 + static int 224 + arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, 225 + u64 spe_events_id) 226 + { 227 + struct arm_spe *spe = speq->spe; 228 + union perf_event *event = speq->event_buf; 229 + struct perf_sample sample = { .ip = 0, }; 230 + 231 + arm_spe_prep_sample(spe, speq, event, &sample); 232 + 233 + sample.id = spe_events_id; 234 + sample.stream_id = spe_events_id; 235 + 236 + return arm_spe_deliver_synth_event(spe, speq, event, &sample); 237 + } 238 + 239 + static int arm_spe_sample(struct arm_spe_queue *speq) 240 + { 241 + const struct arm_spe_record *record = &speq->decoder->record; 242 + struct arm_spe *spe = speq->spe; 243 + int err; 244 + 245 + if (spe->sample_flc) { 246 + if (record->type & ARM_SPE_L1D_MISS) { 247 + err = arm_spe_synth_spe_events_sample( 248 + speq, spe->l1d_miss_id); 249 + if (err) 250 + return err; 251 + } 252 + 253 + if (record->type & ARM_SPE_L1D_ACCESS) { 254 + err = arm_spe_synth_spe_events_sample( 255 + speq, spe->l1d_access_id); 256 + if (err) 257 + return err; 258 + } 259 + } 260 + 261 + if (spe->sample_llc) { 262 + if (record->type & ARM_SPE_LLC_MISS) { 263 + err = arm_spe_synth_spe_events_sample( 264 + speq, spe->llc_miss_id); 265 + if (err) 266 + return err; 267 + } 268 + 269 + if (record->type & ARM_SPE_LLC_ACCESS) { 270 + err = arm_spe_synth_spe_events_sample( 271 + speq, spe->llc_access_id); 272 + if (err) 273 + return err; 274 + } 275 + } 276 + 277 + if (spe->sample_tlb) { 278 + if (record->type & ARM_SPE_TLB_MISS) { 279 + err = arm_spe_synth_spe_events_sample( 280 + speq, spe->tlb_miss_id); 281 + if (err) 282 + return err; 283 + } 284 + 285 + if (record->type & ARM_SPE_TLB_ACCESS) { 286 + err = arm_spe_synth_spe_events_sample( 287 + speq, spe->tlb_access_id); 288 + if (err) 289 + return err; 290 + } 291 + } 292 + 293 + if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 294 + err = arm_spe_synth_spe_events_sample(speq, 295 + spe->branch_miss_id); 296 + if (err) 297 + return err; 298 + } 299 + 300 + if (spe->sample_remote_access && 301 + (record->type & ARM_SPE_REMOTE_ACCESS)) { 302 + err = arm_spe_synth_spe_events_sample(speq, 303 + spe->remote_access_id); 304 + if (err) 305 + return err; 306 + } 307 + 308 + return 0; 309 + } 310 + 311 + static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 312 + { 313 + struct arm_spe *spe = speq->spe; 314 + int ret; 315 + 316 + if (!spe->kernel_start) 317 + spe->kernel_start = machine__kernel_start(spe->machine); 318 + 319 + while (1) { 320 + ret = arm_spe_decode(speq->decoder); 321 + if (!ret) { 322 + pr_debug("No data or all data has been processed.\n"); 323 + return 1; 324 + } 325 + 326 + /* 327 + * Error is detected when decode SPE trace data, continue to 328 + * the next trace data and find out more records. 329 + */ 330 + if (ret < 0) 331 + continue; 332 + 333 + ret = arm_spe_sample(speq); 334 + if (ret) 335 + return ret; 336 + 337 + if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 338 + *timestamp = speq->timestamp; 339 + return 0; 340 + } 341 + } 342 + 343 + return 0; 344 + } 345 + 346 + static int arm_spe__setup_queue(struct arm_spe *spe, 347 + struct auxtrace_queue *queue, 348 + unsigned int queue_nr) 349 + { 350 + struct arm_spe_queue *speq = queue->priv; 351 + struct arm_spe_record *record; 352 + 353 + if (list_empty(&queue->head) || speq) 354 + return 0; 355 + 356 + speq = arm_spe__alloc_queue(spe, queue_nr); 357 + 358 + if (!speq) 359 + return -ENOMEM; 360 + 361 + queue->priv = speq; 362 + 363 + if (queue->cpu != -1) 364 + speq->cpu = queue->cpu; 365 + 366 + if (!speq->on_heap) { 367 + int ret; 368 + 369 + if (spe->timeless_decoding) 370 + return 0; 371 + 372 + retry: 373 + ret = arm_spe_decode(speq->decoder); 374 + 375 + if (!ret) 376 + return 0; 377 + 378 + if (ret < 0) 379 + goto retry; 380 + 381 + record = &speq->decoder->record; 382 + 383 + speq->timestamp = record->timestamp; 384 + ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 385 + if (ret) 386 + return ret; 387 + speq->on_heap = true; 388 + } 389 + 390 + return 0; 391 + } 392 + 393 + static int arm_spe__setup_queues(struct arm_spe *spe) 394 + { 395 + unsigned int i; 396 + int ret; 397 + 398 + for (i = 0; i < spe->queues.nr_queues; i++) { 399 + ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 400 + if (ret) 401 + return ret; 402 + } 403 + 404 + return 0; 405 + } 406 + 407 + static int arm_spe__update_queues(struct arm_spe *spe) 408 + { 409 + if (spe->queues.new_data) { 410 + spe->queues.new_data = false; 411 + return arm_spe__setup_queues(spe); 412 + } 413 + 414 + return 0; 415 + } 416 + 417 + static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 418 + { 419 + struct evsel *evsel; 420 + struct evlist *evlist = spe->session->evlist; 421 + bool timeless_decoding = true; 422 + 423 + /* 424 + * Circle through the list of event and complain if we find one 425 + * with the time bit set. 426 + */ 427 + evlist__for_each_entry(evlist, evsel) { 428 + if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 429 + timeless_decoding = false; 430 + } 431 + 432 + return timeless_decoding; 433 + } 434 + 435 + static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 436 + struct auxtrace_queue *queue) 437 + { 438 + struct arm_spe_queue *speq = queue->priv; 439 + pid_t tid; 440 + 441 + tid = machine__get_current_tid(spe->machine, speq->cpu); 442 + if (tid != -1) { 443 + speq->tid = tid; 444 + thread__zput(speq->thread); 445 + } else 446 + speq->tid = queue->tid; 447 + 448 + if ((!speq->thread) && (speq->tid != -1)) { 449 + speq->thread = machine__find_thread(spe->machine, -1, 450 + speq->tid); 451 + } 452 + 453 + if (speq->thread) { 454 + speq->pid = speq->thread->pid_; 455 + if (queue->cpu == -1) 456 + speq->cpu = speq->thread->cpu; 457 + } 458 + } 459 + 460 + static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 461 + { 462 + unsigned int queue_nr; 463 + u64 ts; 464 + int ret; 465 + 466 + while (1) { 467 + struct auxtrace_queue *queue; 468 + struct arm_spe_queue *speq; 469 + 470 + if (!spe->heap.heap_cnt) 471 + return 0; 472 + 473 + if (spe->heap.heap_array[0].ordinal >= timestamp) 474 + return 0; 475 + 476 + queue_nr = spe->heap.heap_array[0].queue_nr; 477 + queue = &spe->queues.queue_array[queue_nr]; 478 + speq = queue->priv; 479 + 480 + auxtrace_heap__pop(&spe->heap); 481 + 482 + if (spe->heap.heap_cnt) { 483 + ts = spe->heap.heap_array[0].ordinal + 1; 484 + if (ts > timestamp) 485 + ts = timestamp; 486 + } else { 487 + ts = timestamp; 488 + } 489 + 490 + arm_spe_set_pid_tid_cpu(spe, queue); 491 + 492 + ret = arm_spe_run_decoder(speq, &ts); 493 + if (ret < 0) { 494 + auxtrace_heap__add(&spe->heap, queue_nr, ts); 495 + return ret; 496 + } 497 + 498 + if (!ret) { 499 + ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 500 + if (ret < 0) 501 + return ret; 502 + } else { 503 + speq->on_heap = false; 504 + } 505 + } 506 + 507 + return 0; 508 + } 509 + 510 + static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 511 + u64 time_) 512 + { 513 + struct auxtrace_queues *queues = &spe->queues; 514 + unsigned int i; 515 + u64 ts = 0; 516 + 517 + for (i = 0; i < queues->nr_queues; i++) { 518 + struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 519 + struct arm_spe_queue *speq = queue->priv; 520 + 521 + if (speq && (tid == -1 || speq->tid == tid)) { 522 + speq->time = time_; 523 + arm_spe_set_pid_tid_cpu(spe, queue); 524 + arm_spe_run_decoder(speq, &ts); 525 + } 526 + } 527 + return 0; 528 + } 529 + 530 + static int arm_spe_process_event(struct perf_session *session, 531 + union perf_event *event, 532 + struct perf_sample *sample, 533 + struct perf_tool *tool) 534 + { 535 + int err = 0; 536 + u64 timestamp; 537 + struct arm_spe *spe = container_of(session->auxtrace, 538 + struct arm_spe, auxtrace); 539 + 540 + if (dump_trace) 541 + return 0; 542 + 543 + if (!tool->ordered_events) { 544 + pr_err("SPE trace requires ordered events\n"); 545 + return -EINVAL; 546 + } 547 + 548 + if (sample->time && (sample->time != (u64) -1)) 549 + timestamp = sample->time; 550 + else 551 + timestamp = 0; 552 + 553 + if (timestamp || spe->timeless_decoding) { 554 + err = arm_spe__update_queues(spe); 555 + if (err) 556 + return err; 557 + } 558 + 559 + if (spe->timeless_decoding) { 560 + if (event->header.type == PERF_RECORD_EXIT) { 561 + err = arm_spe_process_timeless_queues(spe, 562 + event->fork.tid, 563 + sample->time); 564 + } 565 + } else if (timestamp) { 566 + if (event->header.type == PERF_RECORD_EXIT) { 567 + err = arm_spe_process_queues(spe, timestamp); 568 + if (err) 569 + return err; 570 + } 571 + } 572 + 573 + return err; 140 574 } 141 575 142 576 static int arm_spe_process_auxtrace_event(struct perf_session *session, ··· 618 106 { 619 107 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 620 108 auxtrace); 621 - struct auxtrace_buffer *buffer; 622 - off_t data_offset; 623 - int fd = perf_data__fd(session->data); 624 - int err; 625 109 626 - if (perf_data__is_pipe(session->data)) { 627 - data_offset = 0; 628 - } else { 629 - data_offset = lseek(fd, 0, SEEK_CUR); 630 - if (data_offset == -1) 631 - return -errno; 632 - } 110 + if (!spe->data_queued) { 111 + struct auxtrace_buffer *buffer; 112 + off_t data_offset; 113 + int fd = perf_data__fd(session->data); 114 + int err; 633 115 634 - err = auxtrace_queues__add_event(&spe->queues, session, event, 635 - data_offset, &buffer); 636 - if (err) 637 - return err; 116 + if (perf_data__is_pipe(session->data)) { 117 + data_offset = 0; 118 + } else { 119 + data_offset = lseek(fd, 0, SEEK_CUR); 120 + if (data_offset == -1) 121 + return -errno; 122 + } 638 123 639 - /* Dump here now we have copied a piped trace out of the pipe */ 640 - if (dump_trace) { 641 - if (auxtrace_buffer__get_data(buffer, fd)) { 642 - arm_spe_dump_event(spe, buffer->data, 643 - buffer->size); 644 - auxtrace_buffer__put_data(buffer); 124 + err = auxtrace_queues__add_event(&spe->queues, session, event, 125 + data_offset, &buffer); 126 + if (err) 127 + return err; 128 + 129 + /* Dump here now we have copied a piped trace out of the pipe */ 130 + if (dump_trace) { 131 + if (auxtrace_buffer__get_data(buffer, fd)) { 132 + arm_spe_dump_event(spe, buffer->data, 133 + buffer->size); 134 + auxtrace_buffer__put_data(buffer); 135 + } 645 136 } 646 137 } 647 138 ··· 654 139 static int arm_spe_flush(struct perf_session *session __maybe_unused, 655 140 struct perf_tool *tool __maybe_unused) 656 141 { 657 - return 0; 142 + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 143 + auxtrace); 144 + int ret; 145 + 146 + if (dump_trace) 147 + return 0; 148 + 149 + if (!tool->ordered_events) 150 + return -EINVAL; 151 + 152 + ret = arm_spe__update_queues(spe); 153 + if (ret < 0) 154 + return ret; 155 + 156 + if (spe->timeless_decoding) 157 + return arm_spe_process_timeless_queues(spe, -1, 158 + MAX_TIMESTAMP - 1); 159 + 160 + return arm_spe_process_queues(spe, MAX_TIMESTAMP); 658 161 } 659 162 660 163 static void arm_spe_free_queue(void *priv) ··· 681 148 682 149 if (!speq) 683 150 return; 151 + thread__zput(speq->thread); 152 + arm_spe_decoder_free(speq->decoder); 153 + zfree(&speq->event_buf); 684 154 free(speq); 685 155 } 686 156 ··· 732 196 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 733 197 } 734 198 199 + struct arm_spe_synth { 200 + struct perf_tool dummy_tool; 201 + struct perf_session *session; 202 + }; 203 + 204 + static int arm_spe_event_synth(struct perf_tool *tool, 205 + union perf_event *event, 206 + struct perf_sample *sample __maybe_unused, 207 + struct machine *machine __maybe_unused) 208 + { 209 + struct arm_spe_synth *arm_spe_synth = 210 + container_of(tool, struct arm_spe_synth, dummy_tool); 211 + 212 + return perf_session__deliver_synth_event(arm_spe_synth->session, 213 + event, NULL); 214 + } 215 + 216 + static int arm_spe_synth_event(struct perf_session *session, 217 + struct perf_event_attr *attr, u64 id) 218 + { 219 + struct arm_spe_synth arm_spe_synth; 220 + 221 + memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 222 + arm_spe_synth.session = session; 223 + 224 + return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 225 + &id, arm_spe_event_synth); 226 + } 227 + 228 + static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 229 + const char *name) 230 + { 231 + struct evsel *evsel; 232 + 233 + evlist__for_each_entry(evlist, evsel) { 234 + if (evsel->core.id && evsel->core.id[0] == id) { 235 + if (evsel->name) 236 + zfree(&evsel->name); 237 + evsel->name = strdup(name); 238 + break; 239 + } 240 + } 241 + } 242 + 243 + static int 244 + arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 245 + { 246 + struct evlist *evlist = session->evlist; 247 + struct evsel *evsel; 248 + struct perf_event_attr attr; 249 + bool found = false; 250 + u64 id; 251 + int err; 252 + 253 + evlist__for_each_entry(evlist, evsel) { 254 + if (evsel->core.attr.type == spe->pmu_type) { 255 + found = true; 256 + break; 257 + } 258 + } 259 + 260 + if (!found) { 261 + pr_debug("No selected events with SPE trace data\n"); 262 + return 0; 263 + } 264 + 265 + memset(&attr, 0, sizeof(struct perf_event_attr)); 266 + attr.size = sizeof(struct perf_event_attr); 267 + attr.type = PERF_TYPE_HARDWARE; 268 + attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 269 + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 270 + PERF_SAMPLE_PERIOD; 271 + if (spe->timeless_decoding) 272 + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 273 + else 274 + attr.sample_type |= PERF_SAMPLE_TIME; 275 + 276 + attr.exclude_user = evsel->core.attr.exclude_user; 277 + attr.exclude_kernel = evsel->core.attr.exclude_kernel; 278 + attr.exclude_hv = evsel->core.attr.exclude_hv; 279 + attr.exclude_host = evsel->core.attr.exclude_host; 280 + attr.exclude_guest = evsel->core.attr.exclude_guest; 281 + attr.sample_id_all = evsel->core.attr.sample_id_all; 282 + attr.read_format = evsel->core.attr.read_format; 283 + 284 + /* create new id val to be a fixed offset from evsel id */ 285 + id = evsel->core.id[0] + 1000000000; 286 + 287 + if (!id) 288 + id = 1; 289 + 290 + if (spe->synth_opts.flc) { 291 + spe->sample_flc = true; 292 + 293 + /* Level 1 data cache miss */ 294 + err = arm_spe_synth_event(session, &attr, id); 295 + if (err) 296 + return err; 297 + spe->l1d_miss_id = id; 298 + arm_spe_set_event_name(evlist, id, "l1d-miss"); 299 + id += 1; 300 + 301 + /* Level 1 data cache access */ 302 + err = arm_spe_synth_event(session, &attr, id); 303 + if (err) 304 + return err; 305 + spe->l1d_access_id = id; 306 + arm_spe_set_event_name(evlist, id, "l1d-access"); 307 + id += 1; 308 + } 309 + 310 + if (spe->synth_opts.llc) { 311 + spe->sample_llc = true; 312 + 313 + /* Last level cache miss */ 314 + err = arm_spe_synth_event(session, &attr, id); 315 + if (err) 316 + return err; 317 + spe->llc_miss_id = id; 318 + arm_spe_set_event_name(evlist, id, "llc-miss"); 319 + id += 1; 320 + 321 + /* Last level cache access */ 322 + err = arm_spe_synth_event(session, &attr, id); 323 + if (err) 324 + return err; 325 + spe->llc_access_id = id; 326 + arm_spe_set_event_name(evlist, id, "llc-access"); 327 + id += 1; 328 + } 329 + 330 + if (spe->synth_opts.tlb) { 331 + spe->sample_tlb = true; 332 + 333 + /* TLB miss */ 334 + err = arm_spe_synth_event(session, &attr, id); 335 + if (err) 336 + return err; 337 + spe->tlb_miss_id = id; 338 + arm_spe_set_event_name(evlist, id, "tlb-miss"); 339 + id += 1; 340 + 341 + /* TLB access */ 342 + err = arm_spe_synth_event(session, &attr, id); 343 + if (err) 344 + return err; 345 + spe->tlb_access_id = id; 346 + arm_spe_set_event_name(evlist, id, "tlb-access"); 347 + id += 1; 348 + } 349 + 350 + if (spe->synth_opts.branches) { 351 + spe->sample_branch = true; 352 + 353 + /* Branch miss */ 354 + err = arm_spe_synth_event(session, &attr, id); 355 + if (err) 356 + return err; 357 + spe->branch_miss_id = id; 358 + arm_spe_set_event_name(evlist, id, "branch-miss"); 359 + id += 1; 360 + } 361 + 362 + if (spe->synth_opts.remote_access) { 363 + spe->sample_remote_access = true; 364 + 365 + /* Remote access */ 366 + err = arm_spe_synth_event(session, &attr, id); 367 + if (err) 368 + return err; 369 + spe->remote_access_id = id; 370 + arm_spe_set_event_name(evlist, id, "remote-access"); 371 + id += 1; 372 + } 373 + 374 + return 0; 375 + } 376 + 735 377 int arm_spe_process_auxtrace_info(union perf_event *event, 736 378 struct perf_session *session) 737 379 { 738 380 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 739 - size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; 381 + size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 740 382 struct arm_spe *spe; 741 383 int err; 742 384 ··· 935 221 spe->auxtrace_type = auxtrace_info->type; 936 222 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 937 223 224 + spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 938 225 spe->auxtrace.process_event = arm_spe_process_event; 939 226 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 940 227 spe->auxtrace.flush_events = arm_spe_flush; ··· 946 231 947 232 arm_spe_print_info(&auxtrace_info->priv[0]); 948 233 234 + if (dump_trace) 235 + return 0; 236 + 237 + if (session->itrace_synth_opts && session->itrace_synth_opts->set) 238 + spe->synth_opts = *session->itrace_synth_opts; 239 + else 240 + itrace_synth_opts__set_default(&spe->synth_opts, false); 241 + 242 + err = arm_spe_synth_events(spe, session); 243 + if (err) 244 + goto err_free_queues; 245 + 246 + err = auxtrace_queues__process_index(&spe->queues, session); 247 + if (err) 248 + goto err_free_queues; 249 + 250 + if (spe->queues.populated) 251 + spe->data_queued = true; 252 + 949 253 return 0; 950 254 255 + err_free_queues: 256 + auxtrace_queues__free(&spe->queues); 257 + session->auxtrace = NULL; 951 258 err_free: 952 259 free(spe); 953 260 return err;
+19 -3
tools/perf/util/auxtrace.c
··· 55 55 #include "util/mmap.h" 56 56 57 57 #include <linux/ctype.h> 58 - #include <linux/kernel.h> 59 58 #include "symbol/kallsyms.h" 60 59 #include <internal/lib.h> 61 60 ··· 728 729 struct evlist *evlist, 729 730 struct record_opts *opts, const char *str) 730 731 { 731 - struct perf_evsel_config_term *term; 732 + struct evsel_config_term *term; 732 733 struct evsel *aux_evsel; 733 734 bool has_aux_sample_size = false; 734 735 bool has_aux_leader = false; ··· 770 771 evlist__for_each_entry(evlist, evsel) { 771 772 if (evsel__is_aux_event(evsel)) 772 773 aux_evsel = evsel; 773 - term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); 774 + term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); 774 775 if (term) { 775 776 has_aux_sample_size = true; 776 777 evsel->core.attr.aux_sample_size = term->val.aux_sample_size; ··· 1330 1331 synth_opts->pwr_events = true; 1331 1332 synth_opts->other_events = true; 1332 1333 synth_opts->errors = true; 1334 + synth_opts->flc = true; 1335 + synth_opts->llc = true; 1336 + synth_opts->tlb = true; 1337 + synth_opts->remote_access = true; 1338 + 1333 1339 if (no_sample) { 1334 1340 synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; 1335 1341 synth_opts->period = 1; ··· 1494 1490 if (p == endptr) 1495 1491 goto out_err; 1496 1492 p = endptr; 1493 + break; 1494 + case 'f': 1495 + synth_opts->flc = true; 1496 + break; 1497 + case 'm': 1498 + synth_opts->llc = true; 1499 + break; 1500 + case 't': 1501 + synth_opts->tlb = true; 1502 + break; 1503 + case 'a': 1504 + synth_opts->remote_access = true; 1497 1505 break; 1498 1506 case ' ': 1499 1507 case ',':
+14 -1
tools/perf/util/auxtrace.h
··· 63 63 * because 'perf inject' will write it out 64 64 * @instructions: whether to synthesize 'instructions' events 65 65 * @branches: whether to synthesize 'branches' events 66 + * (branch misses only for Arm SPE) 66 67 * @transactions: whether to synthesize events for transactions 67 68 * @ptwrites: whether to synthesize events for ptwrites 68 69 * @pwr_events: whether to synthesize power events ··· 79 78 * @thread_stack: feed branches to the thread_stack 80 79 * @last_branch: add branch context to 'instruction' events 81 80 * @add_last_branch: add branch context to existing event records 81 + * @flc: whether to synthesize first level cache events 82 + * @llc: whether to synthesize last level cache events 83 + * @tlb: whether to synthesize TLB events 84 + * @remote_access: whether to synthesize remote access events 82 85 * @callchain_sz: maximum callchain size 83 86 * @last_branch_sz: branch context size 84 87 * @period: 'instructions' events period ··· 112 107 bool thread_stack; 113 108 bool last_branch; 114 109 bool add_last_branch; 110 + bool flc; 111 + bool llc; 112 + bool tlb; 113 + bool remote_access; 115 114 unsigned int callchain_sz; 116 115 unsigned int last_branch_sz; 117 116 unsigned long long period; ··· 605 596 606 597 #define ITRACE_HELP \ 607 598 " i: synthesize instructions events\n" \ 608 - " b: synthesize branches events\n" \ 599 + " b: synthesize branches events (branch misses for Arm SPE)\n" \ 609 600 " c: synthesize branches events (calls only)\n" \ 610 601 " r: synthesize branches events (returns only)\n" \ 611 602 " x: synthesize transactions events\n" \ ··· 613 604 " p: synthesize power events\n" \ 614 605 " e: synthesize error events\n" \ 615 606 " d: create a debug log\n" \ 607 + " f: synthesize first level cache events\n" \ 608 + " m: synthesize last level cache events\n" \ 609 + " t: synthesize TLB events\n" \ 610 + " a: synthesize remote access events\n" \ 616 611 " g[len]: synthesize a call chain (use with i or x)\n" \ 617 612 " l[len]: synthesize last branch entries (use with i or x)\n" \ 618 613 " sNUMBER: skip initial number of events\n" \
+1 -1
tools/perf/util/bpf-loader.c
··· 1225 1225 out: 1226 1226 free(map_name); 1227 1227 if (!err) 1228 - key_scan_pos += strlen(map_opt); 1228 + *key_scan_pos += strlen(map_opt); 1229 1229 return err; 1230 1230 } 1231 1231
+1 -1
tools/perf/util/branch.h
··· 46 46 struct branch_stack { 47 47 u64 nr; 48 48 u64 hw_idx; 49 - struct branch_entry entries[0]; 49 + struct branch_entry entries[]; 50 50 }; 51 51 52 52 /*
+14
tools/perf/util/callchain.c
··· 1599 1599 for (node = cursor->first; node != NULL; node = node->next) 1600 1600 map__zput(node->ms.map); 1601 1601 } 1602 + 1603 + void callchain_param_setup(u64 sample_type) 1604 + { 1605 + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { 1606 + if ((sample_type & PERF_SAMPLE_REGS_USER) && 1607 + (sample_type & PERF_SAMPLE_STACK_USER)) { 1608 + callchain_param.record_mode = CALLCHAIN_DWARF; 1609 + dwarf_callchain_users = true; 1610 + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) 1611 + callchain_param.record_mode = CALLCHAIN_LBR; 1612 + else 1613 + callchain_param.record_mode = CALLCHAIN_FP; 1614 + } 1615 + }
+1
tools/perf/util/callchain.h
··· 297 297 u64 *branch_count, u64 *predicted_count, 298 298 u64 *abort_count, u64 *cycles_count); 299 299 300 + void callchain_param_setup(u64 sample_type); 300 301 #endif /* __PERF_CALLCHAIN_H */
+2 -2
tools/perf/util/cloexec.c
··· 65 65 return 1; 66 66 } 67 67 68 - WARN_ONCE(err != EINVAL && err != EBUSY, 68 + WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES, 69 69 "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n", 70 70 err, str_error_r(err, sbuf, sizeof(sbuf))); 71 71 ··· 83 83 if (fd >= 0) 84 84 close(fd); 85 85 86 - if (WARN_ONCE(fd < 0 && err != EBUSY, 86 + if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES, 87 87 "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n", 88 88 err, str_error_r(err, sbuf, sizeof(sbuf)))) 89 89 return -1;
+13 -1
tools/perf/util/config.c
··· 17 17 #include "util/event.h" /* proc_map_timeout */ 18 18 #include "util/hist.h" /* perf_hist_config */ 19 19 #include "util/llvm-utils.h" /* perf_llvm_config */ 20 + #include "util/stat.h" /* perf_stat__set_big_num */ 20 21 #include "build-id.h" 21 22 #include "debug.h" 22 23 #include "config.h" 23 - #include "debug.h" 24 24 #include <sys/types.h> 25 25 #include <sys/stat.h> 26 26 #include <stdlib.h> ··· 452 452 return 0; 453 453 } 454 454 455 + static int perf_stat_config(const char *var, const char *value) 456 + { 457 + if (!strcmp(var, "stat.big-num")) 458 + perf_stat__set_big_num(perf_config_bool(var, value)); 459 + 460 + /* Add other config variables here. */ 461 + return 0; 462 + } 463 + 455 464 int perf_default_config(const char *var, const char *value, 456 465 void *dummy __maybe_unused) 457 466 { ··· 481 472 482 473 if (strstarts(var, "buildid.")) 483 474 return perf_buildid_config(var, value); 475 + 476 + if (strstarts(var, "stat.")) 477 + return perf_stat_config(var, value); 484 478 485 479 /* Add other config variables here. */ 486 480 return 0;
+6 -4
tools/perf/util/counts.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <errno.h> 3 3 #include <stdlib.h> 4 + #include <string.h> 4 5 #include "evsel.h" 5 6 #include "counts.h" 6 7 #include <linux/zalloc.h> ··· 43 42 } 44 43 } 45 44 46 - static void perf_counts__reset(struct perf_counts *counts) 45 + void perf_counts__reset(struct perf_counts *counts) 47 46 { 48 47 xyarray__reset(counts->loaded); 49 48 xyarray__reset(counts->values); 49 + memset(&counts->aggr, 0, sizeof(struct perf_counts_values)); 50 50 } 51 51 52 - void perf_evsel__reset_counts(struct evsel *evsel) 52 + void evsel__reset_counts(struct evsel *evsel) 53 53 { 54 54 perf_counts__reset(evsel->counts); 55 55 } 56 56 57 - int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) 57 + int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) 58 58 { 59 59 evsel->counts = perf_counts__new(ncpus, nthreads); 60 60 return evsel->counts != NULL ? 0 : -ENOMEM; 61 61 } 62 62 63 - void perf_evsel__free_counts(struct evsel *evsel) 63 + void evsel__free_counts(struct evsel *evsel) 64 64 { 65 65 perf_counts__delete(evsel->counts); 66 66 evsel->counts = NULL;
+4 -3
tools/perf/util/counts.h
··· 37 37 38 38 struct perf_counts *perf_counts__new(int ncpus, int nthreads); 39 39 void perf_counts__delete(struct perf_counts *counts); 40 + void perf_counts__reset(struct perf_counts *counts); 40 41 41 - void perf_evsel__reset_counts(struct evsel *evsel); 42 - int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); 43 - void perf_evsel__free_counts(struct evsel *evsel); 42 + void evsel__reset_counts(struct evsel *evsel); 43 + int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); 44 + void evsel__free_counts(struct evsel *evsel); 44 45 45 46 #endif /* __PERF_COUNTS_H */
+1 -1
tools/perf/util/cputopo.h
··· 22 22 23 23 struct numa_topology { 24 24 u32 nr; 25 - struct numa_topology_node nodes[0]; 25 + struct numa_topology_node nodes[]; 26 26 }; 27 27 28 28 struct cpu_topology *cpu_topology__new(void);
+7 -6
tools/perf/util/demangle-java.c
··· 15 15 MODE_CLASS = 1, 16 16 MODE_FUNC = 2, 17 17 MODE_TYPE = 3, 18 - MODE_CTYPE = 3, /* class arg */ 18 + MODE_CTYPE = 4, /* class arg */ 19 19 }; 20 20 21 21 #define BASE_ENT(c, n) [c - 'A']=n ··· 27 27 BASE_ENT('I', "int" ), 28 28 BASE_ENT('J', "long" ), 29 29 BASE_ENT('S', "short" ), 30 - BASE_ENT('Z', "bool" ), 30 + BASE_ENT('Z', "boolean" ), 31 31 }; 32 32 33 33 /* ··· 59 59 60 60 switch (*q) { 61 61 case 'L': 62 - if (mode == MODE_PREFIX || mode == MODE_CTYPE) { 63 - if (mode == MODE_CTYPE) { 62 + if (mode == MODE_PREFIX || mode == MODE_TYPE) { 63 + if (mode == MODE_TYPE) { 64 64 if (narg) 65 65 rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); 66 66 narg++; 67 67 } 68 - rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); 69 68 if (mode == MODE_PREFIX) 70 69 mode = MODE_CLASS; 70 + else 71 + mode = MODE_CTYPE; 71 72 } else 72 73 buf[rlen++] = *q; 73 74 break; ··· 121 120 if (mode != MODE_CLASS && mode != MODE_CTYPE) 122 121 goto error; 123 122 /* safe because at least one other char to process */ 124 - if (isalpha(*(q + 1))) 123 + if (isalpha(*(q + 1)) && mode == MODE_CLASS) 125 124 rlen += scnprintf(buf + rlen, maxlen - rlen, "."); 126 125 if (mode == MODE_CLASS) 127 126 mode = MODE_FUNC;
+16
tools/perf/util/dso.c
··· 47 47 [DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D', 48 48 [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', 49 49 [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', 50 + [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x', 50 51 [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o', 51 52 [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', 52 53 [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', ··· 128 127 case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: 129 128 len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); 130 129 snprintf(filename + len, size - len, "%s", dso->long_name); 130 + break; 131 + 132 + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: 133 + /* 134 + * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in 135 + * /usr/lib/debug/lib when it is expected to be in 136 + * /usr/lib/debug/usr/lib 137 + */ 138 + if (strlen(dso->long_name) < 9 || 139 + strncmp(dso->long_name, "/usr/lib/", 9)) { 140 + ret = -1; 141 + break; 142 + } 143 + len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); 144 + snprintf(filename + len, size - len, "%s", dso->long_name + 4); 131 145 break; 132 146 133 147 case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+3 -2
tools/perf/util/dso.h
··· 30 30 DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, 31 31 DSO_BINARY_TYPE__FEDORA_DEBUGINFO, 32 32 DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, 33 + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, 33 34 DSO_BINARY_TYPE__BUILDID_DEBUGINFO, 34 35 DSO_BINARY_TYPE__SYSTEM_PATH_DSO, 35 36 DSO_BINARY_TYPE__GUEST_KMODULE, ··· 138 137 struct rb_node rb_node; 139 138 u64 offset; 140 139 u64 size; 141 - char data[0]; 140 + char data[]; 142 141 }; 143 142 144 143 struct auxtrace_cache; ··· 210 209 struct nsinfo *nsinfo; 211 210 struct dso_id id; 212 211 refcount_t refcnt; 213 - char name[0]; 212 + char name[]; 214 213 }; 215 214 216 215 /* dso__for_each_symbol - iterate over the symbols of given type
+1 -1
tools/perf/util/event.h
··· 79 79 80 80 struct ip_callchain { 81 81 u64 nr; 82 - u64 ips[0]; 82 + u64 ips[]; 83 83 }; 84 84 85 85 struct branch_stack;
+24 -13
tools/perf/util/evlist.c
··· 233 233 234 234 int __perf_evlist__add_default(struct evlist *evlist, bool precise) 235 235 { 236 - struct evsel *evsel = perf_evsel__new_cycles(precise); 236 + struct evsel *evsel = evsel__new_cycles(precise); 237 237 238 238 if (evsel == NULL) 239 239 return -ENOMEM; ··· 249 249 .config = PERF_COUNT_SW_DUMMY, 250 250 .size = sizeof(attr), /* to capture ABI version */ 251 251 }; 252 - struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); 252 + struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); 253 253 254 254 if (evsel == NULL) 255 255 return -ENOMEM; ··· 266 266 size_t i; 267 267 268 268 for (i = 0; i < nr_attrs; i++) { 269 - evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 269 + evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 270 270 if (evsel == NULL) 271 271 goto out_delete_partial_list; 272 272 list_add_tail(&evsel->core.node, &head); ··· 325 325 int perf_evlist__add_newtp(struct evlist *evlist, 326 326 const char *sys, const char *name, void *handler) 327 327 { 328 - struct evsel *evsel = perf_evsel__newtp(sys, name); 328 + struct evsel *evsel = evsel__newtp(sys, name); 329 329 330 330 if (IS_ERR(evsel)) 331 331 return -1; ··· 380 380 { 381 381 struct evsel *pos; 382 382 struct affinity affinity; 383 - int cpu, i; 383 + int cpu, i, imm = 0; 384 + bool has_imm = false; 384 385 385 386 if (affinity__setup(&affinity) < 0) 386 387 return; 387 388 388 - evlist__for_each_cpu(evlist, i, cpu) { 389 - affinity__set(&affinity, cpu); 389 + /* Disable 'immediate' events last */ 390 + for (imm = 0; imm <= 1; imm++) { 391 + evlist__for_each_cpu(evlist, i, cpu) { 392 + affinity__set(&affinity, cpu); 390 393 391 - evlist__for_each_entry(evlist, pos) { 392 - if (evsel__cpu_iter_skip(pos, cpu)) 393 - continue; 394 - if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) 395 - continue; 396 - evsel__disable_cpu(pos, pos->cpu_iter - 1); 394 + evlist__for_each_entry(evlist, pos) { 395 + if (evsel__cpu_iter_skip(pos, cpu)) 396 + continue; 397 + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) 398 + continue; 399 + if (pos->immediate) 400 + has_imm = true; 401 + if (pos->immediate != imm) 402 + continue; 403 + evsel__disable_cpu(pos, pos->cpu_iter - 1); 404 + } 397 405 } 406 + if (!has_imm) 407 + break; 398 408 } 409 + 399 410 affinity__cleanup(&affinity); 400 411 evlist__for_each_entry(evlist, pos) { 401 412 if (!evsel__is_group_leader(pos) || !pos->core.fd)
+83 -73
tools/perf/util/evsel.c
··· 56 56 57 57 static clockid_t clockid; 58 58 59 - static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused) 59 + static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) 60 60 { 61 61 return 0; 62 62 } 63 63 64 64 void __weak test_attr__ready(void) { } 65 65 66 - static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused) 66 + static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) 67 67 { 68 68 } 69 69 ··· 73 73 void (*fini)(struct evsel *evsel); 74 74 } perf_evsel__object = { 75 75 .size = sizeof(struct evsel), 76 - .init = perf_evsel__no_extra_init, 77 - .fini = perf_evsel__no_extra_fini, 76 + .init = evsel__no_extra_init, 77 + .fini = evsel__no_extra_fini, 78 78 }; 79 79 80 - int perf_evsel__object_config(size_t object_size, 81 - int (*init)(struct evsel *evsel), 82 - void (*fini)(struct evsel *evsel)) 80 + int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), 81 + void (*fini)(struct evsel *evsel)) 83 82 { 84 83 85 84 if (object_size == 0) ··· 254 255 evsel->metric_expr = NULL; 255 256 evsel->metric_name = NULL; 256 257 evsel->metric_events = NULL; 258 + evsel->per_pkg_mask = NULL; 257 259 evsel->collect_stat = false; 258 260 evsel->pmu_name = NULL; 259 261 } 260 262 261 - struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) 263 + struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) 262 264 { 263 265 struct evsel *evsel = zalloc(perf_evsel__object.size); 264 266 ··· 292 292 return perf_event_paranoid_check(1); 293 293 } 294 294 295 - struct evsel *perf_evsel__new_cycles(bool precise) 295 + struct evsel *evsel__new_cycles(bool precise) 296 296 { 297 297 struct perf_event_attr attr = { 298 298 .type = PERF_TYPE_HARDWARE, ··· 334 334 /* 335 335 * Returns pointer with encoded error via <linux/err.h> interface. 336 336 */ 337 - struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) 337 + struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) 338 338 { 339 339 struct evsel *evsel = zalloc(perf_evsel__object.size); 340 340 int err = -ENOMEM; ··· 372 372 return ERR_PTR(err); 373 373 } 374 374 375 - const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { 375 + const char *evsel__hw_names[PERF_COUNT_HW_MAX] = { 376 376 "cycles", 377 377 "instructions", 378 378 "cache-references", ··· 387 387 388 388 static const char *__evsel__hw_name(u64 config) 389 389 { 390 - if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) 391 - return perf_evsel__hw_names[config]; 390 + if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config]) 391 + return evsel__hw_names[config]; 392 392 393 393 return "unknown-hardware"; 394 394 } ··· 435 435 return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); 436 436 } 437 437 438 - const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { 438 + const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { 439 439 "cpu-clock", 440 440 "task-clock", 441 441 "page-faults", ··· 450 450 451 451 static const char *__evsel__sw_name(u64 config) 452 452 { 453 - if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) 454 - return perf_evsel__sw_names[config]; 453 + if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config]) 454 + return evsel__sw_names[config]; 455 455 return "unknown-software"; 456 456 } 457 457 ··· 486 486 return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); 487 487 } 488 488 489 - const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] 490 - [PERF_EVSEL__MAX_ALIASES] = { 489 + const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { 491 490 { "L1-dcache", "l1-d", "l1d", "L1-data", }, 492 491 { "L1-icache", "l1-i", "l1i", "L1-instruction", }, 493 492 { "LLC", "L2", }, ··· 496 497 { "node", }, 497 498 }; 498 499 499 - const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] 500 - [PERF_EVSEL__MAX_ALIASES] = { 500 + const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { 501 501 { "load", "loads", "read", }, 502 502 { "store", "stores", "write", }, 503 503 { "prefetch", "prefetches", "speculative-read", "speculative-load", }, 504 504 }; 505 505 506 - const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] 507 - [PERF_EVSEL__MAX_ALIASES] = { 506 + const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { 508 507 { "refs", "Reference", "ops", "access", }, 509 508 { "misses", "miss", }, 510 509 }; ··· 518 521 * L1I : Read and prefetch only 519 522 * ITLB and BPU : Read-only 520 523 */ 521 - static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { 524 + static unsigned long evsel__hw_cache_stat[C(MAX)] = { 522 525 [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 523 526 [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), 524 527 [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), ··· 530 533 531 534 bool evsel__is_cache_op_valid(u8 type, u8 op) 532 535 { 533 - if (perf_evsel__hw_cache_stat[type] & COP(op)) 536 + if (evsel__hw_cache_stat[type] & COP(op)) 534 537 return true; /* valid */ 535 538 else 536 539 return false; /* invalid */ ··· 539 542 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size) 540 543 { 541 544 if (result) { 542 - return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], 543 - perf_evsel__hw_cache_op[op][0], 544 - perf_evsel__hw_cache_result[result][0]); 545 + return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0], 546 + evsel__hw_cache_op[op][0], 547 + evsel__hw_cache_result[result][0]); 545 548 } 546 549 547 - return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], 548 - perf_evsel__hw_cache_op[op][1]); 550 + return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0], 551 + evsel__hw_cache_op[op][1]); 549 552 } 550 553 551 554 static int __evsel__hw_cache_name(u64 config, char *bf, size_t size) ··· 765 768 } 766 769 } 767 770 768 - static void apply_config_terms(struct evsel *evsel, 769 - struct record_opts *opts, bool track) 771 + static void evsel__apply_config_terms(struct evsel *evsel, 772 + struct record_opts *opts, bool track) 770 773 { 771 - struct perf_evsel_config_term *term; 774 + struct evsel_config_term *term; 772 775 struct list_head *config_terms = &evsel->config_terms; 773 776 struct perf_event_attr *attr = &evsel->core.attr; 774 777 /* callgraph default */ ··· 781 784 782 785 list_for_each_entry(term, config_terms, list) { 783 786 switch (term->type) { 784 - case PERF_EVSEL__CONFIG_TERM_PERIOD: 787 + case EVSEL__CONFIG_TERM_PERIOD: 785 788 if (!(term->weak && opts->user_interval != ULLONG_MAX)) { 786 789 attr->sample_period = term->val.period; 787 790 attr->freq = 0; 788 791 evsel__reset_sample_bit(evsel, PERIOD); 789 792 } 790 793 break; 791 - case PERF_EVSEL__CONFIG_TERM_FREQ: 794 + case EVSEL__CONFIG_TERM_FREQ: 792 795 if (!(term->weak && opts->user_freq != UINT_MAX)) { 793 796 attr->sample_freq = term->val.freq; 794 797 attr->freq = 1; 795 798 evsel__set_sample_bit(evsel, PERIOD); 796 799 } 797 800 break; 798 - case PERF_EVSEL__CONFIG_TERM_TIME: 801 + case EVSEL__CONFIG_TERM_TIME: 799 802 if (term->val.time) 800 803 evsel__set_sample_bit(evsel, TIME); 801 804 else 802 805 evsel__reset_sample_bit(evsel, TIME); 803 806 break; 804 - case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: 807 + case EVSEL__CONFIG_TERM_CALLGRAPH: 805 808 callgraph_buf = term->val.str; 806 809 break; 807 - case PERF_EVSEL__CONFIG_TERM_BRANCH: 810 + case EVSEL__CONFIG_TERM_BRANCH: 808 811 if (term->val.str && strcmp(term->val.str, "no")) { 809 812 evsel__set_sample_bit(evsel, BRANCH_STACK); 810 813 parse_branch_str(term->val.str, ··· 812 815 } else 813 816 evsel__reset_sample_bit(evsel, BRANCH_STACK); 814 817 break; 815 - case PERF_EVSEL__CONFIG_TERM_STACK_USER: 818 + case EVSEL__CONFIG_TERM_STACK_USER: 816 819 dump_size = term->val.stack_user; 817 820 break; 818 - case PERF_EVSEL__CONFIG_TERM_MAX_STACK: 821 + case EVSEL__CONFIG_TERM_MAX_STACK: 819 822 max_stack = term->val.max_stack; 820 823 break; 821 - case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: 824 + case EVSEL__CONFIG_TERM_MAX_EVENTS: 822 825 evsel->max_events = term->val.max_events; 823 826 break; 824 - case PERF_EVSEL__CONFIG_TERM_INHERIT: 827 + case EVSEL__CONFIG_TERM_INHERIT: 825 828 /* 826 829 * attr->inherit should has already been set by 827 830 * evsel__config. If user explicitly set ··· 830 833 */ 831 834 attr->inherit = term->val.inherit ? 1 : 0; 832 835 break; 833 - case PERF_EVSEL__CONFIG_TERM_OVERWRITE: 836 + case EVSEL__CONFIG_TERM_OVERWRITE: 834 837 attr->write_backward = term->val.overwrite ? 1 : 0; 835 838 break; 836 - case PERF_EVSEL__CONFIG_TERM_DRV_CFG: 839 + case EVSEL__CONFIG_TERM_DRV_CFG: 837 840 break; 838 - case PERF_EVSEL__CONFIG_TERM_PERCORE: 841 + case EVSEL__CONFIG_TERM_PERCORE: 839 842 break; 840 - case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: 843 + case EVSEL__CONFIG_TERM_AUX_OUTPUT: 841 844 attr->aux_output = term->val.aux_output ? 1 : 0; 842 845 break; 843 - case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: 846 + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: 844 847 /* Already applied by auxtrace */ 845 848 break; 846 - case PERF_EVSEL__CONFIG_TERM_CFG_CHG: 849 + case EVSEL__CONFIG_TERM_CFG_CHG: 847 850 break; 848 851 default: 849 852 break; ··· 904 907 (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); 905 908 } 906 909 907 - struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, 908 - enum evsel_term_type type) 910 + struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) 909 911 { 910 - struct perf_evsel_config_term *term, *found_term = NULL; 912 + struct evsel_config_term *term, *found_term = NULL; 911 913 912 914 list_for_each_entry(term, &evsel->config_terms, list) { 913 915 if (term->type == type) ··· 1141 1145 * Apply event specific term settings, 1142 1146 * it overloads any global configuration. 1143 1147 */ 1144 - apply_config_terms(evsel, opts, track); 1148 + evsel__apply_config_terms(evsel, opts, track); 1145 1149 1146 1150 evsel->ignore_missing_thread = opts->ignore_missing_thread; 1147 1151 ··· 1154 1158 } 1155 1159 1156 1160 /* 1161 + * A dummy event never triggers any actual counter and therefore 1162 + * cannot be used with branch_stack. 1163 + * 1157 1164 * For initial_delay, a dummy event is added implicitly. 1158 1165 * The software event will trigger -EOPNOTSUPP error out, 1159 1166 * if BRANCH_STACK bit is set. 1160 1167 */ 1161 - if (opts->initial_delay && is_dummy_event(evsel)) 1168 + if (is_dummy_event(evsel)) 1162 1169 evsel__reset_sample_bit(evsel, BRANCH_STACK); 1163 1170 } 1164 1171 ··· 1240 1241 return err; 1241 1242 } 1242 1243 1243 - static void perf_evsel__free_config_terms(struct evsel *evsel) 1244 + static void evsel__free_config_terms(struct evsel *evsel) 1244 1245 { 1245 - struct perf_evsel_config_term *term, *h; 1246 + struct evsel_config_term *term, *h; 1246 1247 1247 1248 list_for_each_entry_safe(term, h, &evsel->config_terms, list) { 1248 1249 list_del_init(&term->list); ··· 1256 1257 { 1257 1258 assert(list_empty(&evsel->core.node)); 1258 1259 assert(evsel->evlist == NULL); 1259 - perf_evsel__free_counts(evsel); 1260 + evsel__free_counts(evsel); 1260 1261 perf_evsel__free_fd(&evsel->core); 1261 1262 perf_evsel__free_id(&evsel->core); 1262 - perf_evsel__free_config_terms(evsel); 1263 + evsel__free_config_terms(evsel); 1263 1264 cgroup__put(evsel->cgrp); 1264 1265 perf_cpu_map__put(evsel->core.cpus); 1265 1266 perf_cpu_map__put(evsel->core.own_cpus); ··· 1267 1268 zfree(&evsel->group_name); 1268 1269 zfree(&evsel->name); 1269 1270 zfree(&evsel->pmu_name); 1271 + zfree(&evsel->per_pkg_mask); 1272 + zfree(&evsel->metric_events); 1270 1273 perf_evsel__object.fini(evsel); 1271 1274 } 1272 1275 ··· 1426 1425 if (FD(evsel, cpu, thread) < 0) 1427 1426 return -EINVAL; 1428 1427 1429 - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) 1428 + if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) 1430 1429 return -ENOMEM; 1431 1430 1432 1431 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) ··· 2417 2416 2418 2417 /* Is there already the separator in the name. */ 2419 2418 if (strchr(name, '/') || 2420 - strchr(name, ':')) 2419 + (strchr(name, ':') && !evsel->is_libpfm_event)) 2421 2420 sep = ""; 2422 2421 2423 2422 if (asprintf(&new_name, "%s%su", name, sep) < 0) ··· 2478 2477 int err, char *msg, size_t size) 2479 2478 { 2480 2479 char sbuf[STRERR_BUFSIZE]; 2481 - int printed = 0; 2480 + int printed = 0, enforced = 0; 2482 2481 2483 2482 switch (err) { 2484 2483 case EPERM: 2485 2484 case EACCES: 2485 + printed += scnprintf(msg + printed, size - printed, 2486 + "Access to performance monitoring and observability operations is limited.\n"); 2487 + 2488 + if (!sysfs__read_int("fs/selinux/enforce", &enforced)) { 2489 + if (enforced) { 2490 + printed += scnprintf(msg + printed, size - printed, 2491 + "Enforced MAC policy settings (SELinux) can limit access to performance\n" 2492 + "monitoring and observability operations. Inspect system audit records for\n" 2493 + "more perf_event access control information and adjusting the policy.\n"); 2494 + } 2495 + } 2496 + 2486 2497 if (err == EPERM) 2487 - printed = scnprintf(msg, size, 2498 + printed += scnprintf(msg, size, 2488 2499 "No permission to enable %s event.\n\n", evsel__name(evsel)); 2489 2500 2490 2501 return scnprintf(msg + printed, size - printed, 2491 - "You may not have permission to collect %sstats.\n\n" 2492 - "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" 2493 - "which controls use of the performance events system by\n" 2494 - "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n" 2495 - "The current value is %d:\n\n" 2502 + "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" 2503 + "access to performance monitoring and observability operations for users\n" 2504 + "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n" 2505 + "perf_event_paranoid setting is %d:\n" 2496 2506 " -1: Allow use of (almost) all events by all users\n" 2497 2507 " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" 2498 - ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n" 2499 - " Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n" 2500 - ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n" 2501 - ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n" 2502 - "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" 2503 - " kernel.perf_event_paranoid = -1\n" , 2504 - target->system_wide ? "system-wide " : "", 2505 - perf_event_paranoid()); 2508 + ">= 0: Disallow raw and ftrace function tracepoint access\n" 2509 + ">= 1: Disallow CPU event access\n" 2510 + ">= 2: Disallow kernel profiling\n" 2511 + "To make the adjusted perf_event_paranoid setting permanent preserve it\n" 2512 + "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)", 2513 + perf_event_paranoid()); 2506 2514 case ENOENT: 2507 2515 return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel)); 2508 2516 case EMFILE:
+16 -18
tools/perf/util/evsel.h
··· 76 76 bool ignore_missing_thread; 77 77 bool forced_leader; 78 78 bool use_uncore_alias; 79 + bool is_libpfm_event; 79 80 /* parse modifier helper */ 80 81 int exclude_GH; 81 82 int sample_read; ··· 155 154 void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, 156 155 struct perf_counts_values *count); 157 156 158 - int perf_evsel__object_config(size_t object_size, 159 - int (*init)(struct evsel *evsel), 160 - void (*fini)(struct evsel *evsel)); 157 + int evsel__object_config(size_t object_size, 158 + int (*init)(struct evsel *evsel), 159 + void (*fini)(struct evsel *evsel)); 161 160 162 161 struct perf_pmu *evsel__find_pmu(struct evsel *evsel); 163 162 bool evsel__is_aux_event(struct evsel *evsel); 164 163 165 - struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); 164 + struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); 166 165 167 166 static inline struct evsel *evsel__new(struct perf_event_attr *attr) 168 167 { 169 - return perf_evsel__new_idx(attr, 0); 168 + return evsel__new_idx(attr, 0); 170 169 } 171 170 172 - struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx); 171 + struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); 173 172 174 173 /* 175 174 * Returns pointer with encoded error via <linux/err.h> interface. 176 175 */ 177 - static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name) 176 + static inline struct evsel *evsel__newtp(const char *sys, const char *name) 178 177 { 179 - return perf_evsel__newtp_idx(sys, name, 0); 178 + return evsel__newtp_idx(sys, name, 0); 180 179 } 181 180 182 - struct evsel *perf_evsel__new_cycles(bool precise); 181 + struct evsel *evsel__new_cycles(bool precise); 183 182 184 183 struct tep_event *event_format__new(const char *sys, const char *name); 185 184 ··· 199 198 200 199 bool evsel__is_cache_op_valid(u8 type, u8 op); 201 200 202 - #define PERF_EVSEL__MAX_ALIASES 8 201 + #define EVSEL__MAX_ALIASES 8 203 202 204 - extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] 205 - [PERF_EVSEL__MAX_ALIASES]; 206 - extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] 207 - [PERF_EVSEL__MAX_ALIASES]; 208 - extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] 209 - [PERF_EVSEL__MAX_ALIASES]; 210 - extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; 211 - extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; 203 + extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; 204 + extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; 205 + extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; 206 + extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; 207 + extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; 212 208 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); 213 209 const char *evsel__name(struct evsel *evsel); 214 210
+21 -22
tools/perf/util/evsel_config.h
··· 6 6 #include <stdbool.h> 7 7 8 8 /* 9 - * The 'struct perf_evsel_config_term' is used to pass event 9 + * The 'struct evsel_config_term' is used to pass event 10 10 * specific configuration data to evsel__config routine. 11 11 * It is allocated within event parsing and attached to 12 - * perf_evsel::config_terms list head. 12 + * evsel::config_terms list head. 13 13 */ 14 14 enum evsel_term_type { 15 - PERF_EVSEL__CONFIG_TERM_PERIOD, 16 - PERF_EVSEL__CONFIG_TERM_FREQ, 17 - PERF_EVSEL__CONFIG_TERM_TIME, 18 - PERF_EVSEL__CONFIG_TERM_CALLGRAPH, 19 - PERF_EVSEL__CONFIG_TERM_STACK_USER, 20 - PERF_EVSEL__CONFIG_TERM_INHERIT, 21 - PERF_EVSEL__CONFIG_TERM_MAX_STACK, 22 - PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, 23 - PERF_EVSEL__CONFIG_TERM_OVERWRITE, 24 - PERF_EVSEL__CONFIG_TERM_DRV_CFG, 25 - PERF_EVSEL__CONFIG_TERM_BRANCH, 26 - PERF_EVSEL__CONFIG_TERM_PERCORE, 27 - PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, 28 - PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, 29 - PERF_EVSEL__CONFIG_TERM_CFG_CHG, 15 + EVSEL__CONFIG_TERM_PERIOD, 16 + EVSEL__CONFIG_TERM_FREQ, 17 + EVSEL__CONFIG_TERM_TIME, 18 + EVSEL__CONFIG_TERM_CALLGRAPH, 19 + EVSEL__CONFIG_TERM_STACK_USER, 20 + EVSEL__CONFIG_TERM_INHERIT, 21 + EVSEL__CONFIG_TERM_MAX_STACK, 22 + EVSEL__CONFIG_TERM_MAX_EVENTS, 23 + EVSEL__CONFIG_TERM_OVERWRITE, 24 + EVSEL__CONFIG_TERM_DRV_CFG, 25 + EVSEL__CONFIG_TERM_BRANCH, 26 + EVSEL__CONFIG_TERM_PERCORE, 27 + EVSEL__CONFIG_TERM_AUX_OUTPUT, 28 + EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, 29 + EVSEL__CONFIG_TERM_CFG_CHG, 30 30 }; 31 31 32 - struct perf_evsel_config_term { 32 + struct evsel_config_term { 33 33 struct list_head list; 34 34 enum evsel_term_type type; 35 35 bool free_str; ··· 53 53 54 54 struct evsel; 55 55 56 - struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, 57 - enum evsel_term_type type); 56 + struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type); 58 57 59 - #define perf_evsel__get_config_term(evsel, type) \ 60 - __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type) 58 + #define evsel__get_config_term(evsel, type) \ 59 + __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type) 61 60 62 61 #endif // __PERF_EVSEL_CONFIG_H
+1 -2
tools/perf/util/evsel_fprintf.c
··· 35 35 return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); 36 36 } 37 37 38 - int perf_evsel__fprintf(struct evsel *evsel, 39 - struct perf_attr_details *details, FILE *fp) 38 + int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp) 40 39 { 41 40 bool first = true; 42 41 int printed = 0;
+1 -2
tools/perf/util/evsel_fprintf.h
··· 15 15 bool trace_fields; 16 16 }; 17 17 18 - int perf_evsel__fprintf(struct evsel *evsel, 19 - struct perf_attr_details *details, FILE *fp); 18 + int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp); 20 19 21 20 #define EVSEL__PRINT_IP (1<<0) 22 21 #define EVSEL__PRINT_SYM (1<<1)
+73 -57
tools/perf/util/expr.c
··· 4 4 #include "expr.h" 5 5 #include "expr-bison.h" 6 6 #include "expr-flex.h" 7 + #include <linux/kernel.h> 7 8 8 9 #ifdef PARSER_DEBUG 9 10 extern int expr_debug; 10 11 #endif 11 12 12 - /* Caller must make sure id is allocated */ 13 - void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) 13 + static size_t key_hash(const void *key, void *ctx __maybe_unused) 14 14 { 15 - int idx; 15 + const char *str = (const char *)key; 16 + size_t hash = 0; 16 17 17 - assert(ctx->num_ids < MAX_PARSE_ID); 18 - idx = ctx->num_ids++; 19 - ctx->ids[idx].name = name; 20 - ctx->ids[idx].val = val; 18 + while (*str != '\0') { 19 + hash *= 31; 20 + hash += *str; 21 + str++; 22 + } 23 + return hash; 24 + } 25 + 26 + static bool key_equal(const void *key1, const void *key2, 27 + void *ctx __maybe_unused) 28 + { 29 + return !strcmp((const char *)key1, (const char *)key2); 30 + } 31 + 32 + /* Caller must make sure id is allocated */ 33 + int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) 34 + { 35 + double *val_ptr = NULL, *old_val = NULL; 36 + char *old_key = NULL; 37 + int ret; 38 + 39 + if (val != 0.0) { 40 + val_ptr = malloc(sizeof(double)); 41 + if (!val_ptr) 42 + return -ENOMEM; 43 + *val_ptr = val; 44 + } 45 + ret = hashmap__set(&ctx->ids, name, val_ptr, 46 + (const void **)&old_key, (void **)&old_val); 47 + free(old_key); 48 + free(old_val); 49 + return ret; 50 + } 51 + 52 + int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr) 53 + { 54 + double *data; 55 + 56 + if (!hashmap__find(&ctx->ids, id, (void **)&data)) 57 + return -1; 58 + *val_ptr = (data == NULL) ? 0.0 : *data; 59 + return 0; 21 60 } 22 61 23 62 void expr__ctx_init(struct expr_parse_ctx *ctx) 24 63 { 25 - ctx->num_ids = 0; 64 + hashmap__init(&ctx->ids, key_hash, key_equal, NULL); 65 + } 66 + 67 + void expr__ctx_clear(struct expr_parse_ctx *ctx) 68 + { 69 + struct hashmap_entry *cur; 70 + size_t bkt; 71 + 72 + hashmap__for_each_entry((&ctx->ids), cur, bkt) { 73 + free((char *)cur->key); 74 + free(cur->value); 75 + } 76 + hashmap__clear(&ctx->ids); 26 77 } 27 78 28 79 static int ··· 96 45 97 46 #ifdef PARSER_DEBUG 98 47 expr_debug = 1; 48 + expr_set_debug(1, scanner); 99 49 #endif 100 50 101 51 ret = expr_parse(val, ctx, scanner); ··· 107 55 return ret; 108 56 } 109 57 110 - int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime) 58 + int expr__parse(double *final_val, struct expr_parse_ctx *ctx, 59 + const char *expr, int runtime) 111 60 { 112 61 return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; 113 62 } 114 63 115 - static bool 116 - already_seen(const char *val, const char *one, const char **other, 117 - int num_other) 64 + int expr__find_other(const char *expr, const char *one, 65 + struct expr_parse_ctx *ctx, int runtime) 118 66 { 119 - int i; 67 + double *old_val = NULL; 68 + char *old_key = NULL; 69 + int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); 120 70 121 - if (one && !strcasecmp(one, val)) 122 - return true; 123 - for (i = 0; i < num_other; i++) 124 - if (!strcasecmp(other[i], val)) 125 - return true; 126 - return false; 127 - } 128 - 129 - int expr__find_other(const char *expr, const char *one, const char ***other, 130 - int *num_other, int runtime) 131 - { 132 - int err, i = 0, j = 0; 133 - struct expr_parse_ctx ctx; 134 - 135 - expr__ctx_init(&ctx); 136 - err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime); 137 - if (err) 138 - return -1; 139 - 140 - *other = malloc((ctx.num_ids + 1) * sizeof(char *)); 141 - if (!*other) 142 - return -ENOMEM; 143 - 144 - for (i = 0, j = 0; i < ctx.num_ids; i++) { 145 - const char *str = ctx.ids[i].name; 146 - 147 - if (already_seen(str, one, *other, j)) 148 - continue; 149 - 150 - str = strdup(str); 151 - if (!str) 152 - goto out; 153 - (*other)[j++] = str; 154 - } 155 - (*other)[j] = NULL; 156 - 157 - out: 158 - if (i != ctx.num_ids) { 159 - while (--j) 160 - free((char *) (*other)[i]); 161 - free(*other); 162 - err = -1; 71 + if (one) { 72 + hashmap__delete(&ctx->ids, one, 73 + (const void **)&old_key, (void **)&old_val); 74 + free(old_key); 75 + free(old_val); 163 76 } 164 77 165 - *num_other = j; 166 - return err; 78 + return ret; 167 79 }
+16 -13
tools/perf/util/expr.h
··· 2 2 #ifndef PARSE_CTX_H 3 3 #define PARSE_CTX_H 1 4 4 5 - #define EXPR_MAX_OTHER 20 6 - #define MAX_PARSE_ID EXPR_MAX_OTHER 7 - 8 - struct expr_parse_id { 9 - const char *name; 10 - double val; 11 - }; 5 + // There are fixes that need to land upstream before we can use libbpf's headers, 6 + // for now use our copy uncoditionally, since the data structures at this point 7 + // are exactly the same, no problem. 8 + //#ifdef HAVE_LIBBPF_SUPPORT 9 + //#include <bpf/hashmap.h> 10 + //#else 11 + #include "util/hashmap.h" 12 + //#endif 12 13 13 14 struct expr_parse_ctx { 14 - int num_ids; 15 - struct expr_parse_id ids[MAX_PARSE_ID]; 15 + struct hashmap ids; 16 16 }; 17 17 18 18 struct expr_scanner_ctx { ··· 21 21 }; 22 22 23 23 void expr__ctx_init(struct expr_parse_ctx *ctx); 24 - void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); 25 - int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); 26 - int expr__find_other(const char *expr, const char *one, const char ***other, 27 - int *num_other, int runtime); 24 + void expr__ctx_clear(struct expr_parse_ctx *ctx); 25 + int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); 26 + int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr); 27 + int expr__parse(double *final_val, struct expr_parse_ctx *ctx, 28 + const char *expr, int runtime); 29 + int expr__find_other(const char *expr, const char *one, 30 + struct expr_parse_ctx *ids, int runtime); 28 31 29 32 #endif
+8 -8
tools/perf/util/expr.l
··· 10 10 char *expr_get_text(yyscan_t yyscanner); 11 11 YYSTYPE *expr_get_lval(yyscan_t yyscanner); 12 12 13 - static int __value(YYSTYPE *yylval, char *str, int base, int token) 13 + static double __value(YYSTYPE *yylval, char *str, int token) 14 14 { 15 - u64 num; 15 + double num; 16 16 17 17 errno = 0; 18 - num = strtoull(str, NULL, base); 18 + num = strtod(str, NULL); 19 19 if (errno) 20 20 return EXPR_ERROR; 21 21 ··· 23 23 return token; 24 24 } 25 25 26 - static int value(yyscan_t scanner, int base) 26 + static int value(yyscan_t scanner) 27 27 { 28 28 YYSTYPE *yylval = expr_get_lval(scanner); 29 29 char *text = expr_get_text(scanner); 30 30 31 - return __value(yylval, text, base, NUMBER); 31 + return __value(yylval, text, NUMBER); 32 32 } 33 33 34 34 /* ··· 81 81 } 82 82 %} 83 83 84 - number [0-9]+ 84 + number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) 85 85 86 86 sch [-,=] 87 87 spec \\{sch} 88 88 sym [0-9a-zA-Z_\.:@?]+ 89 - symbol {spec}*{sym}*{spec}*{sym}*{spec}*{sym} 89 + symbol ({spec}|{sym})+ 90 90 91 91 %% 92 92 struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); ··· 105 105 if { return IF; } 106 106 else { return ELSE; } 107 107 #smt_on { return SMT_ON; } 108 - {number} { return value(yyscanner, 10); } 108 + {number} { return value(yyscanner); } 109 109 {symbol} { return str(yyscanner, ID, sctx->runtime); } 110 110 "|" { return '|'; } 111 111 "^" { return '^'; }
+18 -23
tools/perf/util/expr.y
··· 27 27 %token EXPR_PARSE EXPR_OTHER EXPR_ERROR 28 28 %token <num> NUMBER 29 29 %token <str> ID 30 + %destructor { free ($$); } <str> 30 31 %token MIN MAX IF ELSE SMT_ON 31 32 %left MIN MAX IF 32 33 %left '|' ··· 47 46 pr_debug("%s\n", s); 48 47 } 49 48 50 - static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val) 51 - { 52 - int i; 53 - 54 - for (i = 0; i < ctx->num_ids; i++) { 55 - if (!strcasecmp(ctx->ids[i].name, id)) { 56 - *val = ctx->ids[i].val; 57 - return 0; 58 - } 59 - } 60 - return -1; 61 - } 62 - 63 49 %} 64 50 %% 65 51 ··· 60 72 61 73 other: ID 62 74 { 63 - if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { 64 - pr_err("failed: way too many variables"); 65 - YYABORT; 66 - } 67 - 68 - ctx->ids[ctx->num_ids++].name = $1; 75 + expr__add_id(ctx, $1, 0.0); 69 76 } 70 77 | 71 - MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' 78 + MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' 72 79 73 80 74 81 all_expr: if_expr { *final_val = $1; } ··· 75 92 ; 76 93 77 94 expr: NUMBER 78 - | ID { if (lookup_id(ctx, $1, &$$) < 0) { 95 + | ID { if (expr__get_id(ctx, $1, &$$)) { 79 96 pr_debug("%s not found\n", $1); 97 + free($1); 80 98 YYABORT; 81 99 } 100 + free($1); 82 101 } 83 102 | expr '|' expr { $$ = (long)$1 | (long)$3; } 84 103 | expr '&' expr { $$ = (long)$1 & (long)$3; } ··· 88 103 | expr '+' expr { $$ = $1 + $3; } 89 104 | expr '-' expr { $$ = $1 - $3; } 90 105 | expr '*' expr { $$ = $1 * $3; } 91 - | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } 92 - | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } 106 + | expr '/' expr { if ($3 == 0) { 107 + pr_debug("division by zero\n"); 108 + YYABORT; 109 + } 110 + $$ = $1 / $3; 111 + } 112 + | expr '%' expr { if ((long)$3 == 0) { 113 + pr_debug("division by zero\n"); 114 + YYABORT; 115 + } 116 + $$ = (long)$1 % (long)$3; 117 + } 93 118 | '-' expr %prec NEG { $$ = -$2; } 94 119 | '(' if_expr ')' { $$ = $2; } 95 120 | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
+2 -2
tools/perf/util/genelf_debug.c
··· 342 342 */ 343 343 344 344 /* start state of the state machine we take care of */ 345 - unsigned long last_vma = code_addr; 345 + unsigned long last_vma = 0; 346 346 char const *cur_filename = NULL; 347 347 unsigned long cur_file_idx = 0; 348 348 int last_line = 1; ··· 473 473 ent = debug_entry_next(ent); 474 474 } 475 475 add_compilation_unit(di, buffer_ext_size(dl)); 476 - add_debug_line(dl, debug, nr_debug_entries, 0); 476 + add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET); 477 477 add_debug_abbrev(da); 478 478 if (0) buffer_ext_dump(da, "abbrev"); 479 479
+238
tools/perf/util/hashmap.c
··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + 3 + /* 4 + * Generic non-thread safe hash map implementation. 5 + * 6 + * Copyright (c) 2019 Facebook 7 + */ 8 + #include <stdint.h> 9 + #include <stdlib.h> 10 + #include <stdio.h> 11 + #include <errno.h> 12 + #include <linux/err.h> 13 + #include "hashmap.h" 14 + 15 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 16 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 17 + 18 + /* start with 4 buckets */ 19 + #define HASHMAP_MIN_CAP_BITS 2 20 + 21 + static void hashmap_add_entry(struct hashmap_entry **pprev, 22 + struct hashmap_entry *entry) 23 + { 24 + entry->next = *pprev; 25 + *pprev = entry; 26 + } 27 + 28 + static void hashmap_del_entry(struct hashmap_entry **pprev, 29 + struct hashmap_entry *entry) 30 + { 31 + *pprev = entry->next; 32 + entry->next = NULL; 33 + } 34 + 35 + void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, 36 + hashmap_equal_fn equal_fn, void *ctx) 37 + { 38 + map->hash_fn = hash_fn; 39 + map->equal_fn = equal_fn; 40 + map->ctx = ctx; 41 + 42 + map->buckets = NULL; 43 + map->cap = 0; 44 + map->cap_bits = 0; 45 + map->sz = 0; 46 + } 47 + 48 + struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, 49 + hashmap_equal_fn equal_fn, 50 + void *ctx) 51 + { 52 + struct hashmap *map = malloc(sizeof(struct hashmap)); 53 + 54 + if (!map) 55 + return ERR_PTR(-ENOMEM); 56 + hashmap__init(map, hash_fn, equal_fn, ctx); 57 + return map; 58 + } 59 + 60 + void hashmap__clear(struct hashmap *map) 61 + { 62 + struct hashmap_entry *cur, *tmp; 63 + size_t bkt; 64 + 65 + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { 66 + free(cur); 67 + } 68 + free(map->buckets); 69 + map->buckets = NULL; 70 + map->cap = map->cap_bits = map->sz = 0; 71 + } 72 + 73 + void hashmap__free(struct hashmap *map) 74 + { 75 + if (!map) 76 + return; 77 + 78 + hashmap__clear(map); 79 + free(map); 80 + } 81 + 82 + size_t hashmap__size(const struct hashmap *map) 83 + { 84 + return map->sz; 85 + } 86 + 87 + size_t hashmap__capacity(const struct hashmap *map) 88 + { 89 + return map->cap; 90 + } 91 + 92 + static bool hashmap_needs_to_grow(struct hashmap *map) 93 + { 94 + /* grow if empty or more than 75% filled */ 95 + return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); 96 + } 97 + 98 + static int hashmap_grow(struct hashmap *map) 99 + { 100 + struct hashmap_entry **new_buckets; 101 + struct hashmap_entry *cur, *tmp; 102 + size_t new_cap_bits, new_cap; 103 + size_t h, bkt; 104 + 105 + new_cap_bits = map->cap_bits + 1; 106 + if (new_cap_bits < HASHMAP_MIN_CAP_BITS) 107 + new_cap_bits = HASHMAP_MIN_CAP_BITS; 108 + 109 + new_cap = 1UL << new_cap_bits; 110 + new_buckets = calloc(new_cap, sizeof(new_buckets[0])); 111 + if (!new_buckets) 112 + return -ENOMEM; 113 + 114 + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { 115 + h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); 116 + hashmap_add_entry(&new_buckets[h], cur); 117 + } 118 + 119 + map->cap = new_cap; 120 + map->cap_bits = new_cap_bits; 121 + free(map->buckets); 122 + map->buckets = new_buckets; 123 + 124 + return 0; 125 + } 126 + 127 + static bool hashmap_find_entry(const struct hashmap *map, 128 + const void *key, size_t hash, 129 + struct hashmap_entry ***pprev, 130 + struct hashmap_entry **entry) 131 + { 132 + struct hashmap_entry *cur, **prev_ptr; 133 + 134 + if (!map->buckets) 135 + return false; 136 + 137 + for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; 138 + cur; 139 + prev_ptr = &cur->next, cur = cur->next) { 140 + if (map->equal_fn(cur->key, key, map->ctx)) { 141 + if (pprev) 142 + *pprev = prev_ptr; 143 + *entry = cur; 144 + return true; 145 + } 146 + } 147 + 148 + return false; 149 + } 150 + 151 + int hashmap__insert(struct hashmap *map, const void *key, void *value, 152 + enum hashmap_insert_strategy strategy, 153 + const void **old_key, void **old_value) 154 + { 155 + struct hashmap_entry *entry; 156 + size_t h; 157 + int err; 158 + 159 + if (old_key) 160 + *old_key = NULL; 161 + if (old_value) 162 + *old_value = NULL; 163 + 164 + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); 165 + if (strategy != HASHMAP_APPEND && 166 + hashmap_find_entry(map, key, h, NULL, &entry)) { 167 + if (old_key) 168 + *old_key = entry->key; 169 + if (old_value) 170 + *old_value = entry->value; 171 + 172 + if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { 173 + entry->key = key; 174 + entry->value = value; 175 + return 0; 176 + } else if (strategy == HASHMAP_ADD) { 177 + return -EEXIST; 178 + } 179 + } 180 + 181 + if (strategy == HASHMAP_UPDATE) 182 + return -ENOENT; 183 + 184 + if (hashmap_needs_to_grow(map)) { 185 + err = hashmap_grow(map); 186 + if (err) 187 + return err; 188 + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); 189 + } 190 + 191 + entry = malloc(sizeof(struct hashmap_entry)); 192 + if (!entry) 193 + return -ENOMEM; 194 + 195 + entry->key = key; 196 + entry->value = value; 197 + hashmap_add_entry(&map->buckets[h], entry); 198 + map->sz++; 199 + 200 + return 0; 201 + } 202 + 203 + bool hashmap__find(const struct hashmap *map, const void *key, void **value) 204 + { 205 + struct hashmap_entry *entry; 206 + size_t h; 207 + 208 + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); 209 + if (!hashmap_find_entry(map, key, h, NULL, &entry)) 210 + return false; 211 + 212 + if (value) 213 + *value = entry->value; 214 + return true; 215 + } 216 + 217 + bool hashmap__delete(struct hashmap *map, const void *key, 218 + const void **old_key, void **old_value) 219 + { 220 + struct hashmap_entry **pprev, *entry; 221 + size_t h; 222 + 223 + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); 224 + if (!hashmap_find_entry(map, key, h, &pprev, &entry)) 225 + return false; 226 + 227 + if (old_key) 228 + *old_key = entry->key; 229 + if (old_value) 230 + *old_value = entry->value; 231 + 232 + hashmap_del_entry(pprev, entry); 233 + free(entry); 234 + map->sz--; 235 + 236 + return true; 237 + } 238 +
+176
tools/perf/util/hashmap.h
··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + 3 + /* 4 + * Generic non-thread safe hash map implementation. 5 + * 6 + * Copyright (c) 2019 Facebook 7 + */ 8 + #ifndef __LIBBPF_HASHMAP_H 9 + #define __LIBBPF_HASHMAP_H 10 + 11 + #include <stdbool.h> 12 + #include <stddef.h> 13 + #include <limits.h> 14 + #ifndef __WORDSIZE 15 + #define __WORDSIZE (__SIZEOF_LONG__ * 8) 16 + #endif 17 + 18 + static inline size_t hash_bits(size_t h, int bits) 19 + { 20 + /* shuffle bits and return requested number of upper bits */ 21 + return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); 22 + } 23 + 24 + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); 25 + typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); 26 + 27 + struct hashmap_entry { 28 + const void *key; 29 + void *value; 30 + struct hashmap_entry *next; 31 + }; 32 + 33 + struct hashmap { 34 + hashmap_hash_fn hash_fn; 35 + hashmap_equal_fn equal_fn; 36 + void *ctx; 37 + 38 + struct hashmap_entry **buckets; 39 + size_t cap; 40 + size_t cap_bits; 41 + size_t sz; 42 + }; 43 + 44 + #define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ 45 + .hash_fn = (hash_fn), \ 46 + .equal_fn = (equal_fn), \ 47 + .ctx = (ctx), \ 48 + .buckets = NULL, \ 49 + .cap = 0, \ 50 + .cap_bits = 0, \ 51 + .sz = 0, \ 52 + } 53 + 54 + void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, 55 + hashmap_equal_fn equal_fn, void *ctx); 56 + struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, 57 + hashmap_equal_fn equal_fn, 58 + void *ctx); 59 + void hashmap__clear(struct hashmap *map); 60 + void hashmap__free(struct hashmap *map); 61 + 62 + size_t hashmap__size(const struct hashmap *map); 63 + size_t hashmap__capacity(const struct hashmap *map); 64 + 65 + /* 66 + * Hashmap insertion strategy: 67 + * - HASHMAP_ADD - only add key/value if key doesn't exist yet; 68 + * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, 69 + * update value; 70 + * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do 71 + * nothing and return -ENOENT; 72 + * - HASHMAP_APPEND - always add key/value pair, even if key already exists. 73 + * This turns hashmap into a multimap by allowing multiple values to be 74 + * associated with the same key. Most useful read API for such hashmap is 75 + * hashmap__for_each_key_entry() iteration. If hashmap__find() is still 76 + * used, it will return last inserted key/value entry (first in a bucket 77 + * chain). 78 + */ 79 + enum hashmap_insert_strategy { 80 + HASHMAP_ADD, 81 + HASHMAP_SET, 82 + HASHMAP_UPDATE, 83 + HASHMAP_APPEND, 84 + }; 85 + 86 + /* 87 + * hashmap__insert() adds key/value entry w/ various semantics, depending on 88 + * provided strategy value. If a given key/value pair replaced already 89 + * existing key/value pair, both old key and old value will be returned 90 + * through old_key and old_value to allow calling code do proper memory 91 + * management. 92 + */ 93 + int hashmap__insert(struct hashmap *map, const void *key, void *value, 94 + enum hashmap_insert_strategy strategy, 95 + const void **old_key, void **old_value); 96 + 97 + static inline int hashmap__add(struct hashmap *map, 98 + const void *key, void *value) 99 + { 100 + return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); 101 + } 102 + 103 + static inline int hashmap__set(struct hashmap *map, 104 + const void *key, void *value, 105 + const void **old_key, void **old_value) 106 + { 107 + return hashmap__insert(map, key, value, HASHMAP_SET, 108 + old_key, old_value); 109 + } 110 + 111 + static inline int hashmap__update(struct hashmap *map, 112 + const void *key, void *value, 113 + const void **old_key, void **old_value) 114 + { 115 + return hashmap__insert(map, key, value, HASHMAP_UPDATE, 116 + old_key, old_value); 117 + } 118 + 119 + static inline int hashmap__append(struct hashmap *map, 120 + const void *key, void *value) 121 + { 122 + return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); 123 + } 124 + 125 + bool hashmap__delete(struct hashmap *map, const void *key, 126 + const void **old_key, void **old_value); 127 + 128 + bool hashmap__find(const struct hashmap *map, const void *key, void **value); 129 + 130 + /* 131 + * hashmap__for_each_entry - iterate over all entries in hashmap 132 + * @map: hashmap to iterate 133 + * @cur: struct hashmap_entry * used as a loop cursor 134 + * @bkt: integer used as a bucket loop cursor 135 + */ 136 + #define hashmap__for_each_entry(map, cur, bkt) \ 137 + for (bkt = 0; bkt < map->cap; bkt++) \ 138 + for (cur = map->buckets[bkt]; cur; cur = cur->next) 139 + 140 + /* 141 + * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe 142 + * against removals 143 + * @map: hashmap to iterate 144 + * @cur: struct hashmap_entry * used as a loop cursor 145 + * @tmp: struct hashmap_entry * used as a temporary next cursor storage 146 + * @bkt: integer used as a bucket loop cursor 147 + */ 148 + #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ 149 + for (bkt = 0; bkt < map->cap; bkt++) \ 150 + for (cur = map->buckets[bkt]; \ 151 + cur && ({tmp = cur->next; true; }); \ 152 + cur = tmp) 153 + 154 + /* 155 + * hashmap__for_each_key_entry - iterate over entries associated with given key 156 + * @map: hashmap to iterate 157 + * @cur: struct hashmap_entry * used as a loop cursor 158 + * @key: key to iterate entries for 159 + */ 160 + #define hashmap__for_each_key_entry(map, cur, _key) \ 161 + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ 162 + map->cap_bits); \ 163 + map->buckets ? map->buckets[bkt] : NULL; }); \ 164 + cur; \ 165 + cur = cur->next) \ 166 + if (map->equal_fn(cur->key, (_key), map->ctx)) 167 + 168 + #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ 169 + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ 170 + map->cap_bits); \ 171 + cur = map->buckets ? map->buckets[bkt] : NULL; }); \ 172 + cur && ({ tmp = cur->next; true; }); \ 173 + cur = tmp) \ 174 + if (map->equal_fn(cur->key, (_key), map->ctx)) 175 + 176 + #endif /* __LIBBPF_HASHMAP_H */
+26 -8
tools/perf/util/header.c
··· 3574 3574 return -EINVAL; 3575 3575 } 3576 3576 3577 - return 0; 3577 + return f_header.size == sizeof(f_header) ? 0 : -1; 3578 3578 } 3579 3579 3580 3580 static int read_attr(int fd, struct perf_header *ph, ··· 3676 3676 struct perf_file_header f_header; 3677 3677 struct perf_file_attr f_attr; 3678 3678 u64 f_id; 3679 - int nr_attrs, nr_ids, i, j; 3679 + int nr_attrs, nr_ids, i, j, err; 3680 3680 int fd = perf_data__fd(data); 3681 3681 3682 3682 session->evlist = evlist__new(); ··· 3685 3685 3686 3686 session->evlist->env = &header->env; 3687 3687 session->machines.host.env = &header->env; 3688 - if (perf_data__is_pipe(data)) 3689 - return perf_header__read_pipe(session); 3688 + 3689 + /* 3690 + * We can read 'pipe' data event from regular file, 3691 + * check for the pipe header regardless of source. 3692 + */ 3693 + err = perf_header__read_pipe(session); 3694 + if (!err || (err && perf_data__is_pipe(data))) { 3695 + data->is_pipe = true; 3696 + return err; 3697 + } 3690 3698 3691 3699 if (perf_file_header__read(&f_header, header, fd) < 0) 3692 3700 return -EINVAL; ··· 3955 3947 { 3956 3948 ssize_t size_read, padding, size = event->tracing_data.size; 3957 3949 int fd = perf_data__fd(session->data); 3958 - off_t offset = lseek(fd, 0, SEEK_CUR); 3959 3950 char buf[BUFSIZ]; 3960 3951 3961 - /* setup for reading amidst mmap */ 3962 - lseek(fd, offset + sizeof(struct perf_record_header_tracing_data), 3963 - SEEK_SET); 3952 + /* 3953 + * The pipe fd is already in proper place and in any case 3954 + * we can't move it, and we'd screw the case where we read 3955 + * 'pipe' data from regular file. The trace_report reads 3956 + * data from 'fd' so we need to set it directly behind the 3957 + * event, where the tracing data starts. 3958 + */ 3959 + if (!perf_data__is_pipe(session->data)) { 3960 + off_t offset = lseek(fd, 0, SEEK_CUR); 3961 + 3962 + /* setup for reading amidst mmap */ 3963 + lseek(fd, offset + sizeof(struct perf_record_header_tracing_data), 3964 + SEEK_SET); 3965 + } 3964 3966 3965 3967 size_read = trace_report(fd, &session->tevent, 3966 3968 session->repipe);
+6 -7
tools/perf/util/hist.c
··· 1930 1930 } 1931 1931 } 1932 1932 1933 - void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, 1934 - hists__resort_cb_t cb, void *cb_arg) 1933 + void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, 1934 + hists__resort_cb_t cb, void *cb_arg) 1935 1935 { 1936 1936 bool use_callchain; 1937 1937 ··· 1945 1945 output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); 1946 1946 } 1947 1947 1948 - void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog) 1948 + void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog) 1949 1949 { 1950 - return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); 1950 + return evsel__output_resort_cb(evsel, prog, NULL, NULL); 1951 1951 } 1952 1952 1953 1953 void hists__output_resort(struct hists *hists, struct ui_progress *prog) ··· 2845 2845 2846 2846 int hists__init(void) 2847 2847 { 2848 - int err = perf_evsel__object_config(sizeof(struct hists_evsel), 2849 - hists_evsel__init, 2850 - hists_evsel__exit); 2848 + int err = evsel__object_config(sizeof(struct hists_evsel), 2849 + hists_evsel__init, hists_evsel__exit); 2851 2850 if (err) 2852 2851 fputs("FATAL ERROR: Couldn't setup hists class\n", stderr); 2853 2852
+3 -3
tools/perf/util/hist.h
··· 173 173 174 174 typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); 175 175 176 - void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, 177 - hists__resort_cb_t cb, void *cb_arg); 178 - void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog); 176 + void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, 177 + hists__resort_cb_t cb, void *cb_arg); 178 + void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog); 179 179 void hists__output_resort(struct hists *hists, struct ui_progress *prog); 180 180 void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, 181 181 hists__resort_cb_t cb);
+13 -18
tools/perf/util/intel-pt.c
··· 913 913 sample->callchain = pt->chain; 914 914 } 915 915 916 - static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt) 916 + static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) 917 917 { 918 918 size_t sz = sizeof(struct branch_stack); 919 919 920 - sz += pt->br_stack_sz * sizeof(struct branch_entry); 920 + sz += entry_cnt * sizeof(struct branch_entry); 921 921 return zalloc(sz); 922 922 } 923 923 ··· 930 930 evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; 931 931 } 932 932 933 - pt->br_stack = intel_pt_alloc_br_stack(pt); 933 + pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); 934 934 if (!pt->br_stack) 935 935 return -ENOMEM; 936 936 ··· 951 951 sample->branch_stack = pt->br_stack; 952 952 } 953 953 954 + /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ 955 + #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) 956 + 954 957 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, 955 958 unsigned int queue_nr) 956 959 { ··· 971 968 goto out_free; 972 969 } 973 970 974 - if (pt->synth_opts.last_branch) { 975 - ptq->last_branch = intel_pt_alloc_br_stack(pt); 971 + if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { 972 + unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); 973 + 974 + ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); 976 975 if (!ptq->last_branch) 977 976 goto out_free; 978 977 } ··· 1725 1720 } 1726 1721 } 1727 1722 1728 - /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ 1729 - #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) 1730 - 1731 1723 static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) 1732 1724 { 1733 1725 const struct intel_pt_blk_items *items = &ptq->state->items; ··· 1800 1798 } 1801 1799 1802 1800 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 1803 - struct { 1804 - struct branch_stack br_stack; 1805 - struct branch_entry entries[LBRS_MAX]; 1806 - } br; 1807 - 1808 1801 if (items->mask[INTEL_PT_LBR_0_POS] || 1809 1802 items->mask[INTEL_PT_LBR_1_POS] || 1810 1803 items->mask[INTEL_PT_LBR_2_POS]) { 1811 - intel_pt_add_lbrs(&br.br_stack, items); 1812 - sample.branch_stack = &br.br_stack; 1804 + intel_pt_add_lbrs(ptq->last_branch, items); 1813 1805 } else if (pt->synth_opts.last_branch) { 1814 1806 thread_stack__br_sample(ptq->thread, ptq->cpu, 1815 1807 ptq->last_branch, 1816 1808 pt->br_stack_sz); 1817 - sample.branch_stack = ptq->last_branch; 1818 1809 } else { 1819 - br.br_stack.nr = 0; 1820 - sample.branch_stack = &br.br_stack; 1810 + ptq->last_branch->nr = 0; 1821 1811 } 1812 + sample.branch_stack = ptq->last_branch; 1822 1813 } 1823 1814 1824 1815 if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+1 -1
tools/perf/util/jitdump.c
··· 57 57 unsigned long vma; 58 58 unsigned int lineno; 59 59 /* The filename format is unspecified, absolute path, relative etc. */ 60 - char const filename[0]; 60 + char const filename[]; 61 61 }; 62 62 63 63 struct jit_tool {
+3 -3
tools/perf/util/jitdump.h
··· 93 93 uint64_t addr; 94 94 int lineno; /* source line number starting at 1 */ 95 95 int discrim; /* column discriminator, 0 is default */ 96 - const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ 96 + const char name[]; /* null terminated filename, \xff\0 if same as previous entry */ 97 97 }; 98 98 99 99 struct jr_code_debug_info { ··· 101 101 102 102 uint64_t code_addr; 103 103 uint64_t nr_entry; 104 - struct debug_entry entries[0]; 104 + struct debug_entry entries[]; 105 105 }; 106 106 107 107 struct jr_code_unwinding_info { ··· 110 110 uint64_t unwinding_size; 111 111 uint64_t eh_frame_hdr_size; 112 112 uint64_t mapped_size; 113 - const char unwinding_data[0]; 113 + const char unwinding_data[]; 114 114 }; 115 115 116 116 union jr_entry {
+2 -2
tools/perf/util/machine.c
··· 738 738 739 739 static int is_bpf_image(const char *name) 740 740 { 741 - return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) || 742 - strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1); 741 + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || 742 + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; 743 743 } 744 744 745 745 static int machine__process_ksymbol_register(struct machine *machine,
+15
tools/perf/util/mem-events.c
··· 103 103 return found ? 0 : -ENOENT; 104 104 } 105 105 106 + void perf_mem_events__list(void) 107 + { 108 + int j; 109 + 110 + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 111 + struct perf_mem_event *e = &perf_mem_events[j]; 112 + 113 + fprintf(stderr, "%-13s%-*s%s\n", 114 + e->tag, 115 + verbose > 0 ? 25 : 0, 116 + verbose > 0 ? perf_mem_events__name(j) : "", 117 + e->supported ? ": available" : ""); 118 + } 119 + } 120 + 106 121 static const char * const tlb_access[] = { 107 122 "N/A", 108 123 "HIT",
+2
tools/perf/util/mem-events.h
··· 39 39 40 40 char *perf_mem_events__name(int i); 41 41 42 + void perf_mem_events__list(void); 43 + 42 44 struct mem_info; 43 45 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 44 46 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+206 -108
tools/perf/util/metricgroup.c
··· 85 85 86 86 struct egroup { 87 87 struct list_head nd; 88 - int idnum; 89 - const char **ids; 88 + struct expr_parse_ctx pctx; 90 89 const char *metric_name; 91 90 const char *metric_expr; 92 91 const char *metric_unit; 93 92 int runtime; 93 + bool has_constraint; 94 94 }; 95 95 96 + /** 97 + * Find a group of events in perf_evlist that correpond to those from a parsed 98 + * metric expression. Note, as find_evsel_group is called in the same order as 99 + * perf_evlist was constructed, metric_no_merge doesn't need to test for 100 + * underfilling a group. 101 + * @perf_evlist: a list of events something like: {metric1 leader, metric1 102 + * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, 103 + * metric2 sibling}:W,duration_time 104 + * @pctx: the parse context for the metric expression. 105 + * @metric_no_merge: don't attempt to share events for the metric with other 106 + * metrics. 107 + * @has_constraint: is there a contraint on the group of events? In which case 108 + * the events won't be grouped. 109 + * @metric_events: out argument, null terminated array of evsel's associated 110 + * with the metric. 111 + * @evlist_used: in/out argument, bitmap tracking which evlist events are used. 112 + * @return the first metric event or NULL on failure. 113 + */ 96 114 static struct evsel *find_evsel_group(struct evlist *perf_evlist, 97 - const char **ids, 98 - int idnum, 115 + struct expr_parse_ctx *pctx, 116 + bool metric_no_merge, 117 + bool has_constraint, 99 118 struct evsel **metric_events, 100 - bool *evlist_used) 119 + unsigned long *evlist_used) 101 120 { 102 - struct evsel *ev; 103 - int i = 0, j = 0; 104 - bool leader_found; 121 + struct evsel *ev, *current_leader = NULL; 122 + double *val_ptr; 123 + int i = 0, matched_events = 0, events_to_match; 124 + const int idnum = (int)hashmap__size(&pctx->ids); 125 + 126 + /* duration_time is grouped separately. */ 127 + if (!has_constraint && 128 + hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) 129 + events_to_match = idnum - 1; 130 + else 131 + events_to_match = idnum; 105 132 106 133 evlist__for_each_entry (perf_evlist, ev) { 107 - if (evlist_used[j++]) 134 + /* 135 + * Events with a constraint aren't grouped and match the first 136 + * events available. 137 + */ 138 + if (has_constraint && ev->weak_group) 108 139 continue; 109 - if (!strcmp(ev->name, ids[i])) { 110 - if (!metric_events[i]) 111 - metric_events[i] = ev; 112 - i++; 113 - if (i == idnum) 114 - break; 115 - } else { 116 - /* Discard the whole match and start again */ 117 - i = 0; 140 + /* Ignore event if already used and merging is disabled. */ 141 + if (metric_no_merge && test_bit(ev->idx, evlist_used)) 142 + continue; 143 + if (!has_constraint && ev->leader != current_leader) { 144 + /* 145 + * Start of a new group, discard the whole match and 146 + * start again. 147 + */ 148 + matched_events = 0; 118 149 memset(metric_events, 0, 119 150 sizeof(struct evsel *) * idnum); 151 + current_leader = ev->leader; 152 + } 153 + if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) { 154 + if (has_constraint) { 155 + /* 156 + * Events aren't grouped, ensure the same event 157 + * isn't matched from two groups. 158 + */ 159 + for (i = 0; i < matched_events; i++) { 160 + if (!strcmp(ev->name, 161 + metric_events[i]->name)) { 162 + break; 163 + } 164 + } 165 + if (i != matched_events) 166 + continue; 167 + } 168 + metric_events[matched_events++] = ev; 169 + } 170 + if (matched_events == events_to_match) 171 + break; 172 + } 120 173 121 - if (!strcmp(ev->name, ids[i])) { 122 - if (!metric_events[i]) 123 - metric_events[i] = ev; 124 - i++; 125 - if (i == idnum) 126 - break; 174 + if (events_to_match != idnum) { 175 + /* Add the first duration_time. */ 176 + evlist__for_each_entry(perf_evlist, ev) { 177 + if (!strcmp(ev->name, "duration_time")) { 178 + metric_events[matched_events++] = ev; 179 + break; 127 180 } 128 181 } 129 182 } 130 183 131 - if (i != idnum) { 184 + if (matched_events != idnum) { 132 185 /* Not whole match */ 133 186 return NULL; 134 187 } ··· 189 136 metric_events[idnum] = NULL; 190 137 191 138 for (i = 0; i < idnum; i++) { 192 - leader_found = false; 193 - evlist__for_each_entry(perf_evlist, ev) { 194 - if (!leader_found && (ev == metric_events[i])) 195 - leader_found = true; 196 - 197 - if (leader_found && 198 - !strcmp(ev->name, metric_events[i]->name)) { 199 - ev->metric_leader = metric_events[i]; 200 - } 201 - j++; 202 - } 203 139 ev = metric_events[i]; 204 - evlist_used[ev->idx] = true; 140 + ev->metric_leader = ev; 141 + set_bit(ev->idx, evlist_used); 205 142 } 206 143 207 144 return metric_events[0]; 208 145 } 209 146 210 147 static int metricgroup__setup_events(struct list_head *groups, 148 + bool metric_no_merge, 211 149 struct evlist *perf_evlist, 212 150 struct rblist *metric_events_list) 213 151 { ··· 207 163 int i = 0; 208 164 int ret = 0; 209 165 struct egroup *eg; 210 - struct evsel *evsel; 211 - bool *evlist_used; 166 + struct evsel *evsel, *tmp; 167 + unsigned long *evlist_used; 212 168 213 - evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); 214 - if (!evlist_used) { 215 - ret = -ENOMEM; 216 - return ret; 217 - } 169 + evlist_used = bitmap_alloc(perf_evlist->core.nr_entries); 170 + if (!evlist_used) 171 + return -ENOMEM; 218 172 219 173 list_for_each_entry (eg, groups, nd) { 220 174 struct evsel **metric_events; 221 175 222 - metric_events = calloc(sizeof(void *), eg->idnum + 1); 176 + metric_events = calloc(sizeof(void *), 177 + hashmap__size(&eg->pctx.ids) + 1); 223 178 if (!metric_events) { 224 179 ret = -ENOMEM; 225 180 break; 226 181 } 227 - evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, 228 - metric_events, evlist_used); 182 + evsel = find_evsel_group(perf_evlist, &eg->pctx, 183 + metric_no_merge, 184 + eg->has_constraint, metric_events, 185 + evlist_used); 229 186 if (!evsel) { 230 187 pr_debug("Cannot resolve %s: %s\n", 231 188 eg->metric_name, eg->metric_expr); 189 + free(metric_events); 232 190 continue; 233 191 } 234 - for (i = 0; i < eg->idnum; i++) 192 + for (i = 0; metric_events[i]; i++) 235 193 metric_events[i]->collect_stat = true; 236 194 me = metricgroup__lookup(metric_events_list, evsel, true); 237 195 if (!me) { 238 196 ret = -ENOMEM; 197 + free(metric_events); 239 198 break; 240 199 } 241 200 expr = malloc(sizeof(struct metric_expr)); 242 201 if (!expr) { 243 202 ret = -ENOMEM; 203 + free(metric_events); 244 204 break; 245 205 } 246 206 expr->metric_expr = eg->metric_expr; ··· 255 207 list_add(&expr->nd, &me->head); 256 208 } 257 209 258 - free(evlist_used); 210 + evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { 211 + if (!test_bit(evsel->idx, evlist_used)) { 212 + evlist__remove(perf_evlist, evsel); 213 + evsel__delete(evsel); 214 + } 215 + } 216 + bitmap_free(evlist_used); 259 217 260 218 return ret; 261 219 } ··· 469 415 } 470 416 471 417 static void metricgroup__add_metric_weak_group(struct strbuf *events, 472 - const char **ids, 473 - int idnum) 418 + struct expr_parse_ctx *ctx) 474 419 { 475 - bool no_group = false; 476 - int i; 420 + struct hashmap_entry *cur; 421 + size_t bkt; 422 + bool no_group = true, has_duration = false; 477 423 478 - for (i = 0; i < idnum; i++) { 479 - pr_debug("found event %s\n", ids[i]); 424 + hashmap__for_each_entry((&ctx->ids), cur, bkt) { 425 + pr_debug("found event %s\n", (const char *)cur->key); 480 426 /* 481 427 * Duration time maps to a software event and can make 482 428 * groups not count. Always use it outside a 483 429 * group. 484 430 */ 485 - if (!strcmp(ids[i], "duration_time")) { 486 - if (i > 0) 487 - strbuf_addf(events, "}:W,"); 488 - strbuf_addf(events, "duration_time"); 489 - no_group = true; 431 + if (!strcmp(cur->key, "duration_time")) { 432 + has_duration = true; 490 433 continue; 491 434 } 492 435 strbuf_addf(events, "%s%s", 493 - i == 0 || no_group ? "{" : ",", 494 - ids[i]); 436 + no_group ? "{" : ",", 437 + (const char *)cur->key); 495 438 no_group = false; 496 439 } 497 - if (!no_group) 440 + if (!no_group) { 498 441 strbuf_addf(events, "}:W"); 442 + if (has_duration) 443 + strbuf_addf(events, ",duration_time"); 444 + } else if (has_duration) 445 + strbuf_addf(events, "duration_time"); 499 446 } 500 447 501 448 static void metricgroup__add_metric_non_group(struct strbuf *events, 502 - const char **ids, 503 - int idnum) 449 + struct expr_parse_ctx *ctx) 504 450 { 505 - int i; 451 + struct hashmap_entry *cur; 452 + size_t bkt; 453 + bool first = true; 506 454 507 - for (i = 0; i < idnum; i++) 508 - strbuf_addf(events, ",%s", ids[i]); 455 + hashmap__for_each_entry((&ctx->ids), cur, bkt) { 456 + if (!first) 457 + strbuf_addf(events, ","); 458 + strbuf_addf(events, "%s", (const char *)cur->key); 459 + first = false; 460 + } 509 461 } 510 462 511 463 static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) ··· 552 492 return 1; 553 493 } 554 494 555 - static int __metricgroup__add_metric(struct strbuf *events, 556 - struct list_head *group_list, struct pmu_event *pe, int runtime) 495 + static int __metricgroup__add_metric(struct list_head *group_list, 496 + struct pmu_event *pe, 497 + bool metric_no_group, 498 + int runtime) 557 499 { 558 - 559 - const char **ids; 560 - int idnum; 561 500 struct egroup *eg; 562 - 563 - if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0) 564 - return -EINVAL; 565 - 566 - if (events->len > 0) 567 - strbuf_addf(events, ","); 568 - 569 - if (metricgroup__has_constraint(pe)) 570 - metricgroup__add_metric_non_group(events, ids, idnum); 571 - else 572 - metricgroup__add_metric_weak_group(events, ids, idnum); 573 501 574 502 eg = malloc(sizeof(*eg)); 575 503 if (!eg) 576 504 return -ENOMEM; 577 505 578 - eg->ids = ids; 579 - eg->idnum = idnum; 506 + expr__ctx_init(&eg->pctx); 580 507 eg->metric_name = pe->metric_name; 581 508 eg->metric_expr = pe->metric_expr; 582 509 eg->metric_unit = pe->unit; 583 510 eg->runtime = runtime; 584 - list_add_tail(&eg->nd, group_list); 511 + eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe); 512 + 513 + if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) { 514 + expr__ctx_clear(&eg->pctx); 515 + free(eg); 516 + return -EINVAL; 517 + } 518 + 519 + if (list_empty(group_list)) 520 + list_add(&eg->nd, group_list); 521 + else { 522 + struct list_head *pos; 523 + 524 + /* Place the largest groups at the front. */ 525 + list_for_each_prev(pos, group_list) { 526 + struct egroup *old = list_entry(pos, struct egroup, nd); 527 + 528 + if (hashmap__size(&eg->pctx.ids) <= 529 + hashmap__size(&old->pctx.ids)) 530 + break; 531 + } 532 + list_add(&eg->nd, pos); 533 + } 585 534 586 535 return 0; 587 536 } 588 537 589 - static int metricgroup__add_metric(const char *metric, struct strbuf *events, 538 + static int metricgroup__add_metric(const char *metric, bool metric_no_group, 539 + struct strbuf *events, 590 540 struct list_head *group_list) 591 541 { 592 542 struct pmu_events_map *map = perf_pmu__find_map(NULL); 593 543 struct pmu_event *pe; 594 - int i, ret = -EINVAL; 544 + struct egroup *eg; 545 + int i, ret; 546 + bool has_match = false; 595 547 596 548 if (!map) 597 549 return 0; ··· 611 539 for (i = 0; ; i++) { 612 540 pe = &map->table[i]; 613 541 614 - if (!pe->name && !pe->metric_group && !pe->metric_name) 542 + if (!pe->name && !pe->metric_group && !pe->metric_name) { 543 + /* End of pmu events. */ 544 + if (!has_match) 545 + return -EINVAL; 615 546 break; 547 + } 616 548 if (!pe->metric_expr) 617 549 continue; 618 550 if (match_metric(pe->metric_group, metric) || 619 551 match_metric(pe->metric_name, metric)) { 620 - 552 + has_match = true; 621 553 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); 622 554 623 555 if (!strstr(pe->metric_expr, "?")) { 624 - ret = __metricgroup__add_metric(events, group_list, pe, 1); 556 + ret = __metricgroup__add_metric(group_list, 557 + pe, 558 + metric_no_group, 559 + 1); 560 + if (ret) 561 + return ret; 625 562 } else { 626 563 int j, count; 627 564 ··· 641 560 * those events to group_list. 642 561 */ 643 562 644 - for (j = 0; j < count; j++) 645 - ret = __metricgroup__add_metric(events, group_list, pe, j); 563 + for (j = 0; j < count; j++) { 564 + ret = __metricgroup__add_metric( 565 + group_list, pe, 566 + metric_no_group, j); 567 + if (ret) 568 + return ret; 569 + } 646 570 } 647 - if (ret == -ENOMEM) 648 - break; 649 571 } 650 572 } 651 - return ret; 573 + list_for_each_entry(eg, group_list, nd) { 574 + if (events->len > 0) 575 + strbuf_addf(events, ","); 576 + 577 + if (eg->has_constraint) { 578 + metricgroup__add_metric_non_group(events, 579 + &eg->pctx); 580 + } else { 581 + metricgroup__add_metric_weak_group(events, 582 + &eg->pctx); 583 + } 584 + } 585 + return 0; 652 586 } 653 587 654 - static int metricgroup__add_metric_list(const char *list, struct strbuf *events, 588 + static int metricgroup__add_metric_list(const char *list, bool metric_no_group, 589 + struct strbuf *events, 655 590 struct list_head *group_list) 656 591 { 657 592 char *llist, *nlist, *p; ··· 682 585 strbuf_addf(events, "%s", ""); 683 586 684 587 while ((p = strsep(&llist, ",")) != NULL) { 685 - ret = metricgroup__add_metric(p, events, group_list); 588 + ret = metricgroup__add_metric(p, metric_no_group, events, 589 + group_list); 686 590 if (ret == -EINVAL) { 687 591 fprintf(stderr, "Cannot find metric or group `%s'\n", 688 592 p); ··· 701 603 static void metricgroup__free_egroups(struct list_head *group_list) 702 604 { 703 605 struct egroup *eg, *egtmp; 704 - int i; 705 606 706 607 list_for_each_entry_safe (eg, egtmp, group_list, nd) { 707 - for (i = 0; i < eg->idnum; i++) 708 - zfree(&eg->ids[i]); 709 - zfree(&eg->ids); 608 + expr__ctx_clear(&eg->pctx); 710 609 list_del_init(&eg->nd); 711 610 free(eg); 712 611 } 713 612 } 714 613 715 614 int metricgroup__parse_groups(const struct option *opt, 716 - const char *str, 717 - struct rblist *metric_events) 615 + const char *str, 616 + bool metric_no_group, 617 + bool metric_no_merge, 618 + struct rblist *metric_events) 718 619 { 719 620 struct parse_events_error parse_error; 720 621 struct evlist *perf_evlist = *(struct evlist **)opt->value; ··· 723 626 724 627 if (metric_events->nr_entries == 0) 725 628 metricgroup__rblist_init(metric_events); 726 - ret = metricgroup__add_metric_list(str, &extra_events, &group_list); 629 + ret = metricgroup__add_metric_list(str, metric_no_group, 630 + &extra_events, &group_list); 727 631 if (ret) 728 632 return ret; 729 633 pr_debug("adding %s\n", extra_events.buf); ··· 735 637 goto out; 736 638 } 737 639 strbuf_release(&extra_events); 738 - ret = metricgroup__setup_events(&group_list, perf_evlist, 739 - metric_events); 640 + ret = metricgroup__setup_events(&group_list, metric_no_merge, 641 + perf_evlist, metric_events); 740 642 out: 741 643 metricgroup__free_egroups(&group_list); 742 644 return ret;
+4 -2
tools/perf/util/metricgroup.h
··· 29 29 struct evsel *evsel, 30 30 bool create); 31 31 int metricgroup__parse_groups(const struct option *opt, 32 - const char *str, 33 - struct rblist *metric_events); 32 + const char *str, 33 + bool metric_no_group, 34 + bool metric_no_merge, 35 + struct rblist *metric_events); 34 36 35 37 void metricgroup__print(bool metrics, bool groups, char *filter, 36 38 bool raw, bool details);
+1 -1
tools/perf/util/ordered-events.h
··· 29 29 30 30 struct ordered_events_buffer { 31 31 struct list_head list; 32 - struct ordered_event event[0]; 32 + struct ordered_event event[]; 33 33 }; 34 34 35 35 struct ordered_events {
+78 -33
tools/perf/util/parse-events.c
··· 26 26 #include <api/fs/tracing_path.h> 27 27 #include <perf/cpumap.h> 28 28 #include "parse-events-bison.h" 29 - #define YY_EXTRA_TYPE int 29 + #define YY_EXTRA_TYPE void* 30 30 #include "parse-events-flex.h" 31 31 #include "pmu.h" 32 32 #include "thread_map.h" ··· 36 36 #include "metricgroup.h" 37 37 #include "util/evsel_config.h" 38 38 #include "util/event.h" 39 + #include "util/pfm.h" 39 40 40 41 #define MAX_NAME_LEN 100 41 42 ··· 205 204 err->help = help; 206 205 break; 207 206 default: 208 - WARN_ONCE(1, "WARNING: multiple event parsing errors\n"); 207 + pr_debug("Multiple errors dropping message: %s (%s)\n", 208 + err->str, err->help); 209 209 free(err->str); 210 210 err->str = str; 211 211 free(err->help); ··· 346 344 static struct evsel * 347 345 __add_event(struct list_head *list, int *idx, 348 346 struct perf_event_attr *attr, 347 + bool init_attr, 349 348 char *name, struct perf_pmu *pmu, 350 349 struct list_head *config_terms, bool auto_merge_stats, 351 350 const char *cpu_list) ··· 355 352 struct perf_cpu_map *cpus = pmu ? pmu->cpus : 356 353 cpu_list ? perf_cpu_map__new(cpu_list) : NULL; 357 354 358 - event_attr_init(attr); 355 + if (init_attr) 356 + event_attr_init(attr); 359 357 360 - evsel = perf_evsel__new_idx(attr, *idx); 358 + evsel = evsel__new_idx(attr, *idx); 361 359 if (!evsel) 362 360 return NULL; 363 361 ··· 374 370 if (config_terms) 375 371 list_splice(config_terms, &evsel->config_terms); 376 372 377 - list_add_tail(&evsel->core.node, list); 373 + if (list) 374 + list_add_tail(&evsel->core.node, list); 375 + 378 376 return evsel; 377 + } 378 + 379 + struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, 380 + char *name, struct perf_pmu *pmu) 381 + { 382 + return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, 383 + NULL); 379 384 } 380 385 381 386 static int add_event(struct list_head *list, int *idx, 382 387 struct perf_event_attr *attr, char *name, 383 388 struct list_head *config_terms) 384 389 { 385 - return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM; 390 + return __add_event(list, idx, attr, true, name, NULL, config_terms, 391 + false, NULL) ? 0 : -ENOMEM; 386 392 } 387 393 388 394 static int add_event_tool(struct list_head *list, int *idx, ··· 404 390 .config = PERF_COUNT_SW_DUMMY, 405 391 }; 406 392 407 - evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0"); 393 + evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, 394 + "0"); 408 395 if (!evsel) 409 396 return -ENOMEM; 410 397 evsel->tool_event = tool_event; ··· 414 399 return 0; 415 400 } 416 401 417 - static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) 402 + static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size) 418 403 { 419 404 int i, j; 420 405 int n, longest = -1; 421 406 422 407 for (i = 0; i < size; i++) { 423 - for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) { 408 + for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { 424 409 n = strlen(names[i][j]); 425 410 if (n > longest && !strncasecmp(str, names[i][j], n)) 426 411 longest = n; ··· 459 444 * No fallback - if we cannot get a clear cache type 460 445 * then bail out: 461 446 */ 462 - cache_type = parse_aliases(type, perf_evsel__hw_cache, 463 - PERF_COUNT_HW_CACHE_MAX); 447 + cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX); 464 448 if (cache_type == -1) 465 449 return -EINVAL; 466 450 ··· 472 458 n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); 473 459 474 460 if (cache_op == -1) { 475 - cache_op = parse_aliases(str, perf_evsel__hw_cache_op, 461 + cache_op = parse_aliases(str, evsel__hw_cache_op, 476 462 PERF_COUNT_HW_CACHE_OP_MAX); 477 463 if (cache_op >= 0) { 478 464 if (!evsel__is_cache_op_valid(cache_type, cache_op)) ··· 482 468 } 483 469 484 470 if (cache_result == -1) { 485 - cache_result = parse_aliases(str, perf_evsel__hw_cache_result, 471 + cache_result = parse_aliases(str, evsel__hw_cache_result, 486 472 PERF_COUNT_HW_CACHE_RESULT_MAX); 487 473 if (cache_result >= 0) 488 474 continue; ··· 552 538 struct parse_events_error *err, 553 539 struct list_head *head_config) 554 540 { 555 - struct evsel *evsel; 541 + struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++); 556 542 557 - evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++); 558 543 if (IS_ERR(evsel)) { 559 544 tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); 560 545 return PTR_ERR(evsel); ··· 1227 1214 struct list_head *head_terms __maybe_unused) 1228 1215 { 1229 1216 #define ADD_CONFIG_TERM(__type, __weak) \ 1230 - struct perf_evsel_config_term *__t; \ 1217 + struct evsel_config_term *__t; \ 1231 1218 \ 1232 1219 __t = zalloc(sizeof(*__t)); \ 1233 1220 if (!__t) \ 1234 1221 return -ENOMEM; \ 1235 1222 \ 1236 1223 INIT_LIST_HEAD(&__t->list); \ 1237 - __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ 1224 + __t->type = EVSEL__CONFIG_TERM_ ## __type; \ 1238 1225 __t->weak = __weak; \ 1239 1226 list_add_tail(&__t->list, head_terms) 1240 1227 ··· 1325 1312 } 1326 1313 1327 1314 /* 1328 - * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for 1315 + * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for 1329 1316 * each bit of attr->config that the user has changed. 1330 1317 */ 1331 1318 static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, ··· 1413 1400 1414 1401 static bool config_term_percore(struct list_head *config_terms) 1415 1402 { 1416 - struct perf_evsel_config_term *term; 1403 + struct evsel_config_term *term; 1417 1404 1418 1405 list_for_each_entry(term, config_terms, list) { 1419 - if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE) 1406 + if (term->type == EVSEL__CONFIG_TERM_PERCORE) 1420 1407 return term->val.percore; 1421 1408 } 1422 1409 ··· 1436 1423 struct parse_events_error *err = parse_state->error; 1437 1424 bool use_uncore_alias; 1438 1425 LIST_HEAD(config_terms); 1426 + 1427 + if (verbose > 1) { 1428 + fprintf(stderr, "Attempting to add event pmu '%s' with '", 1429 + name); 1430 + if (head_config) { 1431 + struct parse_events_term *term; 1432 + 1433 + list_for_each_entry(term, head_config, list) { 1434 + fprintf(stderr, "%s,", term->config); 1435 + } 1436 + } 1437 + fprintf(stderr, "' that may result in non-fatal errors\n"); 1438 + } 1439 1439 1440 1440 pmu = perf_pmu__find(name); 1441 1441 if (!pmu) { ··· 1472 1446 1473 1447 if (!head_config) { 1474 1448 attr.type = pmu->type; 1475 - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, 1476 - auto_merge_stats, NULL); 1449 + evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, 1450 + pmu, NULL, auto_merge_stats, NULL); 1477 1451 if (evsel) { 1478 1452 evsel->pmu_name = name ? strdup(name) : NULL; 1479 1453 evsel->use_uncore_alias = use_uncore_alias; ··· 1485 1459 1486 1460 if (perf_pmu__check_alias(pmu, head_config, &info)) 1487 1461 return -EINVAL; 1462 + 1463 + if (verbose > 1) { 1464 + fprintf(stderr, "After aliases, add event pmu '%s' with '", 1465 + name); 1466 + if (head_config) { 1467 + struct parse_events_term *term; 1468 + 1469 + list_for_each_entry(term, head_config, list) { 1470 + fprintf(stderr, "%s,", term->config); 1471 + } 1472 + } 1473 + fprintf(stderr, "' that may result in non-fatal errors\n"); 1474 + } 1488 1475 1489 1476 /* 1490 1477 * Configure hardcoded terms first, no need to check ··· 1517 1478 return -ENOMEM; 1518 1479 1519 1480 if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { 1520 - struct perf_evsel_config_term *pos, *tmp; 1481 + struct evsel_config_term *pos, *tmp; 1521 1482 1522 1483 list_for_each_entry_safe(pos, tmp, &config_terms, list) { 1523 1484 list_del_init(&pos->list); 1524 - zfree(&pos->val.str); 1485 + if (pos->free_str) 1486 + zfree(&pos->val.str); 1525 1487 free(pos); 1526 1488 } 1527 1489 return -EINVAL; 1528 1490 } 1529 1491 1530 - evsel = __add_event(list, &parse_state->idx, &attr, 1492 + evsel = __add_event(list, &parse_state->idx, &attr, true, 1531 1493 get_config_name(head_config), pmu, 1532 1494 &config_terms, auto_merge_stats, NULL); 1533 1495 if (evsel) { ··· 1670 1630 * event. That can be used to distinguish the leader from 1671 1631 * other members, even they have the same event name. 1672 1632 */ 1673 - if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { 1633 + if ((leader != evsel) && 1634 + !strcmp(leader->pmu_name, evsel->pmu_name)) { 1674 1635 is_leader = false; 1675 1636 continue; 1676 1637 } 1677 - /* The name is always alias name */ 1678 - WARN_ON(strcmp(leader->name, evsel->name)); 1679 1638 1680 1639 /* Store the leader event for each PMU */ 1681 1640 leaders[nr_pmu++] = (uintptr_t) evsel; ··· 2041 2002 return r ? r->type : PMU_EVENT_SYMBOL_ERR; 2042 2003 } 2043 2004 2044 - static int parse_events__scanner(const char *str, void *parse_state, int start_token) 2005 + static int parse_events__scanner(const char *str, 2006 + struct parse_events_state *parse_state) 2045 2007 { 2046 2008 YY_BUFFER_STATE buffer; 2047 2009 void *scanner; 2048 2010 int ret; 2049 2011 2050 - ret = parse_events_lex_init_extra(start_token, &scanner); 2012 + ret = parse_events_lex_init_extra(parse_state, &scanner); 2051 2013 if (ret) 2052 2014 return ret; 2053 2015 ··· 2056 2016 2057 2017 #ifdef PARSER_DEBUG 2058 2018 parse_events_debug = 1; 2019 + parse_events_set_debug(1, scanner); 2059 2020 #endif 2060 2021 ret = parse_events_parse(parse_state, scanner); 2061 2022 ··· 2072 2031 int parse_events_terms(struct list_head *terms, const char *str) 2073 2032 { 2074 2033 struct parse_events_state parse_state = { 2075 - .terms = NULL, 2034 + .terms = NULL, 2035 + .stoken = PE_START_TERMS, 2076 2036 }; 2077 2037 int ret; 2078 2038 2079 - ret = parse_events__scanner(str, &parse_state, PE_START_TERMS); 2039 + ret = parse_events__scanner(str, &parse_state); 2080 2040 if (!ret) { 2081 2041 list_splice(parse_state.terms, terms); 2082 2042 zfree(&parse_state.terms); ··· 2096 2054 .idx = evlist->core.nr_entries, 2097 2055 .error = err, 2098 2056 .evlist = evlist, 2057 + .stoken = PE_START_EVENTS, 2099 2058 }; 2100 2059 int ret; 2101 2060 2102 - ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS); 2061 + ret = parse_events__scanner(str, &parse_state); 2103 2062 perf_pmu__parse_cleanup(); 2104 2063 2105 2064 if (!ret && list_empty(&parse_state.list)) { ··· 2860 2817 print_sdt_events(NULL, NULL, name_only); 2861 2818 2862 2819 metricgroup__print(true, true, NULL, name_only, details_flag); 2820 + 2821 + print_libpfm_events(name_only, long_desc); 2863 2822 } 2864 2823 2865 2824 int parse_events__is_hardcoded_term(struct parse_events_term *term)
+5
tools/perf/util/parse-events.h
··· 17 17 struct parse_events_error; 18 18 19 19 struct option; 20 + struct perf_pmu; 20 21 21 22 struct tracepoint_path { 22 23 char *system; ··· 129 128 struct parse_events_error *error; 130 129 struct evlist *evlist; 131 130 struct list_head *terms; 131 + int stoken; 132 132 }; 133 133 134 134 void parse_events__handle_error(struct parse_events_error *err, int idx, ··· 188 186 struct list_head *head_config, 189 187 bool auto_merge_stats, 190 188 bool use_alias); 189 + 190 + struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, 191 + char *name, struct perf_pmu *pmu); 191 192 192 193 int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 193 194 char *str,
+7 -5
tools/perf/util/parse-events.l
··· 209 209 %% 210 210 211 211 %{ 212 - { 213 - int start_token; 212 + struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner); 214 213 215 - start_token = parse_events_get_extra(yyscanner); 214 + { 215 + int start_token = _parse_state->stoken; 216 216 217 217 if (start_token == PE_START_TERMS) 218 218 BEGIN(config); ··· 220 220 BEGIN(event); 221 221 222 222 if (start_token) { 223 - parse_events_set_extra(NULL, yyscanner); 223 + _parse_state->stoken = 0; 224 224 /* 225 225 * The flex parser does not init locations variable 226 226 * via the scan_string interface, so we need do the ··· 252 252 BEGIN(INITIAL); 253 253 REWIND(0); 254 254 } 255 - 255 + , { 256 + return ','; 257 + } 256 258 } 257 259 258 260 <array>{
+281
tools/perf/util/pfm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Support for libpfm4 event encoding. 4 + * 5 + * Copyright 2020 Google LLC. 6 + */ 7 + #include "util/cpumap.h" 8 + #include "util/debug.h" 9 + #include "util/event.h" 10 + #include "util/evlist.h" 11 + #include "util/evsel.h" 12 + #include "util/parse-events.h" 13 + #include "util/pmu.h" 14 + #include "util/pfm.h" 15 + 16 + #include <string.h> 17 + #include <linux/kernel.h> 18 + #include <perfmon/pfmlib_perf_event.h> 19 + 20 + static void libpfm_initialize(void) 21 + { 22 + int ret; 23 + 24 + ret = pfm_initialize(); 25 + if (ret != PFM_SUCCESS) { 26 + ui__warning("libpfm failed to initialize: %s\n", 27 + pfm_strerror(ret)); 28 + } 29 + } 30 + 31 + int parse_libpfm_events_option(const struct option *opt, const char *str, 32 + int unset __maybe_unused) 33 + { 34 + struct evlist *evlist = *(struct evlist **)opt->value; 35 + struct perf_event_attr attr; 36 + struct perf_pmu *pmu; 37 + struct evsel *evsel, *grp_leader = NULL; 38 + char *p, *q, *p_orig; 39 + const char *sep; 40 + int grp_evt = -1; 41 + int ret; 42 + 43 + libpfm_initialize(); 44 + 45 + p_orig = p = strdup(str); 46 + if (!p) 47 + return -1; 48 + /* 49 + * force loading of the PMU list 50 + */ 51 + perf_pmu__scan(NULL); 52 + 53 + for (q = p; strsep(&p, ",{}"); q = p) { 54 + sep = p ? str + (p - p_orig - 1) : ""; 55 + if (*sep == '{') { 56 + if (grp_evt > -1) { 57 + ui__error( 58 + "nested event groups not supported\n"); 59 + goto error; 60 + } 61 + grp_evt++; 62 + } 63 + 64 + /* no event */ 65 + if (*q == '\0') 66 + continue; 67 + 68 + memset(&attr, 0, sizeof(attr)); 69 + event_attr_init(&attr); 70 + 71 + ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3, 72 + &attr, NULL, NULL); 73 + 74 + if (ret != PFM_SUCCESS) { 75 + ui__error("failed to parse event %s : %s\n", str, 76 + pfm_strerror(ret)); 77 + goto error; 78 + } 79 + 80 + pmu = perf_pmu__find_by_type((unsigned int)attr.type); 81 + evsel = parse_events__add_event(evlist->core.nr_entries, 82 + &attr, q, pmu); 83 + if (evsel == NULL) 84 + goto error; 85 + 86 + evsel->is_libpfm_event = true; 87 + 88 + evlist__add(evlist, evsel); 89 + 90 + if (grp_evt == 0) 91 + grp_leader = evsel; 92 + 93 + if (grp_evt > -1) { 94 + evsel->leader = grp_leader; 95 + grp_leader->core.nr_members++; 96 + grp_evt++; 97 + } 98 + 99 + if (*sep == '}') { 100 + if (grp_evt < 0) { 101 + ui__error( 102 + "cannot close a non-existing event group\n"); 103 + goto error; 104 + } 105 + evlist->nr_groups++; 106 + grp_leader = NULL; 107 + grp_evt = -1; 108 + } 109 + } 110 + return 0; 111 + error: 112 + free(p_orig); 113 + return -1; 114 + } 115 + 116 + static const char *srcs[PFM_ATTR_CTRL_MAX] = { 117 + [PFM_ATTR_CTRL_UNKNOWN] = "???", 118 + [PFM_ATTR_CTRL_PMU] = "PMU", 119 + [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event", 120 + }; 121 + 122 + static void 123 + print_attr_flags(pfm_event_attr_info_t *info) 124 + { 125 + int n = 0; 126 + 127 + if (info->is_dfl) { 128 + printf("[default] "); 129 + n++; 130 + } 131 + 132 + if (info->is_precise) { 133 + printf("[precise] "); 134 + n++; 135 + } 136 + 137 + if (!n) 138 + printf("- "); 139 + } 140 + 141 + static void 142 + print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc) 143 + { 144 + pfm_event_attr_info_t ainfo; 145 + const char *src; 146 + int j, ret; 147 + 148 + ainfo.size = sizeof(ainfo); 149 + 150 + printf(" %s\n", info->name); 151 + printf(" [%s]\n", info->desc); 152 + if (long_desc) { 153 + if (info->equiv) 154 + printf(" Equiv: %s\n", info->equiv); 155 + 156 + printf(" Code : 0x%"PRIx64"\n", info->code); 157 + } 158 + pfm_for_each_event_attr(j, info) { 159 + ret = pfm_get_event_attr_info(info->idx, j, 160 + PFM_OS_PERF_EVENT_EXT, &ainfo); 161 + if (ret != PFM_SUCCESS) 162 + continue; 163 + 164 + if (ainfo.type == PFM_ATTR_UMASK) { 165 + printf(" %s:%s\n", info->name, ainfo.name); 166 + printf(" [%s]\n", ainfo.desc); 167 + } 168 + 169 + if (!long_desc) 170 + continue; 171 + 172 + if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) 173 + ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; 174 + 175 + src = srcs[ainfo.ctrl]; 176 + switch (ainfo.type) { 177 + case PFM_ATTR_UMASK: 178 + printf(" Umask : 0x%02"PRIx64" : %s: ", 179 + ainfo.code, src); 180 + print_attr_flags(&ainfo); 181 + putchar('\n'); 182 + break; 183 + case PFM_ATTR_MOD_BOOL: 184 + printf(" Modif : %s: [%s] : %s (boolean)\n", src, 185 + ainfo.name, ainfo.desc); 186 + break; 187 + case PFM_ATTR_MOD_INTEGER: 188 + printf(" Modif : %s: [%s] : %s (integer)\n", src, 189 + ainfo.name, ainfo.desc); 190 + break; 191 + case PFM_ATTR_NONE: 192 + case PFM_ATTR_RAW_UMASK: 193 + case PFM_ATTR_MAX: 194 + default: 195 + printf(" Attr : %s: [%s] : %s\n", src, 196 + ainfo.name, ainfo.desc); 197 + } 198 + } 199 + } 200 + 201 + /* 202 + * list all pmu::event:umask, pmu::event 203 + * printed events may not be all valid combinations of umask for an event 204 + */ 205 + static void 206 + print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info) 207 + { 208 + pfm_event_attr_info_t ainfo; 209 + int j, ret; 210 + bool has_umask = false; 211 + 212 + ainfo.size = sizeof(ainfo); 213 + 214 + pfm_for_each_event_attr(j, info) { 215 + ret = pfm_get_event_attr_info(info->idx, j, 216 + PFM_OS_PERF_EVENT_EXT, &ainfo); 217 + if (ret != PFM_SUCCESS) 218 + continue; 219 + 220 + if (ainfo.type != PFM_ATTR_UMASK) 221 + continue; 222 + 223 + printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name); 224 + has_umask = true; 225 + } 226 + if (!has_umask) 227 + printf("%s::%s\n", pinfo->name, info->name); 228 + } 229 + 230 + void print_libpfm_events(bool name_only, bool long_desc) 231 + { 232 + pfm_event_info_t info; 233 + pfm_pmu_info_t pinfo; 234 + int i, p, ret; 235 + 236 + libpfm_initialize(); 237 + 238 + /* initialize to zero to indicate ABI version */ 239 + info.size = sizeof(info); 240 + pinfo.size = sizeof(pinfo); 241 + 242 + if (!name_only) 243 + puts("\nList of pre-defined events (to be used in --pfm-events):\n"); 244 + 245 + pfm_for_all_pmus(p) { 246 + bool printed_pmu = false; 247 + 248 + ret = pfm_get_pmu_info(p, &pinfo); 249 + if (ret != PFM_SUCCESS) 250 + continue; 251 + 252 + /* only print events that are supported by host HW */ 253 + if (!pinfo.is_present) 254 + continue; 255 + 256 + /* handled by perf directly */ 257 + if (pinfo.pmu == PFM_PMU_PERF_EVENT) 258 + continue; 259 + 260 + for (i = pinfo.first_event; i != -1; 261 + i = pfm_get_event_next(i)) { 262 + 263 + ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, 264 + &info); 265 + if (ret != PFM_SUCCESS) 266 + continue; 267 + 268 + if (!name_only && !printed_pmu) { 269 + printf("%s:\n", pinfo.name); 270 + printed_pmu = true; 271 + } 272 + 273 + if (!name_only) 274 + print_libpfm_events_detailed(&info, long_desc); 275 + else 276 + print_libpfm_events_raw(&pinfo, &info); 277 + } 278 + if (!name_only && printed_pmu) 279 + putchar('\n'); 280 + } 281 + }
+37
tools/perf/util/pfm.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Support for libpfm4 event encoding. 4 + * 5 + * Copyright 2020 Google LLC. 6 + */ 7 + #ifndef __PERF_PFM_H 8 + #define __PERF_PFM_H 9 + 10 + #include <subcmd/parse-options.h> 11 + 12 + #ifdef HAVE_LIBPFM 13 + int parse_libpfm_events_option(const struct option *opt, const char *str, 14 + int unset); 15 + 16 + void print_libpfm_events(bool name_only, bool long_desc); 17 + 18 + #else 19 + #include <linux/compiler.h> 20 + 21 + static inline int parse_libpfm_events_option( 22 + const struct option *opt __maybe_unused, 23 + const char *str __maybe_unused, 24 + int unset __maybe_unused) 25 + { 26 + return 0; 27 + } 28 + 29 + static inline void print_libpfm_events(bool name_only __maybe_unused, 30 + bool long_desc __maybe_unused) 31 + { 32 + } 33 + 34 + #endif 35 + 36 + 37 + #endif /* __PERF_PFM_H */
+21 -12
tools/perf/util/pmu.c
··· 1056 1056 * Setup one of config[12] attr members based on the 1057 1057 * user input data - term parameter. 1058 1058 */ 1059 - static int pmu_config_term(struct list_head *formats, 1059 + static int pmu_config_term(const char *pmu_name, 1060 + struct list_head *formats, 1060 1061 struct perf_event_attr *attr, 1061 1062 struct parse_events_term *term, 1062 1063 struct list_head *head_terms, ··· 1083 1082 1084 1083 format = pmu_find_format(formats, term->config); 1085 1084 if (!format) { 1086 - if (verbose > 0) 1087 - printf("Invalid event/parameter '%s'\n", term->config); 1088 - if (err) { 1089 - char *pmu_term = pmu_formats_string(formats); 1085 + char *pmu_term = pmu_formats_string(formats); 1086 + char *unknown_term; 1087 + char *help_msg; 1090 1088 1089 + if (asprintf(&unknown_term, 1090 + "unknown term '%s' for pmu '%s'", 1091 + term->config, pmu_name) < 0) 1092 + unknown_term = NULL; 1093 + help_msg = parse_events_formats_error_string(pmu_term); 1094 + if (err) { 1091 1095 parse_events__handle_error(err, term->err_term, 1092 - strdup("unknown term"), 1093 - parse_events_formats_error_string(pmu_term)); 1094 - free(pmu_term); 1096 + unknown_term, 1097 + help_msg); 1098 + } else { 1099 + pr_debug("%s (%s)\n", unknown_term, help_msg); 1100 + free(unknown_term); 1095 1101 } 1102 + free(pmu_term); 1096 1103 return -EINVAL; 1097 1104 } 1098 1105 ··· 1177 1168 return 0; 1178 1169 } 1179 1170 1180 - int perf_pmu__config_terms(struct list_head *formats, 1171 + int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, 1181 1172 struct perf_event_attr *attr, 1182 1173 struct list_head *head_terms, 1183 1174 bool zero, struct parse_events_error *err) ··· 1185 1176 struct parse_events_term *term; 1186 1177 1187 1178 list_for_each_entry(term, head_terms, list) { 1188 - if (pmu_config_term(formats, attr, term, head_terms, 1179 + if (pmu_config_term(pmu_name, formats, attr, term, head_terms, 1189 1180 zero, err)) 1190 1181 return -EINVAL; 1191 1182 } ··· 1205 1196 bool zero = !!pmu->default_config; 1206 1197 1207 1198 attr->type = pmu->type; 1208 - return perf_pmu__config_terms(&pmu->format, attr, head_terms, 1209 - zero, err); 1199 + return perf_pmu__config_terms(pmu->name, &pmu->format, attr, 1200 + head_terms, zero, err); 1210 1201 } 1211 1202 1212 1203 static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
+2 -2
tools/perf/util/pmu.h
··· 9 9 #include "parse-events.h" 10 10 #include "pmu-events/pmu-events.h" 11 11 12 - struct perf_evsel_config_term; 12 + struct evsel_config_term; 13 13 14 14 enum { 15 15 PERF_PMU_FORMAT_VALUE_CONFIG, ··· 76 76 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, 77 77 struct list_head *head_terms, 78 78 struct parse_events_error *error); 79 - int perf_pmu__config_terms(struct list_head *formats, 79 + int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, 80 80 struct perf_event_attr *attr, 81 81 struct list_head *head_terms, 82 82 bool zero, struct parse_events_error *error);
+29 -20
tools/perf/util/probe-event.c
··· 102 102 symbol__exit(); 103 103 } 104 104 105 - static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) 105 + static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap) 106 106 { 107 107 /* kmap->ref_reloc_sym should be set if host_machine is initialized */ 108 108 struct kmap *kmap; ··· 114 114 kmap = map__kmap(map); 115 115 if (!kmap) 116 116 return NULL; 117 + 118 + if (pmap) 119 + *pmap = map; 120 + 117 121 return kmap->ref_reloc_sym; 118 122 } 119 123 ··· 129 125 struct map *map; 130 126 131 127 /* ref_reloc_sym is just a label. Need a special fix*/ 132 - reloc_sym = kernel_get_ref_reloc_sym(); 128 + reloc_sym = kernel_get_ref_reloc_sym(NULL); 133 129 if (reloc_sym && strcmp(name, reloc_sym->name) == 0) 134 130 *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; 135 131 else { ··· 236 232 static bool kprobe_blacklist__listed(unsigned long address); 237 233 static bool kprobe_warn_out_range(const char *symbol, unsigned long address) 238 234 { 239 - u64 etext_addr = 0; 240 - int ret; 235 + struct map *map; 236 + bool ret = false; 241 237 242 - /* Get the address of _etext for checking non-probable text symbol */ 243 - ret = kernel_get_symbol_address_by_name("_etext", &etext_addr, 244 - false, false); 245 - 246 - if (ret == 0 && etext_addr < address) 247 - pr_warning("%s is out of .text, skip it.\n", symbol); 248 - else if (kprobe_blacklist__listed(address)) 238 + map = kernel_get_module_map(NULL); 239 + if (map) { 240 + ret = address <= map->start || map->end < address; 241 + if (ret) 242 + pr_warning("%s is out of .text, skip it.\n", symbol); 243 + map__put(map); 244 + } 245 + if (!ret && kprobe_blacklist__listed(address)) { 249 246 pr_warning("%s is blacklisted function, skip it.\n", symbol); 250 - else 251 - return false; 247 + ret = true; 248 + } 252 249 253 - return true; 250 + return ret; 254 251 } 255 252 256 253 /* ··· 750 745 int ntevs) 751 746 { 752 747 struct ref_reloc_sym *reloc_sym; 748 + struct map *map; 753 749 char *tmp; 754 750 int i, skipped = 0; 755 751 ··· 759 753 return post_process_offline_probe_trace_events(tevs, ntevs, 760 754 symbol_conf.vmlinux_name); 761 755 762 - reloc_sym = kernel_get_ref_reloc_sym(); 756 + reloc_sym = kernel_get_ref_reloc_sym(&map); 763 757 if (!reloc_sym) { 764 758 pr_warning("Relocated base symbol is not found!\n"); 765 759 return -EINVAL; ··· 770 764 continue; 771 765 if (tevs[i].point.retprobe && !kretprobe_offset_is_supported()) 772 766 continue; 773 - /* If we found a wrong one, mark it by NULL symbol */ 767 + /* 768 + * If we found a wrong one, mark it by NULL symbol. 769 + * Since addresses in debuginfo is same as objdump, we need 770 + * to convert it to addresses on memory. 771 + */ 774 772 if (kprobe_warn_out_range(tevs[i].point.symbol, 775 - tevs[i].point.address)) { 773 + map__objdump_2mem(map, tevs[i].point.address))) { 776 774 tmp = NULL; 777 775 skipped++; 778 776 } else { ··· 1775 1765 fmt1_str = strtok_r(argv0_str, ":", &fmt); 1776 1766 fmt2_str = strtok_r(NULL, "/", &fmt); 1777 1767 fmt3_str = strtok_r(NULL, " \t", &fmt); 1778 - if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL 1779 - || fmt3_str == NULL) { 1768 + if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) { 1780 1769 semantic_error("Failed to parse event name: %s\n", argv[0]); 1781 1770 ret = -EINVAL; 1782 1771 goto out; ··· 2945 2936 /* Note that the symbols in the kmodule are not relocated */ 2946 2937 if (!pev->uprobes && !pev->target && 2947 2938 (!pp->retprobe || kretprobe_offset_is_supported())) { 2948 - reloc_sym = kernel_get_ref_reloc_sym(); 2939 + reloc_sym = kernel_get_ref_reloc_sym(NULL); 2949 2940 if (!reloc_sym) { 2950 2941 pr_warning("Relocated base symbol is not found!\n"); 2951 2942 ret = -EINVAL;
+1
tools/perf/util/probe-finder.c
··· 101 101 DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, 102 102 DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, 103 103 DSO_BINARY_TYPE__BUILDID_DEBUGINFO, 104 + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, 104 105 DSO_BINARY_TYPE__NOT_FOUND, 105 106 }; 106 107
+1 -1
tools/perf/util/pstack.c
··· 15 15 struct pstack { 16 16 unsigned short top; 17 17 unsigned short max_nr_entries; 18 - void *entries[0]; 18 + void *entries[]; 19 19 }; 20 20 21 21 struct pstack *pstack__new(unsigned short max_nr_entries)
+6
tools/perf/util/record.h
··· 36 36 bool record_namespaces; 37 37 bool record_cgroup; 38 38 bool record_switch_events; 39 + bool record_switch_events_set; 39 40 bool all_kernel; 40 41 bool all_user; 41 42 bool kernel_callchains; ··· 76 75 extern struct option *record_options; 77 76 78 77 int record__parse_freq(const struct option *opt, const char *str, int unset); 78 + 79 + static inline bool record_opts__no_switch_events(const struct record_opts *opts) 80 + { 81 + return opts->record_switch_events_set && !opts->record_switch_events; 82 + } 79 83 80 84 #endif // _PERF_RECORD_H
+8 -4
tools/perf/util/session.c
··· 33 33 #include "../perf.h" 34 34 #include "arch/common.h" 35 35 #include <internal/lib.h> 36 - #include <linux/err.h> 37 36 38 37 #ifdef HAVE_ZSTD_SUPPORT 39 38 static int perf_session__process_compressed_event(struct perf_session *session, ··· 1103 1104 for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { 1104 1105 u64 val = regs[i++]; 1105 1106 1106 - printf(".... %-5s 0x%" PRIx64 "\n", 1107 + printf(".... %-5s 0x%016" PRIx64 "\n", 1107 1108 perf_reg_name(rid), val); 1108 1109 } 1109 1110 } ··· 1541 1542 */ 1542 1543 return 0; 1543 1544 case PERF_RECORD_HEADER_TRACING_DATA: 1544 - /* setup for reading amidst mmap */ 1545 - lseek(fd, file_offset, SEEK_SET); 1545 + /* 1546 + * Setup for reading amidst mmap, but only when we 1547 + * are in 'file' mode. The 'pipe' fd is in proper 1548 + * place already. 1549 + */ 1550 + if (!perf_data__is_pipe(session->data)) 1551 + lseek(fd, file_offset, SEEK_SET); 1546 1552 return tool->tracing_data(session, event); 1547 1553 case PERF_RECORD_HEADER_BUILD_ID: 1548 1554 return tool->build_id(session, event);
+1 -1
tools/perf/util/sideband_evlist.c
··· 22 22 attr->sample_id_all = 1; 23 23 } 24 24 25 - evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); 25 + evsel = evsel__new_idx(attr, evlist->core.nr_entries); 26 26 if (!evsel) 27 27 return -1; 28 28
+1 -1
tools/perf/util/sort.c
··· 2817 2817 return str; 2818 2818 2819 2819 if (asprintf(&n, "%s,%s", pre, str) < 0) 2820 - return NULL; 2820 + n = NULL; 2821 2821 2822 2822 free(str); 2823 2823 return n;
+31 -22
tools/perf/util/stat-shadow.c
··· 323 323 { 324 324 struct evsel *counter, *leader, **metric_events, *oc; 325 325 bool found; 326 - const char **metric_names; 326 + struct expr_parse_ctx ctx; 327 + struct hashmap_entry *cur; 328 + size_t bkt; 327 329 int i; 328 - int num_metric_names; 329 330 331 + expr__ctx_init(&ctx); 330 332 evlist__for_each_entry(evsel_list, counter) { 331 333 bool invalid = false; 332 334 333 335 leader = counter->leader; 334 336 if (!counter->metric_expr) 335 337 continue; 338 + 339 + expr__ctx_clear(&ctx); 336 340 metric_events = counter->metric_events; 337 341 if (!metric_events) { 338 - if (expr__find_other(counter->metric_expr, counter->name, 339 - &metric_names, &num_metric_names, 1) < 0) 342 + if (expr__find_other(counter->metric_expr, 343 + counter->name, 344 + &ctx, 1) < 0) 340 345 continue; 341 346 342 347 metric_events = calloc(sizeof(struct evsel *), 343 - num_metric_names + 1); 344 - if (!metric_events) 348 + hashmap__size(&ctx.ids) + 1); 349 + if (!metric_events) { 350 + expr__ctx_clear(&ctx); 345 351 return; 352 + } 346 353 counter->metric_events = metric_events; 347 354 } 348 355 349 - for (i = 0; i < num_metric_names; i++) { 356 + i = 0; 357 + hashmap__for_each_entry((&ctx.ids), cur, bkt) { 358 + const char *metric_name = (const char *)cur->key; 359 + 350 360 found = false; 351 361 if (leader) { 352 362 /* Search in group */ 353 363 for_each_group_member (oc, leader) { 354 - if (!strcasecmp(oc->name, metric_names[i]) && 364 + if (!strcasecmp(oc->name, 365 + metric_name) && 355 366 !oc->collect_stat) { 356 367 found = true; 357 368 break; ··· 371 360 } 372 361 if (!found) { 373 362 /* Search ignoring groups */ 374 - oc = perf_stat__find_event(evsel_list, metric_names[i]); 363 + oc = perf_stat__find_event(evsel_list, 364 + metric_name); 375 365 } 376 366 if (!oc) { 377 367 /* Deduping one is good enough to handle duplicated PMUs. */ ··· 385 373 * of events. So we ask the user instead to add the missing 386 374 * events. 387 375 */ 388 - if (!printed || strcasecmp(printed, metric_names[i])) { 376 + if (!printed || 377 + strcasecmp(printed, metric_name)) { 389 378 fprintf(stderr, 390 379 "Add %s event to groups to get metric expression for %s\n", 391 - metric_names[i], 380 + metric_name, 392 381 counter->name); 393 - printed = strdup(metric_names[i]); 382 + printed = strdup(metric_name); 394 383 } 395 384 invalid = true; 396 385 continue; 397 386 } 398 - metric_events[i] = oc; 387 + metric_events[i++] = oc; 399 388 oc->collect_stat = true; 400 389 } 401 390 metric_events[i] = NULL; 402 - free(metric_names); 403 391 if (invalid) { 404 392 free(metric_events); 405 393 counter->metric_events = NULL; 406 394 counter->metric_expr = NULL; 407 395 } 408 396 } 397 + expr__ctx_clear(&ctx); 409 398 } 410 399 411 400 static double runtime_stat_avg(struct runtime_stat *st, ··· 737 724 const char *metric_name, 738 725 const char *metric_unit, 739 726 int runtime, 740 - double avg, 741 727 int cpu, 742 728 struct perf_stat_output_ctx *out, 743 729 struct runtime_stat *st) ··· 749 737 char *n, *pn; 750 738 751 739 expr__ctx_init(&pctx); 752 - /* Must be first id entry */ 753 - expr__add_id(&pctx, name, avg); 754 740 for (i = 0; metric_events[i]; i++) { 755 741 struct saved_value *v; 756 742 struct stats *stats; ··· 807 797 print_metric(config, ctxp, NULL, "%8.1f", 808 798 metric_bf, ratio); 809 799 } else { 810 - print_metric(config, ctxp, NULL, "%8.1f", 800 + print_metric(config, ctxp, NULL, "%8.2f", 811 801 metric_name ? 812 802 metric_name : 813 803 out->force_header ? name : "", ··· 824 814 (metric_name ? metric_name : name) : "", 0); 825 815 } 826 816 827 - for (i = 1; i < pctx.num_ids; i++) 828 - zfree(&pctx.ids[i].name); 817 + expr__ctx_clear(&pctx); 829 818 } 830 819 831 820 void perf_stat__print_shadow_stats(struct perf_stat_config *config, ··· 1036 1027 print_metric(config, ctxp, NULL, NULL, name, 0); 1037 1028 } else if (evsel->metric_expr) { 1038 1029 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 1039 - evsel->metric_name, NULL, 1, avg, cpu, out, st); 1030 + evsel->metric_name, NULL, 1, cpu, out, st); 1040 1031 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1041 1032 char unit = 'M'; 1042 1033 char unit_buf[10]; ··· 1065 1056 out->new_line(config, ctxp); 1066 1057 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1067 1058 evsel->name, mexp->metric_name, 1068 - mexp->metric_unit, mexp->runtime, avg, cpu, out, st); 1059 + mexp->metric_unit, mexp->runtime, cpu, out, st); 1069 1060 } 1070 1061 } 1071 1062 if (num == 0)
+65 -25
tools/perf/util/stat.c
··· 115 115 } 116 116 } 117 117 118 - static void perf_evsel__reset_stat_priv(struct evsel *evsel) 118 + static void evsel__reset_stat_priv(struct evsel *evsel) 119 119 { 120 120 int i; 121 121 struct perf_stat_evsel *ps = evsel->stats; ··· 126 126 perf_stat_evsel_id_init(evsel); 127 127 } 128 128 129 - static int perf_evsel__alloc_stat_priv(struct evsel *evsel) 129 + static int evsel__alloc_stat_priv(struct evsel *evsel) 130 130 { 131 131 evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); 132 132 if (evsel->stats == NULL) 133 133 return -ENOMEM; 134 - perf_evsel__reset_stat_priv(evsel); 134 + evsel__reset_stat_priv(evsel); 135 135 return 0; 136 136 } 137 137 138 - static void perf_evsel__free_stat_priv(struct evsel *evsel) 138 + static void evsel__free_stat_priv(struct evsel *evsel) 139 139 { 140 140 struct perf_stat_evsel *ps = evsel->stats; 141 141 ··· 144 144 zfree(&evsel->stats); 145 145 } 146 146 147 - static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, 148 - int ncpus, int nthreads) 147 + static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) 149 148 { 150 149 struct perf_counts *counts; 151 150 ··· 155 156 return counts ? 0 : -ENOMEM; 156 157 } 157 158 158 - static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) 159 + static void evsel__free_prev_raw_counts(struct evsel *evsel) 159 160 { 160 161 perf_counts__delete(evsel->prev_raw_counts); 161 162 evsel->prev_raw_counts = NULL; 162 163 } 163 164 164 - static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) 165 + static void evsel__reset_prev_raw_counts(struct evsel *evsel) 165 166 { 166 - if (evsel->prev_raw_counts) { 167 - evsel->prev_raw_counts->aggr.val = 0; 168 - evsel->prev_raw_counts->aggr.ena = 0; 169 - evsel->prev_raw_counts->aggr.run = 0; 170 - } 167 + if (evsel->prev_raw_counts) 168 + perf_counts__reset(evsel->prev_raw_counts); 171 169 } 172 170 173 - static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 171 + static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 174 172 { 175 173 int ncpus = evsel__nr_cpus(evsel); 176 174 int nthreads = perf_thread_map__nr(evsel->core.threads); 177 175 178 - if (perf_evsel__alloc_stat_priv(evsel) < 0 || 179 - perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 180 - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 176 + if (evsel__alloc_stat_priv(evsel) < 0 || 177 + evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 178 + (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 181 179 return -ENOMEM; 182 180 183 181 return 0; ··· 185 189 struct evsel *evsel; 186 190 187 191 evlist__for_each_entry(evlist, evsel) { 188 - if (perf_evsel__alloc_stats(evsel, alloc_raw)) 192 + if (evsel__alloc_stats(evsel, alloc_raw)) 189 193 goto out_free; 190 194 } 191 195 ··· 201 205 struct evsel *evsel; 202 206 203 207 evlist__for_each_entry(evlist, evsel) { 204 - perf_evsel__free_stat_priv(evsel); 205 - perf_evsel__free_counts(evsel); 206 - perf_evsel__free_prev_raw_counts(evsel); 208 + evsel__free_stat_priv(evsel); 209 + evsel__free_counts(evsel); 210 + evsel__free_prev_raw_counts(evsel); 207 211 } 208 212 } 209 213 ··· 212 216 struct evsel *evsel; 213 217 214 218 evlist__for_each_entry(evlist, evsel) { 215 - perf_evsel__reset_stat_priv(evsel); 216 - perf_evsel__reset_counts(evsel); 219 + evsel__reset_stat_priv(evsel); 220 + evsel__reset_counts(evsel); 217 221 } 218 222 } 219 223 ··· 222 226 struct evsel *evsel; 223 227 224 228 evlist__for_each_entry(evlist, evsel) 225 - perf_evsel__reset_prev_raw_counts(evsel); 229 + evsel__reset_prev_raw_counts(evsel); 230 + } 231 + 232 + static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel) 233 + { 234 + int ncpus = evsel__nr_cpus(evsel); 235 + int nthreads = perf_thread_map__nr(evsel->core.threads); 236 + 237 + for (int thread = 0; thread < nthreads; thread++) { 238 + for (int cpu = 0; cpu < ncpus; cpu++) { 239 + *perf_counts(evsel->counts, cpu, thread) = 240 + *perf_counts(evsel->prev_raw_counts, cpu, 241 + thread); 242 + } 243 + } 244 + 245 + evsel->counts->aggr = evsel->prev_raw_counts->aggr; 246 + } 247 + 248 + void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) 249 + { 250 + struct evsel *evsel; 251 + 252 + evlist__for_each_entry(evlist, evsel) 253 + perf_evsel__copy_prev_raw_counts(evsel); 254 + } 255 + 256 + void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) 257 + { 258 + struct evsel *evsel; 259 + 260 + /* 261 + * To collect the overall statistics for interval mode, 262 + * we copy the counts from evsel->prev_raw_counts to 263 + * evsel->counts. The perf_stat_process_counter creates 264 + * aggr values from per cpu values, but the per cpu values 265 + * are 0 for AGGR_GLOBAL. So we use a trick that saves the 266 + * previous aggr value to the first member of perf_counts, 267 + * then aggr calculation in process_counter_values can work 268 + * correctly. 269 + */ 270 + evlist__for_each_entry(evlist, evsel) { 271 + *perf_counts(evsel->prev_raw_counts, 0, 0) = 272 + evsel->prev_raw_counts->aggr; 273 + } 226 274 } 227 275 228 276 static void zero_per_pkg(struct evsel *counter) ··· 408 368 * interval mode, otherwise overall avg running 409 369 * averages will be shown for each interval. 410 370 */ 411 - if (config->interval) { 371 + if (config->interval || config->summary) { 412 372 for (i = 0; i < 3; i++) 413 373 init_stats(&ps->res_stats[i]); 414 374 }
+7
tools/perf/util/stat.h
··· 110 110 bool all_kernel; 111 111 bool all_user; 112 112 bool percore_show_thread; 113 + bool summary; 114 + bool metric_no_group; 115 + bool metric_no_merge; 113 116 FILE *output; 114 117 unsigned int interval; 115 118 unsigned int timeout; ··· 134 131 u64 *walltime_run; 135 132 struct rblist metric_events; 136 133 }; 134 + 135 + void perf_stat__set_big_num(int set); 137 136 138 137 void update_stats(struct stats *stats, u64 val); 139 138 double avg_stats(struct stats *stats); ··· 203 198 void perf_evlist__free_stats(struct evlist *evlist); 204 199 void perf_evlist__reset_stats(struct evlist *evlist); 205 200 void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); 201 + void perf_evlist__copy_prev_raw_counts(struct evlist *evlist); 202 + void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist); 206 203 207 204 int perf_stat_process_counter(struct perf_stat_config *config, 208 205 struct evsel *counter);
+7
tools/perf/util/symbol-elf.c
··· 1458 1458 u64 first_symbol; 1459 1459 u64 last_symbol; 1460 1460 u64 first_module; 1461 + u64 first_module_symbol; 1461 1462 u64 last_module_symbol; 1462 1463 size_t phnum; 1463 1464 struct list_head phdrs; ··· 1535 1534 return 0; 1536 1535 1537 1536 if (strchr(name, '[')) { 1537 + if (!kci->first_module_symbol || start < kci->first_module_symbol) 1538 + kci->first_module_symbol = start; 1538 1539 if (start > kci->last_module_symbol) 1539 1540 kci->last_module_symbol = start; 1540 1541 return 0; ··· 1733 1730 kci->etext = round_up(kci->last_symbol, page_size); 1734 1731 kci->etext += page_size; 1735 1732 } 1733 + 1734 + if (kci->first_module_symbol && 1735 + (!kci->first_module || kci->first_module_symbol < kci->first_module)) 1736 + kci->first_module = kci->first_module_symbol; 1736 1737 1737 1738 kci->first_module = round_down(kci->first_module, page_size); 1738 1739
+4
tools/perf/util/symbol.c
··· 79 79 DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, 80 80 DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, 81 81 DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, 82 + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, 82 83 DSO_BINARY_TYPE__NOT_FOUND, 83 84 }; 84 85 ··· 1224 1223 1225 1224 m->end = old_map->start; 1226 1225 list_add_tail(&m->node, &merged); 1226 + new_map->pgoff += old_map->end - new_map->start; 1227 1227 new_map->start = old_map->end; 1228 1228 } 1229 1229 } else { ··· 1245 1243 * |new......| -> |new...| 1246 1244 * |old....| -> |old....| 1247 1245 */ 1246 + new_map->pgoff += old_map->end - new_map->start; 1248 1247 new_map->start = old_map->end; 1249 1248 } 1250 1249 } ··· 1532 1529 case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: 1533 1530 case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: 1534 1531 case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: 1532 + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: 1535 1533 case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: 1536 1534 case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: 1537 1535 return !kmod && dso->kernel == DSO_TYPE_USER;
+1 -1
tools/perf/util/symbol.h
··· 55 55 u8 inlined:1; 56 56 u8 arch_sym; 57 57 bool annotate2; 58 - char name[0]; 58 + char name[]; 59 59 }; 60 60 61 61 void symbol__delete(struct symbol *sym);
+2 -2
tools/perf/util/syscalltbl.c
··· 8 8 #include "syscalltbl.h" 9 9 #include <stdlib.h> 10 10 #include <linux/compiler.h> 11 + #include <linux/zalloc.h> 11 12 12 13 #ifdef HAVE_SYSCALL_TABLE_SUPPORT 13 - #include <linux/zalloc.h> 14 14 #include <string.h> 15 15 #include "string2.h" 16 16 ··· 142 142 143 143 struct syscalltbl *syscalltbl__new(void) 144 144 { 145 - struct syscalltbl *tbl = malloc(sizeof(*tbl)); 145 + struct syscalltbl *tbl = zalloc(sizeof(*tbl)); 146 146 if (tbl) 147 147 tbl->audit_machine = audit_detect_machine(); 148 148 return tbl;
+6 -8
tools/perf/util/syscalltbl.h
··· 3 3 #define __PERF_SYSCALLTBL_H 4 4 5 5 struct syscalltbl { 6 - union { 7 - int audit_machine; 8 - struct { 9 - int max_id; 10 - int nr_entries; 11 - void *entries; 12 - } syscalls; 13 - }; 6 + int audit_machine; 7 + struct { 8 + int max_id; 9 + int nr_entries; 10 + void *entries; 11 + } syscalls; 14 12 }; 15 13 16 14 struct syscalltbl *syscalltbl__new(void);
+1 -1
tools/perf/util/trace-event-info.c
··· 428 428 if (!ppath->next) { 429 429 error: 430 430 pr_debug("No memory to alloc tracepoints list\n"); 431 - put_tracepoints_path(&path); 431 + put_tracepoints_path(path.next); 432 432 return NULL; 433 433 } 434 434 next:
+1 -1
tools/perf/util/unwind-libunwind-local.c
··· 243 243 * encoded_t fde_addr; 244 244 * } binary_search_table[fde_count]; 245 245 */ 246 - char data[0]; 246 + char data[]; 247 247 } __packed; 248 248 249 249 static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,