Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf tooling fixes from Thomas Gleixner:
"Core libraries:
- Fix max perf_event_attr.precise_ip detection.
- Fix parser error for uncore event alias
- Fixup ordering of kernel maps after obtaining the main kernel map
address.

Intel PT:
- Fix TSC slip where A TSC packet can slip past MTC packets so that
the timestamp appears to go backwards.
- Fixes for exported-sql-viewer GUI conversion to python3.

ARM coresight:
- Fix the build by adding a missing case value for enumeration value
introduced in newer library, that now is the required one.

tool headers:
- Syncronize kernel headers with the kernel, getting new io_uring and
pidfd_send_signal syscalls so that 'perf trace' can handle them"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf pmu: Fix parser error for uncore event alias
perf scripts python: exported-sql-viewer.py: Fix python3 support
perf scripts python: exported-sql-viewer.py: Fix never-ending loop
perf machine: Update kernel map address and re-order properly
tools headers uapi: Sync powerpc's asm/kvm.h copy with the kernel sources
tools headers: Update x86's syscall_64.tbl and uapi/asm-generic/unistd
tools headers uapi: Update drm/i915_drm.h
tools arch x86: Sync asm/cpufeatures.h with the kernel sources
tools headers uapi: Sync linux/fcntl.h to get the F_SEAL_FUTURE_WRITE addition
tools headers uapi: Sync asm-generic/mman-common.h and linux/mman.h
perf evsel: Fix max perf_event_attr.precise_ip detection
perf intel-pt: Fix TSC slip
perf cs-etm: Add missing case value

Changed files
+288 -103
tools
arch
alpha
include
uapi
asm
mips
include
uapi
asm
parisc
include
uapi
asm
powerpc
include
uapi
asm
x86
include
xtensa
include
uapi
asm
build
include
perf
arch
x86
entry
syscalls
scripts
trace
beauty
util
-2
tools/arch/alpha/include/uapi/asm/mman.h
··· 27 27 #define MAP_NONBLOCK 0x40000 28 28 #define MAP_NORESERVE 0x10000 29 29 #define MAP_POPULATE 0x20000 30 - #define MAP_PRIVATE 0x02 31 - #define MAP_SHARED 0x01 32 30 #define MAP_STACK 0x80000 33 31 #define PROT_EXEC 0x4 34 32 #define PROT_GROWSDOWN 0x01000000
-2
tools/arch/mips/include/uapi/asm/mman.h
··· 28 28 #define MAP_NONBLOCK 0x20000 29 29 #define MAP_NORESERVE 0x0400 30 30 #define MAP_POPULATE 0x10000 31 - #define MAP_PRIVATE 0x002 32 - #define MAP_SHARED 0x001 33 31 #define MAP_STACK 0x40000 34 32 #define PROT_EXEC 0x04 35 33 #define PROT_GROWSDOWN 0x01000000
-2
tools/arch/parisc/include/uapi/asm/mman.h
··· 27 27 #define MAP_NONBLOCK 0x20000 28 28 #define MAP_NORESERVE 0x4000 29 29 #define MAP_POPULATE 0x10000 30 - #define MAP_PRIVATE 0x02 31 - #define MAP_SHARED 0x01 32 30 #define MAP_STACK 0x40000 33 31 #define PROT_EXEC 0x4 34 32 #define PROT_GROWSDOWN 0x01000000
+2
tools/arch/powerpc/include/uapi/asm/kvm.h
··· 463 463 #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) 464 464 #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) 465 465 #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) 466 + #define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) 466 467 467 468 #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) 468 469 #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) 469 470 #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) 471 + #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) 470 472 471 473 /* Per-vcpu XICS interrupt controller state */ 472 474 #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+1
tools/arch/x86/include/asm/cpufeatures.h
··· 344 344 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ 345 345 #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ 346 346 #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ 347 + #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ 347 348 #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ 348 349 #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ 349 350 #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-2
tools/arch/xtensa/include/uapi/asm/mman.h
··· 27 27 #define MAP_NONBLOCK 0x20000 28 28 #define MAP_NORESERVE 0x0400 29 29 #define MAP_POPULATE 0x10000 30 - #define MAP_PRIVATE 0x002 31 - #define MAP_SHARED 0x001 32 30 #define MAP_STACK 0x40000 33 31 #define PROT_EXEC 0x4 34 32 #define PROT_GROWSDOWN 0x01000000
+2 -2
tools/build/feature/test-libopencsd.c
··· 4 4 /* 5 5 * Check OpenCSD library version is sufficient to provide required features 6 6 */ 7 - #define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0)) 7 + #define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0)) 8 8 #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) 9 - #error "OpenCSD >= 0.10.0 is required" 9 + #error "OpenCSD >= 0.11.0 is required" 10 10 #endif 11 11 12 12 int main(void)
+23
tools/include/uapi/asm-generic/mman-common-tools.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H 3 + #define __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H 4 + 5 + #include <asm-generic/mman-common.h> 6 + 7 + /* We need this because we need to have tools/include/uapi/ included in the tools 8 + * header search path to get access to stuff that is not yet in the system's 9 + * copy of the files in that directory, but since this cset: 10 + * 11 + * 746c9398f5ac ("arch: move common mmap flags to linux/mman.h") 12 + * 13 + * We end up making sys/mman.h, that is in the system headers, to not find the 14 + * MAP_SHARED and MAP_PRIVATE defines because they are not anymore in our copy 15 + * of asm-generic/mman-common.h. So we define them here and include this header 16 + * from each of the per arch mman.h headers. 17 + */ 18 + #ifndef MAP_SHARED 19 + #define MAP_SHARED 0x01 /* Share changes */ 20 + #define MAP_PRIVATE 0x02 /* Changes are private */ 21 + #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ 22 + #endif 23 + #endif // __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H
+1 -3
tools/include/uapi/asm-generic/mman-common.h
··· 15 15 #define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ 16 16 #define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ 17 17 18 - #define MAP_SHARED 0x01 /* Share changes */ 19 - #define MAP_PRIVATE 0x02 /* Changes are private */ 20 - #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ 18 + /* 0x01 - 0x03 are defined in linux/mman.h */ 21 19 #define MAP_TYPE 0x0f /* Mask for type of mapping */ 22 20 #define MAP_FIXED 0x10 /* Interpret addr exactly */ 23 21 #define MAP_ANONYMOUS 0x20 /* don't use a file */
+1 -1
tools/include/uapi/asm-generic/mman.h
··· 2 2 #ifndef __ASM_GENERIC_MMAN_H 3 3 #define __ASM_GENERIC_MMAN_H 4 4 5 - #include <asm-generic/mman-common.h> 5 + #include <asm-generic/mman-common-tools.h> 6 6 7 7 #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ 8 8 #define MAP_DENYWRITE 0x0800 /* ETXTBSY */
+10 -1
tools/include/uapi/asm-generic/unistd.h
··· 824 824 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) 825 825 #endif 826 826 827 + #define __NR_pidfd_send_signal 424 828 + __SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) 829 + #define __NR_io_uring_setup 425 830 + __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) 831 + #define __NR_io_uring_enter 426 832 + __SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) 833 + #define __NR_io_uring_register 427 834 + __SYSCALL(__NR_io_uring_register, sys_io_uring_register) 835 + 827 836 #undef __NR_syscalls 828 - #define __NR_syscalls 424 837 + #define __NR_syscalls 428 829 838 830 839 /* 831 840 * 32 bit systems traditionally used different
+64
tools/include/uapi/drm/i915_drm.h
··· 1486 1486 #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ 1487 1487 #define I915_CONTEXT_DEFAULT_PRIORITY 0 1488 1488 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ 1489 + /* 1490 + * When using the following param, value should be a pointer to 1491 + * drm_i915_gem_context_param_sseu. 1492 + */ 1493 + #define I915_CONTEXT_PARAM_SSEU 0x7 1489 1494 __u64 value; 1495 + }; 1496 + 1497 + /** 1498 + * Context SSEU programming 1499 + * 1500 + * It may be necessary for either functional or performance reason to configure 1501 + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ 1502 + * Sub-slice/EU). 1503 + * 1504 + * This is done by configuring SSEU configuration using the below 1505 + * @struct drm_i915_gem_context_param_sseu for every supported engine which 1506 + * userspace intends to use. 1507 + * 1508 + * Not all GPUs or engines support this functionality in which case an error 1509 + * code -ENODEV will be returned. 1510 + * 1511 + * Also, flexibility of possible SSEU configuration permutations varies between 1512 + * GPU generations and software imposed limitations. Requesting such a 1513 + * combination will return an error code of -EINVAL. 1514 + * 1515 + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in 1516 + * favour of a single global setting. 1517 + */ 1518 + struct drm_i915_gem_context_param_sseu { 1519 + /* 1520 + * Engine class & instance to be configured or queried. 1521 + */ 1522 + __u16 engine_class; 1523 + __u16 engine_instance; 1524 + 1525 + /* 1526 + * Unused for now. Must be cleared to zero. 1527 + */ 1528 + __u32 flags; 1529 + 1530 + /* 1531 + * Mask of slices to enable for the context. Valid values are a subset 1532 + * of the bitmask value returned for I915_PARAM_SLICE_MASK. 1533 + */ 1534 + __u64 slice_mask; 1535 + 1536 + /* 1537 + * Mask of subslices to enable for the context. Valid values are a 1538 + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. 1539 + */ 1540 + __u64 subslice_mask; 1541 + 1542 + /* 1543 + * Minimum/Maximum number of EUs to enable per subslice for the 1544 + * context. min_eus_per_subslice must be inferior or equal to 1545 + * max_eus_per_subslice. 1546 + */ 1547 + __u16 min_eus_per_subslice; 1548 + __u16 max_eus_per_subslice; 1549 + 1550 + /* 1551 + * Unused for now. Must be cleared to zero. 1552 + */ 1553 + __u32 rsvd; 1490 1554 }; 1491 1555 1492 1556 enum drm_i915_oa_format {
+1
tools/include/uapi/linux/fcntl.h
··· 41 41 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ 42 42 #define F_SEAL_GROW 0x0004 /* prevent file from growing */ 43 43 #define F_SEAL_WRITE 0x0008 /* prevent writes */ 44 + #define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ 44 45 /* (1U << 31) is reserved for signed error codes */ 45 46 46 47 /*
+4
tools/include/uapi/linux/mman.h
··· 12 12 #define OVERCOMMIT_ALWAYS 1 13 13 #define OVERCOMMIT_NEVER 2 14 14 15 + #define MAP_SHARED 0x01 /* Share changes */ 16 + #define MAP_PRIVATE 0x02 /* Changes are private */ 17 + #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ 18 + 15 19 /* 16 20 * Huge page size encoding when MAP_HUGETLB is specified, and a huge page 17 21 * size other than the default is desired. See hugetlb_encode.h.
+2 -2
tools/perf/Makefile.perf
··· 481 481 mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c 482 482 mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh 483 483 484 - $(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) 485 - $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ 484 + $(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) 485 + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ 486 486 487 487 mount_flags_array := $(beauty_outdir)/mount_flags_array.c 488 488 mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+4
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
··· 345 345 334 common rseq __x64_sys_rseq 346 346 # don't use numbers 387 through 423, add new calls after the last 347 347 # 'common' entry 348 + 424 common pidfd_send_signal __x64_sys_pidfd_send_signal 349 + 425 common io_uring_setup __x64_sys_io_uring_setup 350 + 426 common io_uring_enter __x64_sys_io_uring_enter 351 + 427 common io_uring_register __x64_sys_io_uring_register 348 352 349 353 # 350 354 # x32-specific system call numbers start at 512 to avoid cache impact
+1 -1
tools/perf/check-headers.sh
··· 103 103 # diff with extra ignore lines 104 104 check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' 105 105 check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' 106 - check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"' 106 + check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' 107 107 check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' 108 108 109 109 # diff non-symmetric files
+63 -14
tools/perf/scripts/python/exported-sql-viewer.py
··· 107 107 from PySide.QtCore import * 108 108 from PySide.QtGui import * 109 109 from PySide.QtSql import * 110 + pyside_version_1 = True 110 111 from decimal import * 111 112 from ctypes import * 112 113 from multiprocessing import Process, Array, Value, Event ··· 1527 1526 " (" + dsoname(query.value(15)) + ")") 1528 1527 return data 1529 1528 1529 + def BranchDataPrepWA(query): 1530 + data = [] 1531 + data.append(query.value(0)) 1532 + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string 1533 + data.append("{:>19}".format(query.value(1))) 1534 + for i in xrange(2, 8): 1535 + data.append(query.value(i)) 1536 + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1537 + " (" + dsoname(query.value(11)) + ")" + " -> " + 1538 + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1539 + " (" + dsoname(query.value(15)) + ")") 1540 + return data 1541 + 1530 1542 # Branch data model 1531 1543 1532 1544 class BranchModel(TreeModel): ··· 1567 1553 " AND evsel_id = " + str(self.event_id) + 1568 1554 " ORDER BY samples.id" 1569 1555 " LIMIT " + str(glb_chunk_sz)) 1570 - self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) 1556 + if pyside_version_1 and sys.version_info[0] == 3: 1557 + prep = BranchDataPrepWA 1558 + else: 1559 + prep = BranchDataPrep 1560 + self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) 1571 1561 self.fetcher.done.connect(self.Update) 1572 1562 self.fetcher.Fetch(glb_chunk_sz) 1573 1563 ··· 2097 2079 return False 2098 2080 return True 2099 2081 2100 - # SQL data preparation 2101 - 2102 - def SQLTableDataPrep(query, count): 2103 - data = [] 2104 - for i in xrange(count): 2105 - data.append(query.value(i)) 2106 - return data 2107 - 2108 2082 # SQL table data model item 2109 2083 2110 2084 class SQLTableItem(): ··· 2120 2110 self.more = True 2121 2111 self.populated = 0 2122 2112 self.column_headers = column_headers 2123 - self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample) 2113 + self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): self.SQLTableDataPrep(x, y), self.AddSample) 2124 2114 self.fetcher.done.connect(self.Update) 2125 2115 self.fetcher.Fetch(glb_chunk_sz) 2126 2116 ··· 2164 2154 def columnHeader(self, column): 2165 2155 return self.column_headers[column] 2166 2156 2157 + def SQLTableDataPrep(self, query, count): 2158 + data = [] 2159 + for i in xrange(count): 2160 + data.append(query.value(i)) 2161 + return data 2162 + 2167 2163 # SQL automatic table data model 2168 2164 2169 2165 class SQLAutoTableModel(SQLTableModel): ··· 2198 2182 QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") 2199 2183 while query.next(): 2200 2184 column_headers.append(query.value(0)) 2185 + if pyside_version_1 and sys.version_info[0] == 3: 2186 + if table_name == "samples_view": 2187 + self.SQLTableDataPrep = self.samples_view_DataPrep 2188 + if table_name == "samples": 2189 + self.SQLTableDataPrep = self.samples_DataPrep 2201 2190 super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent) 2191 + 2192 + def samples_view_DataPrep(self, query, count): 2193 + data = [] 2194 + data.append(query.value(0)) 2195 + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string 2196 + data.append("{:>19}".format(query.value(1))) 2197 + for i in xrange(2, count): 2198 + data.append(query.value(i)) 2199 + return data 2200 + 2201 + def samples_DataPrep(self, query, count): 2202 + data = [] 2203 + for i in xrange(9): 2204 + data.append(query.value(i)) 2205 + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string 2206 + data.append("{:>19}".format(query.value(9))) 2207 + for i in xrange(10, count): 2208 + data.append(query.value(i)) 2209 + return data 2202 2210 2203 2211 # Base class for custom ResizeColumnsToContents 2204 2212 ··· 2908 2868 ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) 2909 2869 if not ok: 2910 2870 return 0, "" 2871 + if sys.version_info[0] == 2: 2872 + result = inst.buffer.value 2873 + else: 2874 + result = inst.buffer.value.decode() 2911 2875 # Return instruction length and the disassembled instruction text 2912 2876 # For now, assume the length is in byte 166 2913 - return inst.xedd[166], inst.buffer.value 2877 + return inst.xedd[166], result 2914 2878 2915 2879 def TryOpen(file_name): 2916 2880 try: ··· 2930 2886 header = f.read(7) 2931 2887 f.seek(pos) 2932 2888 magic = header[0:4] 2933 - eclass = ord(header[4]) 2934 - encoding = ord(header[5]) 2935 - version = ord(header[6]) 2889 + if sys.version_info[0] == 2: 2890 + eclass = ord(header[4]) 2891 + encoding = ord(header[5]) 2892 + version = ord(header[6]) 2893 + else: 2894 + eclass = header[4] 2895 + encoding = header[5] 2896 + version = header[6] 2936 2897 if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: 2937 2898 result = True if eclass == 2 else False 2938 2899 return result
+11 -3
tools/perf/trace/beauty/mmap_flags.sh
··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: LGPL-2.1 3 3 4 - if [ $# -ne 2 ] ; then 4 + if [ $# -ne 3 ] ; then 5 5 [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` 6 + linux_header_dir=tools/include/uapi/linux 6 7 header_dir=tools/include/uapi/asm-generic 7 8 arch_header_dir=tools/arch/${hostarch}/include/uapi/asm 8 9 else 9 - header_dir=$1 10 - arch_header_dir=$2 10 + linux_header_dir=$1 11 + header_dir=$2 12 + arch_header_dir=$3 11 13 fi 12 14 15 + linux_mman=${linux_header_dir}/mman.h 13 16 arch_mman=${arch_header_dir}/mman.h 14 17 15 18 # those in egrep -vw are flags, we want just the bits ··· 21 18 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 22 19 egrep -q $regex ${arch_mman} && \ 23 20 (egrep $regex ${arch_mman} | \ 21 + sed -r "s/$regex/\2 \1/g" | \ 22 + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") 23 + egrep -q $regex ${linux_mman} && \ 24 + (egrep $regex ${linux_mman} | \ 25 + egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ 24 26 sed -r "s/$regex/\2 \1/g" | \ 25 27 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") 26 28 ([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+1
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
··· 387 387 break; 388 388 case OCSD_INSTR_ISB: 389 389 case OCSD_INSTR_DSB_DMB: 390 + case OCSD_INSTR_WFI_WFE: 390 391 case OCSD_INSTR_OTHER: 391 392 default: 392 393 packet->last_instr_taken_branch = false;
-29
tools/perf/util/evlist.c
··· 231 231 } 232 232 } 233 233 234 - void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr) 235 - { 236 - struct perf_event_attr attr = { 237 - .type = PERF_TYPE_HARDWARE, 238 - .config = PERF_COUNT_HW_CPU_CYCLES, 239 - .exclude_kernel = 1, 240 - .precise_ip = 3, 241 - }; 242 - 243 - event_attr_init(&attr); 244 - 245 - /* 246 - * Unnamed union member, not supported as struct member named 247 - * initializer in older compilers such as gcc 4.4.7 248 - */ 249 - attr.sample_period = 1; 250 - 251 - while (attr.precise_ip != 0) { 252 - int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 253 - if (fd != -1) { 254 - close(fd); 255 - break; 256 - } 257 - --attr.precise_ip; 258 - } 259 - 260 - pattr->precise_ip = attr.precise_ip; 261 - } 262 - 263 234 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 264 235 { 265 236 struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
-2
tools/perf/util/evlist.h
··· 315 315 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 316 316 struct perf_evsel *tracking_evsel); 317 317 318 - void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); 319 - 320 318 struct perf_evsel * 321 319 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); 322 320
+59 -13
tools/perf/util/evsel.c
··· 295 295 if (!precise) 296 296 goto new_event; 297 297 298 - perf_event_attr__set_max_precise_ip(&attr); 299 298 /* 300 299 * Now let the usual logic to set up the perf_event_attr defaults 301 300 * to kick in when we return and before perf_evsel__open() is called. ··· 303 304 evsel = perf_evsel__new(&attr); 304 305 if (evsel == NULL) 305 306 goto out; 307 + 308 + evsel->precise_max = true; 306 309 307 310 /* use asprintf() because free(evsel) assumes name is allocated */ 308 311 if (asprintf(&evsel->name, "cycles%s%s%.*s", ··· 1084 1083 } 1085 1084 1086 1085 if (evsel->precise_max) 1087 - perf_event_attr__set_max_precise_ip(attr); 1086 + attr->precise_ip = 3; 1088 1087 1089 1088 if (opts->all_user) { 1090 1089 attr->exclude_kernel = 1; ··· 1750 1749 return true; 1751 1750 } 1752 1751 1752 + static void display_attr(struct perf_event_attr *attr) 1753 + { 1754 + if (verbose >= 2) { 1755 + fprintf(stderr, "%.60s\n", graph_dotted_line); 1756 + fprintf(stderr, "perf_event_attr:\n"); 1757 + perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL); 1758 + fprintf(stderr, "%.60s\n", graph_dotted_line); 1759 + } 1760 + } 1761 + 1762 + static int perf_event_open(struct perf_evsel *evsel, 1763 + pid_t pid, int cpu, int group_fd, 1764 + unsigned long flags) 1765 + { 1766 + int precise_ip = evsel->attr.precise_ip; 1767 + int fd; 1768 + 1769 + while (1) { 1770 + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", 1771 + pid, cpu, group_fd, flags); 1772 + 1773 + fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags); 1774 + if (fd >= 0) 1775 + break; 1776 + 1777 + /* 1778 + * Do quick precise_ip fallback if: 1779 + * - there is precise_ip set in perf_event_attr 1780 + * - maximum precise is requested 1781 + * - sys_perf_event_open failed with ENOTSUP error, 1782 + * which is associated with wrong precise_ip 1783 + */ 1784 + if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP)) 1785 + break; 1786 + 1787 + /* 1788 + * We tried all the precise_ip values, and it's 1789 + * still failing, so leave it to standard fallback. 1790 + */ 1791 + if (!evsel->attr.precise_ip) { 1792 + evsel->attr.precise_ip = precise_ip; 1793 + break; 1794 + } 1795 + 1796 + pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); 1797 + evsel->attr.precise_ip--; 1798 + pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip); 1799 + display_attr(&evsel->attr); 1800 + } 1801 + 1802 + return fd; 1803 + } 1804 + 1753 1805 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 1754 1806 struct thread_map *threads) 1755 1807 { ··· 1878 1824 if (perf_missing_features.sample_id_all) 1879 1825 evsel->attr.sample_id_all = 0; 1880 1826 1881 - if (verbose >= 2) { 1882 - fprintf(stderr, "%.60s\n", graph_dotted_line); 1883 - fprintf(stderr, "perf_event_attr:\n"); 1884 - perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); 1885 - fprintf(stderr, "%.60s\n", graph_dotted_line); 1886 - } 1827 + display_attr(&evsel->attr); 1887 1828 1888 1829 for (cpu = 0; cpu < cpus->nr; cpu++) { 1889 1830 ··· 1890 1841 1891 1842 group_fd = get_group_fd(evsel, cpu, thread); 1892 1843 retry_open: 1893 - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", 1894 - pid, cpus->map[cpu], group_fd, flags); 1895 - 1896 1844 test_attr__ready(); 1897 1845 1898 - fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], 1899 - group_fd, flags); 1846 + fd = perf_event_open(evsel, pid, cpus->map[cpu], 1847 + group_fd, flags); 1900 1848 1901 1849 FD(evsel, cpu, thread) = fd; 1902 1850
+8 -12
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 251 251 if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) 252 252 decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / 253 253 decoder->tsc_ctc_ratio_d; 254 - 255 - /* 256 - * Allow for timestamps appearing to backwards because a TSC 257 - * packet has slipped past a MTC packet, so allow 2 MTC ticks 258 - * or ... 259 - */ 260 - decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, 261 - decoder->tsc_ctc_ratio_n, 262 - decoder->tsc_ctc_ratio_d); 263 254 } 264 - /* ... or 0x100 paranoia */ 265 - if (decoder->tsc_slip < 0x100) 266 - decoder->tsc_slip = 0x100; 255 + 256 + /* 257 + * A TSC packet can slip past MTC packets so that the timestamp appears 258 + * to go backwards. One estimate is that can be up to about 40 CPU 259 + * cycles, which is certainly less than 0x1000 TSC ticks, but accept 260 + * slippage an order of magnitude more to be on the safe side. 261 + */ 262 + decoder->tsc_slip = 0x10000; 267 263 268 264 intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); 269 265 intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+20 -12
tools/perf/util/machine.c
··· 1421 1421 machine->vmlinux_map->end = ~0ULL; 1422 1422 } 1423 1423 1424 + static void machine__update_kernel_mmap(struct machine *machine, 1425 + u64 start, u64 end) 1426 + { 1427 + struct map *map = machine__kernel_map(machine); 1428 + 1429 + map__get(map); 1430 + map_groups__remove(&machine->kmaps, map); 1431 + 1432 + machine__set_kernel_mmap(machine, start, end); 1433 + 1434 + map_groups__insert(&machine->kmaps, map); 1435 + map__put(map); 1436 + } 1437 + 1424 1438 int machine__create_kernel_maps(struct machine *machine) 1425 1439 { 1426 1440 struct dso *kernel = machine__get_kernel(machine); ··· 1467 1453 goto out_put; 1468 1454 } 1469 1455 1470 - /* we have a real start address now, so re-order the kmaps */ 1471 - map = machine__kernel_map(machine); 1472 - 1473 - map__get(map); 1474 - map_groups__remove(&machine->kmaps, map); 1475 - 1476 - /* assume it's the last in the kmaps */ 1477 - machine__set_kernel_mmap(machine, addr, ~0ULL); 1478 - 1479 - map_groups__insert(&machine->kmaps, map); 1480 - map__put(map); 1456 + /* 1457 + * we have a real start address now, so re-order the kmaps 1458 + * assume it's the last in the kmaps 1459 + */ 1460 + machine__update_kernel_mmap(machine, addr, ~0ULL); 1481 1461 } 1482 1462 1483 1463 if (machine__create_extra_kernel_maps(machine, kernel)) ··· 1607 1599 if (strstr(kernel->long_name, "vmlinux")) 1608 1600 dso__set_short_name(kernel, "[kernel.vmlinux]", false); 1609 1601 1610 - machine__set_kernel_mmap(machine, event->mmap.start, 1602 + machine__update_kernel_mmap(machine, event->mmap.start, 1611 1603 event->mmap.start + event->mmap.len); 1612 1604 1613 1605 /*
+10
tools/perf/util/pmu.c
··· 732 732 733 733 if (!is_arm_pmu_core(name)) { 734 734 pname = pe->pmu ? pe->pmu : "cpu"; 735 + 736 + /* 737 + * uncore alias may be from different PMU 738 + * with common prefix 739 + */ 740 + if (pmu_is_uncore(name) && 741 + !strncmp(pname, name, strlen(pname))) 742 + goto new_alias; 743 + 735 744 if (strcmp(pname, name)) 736 745 continue; 737 746 } 738 747 748 + new_alias: 739 749 /* need type casts to override 'const' */ 740 750 __perf_pmu__new_alias(head, NULL, (char *)pe->name, 741 751 (char *)pe->desc, (char *)pe->event,