commit 35c8fad4a703fdfa009ed274f80bb64b49314cde · tjh.dev/kernel

+2

tools/arch/x86/include/asm/msr-index.h

··· 625 625 626 626 #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc 627 627 628 + #define MSR_IA32_XFD 0x000001c4 629 + #define MSR_IA32_XFD_ERR 0x000001c5 628 630 #define MSR_IA32_XSS 0x00000da0 629 631 630 632 #define MSR_IA32_APICBASE 0x0000001b

+4

tools/arch/x86/include/uapi/asm/prctl.h

··· 10 10 #define ARCH_GET_CPUID 0x1011 11 11 #define ARCH_SET_CPUID 0x1012 12 12 13 + #define ARCH_GET_XCOMP_SUPP 0x1021 14 + #define ARCH_GET_XCOMP_PERM 0x1022 15 + #define ARCH_REQ_XCOMP_PERM 0x1023 16 + 13 17 #define ARCH_MAP_VDSO_X32 0x2001 14 18 #define ARCH_MAP_VDSO_32 0x2002 15 19 #define ARCH_MAP_VDSO_64 0x2003

+4 -1

tools/include/uapi/asm-generic/unistd.h

··· 880 880 #define __NR_process_mrelease 448 881 881 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) 882 882 883 + #define __NR_futex_waitv 449 884 + __SYSCALL(__NR_futex_waitv, sys_futex_waitv) 885 + 883 886 #undef __NR_syscalls 884 - #define __NR_syscalls 449 887 + #define __NR_syscalls 450 885 888 886 889 /* 887 890 * 32 bit systems traditionally used different

+241 -1

tools/include/uapi/drm/i915_drm.h

··· 1522 1522 #define I915_TILING_NONE 0 1523 1523 #define I915_TILING_X 1 1524 1524 #define I915_TILING_Y 2 1525 + /* 1526 + * Do not add new tiling types here. The I915_TILING_* values are for 1527 + * de-tiling fence registers that no longer exist on modern platforms. Although 1528 + * the hardware may support new types of tiling in general (e.g., Tile4), we 1529 + * do not need to add them to the uapi that is specific to now-defunct ioctls. 1530 + */ 1525 1531 #define I915_TILING_LAST I915_TILING_Y 1526 1532 1527 1533 #define I915_BIT_6_SWIZZLE_NONE 0 ··· 1830 1824 * Extensions: 1831 1825 * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) 1832 1826 * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) 1827 + * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) 1833 1828 */ 1834 1829 #define I915_CONTEXT_PARAM_ENGINES 0xa 1835 1830 ··· 1853 1846 * attempted to use it, never re-use this context param number. 1854 1847 */ 1855 1848 #define I915_CONTEXT_PARAM_RINGSIZE 0xc 1849 + 1850 + /* 1851 + * I915_CONTEXT_PARAM_PROTECTED_CONTENT: 1852 + * 1853 + * Mark that the context makes use of protected content, which will result 1854 + * in the context being invalidated when the protected content session is. 1855 + * Given that the protected content session is killed on suspend, the device 1856 + * is kept awake for the lifetime of a protected context, so the user should 1857 + * make sure to dispose of them once done. 1858 + * This flag can only be set at context creation time and, when set to true, 1859 + * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE 1860 + * to false. This flag can't be set to true in conjunction with setting the 1861 + * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: 1862 + * 1863 + * .. code-block:: C 1864 + * 1865 + * struct drm_i915_gem_context_create_ext_setparam p_protected = { 1866 + * .base = { 1867 + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, 1868 + * }, 1869 + * .param = { 1870 + * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, 1871 + * .value = 1, 1872 + * } 1873 + * }; 1874 + * struct drm_i915_gem_context_create_ext_setparam p_norecover = { 1875 + * .base = { 1876 + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, 1877 + * .next_extension = to_user_pointer(&p_protected), 1878 + * }, 1879 + * .param = { 1880 + * .param = I915_CONTEXT_PARAM_RECOVERABLE, 1881 + * .value = 0, 1882 + * } 1883 + * }; 1884 + * struct drm_i915_gem_context_create_ext create = { 1885 + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, 1886 + * .extensions = to_user_pointer(&p_norecover); 1887 + * }; 1888 + * 1889 + * ctx_id = gem_context_create_ext(drm_fd, &create); 1890 + * 1891 + * In addition to the normal failure cases, setting this flag during context 1892 + * creation can result in the following errors: 1893 + * 1894 + * -ENODEV: feature not available 1895 + * -EPERM: trying to mark a recoverable or not bannable context as protected 1896 + */ 1897 + #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd 1856 1898 /* Must be kept compact -- no holes and well documented */ 1857 1899 1858 1900 __u64 value; ··· 2106 2050 } __attribute__((packed)) name__ 2107 2051 2108 2052 /** 2053 + * struct i915_context_engines_parallel_submit - Configure engine for 2054 + * parallel submission. 2055 + * 2056 + * Setup a slot in the context engine map to allow multiple BBs to be submitted 2057 + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU 2058 + * in parallel. Multiple hardware contexts are created internally in the i915 to 2059 + * run these BBs. Once a slot is configured for N BBs only N BBs can be 2060 + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user 2061 + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how 2062 + * many BBs there are based on the slot's configuration. The N BBs are the last 2063 + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. 2064 + * 2065 + * The default placement behavior is to create implicit bonds between each 2066 + * context if each context maps to more than 1 physical engine (e.g. context is 2067 + * a virtual engine). Also we only allow contexts of same engine class and these 2068 + * contexts must be in logically contiguous order. Examples of the placement 2069 + * behavior are described below. Lastly, the default is to not allow BBs to be 2070 + * preempted mid-batch. Rather insert coordinated preemption points on all 2071 + * hardware contexts between each set of BBs. Flags could be added in the future 2072 + * to change both of these default behaviors. 2073 + * 2074 + * Returns -EINVAL if hardware context placement configuration is invalid or if 2075 + * the placement configuration isn't supported on the platform / submission 2076 + * interface. 2077 + * Returns -ENODEV if extension isn't supported on the platform / submission 2078 + * interface. 2079 + * 2080 + * .. code-block:: none 2081 + * 2082 + * Examples syntax: 2083 + * CS[X] = generic engine of same class, logical instance X 2084 + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE 2085 + * 2086 + * Example 1 pseudo code: 2087 + * set_engines(INVALID) 2088 + * set_parallel(engine_index=0, width=2, num_siblings=1, 2089 + * engines=CS[0],CS[1]) 2090 + * 2091 + * Results in the following valid placement: 2092 + * CS[0], CS[1] 2093 + * 2094 + * Example 2 pseudo code: 2095 + * set_engines(INVALID) 2096 + * set_parallel(engine_index=0, width=2, num_siblings=2, 2097 + * engines=CS[0],CS[2],CS[1],CS[3]) 2098 + * 2099 + * Results in the following valid placements: 2100 + * CS[0], CS[1] 2101 + * CS[2], CS[3] 2102 + * 2103 + * This can be thought of as two virtual engines, each containing two 2104 + * engines thereby making a 2D array. However, there are bonds tying the 2105 + * entries together and placing restrictions on how they can be scheduled. 2106 + * Specifically, the scheduler can choose only vertical columns from the 2D 2107 + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the 2108 + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice 2109 + * versa. Same for CS[2] requires also using CS[3]. 2110 + * VE[0] = CS[0], CS[2] 2111 + * VE[1] = CS[1], CS[3] 2112 + * 2113 + * Example 3 pseudo code: 2114 + * set_engines(INVALID) 2115 + * set_parallel(engine_index=0, width=2, num_siblings=2, 2116 + * engines=CS[0],CS[1],CS[1],CS[3]) 2117 + * 2118 + * Results in the following valid and invalid placements: 2119 + * CS[0], CS[1] 2120 + * CS[1], CS[3] - Not logically contiguous, return -EINVAL 2121 + */ 2122 + struct i915_context_engines_parallel_submit { 2123 + /** 2124 + * @base: base user extension. 2125 + */ 2126 + struct i915_user_extension base; 2127 + 2128 + /** 2129 + * @engine_index: slot for parallel engine 2130 + */ 2131 + __u16 engine_index; 2132 + 2133 + /** 2134 + * @width: number of contexts per parallel engine or in other words the 2135 + * number of batches in each submission 2136 + */ 2137 + __u16 width; 2138 + 2139 + /** 2140 + * @num_siblings: number of siblings per context or in other words the 2141 + * number of possible placements for each submission 2142 + */ 2143 + __u16 num_siblings; 2144 + 2145 + /** 2146 + * @mbz16: reserved for future use; must be zero 2147 + */ 2148 + __u16 mbz16; 2149 + 2150 + /** 2151 + * @flags: all undefined flags must be zero, currently not defined flags 2152 + */ 2153 + __u64 flags; 2154 + 2155 + /** 2156 + * @mbz64: reserved for future use; must be zero 2157 + */ 2158 + __u64 mbz64[3]; 2159 + 2160 + /** 2161 + * @engines: 2-d array of engine instances to configure parallel engine 2162 + * 2163 + * length = width (i) * num_siblings (j) 2164 + * index = j + i * num_siblings 2165 + */ 2166 + struct i915_engine_class_instance engines[0]; 2167 + 2168 + } __packed; 2169 + 2170 + #define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ 2171 + struct i915_user_extension base; \ 2172 + __u16 engine_index; \ 2173 + __u16 width; \ 2174 + __u16 num_siblings; \ 2175 + __u16 mbz16; \ 2176 + __u64 flags; \ 2177 + __u64 mbz64[3]; \ 2178 + struct i915_engine_class_instance engines[N__]; \ 2179 + } __attribute__((packed)) name__ 2180 + 2181 + /** 2109 2182 * DOC: Context Engine Map uAPI 2110 2183 * 2111 2184 * Context engine map is a new way of addressing engines when submitting batch- ··· 2293 2108 __u64 extensions; /* linked chain of extension blocks, 0 terminates */ 2294 2109 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ 2295 2110 #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ 2111 + #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ 2296 2112 struct i915_engine_class_instance engines[0]; 2297 2113 } __attribute__((packed)); 2298 2114 ··· 2912 2726 2913 2727 /** @flags: Engine flags. */ 2914 2728 __u64 flags; 2729 + #define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) 2915 2730 2916 2731 /** @capabilities: Capabilities of this engine. */ 2917 2732 __u64 capabilities; 2918 2733 #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) 2919 2734 #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) 2920 2735 2736 + /** @logical_instance: Logical instance of engine */ 2737 + __u16 logical_instance; 2738 + 2921 2739 /** @rsvd1: Reserved fields. */ 2922 - __u64 rsvd1[4]; 2740 + __u16 rsvd1[3]; 2741 + /** @rsvd2: Reserved fields. */ 2742 + __u64 rsvd2[3]; 2923 2743 }; 2924 2744 2925 2745 /** ··· 3171 2979 * 3172 2980 * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see 3173 2981 * struct drm_i915_gem_create_ext_memory_regions. 2982 + * 2983 + * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see 2984 + * struct drm_i915_gem_create_ext_protected_content. 3174 2985 */ 3175 2986 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 2987 + #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 3176 2988 __u64 extensions; 3177 2989 }; 3178 2990 ··· 3233 3037 */ 3234 3038 __u64 regions; 3235 3039 }; 3040 + 3041 + /** 3042 + * struct drm_i915_gem_create_ext_protected_content - The 3043 + * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. 3044 + * 3045 + * If this extension is provided, buffer contents are expected to be protected 3046 + * by PXP encryption and require decryption for scan out and processing. This 3047 + * is only possible on platforms that have PXP enabled, on all other scenarios 3048 + * using this extension will cause the ioctl to fail and return -ENODEV. The 3049 + * flags parameter is reserved for future expansion and must currently be set 3050 + * to zero. 3051 + * 3052 + * The buffer contents are considered invalid after a PXP session teardown. 3053 + * 3054 + * The encryption is guaranteed to be processed correctly only if the object 3055 + * is submitted with a context created using the 3056 + * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks 3057 + * at submission time on the validity of the objects involved. 3058 + * 3059 + * Below is an example on how to create a protected object: 3060 + * 3061 + * .. code-block:: C 3062 + * 3063 + * struct drm_i915_gem_create_ext_protected_content protected_ext = { 3064 + * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, 3065 + * .flags = 0, 3066 + * }; 3067 + * struct drm_i915_gem_create_ext create_ext = { 3068 + * .size = PAGE_SIZE, 3069 + * .extensions = (uintptr_t)&protected_ext, 3070 + * }; 3071 + * 3072 + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); 3073 + * if (err) ... 3074 + */ 3075 + struct drm_i915_gem_create_ext_protected_content { 3076 + /** @base: Extension link. See struct i915_user_extension. */ 3077 + struct i915_user_extension base; 3078 + /** @flags: reserved for future usage, currently MBZ */ 3079 + __u32 flags; 3080 + }; 3081 + 3082 + /* ID of the protected content session managed by i915 when PXP is active */ 3083 + #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf 3236 3084 3237 3085 #if defined(__cplusplus) 3238 3086 }

+3

tools/include/uapi/linux/prctl.h

··· 268 268 # define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ 269 269 # define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ 270 270 # define PR_SCHED_CORE_MAX 4 271 + # define PR_SCHED_CORE_SCOPE_THREAD 0 272 + # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 273 + # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 271 274 272 275 #endif /* _LINUX_PRCTL_H */

+1 -1

tools/include/uapi/sound/asound.h

··· 1002 1002 #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) 1003 1003 #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) 1004 1004 #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ 1005 - // (1 << 3) is unused. 1005 + /* (1 << 3) is unused. */ 1006 1006 #define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ 1007 1007 #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ 1008 1008 #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)

+1 -1

tools/perf/Documentation/perf-record.txt

··· 469 469 470 470 --switch-events:: 471 471 Record context switch events i.e. events of type PERF_RECORD_SWITCH or 472 - PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) 472 + PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) 473 473 switch events will be enabled automatically, which can be suppressed by 474 474 by the option --no-switch-events. 475 475

+11 -11

tools/perf/Makefile.perf

··· 516 516 $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl) 517 517 $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@ 518 518 519 - socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c 520 - socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh 521 - 522 - $(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl) 523 - $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@ 524 - 525 - socket_arrays := $(beauty_outdir)/socket_arrays.c 519 + socket_arrays := $(beauty_outdir)/socket.c 526 520 socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh 527 521 528 - $(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl) 529 - $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@ 522 + $(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl) 523 + $(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@ 524 + 525 + sockaddr_arrays := $(beauty_outdir)/sockaddr.c 526 + sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh 527 + 528 + $(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl) 529 + $(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@ 530 530 531 531 vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c 532 532 vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux ··· 736 736 $(sndrv_ctl_ioctl_array) \ 737 737 $(kcmp_type_array) \ 738 738 $(kvm_ioctl_array) \ 739 - $(socket_ipproto_array) \ 740 739 $(socket_arrays) \ 740 + $(sockaddr_arrays) \ 741 741 $(vhost_virtio_ioctl_array) \ 742 742 $(madvise_behavior_array) \ 743 743 $(mmap_flags_array) \ ··· 1113 1113 $(OUTPUT)$(sndrv_pcm_ioctl_array) \ 1114 1114 $(OUTPUT)$(kvm_ioctl_array) \ 1115 1115 $(OUTPUT)$(kcmp_type_array) \ 1116 - $(OUTPUT)$(socket_ipproto_array) \ 1117 1116 $(OUTPUT)$(socket_arrays) \ 1117 + $(OUTPUT)$(sockaddr_arrays) \ 1118 1118 $(OUTPUT)$(vhost_virtio_ioctl_array) \ 1119 1119 $(OUTPUT)$(perf_ioctl_array) \ 1120 1120 $(OUTPUT)$(prctl_option_array) \

+1 -1

tools/perf/arch/arm/include/arch-tests.h

··· 2 2 #ifndef ARCH_TESTS_H 3 3 #define ARCH_TESTS_H 4 4 5 - extern struct test arch_tests[]; 5 + extern struct test_suite *arch_tests[]; 6 6 7 7 #endif

+4 -12

tools/perf/arch/arm/tests/arch-tests.c

··· 3 3 #include "tests/tests.h" 4 4 #include "arch-tests.h" 5 5 6 - struct test arch_tests[] = { 6 + struct test_suite *arch_tests[] = { 7 7 #ifdef HAVE_DWARF_UNWIND_SUPPORT 8 - { 9 - .desc = "DWARF unwind", 10 - .func = test__dwarf_unwind, 11 - }, 8 + &suite__dwarf_unwind, 12 9 #endif 13 - { 14 - .desc = "Vectors page", 15 - .func = test__vectors_page, 16 - }, 17 - { 18 - .func = NULL, 19 - }, 10 + &suite__vectors_page, 11 + NULL, 20 12 };

+3 -2

tools/perf/arch/arm/tests/vectors-page.c

··· 9 9 10 10 #define VECTORS__MAP_NAME "[vectors]" 11 11 12 - int test__vectors_page(struct test *test __maybe_unused, 13 - int subtest __maybe_unused) 12 + static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 14 13 { 15 14 void *start, *end; 16 15 ··· 21 22 22 23 return TEST_OK; 23 24 } 25 + 26 + DEFINE_SUITE("Vectors page", vectors_page);

+1 -1

tools/perf/arch/arm64/include/arch-tests.h

··· 2 2 #ifndef ARCH_TESTS_H 3 3 #define ARCH_TESTS_H 4 4 5 - extern struct test arch_tests[]; 5 + extern struct test_suite *arch_tests[]; 6 6 7 7 #endif

+3 -8

tools/perf/arch/arm64/tests/arch-tests.c

··· 3 3 #include "tests/tests.h" 4 4 #include "arch-tests.h" 5 5 6 - struct test arch_tests[] = { 6 + struct test_suite *arch_tests[] = { 7 7 #ifdef HAVE_DWARF_UNWIND_SUPPORT 8 - { 9 - .desc = "DWARF unwind", 10 - .func = test__dwarf_unwind, 11 - }, 8 + &suite__dwarf_unwind, 12 9 #endif 13 - { 14 - .func = NULL, 15 - }, 10 + NULL, 16 11 };

+282 -1

tools/perf/arch/arm64/util/arm-spe.c

··· 23 23 #include "../../../util/auxtrace.h" 24 24 #include "../../../util/record.h" 25 25 #include "../../../util/arm-spe.h" 26 + #include <tools/libc_compat.h> // reallocarray 26 27 27 28 #define KiB(x) ((x) * 1024) 28 29 #define MiB(x) ((x) * 1024 * 1024) ··· 32 31 struct auxtrace_record itr; 33 32 struct perf_pmu *arm_spe_pmu; 34 33 struct evlist *evlist; 34 + int wrapped_cnt; 35 + bool *wrapped; 35 36 }; 36 37 37 38 static void arm_spe_set_timestamp(struct auxtrace_record *itr, ··· 87 84 return 0; 88 85 } 89 86 87 + static void 88 + arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, 89 + bool privileged) 90 + { 91 + /* 92 + * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor 93 + * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for 94 + * unprivileged users. 95 + * 96 + * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for 97 + * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages 98 + * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 99 + * user is likely to get an error as they exceed their mlock limmit. 100 + */ 101 + 102 + /* 103 + * No size were given to '-S' or '-m,', so go with the default 104 + */ 105 + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 106 + if (privileged) { 107 + opts->auxtrace_mmap_pages = MiB(4) / page_size; 108 + } else { 109 + opts->auxtrace_mmap_pages = KiB(128) / page_size; 110 + if (opts->mmap_pages == UINT_MAX) 111 + opts->mmap_pages = KiB(256) / page_size; 112 + } 113 + } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { 114 + opts->mmap_pages = KiB(256) / page_size; 115 + } 116 + 117 + /* 118 + * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the 119 + * auxtrace mmap area. 120 + */ 121 + if (!opts->auxtrace_snapshot_size) 122 + opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; 123 + 124 + /* 125 + * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big 126 + * enough to fit the requested snapshot size. 127 + */ 128 + if (!opts->auxtrace_mmap_pages) { 129 + size_t sz = opts->auxtrace_snapshot_size; 130 + 131 + sz = round_up(sz, page_size) / page_size; 132 + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 133 + } 134 + } 135 + 90 136 static int arm_spe_recording_options(struct auxtrace_record *itr, 91 137 struct evlist *evlist, 92 138 struct record_opts *opts) ··· 167 115 if (!opts->full_auxtrace) 168 116 return 0; 169 117 118 + /* 119 + * we are in snapshot mode. 120 + */ 121 + if (opts->auxtrace_snapshot_mode) { 122 + /* 123 + * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with 124 + * default values. 125 + */ 126 + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) 127 + arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); 128 + 129 + /* 130 + * Snapshot size can't be bigger than the auxtrace area. 131 + */ 132 + if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { 133 + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 134 + opts->auxtrace_snapshot_size, 135 + opts->auxtrace_mmap_pages * (size_t)page_size); 136 + return -EINVAL; 137 + } 138 + 139 + /* 140 + * Something went wrong somewhere - this shouldn't happen. 141 + */ 142 + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 143 + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 144 + return -EINVAL; 145 + } 146 + } 147 + 170 148 /* We are in full trace mode but '-m,xyz' wasn't specified */ 171 149 if (!opts->auxtrace_mmap_pages) { 172 150 if (privileged) { ··· 220 138 } 221 139 } 222 140 141 + if (opts->auxtrace_snapshot_mode) 142 + pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, 143 + opts->auxtrace_snapshot_size); 223 144 224 145 /* 225 146 * To obtain the auxtrace buffer file descriptor, the auxtrace event ··· 251 166 tracking_evsel->core.attr.sample_period = 1; 252 167 253 168 /* In per-cpu case, always need the time of mmap events etc */ 254 - if (!perf_cpu_map__empty(cpus)) 169 + if (!perf_cpu_map__empty(cpus)) { 255 170 evsel__set_sample_bit(tracking_evsel, TIME); 171 + evsel__set_sample_bit(tracking_evsel, CPU); 172 + 173 + /* also track task context switch */ 174 + if (!record_opts__no_switch_events(opts)) 175 + tracking_evsel->core.attr.context_switch = 1; 176 + } 177 + 178 + return 0; 179 + } 180 + 181 + static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, 182 + struct record_opts *opts, 183 + const char *str) 184 + { 185 + unsigned long long snapshot_size = 0; 186 + char *endptr; 187 + 188 + if (str) { 189 + snapshot_size = strtoull(str, &endptr, 0); 190 + if (*endptr || snapshot_size > SIZE_MAX) 191 + return -1; 192 + } 193 + 194 + opts->auxtrace_snapshot_mode = true; 195 + opts->auxtrace_snapshot_size = snapshot_size; 196 + 197 + return 0; 198 + } 199 + 200 + static int arm_spe_snapshot_start(struct auxtrace_record *itr) 201 + { 202 + struct arm_spe_recording *ptr = 203 + container_of(itr, struct arm_spe_recording, itr); 204 + struct evsel *evsel; 205 + 206 + evlist__for_each_entry(ptr->evlist, evsel) { 207 + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) 208 + return evsel__disable(evsel); 209 + } 210 + return -EINVAL; 211 + } 212 + 213 + static int arm_spe_snapshot_finish(struct auxtrace_record *itr) 214 + { 215 + struct arm_spe_recording *ptr = 216 + container_of(itr, struct arm_spe_recording, itr); 217 + struct evsel *evsel; 218 + 219 + evlist__for_each_entry(ptr->evlist, evsel) { 220 + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) 221 + return evsel__enable(evsel); 222 + } 223 + return -EINVAL; 224 + } 225 + 226 + static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) 227 + { 228 + bool *wrapped; 229 + int cnt = ptr->wrapped_cnt, new_cnt, i; 230 + 231 + /* 232 + * No need to allocate, so return early. 233 + */ 234 + if (idx < cnt) 235 + return 0; 236 + 237 + /* 238 + * Make ptr->wrapped as big as idx. 239 + */ 240 + new_cnt = idx + 1; 241 + 242 + /* 243 + * Free'ed in arm_spe_recording_free(). 244 + */ 245 + wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); 246 + if (!wrapped) 247 + return -ENOMEM; 248 + 249 + /* 250 + * init new allocated values. 251 + */ 252 + for (i = cnt; i < new_cnt; i++) 253 + wrapped[i] = false; 254 + 255 + ptr->wrapped_cnt = new_cnt; 256 + ptr->wrapped = wrapped; 257 + 258 + return 0; 259 + } 260 + 261 + static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, 262 + size_t buffer_size, u64 head) 263 + { 264 + u64 i, watermark; 265 + u64 *buf = (u64 *)buffer; 266 + size_t buf_size = buffer_size; 267 + 268 + /* 269 + * Defensively handle the case where head might be continually increasing - if its value is 270 + * equal or greater than the size of the ring buffer, then we can safely determine it has 271 + * wrapped around. Otherwise, continue to detect if head might have wrapped. 272 + */ 273 + if (head >= buffer_size) 274 + return true; 275 + 276 + /* 277 + * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. 278 + */ 279 + watermark = buf_size - 512; 280 + 281 + /* 282 + * The value of head is somewhere within the size of the ring buffer. This can be that there 283 + * hasn't been enough data to fill the ring buffer yet or the trace time was so long that 284 + * head has numerically wrapped around. To find we need to check if we have data at the 285 + * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed 286 + * out and there is a fresh mapping with every new session. 287 + */ 288 + 289 + /* 290 + * head is less than 512 byte from the end of the ring buffer. 291 + */ 292 + if (head > watermark) 293 + watermark = head; 294 + 295 + /* 296 + * Speed things up by using 64 bit transactions (see "u64 *buf" above) 297 + */ 298 + watermark /= sizeof(u64); 299 + buf_size /= sizeof(u64); 300 + 301 + /* 302 + * If we find trace data at the end of the ring buffer, head has been there and has 303 + * numerically wrapped around at least once. 304 + */ 305 + for (i = watermark; i < buf_size; i++) 306 + if (buf[i]) 307 + return true; 308 + 309 + return false; 310 + } 311 + 312 + static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, 313 + struct auxtrace_mmap *mm, unsigned char *data, 314 + u64 *head, u64 *old) 315 + { 316 + int err; 317 + bool wrapped; 318 + struct arm_spe_recording *ptr = 319 + container_of(itr, struct arm_spe_recording, itr); 320 + 321 + /* 322 + * Allocate memory to keep track of wrapping if this is the first 323 + * time we deal with this *mm. 324 + */ 325 + if (idx >= ptr->wrapped_cnt) { 326 + err = arm_spe_alloc_wrapped_array(ptr, idx); 327 + if (err) 328 + return err; 329 + } 330 + 331 + /* 332 + * Check to see if *head has wrapped around. If it hasn't only the 333 + * amount of data between *head and *old is snapshot'ed to avoid 334 + * bloating the perf.data file with zeros. But as soon as *head has 335 + * wrapped around the entire size of the AUX ring buffer it taken. 336 + */ 337 + wrapped = ptr->wrapped[idx]; 338 + if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { 339 + wrapped = true; 340 + ptr->wrapped[idx] = true; 341 + } 342 + 343 + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 344 + __func__, idx, (size_t)*old, (size_t)*head, mm->len); 345 + 346 + /* 347 + * No wrap has occurred, we can just use *head and *old. 348 + */ 349 + if (!wrapped) 350 + return 0; 351 + 352 + /* 353 + * *head has wrapped around - adjust *head and *old to pickup the 354 + * entire content of the AUX buffer. 355 + */ 356 + if (*head >= mm->len) { 357 + *old = *head - mm->len; 358 + } else { 359 + *head += mm->len; 360 + *old = *head - mm->len; 361 + } 256 362 257 363 return 0; 258 364 } ··· 462 186 struct arm_spe_recording *sper = 463 187 container_of(itr, struct arm_spe_recording, itr); 464 188 189 + free(sper->wrapped); 465 190 free(sper); 466 191 } 467 192 ··· 484 207 485 208 sper->arm_spe_pmu = arm_spe_pmu; 486 209 sper->itr.pmu = arm_spe_pmu; 210 + sper->itr.snapshot_start = arm_spe_snapshot_start; 211 + sper->itr.snapshot_finish = arm_spe_snapshot_finish; 212 + sper->itr.find_snapshot = arm_spe_find_snapshot; 213 + sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; 487 214 sper->itr.recording_options = arm_spe_recording_options; 488 215 sper->itr.info_priv_size = arm_spe_info_priv_size; 489 216 sper->itr.info_fill = arm_spe_info_fill;

+1 -1

tools/perf/arch/powerpc/include/arch-tests.h

··· 2 2 #ifndef ARCH_TESTS_H 3 3 #define ARCH_TESTS_H 4 4 5 - extern struct test arch_tests[]; 5 + extern struct test_suite *arch_tests[]; 6 6 7 7 #endif

+4 -8

tools/perf/arch/powerpc/tests/arch-tests.c

··· 3 3 #include "tests/tests.h" 4 4 #include "arch-tests.h" 5 5 6 - struct test arch_tests[] = { 6 + 7 + struct test_suite *arch_tests[] = { 7 8 #ifdef HAVE_DWARF_UNWIND_SUPPORT 8 - { 9 - .desc = "Test dwarf unwind", 10 - .func = test__dwarf_unwind, 11 - }, 9 + &suite__dwarf_unwind, 12 10 #endif 13 - { 14 - .func = NULL, 15 - }, 11 + NULL, 16 12 };

+1

tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

··· 370 370 446 common landlock_restrict_self sys_landlock_restrict_self 371 371 447 common memfd_secret sys_memfd_secret 372 372 448 common process_mrelease sys_process_mrelease 373 + 449 common futex_waitv sys_futex_waitv 373 374 374 375 # 375 376 # Due to a historical design error, certain syscalls are numbered differently

+7 -7

tools/perf/arch/x86/include/arch-tests.h

··· 2 2 #ifndef ARCH_TESTS_H 3 3 #define ARCH_TESTS_H 4 4 5 - struct test; 5 + struct test_suite; 6 6 7 7 /* Tests */ 8 - int test__rdpmc(struct test *test, int subtest); 9 - int test__insn_x86(struct test *test, int subtest); 10 - int test__intel_pt_pkt_decoder(struct test *test, int subtest); 11 - int test__bp_modify(struct test *test, int subtest); 12 - int test__x86_sample_parsing(struct test *test, int subtest); 8 + int test__rdpmc(struct test_suite *test, int subtest); 9 + int test__insn_x86(struct test_suite *test, int subtest); 10 + int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); 11 + int test__bp_modify(struct test_suite *test, int subtest); 12 + int test__x86_sample_parsing(struct test_suite *test, int subtest); 13 13 14 - extern struct test arch_tests[]; 14 + extern struct test_suite *arch_tests[]; 15 15 16 16 #endif

+19 -30

tools/perf/arch/x86/tests/arch-tests.c

··· 3 3 #include "tests/tests.h" 4 4 #include "arch-tests.h" 5 5 6 - struct test arch_tests[] = { 7 - { 8 - .desc = "x86 rdpmc", 9 - .func = test__rdpmc, 10 - }, 11 - #ifdef HAVE_DWARF_UNWIND_SUPPORT 12 - { 13 - .desc = "DWARF unwind", 14 - .func = test__dwarf_unwind, 15 - }, 16 - #endif 6 + DEFINE_SUITE("x86 rdpmc", rdpmc); 17 7 #ifdef HAVE_AUXTRACE_SUPPORT 18 - { 19 - .desc = "x86 instruction decoder - new instructions", 20 - .func = test__insn_x86, 21 - }, 22 - { 23 - .desc = "Intel PT packet decoder", 24 - .func = test__intel_pt_pkt_decoder, 25 - }, 8 + DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); 9 + DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder); 26 10 #endif 27 11 #if defined(__x86_64__) 28 - { 29 - .desc = "x86 bp modify", 30 - .func = test__bp_modify, 31 - }, 12 + DEFINE_SUITE("x86 bp modify", bp_modify); 32 13 #endif 33 - { 34 - .desc = "x86 Sample parsing", 35 - .func = test__x86_sample_parsing, 36 - }, 37 - { 38 - .func = NULL, 39 - }, 14 + DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing); 40 15 16 + struct test_suite *arch_tests[] = { 17 + &suite__rdpmc, 18 + #ifdef HAVE_DWARF_UNWIND_SUPPORT 19 + &suite__dwarf_unwind, 20 + #endif 21 + #ifdef HAVE_AUXTRACE_SUPPORT 22 + &suite__insn_x86, 23 + &suite__intel_pt_pkt_decoder, 24 + #endif 25 + #if defined(__x86_64__) 26 + &suite__bp_modify, 27 + #endif 28 + &suite__x86_sample_parsing, 29 + NULL, 41 30 };

+1 -1

tools/perf/arch/x86/tests/bp-modify.c

··· 204 204 return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; 205 205 } 206 206 207 - int test__bp_modify(struct test *test __maybe_unused, 207 + int test__bp_modify(struct test_suite *test __maybe_unused, 208 208 int subtest __maybe_unused) 209 209 { 210 210 TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());

+1 -1

tools/perf/arch/x86/tests/insn-x86.c

··· 173 173 * verbose (-v) option to see all the instructions and whether or not they 174 174 * decoded successfully. 175 175 */ 176 - int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused) 176 + int test__insn_x86(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 177 177 { 178 178 int ret = 0; 179 179

+1 -1

tools/perf/arch/x86/tests/intel-cqm.c

··· 37 37 * the last read counter value to avoid triggering a WARN_ON_ONCE() in 38 38 * smp_call_function_many() caused by sending IPIs from NMI context. 39 39 */ 40 - int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused) 40 + int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 41 41 { 42 42 struct evlist *evlist = NULL; 43 43 struct evsel *evsel = NULL;

+1 -1

tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c

··· 289 289 * This test feeds byte sequences to the Intel PT packet decoder and checks the 290 290 * results. Changes to the packet context are also checked. 291 291 */ 292 - int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused) 292 + int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 293 293 { 294 294 struct test_data *d = data; 295 295 int ret;

+1 -1

tools/perf/arch/x86/tests/rdpmc.c

··· 157 157 return 0; 158 158 } 159 159 160 - int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) 160 + int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 161 161 { 162 162 int status = 0; 163 163 int wret = 0;

+1 -1

tools/perf/arch/x86/tests/sample-parsing.c

··· 115 115 * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. 116 116 * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. 117 117 */ 118 - int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) 118 + int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 119 119 { 120 120 return do_test(PERF_SAMPLE_WEIGHT_STRUCT); 121 121 }

+1

tools/perf/bench/futex-lock-pi.c

··· 233 233 print_summary(); 234 234 235 235 free(worker); 236 + perf_cpu_map__put(cpu); 236 237 return ret; 237 238 err: 238 239 usage_with_options(bench_futex_lock_pi_usage, options);

+1

tools/perf/bench/futex-requeue.c

··· 294 294 print_summary(); 295 295 296 296 free(worker); 297 + perf_cpu_map__put(cpu); 297 298 return ret; 298 299 err: 299 300 usage_with_options(bench_futex_requeue_usage, options);

+1

tools/perf/bench/futex-wake-parallel.c

··· 329 329 print_summary(); 330 330 331 331 free(blocked_worker); 332 + perf_cpu_map__put(cpu); 332 333 return ret; 333 334 } 334 335 #endif /* HAVE_PTHREAD_BARRIER */

+1

tools/perf/bench/futex-wake.c

··· 222 222 print_summary(); 223 223 224 224 free(worker); 225 + perf_cpu_map__put(cpu); 225 226 return ret; 226 227 }

+4

tools/perf/builtin-trace.c

··· 979 979 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 980 980 { .name = "getrlimit", 981 981 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 982 + { .name = "getsockopt", 983 + .arg = { [1] = STRARRAY(level, socket_level), }, }, 982 984 { .name = "gettid", .errpid = true, }, 983 985 { .name = "ioctl", 984 986 .arg = { ··· 1123 1121 .arg = { [0] = STRARRAY(which, itimers), }, }, 1124 1122 { .name = "setrlimit", 1125 1123 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 1124 + { .name = "setsockopt", 1125 + .arg = { [1] = STRARRAY(level, socket_level), }, }, 1126 1126 { .name = "socket", 1127 1127 .arg = { [0] = STRARRAY(family, socket_families), 1128 1128 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },

+3

tools/perf/design.txt

··· 106 106 PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, 107 107 PERF_COUNT_HW_BRANCH_MISSES = 5, 108 108 PERF_COUNT_HW_BUS_CYCLES = 6, 109 + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, 110 + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, 111 + PERF_COUNT_HW_REF_CPU_CYCLES = 9, 109 112 }; 110 113 111 114 These are standardized types of events that work relatively uniformly

+676

tools/perf/pmu-events/arch/powerpc/power10/metrics.json

··· 1 + [ 2 + { 3 + "BriefDescription": "Percentage of cycles that are run cycles", 4 + "MetricExpr": "PM_RUN_CYC / PM_CYC * 100", 5 + "MetricGroup": "General", 6 + "MetricName": "RUN_CYCLES_RATE", 7 + "ScaleUnit": "1%" 8 + }, 9 + { 10 + "BriefDescription": "Average cycles per completed instruction", 11 + "MetricExpr": "PM_CYC / PM_INST_CMPL", 12 + "MetricGroup": "CPI", 13 + "MetricName": "CYCLES_PER_INSTRUCTION" 14 + }, 15 + { 16 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason", 17 + "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL", 18 + "MetricGroup": "CPI", 19 + "MetricName": "DISPATCHED_CPI" 20 + }, 21 + { 22 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush", 23 + "MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL", 24 + "MetricGroup": "CPI", 25 + "MetricName": "DISPATCHED_FLUSH_CPI" 26 + }, 27 + { 28 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss", 29 + "MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL", 30 + "MetricGroup": "CPI", 31 + "MetricName": "DISPATCHED_TRANSLATION_CPI" 32 + }, 33 + { 34 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss", 35 + "MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL", 36 + "MetricGroup": "CPI", 37 + "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI" 38 + }, 39 + { 40 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss", 41 + "MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL", 42 + "MetricGroup": "CPI", 43 + "MetricName": "DISPATCHED_ITLB_MISS_CPI" 44 + }, 45 + { 46 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss", 47 + "MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL", 48 + "MetricGroup": "CPI", 49 + "MetricName": "DISPATCHED_IC_MISS_CPI" 50 + }, 51 + { 52 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2", 53 + "MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL", 54 + "MetricGroup": "CPI", 55 + "MetricName": "DISPATCHED_IC_L2_CPI" 56 + }, 57 + { 58 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3", 59 + "MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL", 60 + "MetricGroup": "CPI", 61 + "MetricName": "DISPATCHED_IC_L3_CPI" 62 + }, 63 + { 64 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3", 65 + "MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL", 66 + "MetricGroup": "CPI", 67 + "MetricName": "DISPATCHED_IC_L3MISS_CPI" 68 + }, 69 + { 70 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict", 71 + "MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL", 72 + "MetricGroup": "CPI", 73 + "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI" 74 + }, 75 + { 76 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict", 77 + "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL", 78 + "MetricGroup": "CPI", 79 + "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI" 80 + }, 81 + { 82 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict", 83 + "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL", 84 + "MetricGroup": "CPI", 85 + "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI" 86 + }, 87 + { 88 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict", 89 + "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL", 90 + "MetricGroup": "CPI", 91 + "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI" 92 + }, 93 + { 94 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict", 95 + "MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL", 96 + "MetricGroup": "CPI", 97 + "MetricName": "DISPATCHED_BR_MPRED_CPI" 98 + }, 99 + { 100 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason", 101 + "MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL", 102 + "MetricGroup": "CPI", 103 + "MetricName": "DISPATCHED_HELD_CPI" 104 + }, 105 + { 106 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch", 107 + "MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL", 108 + "MetricGroup": "CPI", 109 + "MetricName": "DISP_HELD_STALL_SYNC_CPI" 110 + }, 111 + { 112 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard", 113 + "MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL", 114 + "MetricGroup": "CPI", 115 + "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI" 116 + }, 117 + { 118 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full", 119 + "MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL", 120 + "MetricGroup": "CPI", 121 + "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI" 122 + }, 123 + { 124 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full", 125 + "MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL", 126 + "MetricGroup": "CPI", 127 + "MetricName": "DISPATCHED_HELD_RENAME_CPI" 128 + }, 129 + { 130 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full", 131 + "MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL", 132 + "MetricGroup": "CPI", 133 + "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI" 134 + }, 135 + { 136 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full", 137 + "MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL", 138 + "MetricGroup": "CPI", 139 + "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI" 140 + }, 141 + { 142 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason", 143 + "MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL", 144 + "MetricGroup": "CPI", 145 + "MetricName": "DISPATCHED_HELD_OTHER_CPI" 146 + }, 147 + { 148 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason", 149 + "MetricExpr": "PM_ISSUE_STALL / PM_RUN_INST_CMPL", 150 + "MetricGroup": "CPI", 151 + "MetricName": "ISSUE_STALL_CPI" 152 + }, 153 + { 154 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units", 155 + "MetricExpr": "PM_EXEC_STALL / PM_RUN_INST_CMPL", 156 + "MetricGroup": "CPI", 157 + "MetricName": "EXECUTION_STALL_CPI" 158 + }, 159 + { 160 + "BriefDescription": "Average cycles per completed instruction spent executing an NTC instruction that gets flushed some time after dispatch", 161 + "MetricExpr": "PM_EXEC_STALL_NTC_FLUSH / PM_RUN_INST_CMPL", 162 + "MetricGroup": "CPI", 163 + "MetricName": "NTC_FLUSH_STALL_CPI" 164 + }, 165 + { 166 + "BriefDescription": "Average cycles per completed instruction when the NTF instruction finishes at dispatch", 167 + "MetricExpr": "PM_EXEC_STALL_FIN_AT_DISP / PM_RUN_INST_CMPL", 168 + "MetricGroup": "CPI", 169 + "MetricName": "FIN_AT_DISP_STALL_CPI" 170 + }, 171 + { 172 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the branch unit", 173 + "MetricExpr": "PM_EXEC_STALL_BRU / PM_RUN_INST_CMPL", 174 + "MetricGroup": "CPI", 175 + "MetricName": "BRU_STALL_CPI" 176 + }, 177 + { 178 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a simple fixed point instruction that is executing in the LSU", 179 + "MetricExpr": "PM_EXEC_STALL_SIMPLE_FX / PM_RUN_INST_CMPL", 180 + "MetricGroup": "CPI", 181 + "MetricName": "SIMPLE_FX_STALL_CPI" 182 + }, 183 + { 184 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the VSU", 185 + "MetricExpr": "PM_EXEC_STALL_VSU / PM_RUN_INST_CMPL", 186 + "MetricGroup": "CPI", 187 + "MetricName": "VSU_STALL_CPI" 188 + }, 189 + { 190 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units", 191 + "MetricExpr": "PM_EXEC_STALL_TRANSLATION / PM_RUN_INST_CMPL", 192 + "MetricGroup": "CPI", 193 + "MetricName": "TRANSLATION_STALL_CPI" 194 + }, 195 + { 196 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load or store that suffered a translation miss", 197 + "MetricExpr": "PM_EXEC_STALL_DERAT_ONLY_MISS / PM_RUN_INST_CMPL", 198 + "MetricGroup": "CPI", 199 + "MetricName": "DERAT_ONLY_MISS_STALL_CPI" 200 + }, 201 + { 202 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is recovering from a TLB miss", 203 + "MetricExpr": "PM_EXEC_STALL_DERAT_DTLB_MISS / PM_RUN_INST_CMPL", 204 + "MetricGroup": "CPI", 205 + "MetricName": "DERAT_DTLB_MISS_STALL_CPI" 206 + }, 207 + { 208 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the LSU", 209 + "MetricExpr": "PM_EXEC_STALL_LSU / PM_RUN_INST_CMPL", 210 + "MetricGroup": "CPI", 211 + "MetricName": "LSU_STALL_CPI" 212 + }, 213 + { 214 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load that is executing in the LSU", 215 + "MetricExpr": "PM_EXEC_STALL_LOAD / PM_RUN_INST_CMPL", 216 + "MetricGroup": "CPI", 217 + "MetricName": "LOAD_STALL_CPI" 218 + }, 219 + { 220 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3", 221 + "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL", 222 + "MetricGroup": "CPI", 223 + "MetricName": "DMISS_L2L3_STALL_CPI" 224 + }, 225 + { 226 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, with an RC dispatch conflict", 227 + "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL", 228 + "MetricGroup": "CPI", 229 + "MetricName": "DMISS_L2L3_CONFLICT_STALL_CPI" 230 + }, 231 + { 232 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, without an RC dispatch conflict", 233 + "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT / PM_RUN_INST_CMPL", 234 + "MetricGroup": "CPI", 235 + "MetricName": "DMISS_L2L3_NOCONFLICT_STALL_CPI" 236 + }, 237 + { 238 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a source beyond the local L2 and local L3", 239 + "MetricExpr": "PM_EXEC_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL", 240 + "MetricGroup": "CPI", 241 + "MetricName": "DMISS_L3MISS_STALL_CPI" 242 + }, 243 + { 244 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a neighbor chiplet's L2 or L3 in the same chip", 245 + "MetricExpr": "PM_EXEC_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL", 246 + "MetricGroup": "CPI", 247 + "MetricName": "DMISS_L21_L31_STALL_CPI" 248 + }, 249 + { 250 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from L4, local memory or OpenCAPI chip", 251 + "MetricExpr": "PM_EXEC_STALL_DMISS_LMEM / PM_RUN_INST_CMPL", 252 + "MetricGroup": "CPI", 253 + "MetricName": "DMISS_LMEM_STALL_CPI" 254 + }, 255 + { 256 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a remote chip (cache, L4, memory or OpenCAPI) in the same group", 257 + "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_CHIP / PM_RUN_INST_CMPL", 258 + "MetricGroup": "CPI", 259 + "MetricName": "DMISS_OFF_CHIP_STALL_CPI" 260 + }, 261 + { 262 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a distant chip (cache, L4, memory or OpenCAPI chip)", 263 + "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_NODE / PM_RUN_INST_CMPL", 264 + "MetricGroup": "CPI", 265 + "MetricName": "DMISS_OFF_NODE_STALL_CPI" 266 + }, 267 + { 268 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a TLBIEL instruction", 269 + "MetricExpr": "PM_EXEC_STALL_TLBIEL / PM_RUN_INST_CMPL", 270 + "MetricGroup": "CPI", 271 + "MetricName": "TLBIEL_STALL_CPI" 272 + }, 273 + { 274 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is finishing a load after its data has been reloaded from a data source beyond the local L1, OR when the LSU is processing an L1-hit, OR when the NTF instruction merged with another load in the LMQ", 275 + "MetricExpr": "PM_EXEC_STALL_LOAD_FINISH / PM_RUN_INST_CMPL", 276 + "MetricGroup": "CPI", 277 + "MetricName": "LOAD_FINISH_STALL_CPI" 278 + }, 279 + { 280 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store that is executing in the LSU", 281 + "MetricExpr": "PM_EXEC_STALL_STORE / PM_RUN_INST_CMPL", 282 + "MetricGroup": "CPI", 283 + "MetricName": "STORE_STALL_CPI" 284 + }, 285 + { 286 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is in the store unit outside of handling store misses or other special store operations", 287 + "MetricExpr": "PM_EXEC_STALL_STORE_PIPE / PM_RUN_INST_CMPL", 288 + "MetricGroup": "CPI", 289 + "MetricName": "STORE_PIPE_STALL_CPI" 290 + }, 291 + { 292 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store whose cache line was not resident in the L1 and had to wait for allocation of the missing line into the L1", 293 + "MetricExpr": "PM_EXEC_STALL_STORE_MISS / PM_RUN_INST_CMPL", 294 + "MetricGroup": "CPI", 295 + "MetricName": "STORE_MISS_STALL_CPI" 296 + }, 297 + { 298 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a TLBIE instruction waiting for a response from the L2", 299 + "MetricExpr": "PM_EXEC_STALL_TLBIE / PM_RUN_INST_CMPL", 300 + "MetricGroup": "CPI", 301 + "MetricName": "TLBIE_STALL_CPI" 302 + }, 303 + { 304 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a PTESYNC instruction", 305 + "MetricExpr": "PM_EXEC_STALL_PTESYNC / PM_RUN_INST_CMPL", 306 + "MetricGroup": "CPI", 307 + "MetricName": "PTESYNC_STALL_CPI" 308 + }, 309 + { 310 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because the thread was blocked", 311 + "MetricExpr": "PM_CMPL_STALL / PM_RUN_INST_CMPL", 312 + "MetricGroup": "CPI", 313 + "MetricName": "COMPLETION_STALL_CPI" 314 + }, 315 + { 316 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because it was interrupted by ANY exception", 317 + "MetricExpr": "PM_CMPL_STALL_EXCEPTION / PM_RUN_INST_CMPL", 318 + "MetricGroup": "CPI", 319 + "MetricName": "EXCEPTION_COMPLETION_STALL_CPI" 320 + }, 321 + { 322 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is stuck at finish waiting for the non-speculative finish of either a STCX instruction waiting for its result or a load waiting for non-critical sectors of data and ECC", 323 + "MetricExpr": "PM_CMPL_STALL_MEM_ECC / PM_RUN_INST_CMPL", 324 + "MetricGroup": "CPI", 325 + "MetricName": "MEM_ECC_COMPLETION_STALL_CPI" 326 + }, 327 + { 328 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a STCX instruction waiting for resolution from the nest", 329 + "MetricExpr": "PM_CMPL_STALL_STCX / PM_RUN_INST_CMPL", 330 + "MetricGroup": "CPI", 331 + "MetricName": "STCX_COMPLETION_STALL_CPI" 332 + }, 333 + { 334 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a LWSYNC instruction waiting to complete", 335 + "MetricExpr": "PM_CMPL_STALL_LWSYNC / PM_RUN_INST_CMPL", 336 + "MetricGroup": "CPI", 337 + "MetricName": "LWSYNC_COMPLETION_STALL_CPI" 338 + }, 339 + { 340 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a HWSYNC instruction stuck at finish waiting for a response from the L2", 341 + "MetricExpr": "PM_CMPL_STALL_HWSYNC / PM_RUN_INST_CMPL", 342 + "MetricGroup": "CPI", 343 + "MetricName": "HWSYNC_COMPLETION_STALL_CPI" 344 + }, 345 + { 346 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction required special handling before completion", 347 + "MetricExpr": "PM_CMPL_STALL_SPECIAL / PM_RUN_INST_CMPL", 348 + "MetricGroup": "CPI", 349 + "MetricName": "SPECIAL_COMPLETION_STALL_CPI" 350 + }, 351 + { 352 + "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread", 353 + "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL", 354 + "MetricGroup": "CPI", 355 + "MetricName": "DISPATCHED_FETCH_CPI" 356 + }, 357 + { 358 + "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management", 359 + "MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL", 360 + "MetricGroup": "CPI", 361 + "MetricName": "DISPATCHED_HELD_HALT_CPI" 362 + }, 363 + { 364 + "BriefDescription": "Percentage of flushes per completed instruction", 365 + "MetricExpr": "PM_FLUSH / PM_RUN_INST_CMPL * 100", 366 + "MetricGroup": "Others", 367 + "MetricName": "FLUSH_RATE", 368 + "ScaleUnit": "1%" 369 + }, 370 + { 371 + "BriefDescription": "Percentage of flushes due to a branch mispredict per completed instruction", 372 + "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100", 373 + "MetricGroup": "Others", 374 + "MetricName": "BR_MPRED_FLUSH_RATE", 375 + "ScaleUnit": "1%" 376 + }, 377 + { 378 + "BriefDescription": "Percentage of branch mispredictions per completed instruction", 379 + "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL", 380 + "MetricGroup": "Others", 381 + "MetricName": "BRANCH_MISPREDICTION_RATE" 382 + }, 383 + { 384 + "BriefDescription": "Percentage of finished loads that missed in the L1", 385 + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", 386 + "MetricGroup": "Others", 387 + "MetricName": "L1_LD_MISS_RATIO", 388 + "ScaleUnit": "1%" 389 + }, 390 + { 391 + "BriefDescription": "Percentage of completed instructions that were loads that missed the L1", 392 + "MetricExpr": "PM_LD_MISS_L1 / PM_RUN_INST_CMPL * 100", 393 + "MetricGroup": "Others", 394 + "MetricName": "L1_LD_MISS_RATE", 395 + "ScaleUnit": "1%" 396 + }, 397 + { 398 + "BriefDescription": "Percentage of completed instructions when the DPTEG required for the load/store instruction in execution was missing from the TLB", 399 + "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL * 100", 400 + "MetricGroup": "Others", 401 + "MetricName": "DTLB_MISS_RATE", 402 + "ScaleUnit": "1%" 403 + }, 404 + { 405 + "BriefDescription": "Average number of completed instructions dispatched per instruction completed", 406 + "MetricExpr": "PM_INST_DISP / PM_RUN_INST_CMPL", 407 + "MetricGroup": "General", 408 + "MetricName": "DISPATCH_PER_INST_CMPL" 409 + }, 410 + { 411 + "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1 or L2", 412 + "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100", 413 + "MetricGroup": "General", 414 + "MetricName": "L2_LD_MISS_RATE", 415 + "ScaleUnit": "1%" 416 + }, 417 + { 418 + "BriefDescription": "Percentage of completed instructions that were demand fetches that missed the L1 icache", 419 + "MetricExpr": "PM_L1_ICACHE_MISS / PM_RUN_INST_CMPL * 100", 420 + "MetricGroup": "Instruction_Misses", 421 + "MetricName": "L1_INST_MISS_RATE", 422 + "ScaleUnit": "1%" 423 + }, 424 + { 425 + "BriefDescription": "Percentage of completed instructions that were demand fetches that reloaded from beyond the L3 icache", 426 + "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100", 427 + "MetricGroup": "General", 428 + "MetricName": "L3_INST_MISS_RATE", 429 + "ScaleUnit": "1%" 430 + }, 431 + { 432 + "BriefDescription": "Average number of completed instructions per cycle", 433 + "MetricExpr": "PM_INST_CMPL / PM_CYC", 434 + "MetricGroup": "General", 435 + "MetricName": "IPC" 436 + }, 437 + { 438 + "BriefDescription": "Average number of cycles per completed instruction group", 439 + "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", 440 + "MetricGroup": "General", 441 + "MetricName": "CYCLES_PER_COMPLETED_INSTRUCTIONS_SET" 442 + }, 443 + { 444 + "BriefDescription": "Percentage of cycles when at least 1 instruction dispatched", 445 + "MetricExpr": "PM_1PLUS_PPC_DISP / PM_RUN_CYC * 100", 446 + "MetricGroup": "General", 447 + "MetricName": "CYCLES_ATLEAST_ONE_INST_DISPATCHED", 448 + "ScaleUnit": "1%" 449 + }, 450 + { 451 + "BriefDescription": "Average number of finished loads per completed instruction", 452 + "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", 453 + "MetricGroup": "General", 454 + "MetricName": "LOADS_PER_INST" 455 + }, 456 + { 457 + "BriefDescription": "Average number of finished stores per completed instruction", 458 + "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", 459 + "MetricGroup": "General", 460 + "MetricName": "STORES_PER_INST" 461 + }, 462 + { 463 + "BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction", 464 + "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100", 465 + "MetricGroup": "dL1_Reloads", 466 + "MetricName": "DL1_RELOAD_FROM_L2_MISS_RATE", 467 + "ScaleUnit": "1%" 468 + }, 469 + { 470 + "BriefDescription": "Percentage of demand loads that reloaded from beyond the L3 per completed instruction", 471 + "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100", 472 + "MetricGroup": "dL1_Reloads", 473 + "MetricName": "DL1_RELOAD_FROM_L3_MISS_RATE", 474 + "ScaleUnit": "1%" 475 + }, 476 + { 477 + "BriefDescription": "Percentage of DERAT misses with 4k page size per completed instruction", 478 + "MetricExpr": "PM_DERAT_MISS_4K / PM_RUN_INST_CMPL * 100", 479 + "MetricGroup": "Translation", 480 + "MetricName": "DERAT_4K_MISS_RATE", 481 + "ScaleUnit": "1%" 482 + }, 483 + { 484 + "BriefDescription": "Percentage of DERAT misses with 64k page size per completed instruction", 485 + "MetricExpr": "PM_DERAT_MISS_64K / PM_RUN_INST_CMPL * 100", 486 + "MetricGroup": "Translation", 487 + "MetricName": "DERAT_64K_MISS_RATE", 488 + "ScaleUnit": "1%" 489 + }, 490 + { 491 + "BriefDescription": "Average number of run cycles per completed instruction", 492 + "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", 493 + "MetricGroup": "General", 494 + "MetricName": "RUN_CPI" 495 + }, 496 + { 497 + "BriefDescription": "Percentage of DERAT misses per completed instruction", 498 + "MetricExpr": "PM_DERAT_MISS / PM_RUN_INST_CMPL * 100", 499 + "MetricGroup": "Translation", 500 + "MetricName": "DERAT_MISS_RATE", 501 + "ScaleUnit": "1%" 502 + }, 503 + { 504 + "BriefDescription": "Average number of completed instructions per run cycle", 505 + "MetricExpr": "PM_RUN_INST_CMPL / PM_RUN_CYC", 506 + "MetricGroup": "General", 507 + "MetricName": "RUN_IPC" 508 + }, 509 + { 510 + "BriefDescription": "Average number of completed instructions per instruction group", 511 + "MetricExpr": "PM_RUN_INST_CMPL / PM_1PLUS_PPC_CMPL", 512 + "MetricGroup": "General", 513 + "MetricName": "AVERAGE_COMPLETED_INSTRUCTION_SET_SIZE" 514 + }, 515 + { 516 + "BriefDescription": "Average number of finished instructions per completed instructions", 517 + "MetricExpr": "PM_INST_FIN / PM_RUN_INST_CMPL", 518 + "MetricGroup": "General", 519 + "MetricName": "INST_FIN_PER_CMPL" 520 + }, 521 + { 522 + "BriefDescription": "Average cycles per completed instruction when the NTF instruction is completing and the finish was overlooked", 523 + "MetricExpr": "PM_EXEC_STALL_UNKNOWN / PM_RUN_INST_CMPL", 524 + "MetricGroup": "General", 525 + "MetricName": "EXEC_STALL_UNKOWN_CPI" 526 + }, 527 + { 528 + "BriefDescription": "Percentage of finished branches that were taken", 529 + "MetricExpr": "PM_BR_TAKEN_CMPL / PM_BR_FIN * 100", 530 + "MetricGroup": "General", 531 + "MetricName": "TAKEN_BRANCHES", 532 + "ScaleUnit": "1%" 533 + }, 534 + { 535 + "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1, L2, or the L3", 536 + "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100", 537 + "MetricGroup": "General", 538 + "MetricName": "L3_LD_MISS_RATE", 539 + "ScaleUnit": "1%" 540 + }, 541 + { 542 + "BriefDescription": "Average number of finished branches per completed instruction", 543 + "MetricExpr": "PM_BR_FIN / PM_RUN_INST_CMPL", 544 + "MetricGroup": "General", 545 + "MetricName": "BRANCHES_PER_INST" 546 + }, 547 + { 548 + "BriefDescription": "Average number of instructions finished in the LSU per completed instruction", 549 + "MetricExpr": "PM_LSU_FIN / PM_RUN_INST_CMPL", 550 + "MetricGroup": "General", 551 + "MetricName": "LSU_PER_INST" 552 + }, 553 + { 554 + "BriefDescription": "Average number of instructions finished in the VSU per completed instruction", 555 + "MetricExpr": "PM_VSU_FIN / PM_RUN_INST_CMPL", 556 + "MetricGroup": "General", 557 + "MetricName": "VSU_PER_INST" 558 + }, 559 + { 560 + "BriefDescription": "Average number of TLBIE instructions finished in the LSU per completed instruction", 561 + "MetricExpr": "PM_TLBIE_FIN / PM_RUN_INST_CMPL", 562 + "MetricGroup": "General", 563 + "MetricName": "TLBIE_PER_INST" 564 + }, 565 + { 566 + "BriefDescription": "Average number of STCX instructions finshed per completed instruction", 567 + "MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL", 568 + "MetricGroup": "General", 569 + "MetricName": "STXC_PER_INST" 570 + }, 571 + { 572 + "BriefDescription": "Average number of LARX instructions finshed per completed instruction", 573 + "MetricExpr": "PM_LARX_FIN / PM_RUN_INST_CMPL", 574 + "MetricGroup": "General", 575 + "MetricName": "LARX_PER_INST" 576 + }, 577 + { 578 + "BriefDescription": "Average number of PTESYNC instructions finshed per completed instruction", 579 + "MetricExpr": "PM_PTESYNC_FIN / PM_RUN_INST_CMPL", 580 + "MetricGroup": "General", 581 + "MetricName": "PTESYNC_PER_INST" 582 + }, 583 + { 584 + "BriefDescription": "Average number of simple fixed-point instructions finshed in the store unit per completed instruction", 585 + "MetricExpr": "PM_FX_LSU_FIN / PM_RUN_INST_CMPL", 586 + "MetricGroup": "General", 587 + "MetricName": "FX_PER_INST" 588 + }, 589 + { 590 + "BriefDescription": "Percentage of demand load misses that reloaded the L1 cache", 591 + "MetricExpr": "PM_LD_DEMAND_MISS_L1 / PM_LD_MISS_L1 * 100", 592 + "MetricGroup": "General", 593 + "MetricName": "DL1_MISS_RELOADS", 594 + "ScaleUnit": "1%" 595 + }, 596 + { 597 + "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L2", 598 + "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_DEMAND_MISS_L1 * 100", 599 + "MetricGroup": "dL1_Reloads", 600 + "MetricName": "DL1_RELOAD_FROM_L2_MISS", 601 + "ScaleUnit": "1%" 602 + }, 603 + { 604 + "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L3", 605 + "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_DEMAND_MISS_L1 * 100", 606 + "MetricGroup": "dL1_Reloads", 607 + "MetricName": "DL1_RELOAD_FROM_L3_MISS", 608 + "ScaleUnit": "1%" 609 + }, 610 + { 611 + "BriefDescription": "Percentage of cycles stalled due to the NTC instruction waiting for a load miss to resolve from a source beyond the local L2 and local L3", 612 + "MetricExpr": "DMISS_L3MISS_STALL_CPI / RUN_CPI * 100", 613 + "MetricGroup": "General", 614 + "MetricName": "DCACHE_MISS_CPI", 615 + "ScaleUnit": "1%" 616 + }, 617 + { 618 + "BriefDescription": "Percentage of DERAT misses with 2M page size per completed instruction", 619 + "MetricExpr": "PM_DERAT_MISS_2M / PM_RUN_INST_CMPL * 100", 620 + "MetricGroup": "Translation", 621 + "MetricName": "DERAT_2M_MISS_RATE", 622 + "ScaleUnit": "1%" 623 + }, 624 + { 625 + "BriefDescription": "Percentage of DERAT misses with 16M page size per completed instruction", 626 + "MetricExpr": "PM_DERAT_MISS_16M / PM_RUN_INST_CMPL * 100", 627 + "MetricGroup": "Translation", 628 + "MetricName": "DERAT_16M_MISS_RATE", 629 + "ScaleUnit": "1%" 630 + }, 631 + { 632 + "BriefDescription": "DERAT miss ratio for 4K page size", 633 + "MetricExpr": "PM_DERAT_MISS_4K / PM_DERAT_MISS", 634 + "MetricGroup": "Translation", 635 + "MetricName": "DERAT_4K_MISS_RATIO" 636 + }, 637 + { 638 + "BriefDescription": "DERAT miss ratio for 2M page size", 639 + "MetricExpr": "PM_DERAT_MISS_2M / PM_DERAT_MISS", 640 + "MetricGroup": "Translation", 641 + "MetricName": "DERAT_2M_MISS_RATIO" 642 + }, 643 + { 644 + "BriefDescription": "DERAT miss ratio for 16M page size", 645 + "MetricExpr": "PM_DERAT_MISS_16M / PM_DERAT_MISS", 646 + "MetricGroup": "Translation", 647 + "MetricName": "DERAT_16M_MISS_RATIO" 648 + }, 649 + { 650 + "BriefDescription": "DERAT miss ratio for 64K page size", 651 + "MetricExpr": "PM_DERAT_MISS_64K / PM_DERAT_MISS", 652 + "MetricGroup": "Translation", 653 + "MetricName": "DERAT_64K_MISS_RATIO" 654 + }, 655 + { 656 + "BriefDescription": "Percentage of DERAT misses that resulted in TLB reloads", 657 + "MetricExpr": "PM_DTLB_MISS / PM_DERAT_MISS * 100", 658 + "MetricGroup": "Translation", 659 + "MetricName": "DERAT_MISS_RELOAD", 660 + "ScaleUnit": "1%" 661 + }, 662 + { 663 + "BriefDescription": "Percentage of icache misses that were reloaded from beyond the local L3", 664 + "MetricExpr": "PM_INST_FROM_L3MISS / PM_L1_ICACHE_MISS * 100", 665 + "MetricGroup": "Instruction_Misses", 666 + "MetricName": "INST_FROM_L3_MISS", 667 + "ScaleUnit": "1%" 668 + }, 669 + { 670 + "BriefDescription": "Percentage of icache reloads from the beyond the L3 per completed instruction", 671 + "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100", 672 + "MetricGroup": "Instruction_Misses", 673 + "MetricName": "INST_FROM_L3_MISS_RATE", 674 + "ScaleUnit": "1%" 675 + } 676 + ]

+4 -2

tools/perf/tests/api-io.c

··· 289 289 return ret; 290 290 } 291 291 292 - int test__api_io(struct test *test __maybe_unused, 293 - int subtest __maybe_unused) 292 + static int test__api_io(struct test_suite *test __maybe_unused, 293 + int subtest __maybe_unused) 294 294 { 295 295 int ret = 0; 296 296 ··· 302 302 ret = TEST_FAIL; 303 303 return ret; 304 304 } 305 + 306 + DEFINE_SUITE("Test api io", api_io);

+3 -1

tools/perf/tests/attr.c

··· 178 178 return system(cmd) ? TEST_FAIL : TEST_OK; 179 179 } 180 180 181 - int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) 181 + static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 182 182 { 183 183 struct stat st; 184 184 char path_perf[PATH_MAX]; ··· 207 207 208 208 return TEST_SKIP; 209 209 } 210 + 211 + DEFINE_SUITE("Setup struct perf_event_attr", attr);

+3 -1

tools/perf/tests/backward-ring-buffer.c

··· 82 82 } 83 83 84 84 85 - int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused) 85 + static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 86 86 { 87 87 int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0; 88 88 char pid[16], sbuf[STRERR_BUFSIZE]; ··· 167 167 evlist__delete(evlist); 168 168 return ret; 169 169 } 170 + 171 + DEFINE_SUITE("Read backward ring buffer", backward_ring_buffer);

+3 -1

tools/perf/tests/bitmap.c

··· 40 40 return ret; 41 41 } 42 42 43 - int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused) 43 + static int test__bitmap_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 44 44 { 45 45 TEST_ASSERT_VAL("failed to convert map", test_bitmap("1")); 46 46 TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5")); ··· 51 51 TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40")); 52 52 return 0; 53 53 } 54 + 55 + DEFINE_SUITE("Print bitmap", bitmap_print);

+20 -16

tools/perf/tests/bp_account.c

··· 19 19 #include "../perf-sys.h" 20 20 #include "cloexec.h" 21 21 22 + /* 23 + * PowerPC and S390 do not support creation of instruction breakpoints using the 24 + * perf_event interface. 25 + * 26 + * Just disable the test for these architectures until these issues are 27 + * resolved. 28 + */ 29 + #if defined(__powerpc__) || defined(__s390x__) 30 + #define BP_ACCOUNT_IS_SUPPORTED 0 31 + #else 32 + #define BP_ACCOUNT_IS_SUPPORTED 1 33 + #endif 34 + 22 35 static volatile long the_var; 23 36 24 37 static noinline int test_function(void) ··· 186 173 * we create another watchpoint to ensure 187 174 * the slot accounting is correct 188 175 */ 189 - int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused) 176 + static int test__bp_accounting(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 190 177 { 191 178 int has_ioctl = detect_ioctl(); 192 179 int wp_cnt = detect_cnt(false); 193 180 int bp_cnt = detect_cnt(true); 194 181 int share = detect_share(wp_cnt, bp_cnt); 182 + 183 + if (!BP_ACCOUNT_IS_SUPPORTED) { 184 + pr_debug("Test not supported on this architecture"); 185 + return TEST_SKIP; 186 + } 195 187 196 188 pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n", 197 189 wp_cnt, bp_cnt, has_ioctl, share); ··· 207 189 return bp_accounting(wp_cnt, share); 208 190 } 209 191 210 - bool test__bp_account_is_supported(void) 211 - { 212 - /* 213 - * PowerPC and S390 do not support creation of instruction 214 - * breakpoints using the perf_event interface. 215 - * 216 - * Just disable the test for these architectures until these 217 - * issues are resolved. 218 - */ 219 - #if defined(__powerpc__) || defined(__s390x__) 220 - return false; 221 - #else 222 - return true; 223 - #endif 224 - } 192 + DEFINE_SUITE("Breakpoint accounting", bp_accounting);

+7 -27

tools/perf/tests/bp_signal.c

··· 161 161 return count; 162 162 } 163 163 164 - int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused) 164 + static int test__bp_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 165 165 { 166 166 struct sigaction sa; 167 167 long long count1, count2, count3; 168 + 169 + if (!BP_SIGNAL_IS_SUPPORTED) { 170 + pr_debug("Test not supported on this architecture"); 171 + return TEST_SKIP; 172 + } 168 173 169 174 /* setup SIGIO signal handler */ 170 175 memset(&sa, 0, sizeof(struct sigaction)); ··· 290 285 TEST_OK : TEST_FAIL; 291 286 } 292 287 293 - bool test__bp_signal_is_supported(void) 294 - { 295 - /* 296 - * PowerPC and S390 do not support creation of instruction 297 - * breakpoints using the perf_event interface. 298 - * 299 - * ARM requires explicit rounding down of the instruction 300 - * pointer in Thumb mode, and then requires the single-step 301 - * to be handled explicitly in the overflow handler to avoid 302 - * stepping into the SIGIO handler and getting stuck on the 303 - * breakpointed instruction. 304 - * 305 - * Since arm64 has the same issue with arm for the single-step 306 - * handling, this case also gets stuck on the breakpointed 307 - * instruction. 308 - * 309 - * Just disable the test for these architectures until these 310 - * issues are resolved. 311 - */ 312 - #if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \ 313 - defined(__aarch64__) 314 - return false; 315 - #else 316 - return true; 317 - #endif 318 - } 288 + DEFINE_SUITE("Breakpoint overflow signal handler", bp_signal);

+8 -1

tools/perf/tests/bp_signal_overflow.c

··· 59 59 #define EXECUTIONS 10000 60 60 #define THRESHOLD 100 61 61 62 - int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused) 62 + static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 63 63 { 64 64 struct perf_event_attr pe; 65 65 struct sigaction sa; 66 66 long long count; 67 67 int fd, i, fails = 0; 68 + 69 + if (!BP_SIGNAL_IS_SUPPORTED) { 70 + pr_debug("Test not supported on this architecture"); 71 + return TEST_SKIP; 72 + } 68 73 69 74 /* setup SIGIO signal handler */ 70 75 memset(&sa, 0, sizeof(struct sigaction)); ··· 138 133 139 134 return fails ? TEST_FAIL : TEST_OK; 140 135 } 136 + 137 + DEFINE_SUITE("Breakpoint overflow sampling", bp_signal_overflow);

+54 -31

tools/perf/tests/bpf.c

··· 62 62 63 63 static struct { 64 64 enum test_llvm__testcase prog_id; 65 - const char *desc; 66 65 const char *name; 67 66 const char *msg_compile_fail; 68 67 const char *msg_load_fail; ··· 71 72 } bpf_testcase_table[] = { 72 73 { 73 74 .prog_id = LLVM_TESTCASE_BASE, 74 - .desc = "Basic BPF filtering", 75 75 .name = "[basic_bpf_test]", 76 76 .msg_compile_fail = "fix 'perf test LLVM' first", 77 77 .msg_load_fail = "load bpf object failed", ··· 79 81 }, 80 82 { 81 83 .prog_id = LLVM_TESTCASE_BASE, 82 - .desc = "BPF pinning", 83 84 .name = "[bpf_pinning]", 84 85 .msg_compile_fail = "fix kbuild first", 85 86 .msg_load_fail = "check your vmlinux setting?", ··· 89 92 #ifdef HAVE_BPF_PROLOGUE 90 93 { 91 94 .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, 92 - .desc = "BPF prologue generation", 93 95 .name = "[bpf_prologue_test]", 94 96 .msg_compile_fail = "fix kbuild first", 95 97 .msg_load_fail = "check your vmlinux setting?", ··· 279 283 return ret; 280 284 } 281 285 282 - int test__bpf_subtest_get_nr(void) 283 - { 284 - return (int)ARRAY_SIZE(bpf_testcase_table); 285 - } 286 - 287 - const char *test__bpf_subtest_get_desc(int i) 288 - { 289 - if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table)) 290 - return NULL; 291 - return bpf_testcase_table[i].desc; 292 - } 293 - 294 286 static int check_env(void) 295 287 { 296 288 int err; ··· 297 313 } 298 314 299 315 err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, 300 - sizeof(insns) / sizeof(insns[0]), 316 + ARRAY_SIZE(insns), 301 317 license, kver_int, NULL, 0); 302 318 if (err < 0) { 303 319 pr_err("Missing basic BPF support, skip this test: %s\n", ··· 309 325 return 0; 310 326 } 311 327 312 - int test__bpf(struct test *test __maybe_unused, int i) 328 + static int test__bpf(int i) 313 329 { 314 330 int err; 315 331 ··· 327 343 err = __test__bpf(i); 328 344 return err; 329 345 } 346 + #endif 330 347 348 + static int test__basic_bpf_test(struct test_suite *test __maybe_unused, 349 + int subtest __maybe_unused) 350 + { 351 + #ifdef HAVE_LIBBPF_SUPPORT 352 + return test__bpf(0); 331 353 #else 332 - int test__bpf_subtest_get_nr(void) 333 - { 334 - return 0; 335 - } 336 - 337 - const char *test__bpf_subtest_get_desc(int i __maybe_unused) 338 - { 339 - return NULL; 340 - } 341 - 342 - int test__bpf(struct test *test __maybe_unused, int i __maybe_unused) 343 - { 344 354 pr_debug("Skip BPF test because BPF support is not compiled\n"); 345 355 return TEST_SKIP; 346 - } 347 356 #endif 357 + } 358 + 359 + static int test__bpf_pinning(struct test_suite *test __maybe_unused, 360 + int subtest __maybe_unused) 361 + { 362 + #ifdef HAVE_LIBBPF_SUPPORT 363 + return test__bpf(1); 364 + #else 365 + pr_debug("Skip BPF test because BPF support is not compiled\n"); 366 + return TEST_SKIP; 367 + #endif 368 + } 369 + 370 + static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, 371 + int subtest __maybe_unused) 372 + { 373 + #if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) 374 + return test__bpf(2); 375 + #else 376 + pr_debug("Skip BPF test because BPF support is not compiled\n"); 377 + return TEST_SKIP; 378 + #endif 379 + } 380 + 381 + 382 + static struct test_case bpf_tests[] = { 383 + #ifdef HAVE_LIBBPF_SUPPORT 384 + TEST_CASE("Basic BPF filtering", basic_bpf_test), 385 + TEST_CASE("BPF pinning", bpf_pinning), 386 + #ifdef HAVE_BPF_PROLOGUE 387 + TEST_CASE("BPF prologue generation", bpf_prologue_test), 388 + #else 389 + TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), 390 + #endif 391 + #else 392 + TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"), 393 + TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"), 394 + TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), 395 + #endif 396 + { .name = NULL, } 397 + }; 398 + 399 + struct test_suite suite__bpf = { 400 + .desc = "BPF filter", 401 + .test_cases = bpf_tests, 402 + };

+178 -400

tools/perf/tests/builtin-test.c

··· 30 30 31 31 static bool dont_fork; 32 32 33 - struct test __weak arch_tests[] = { 34 - { 35 - .func = NULL, 36 - }, 33 + struct test_suite *__weak arch_tests[] = { 34 + NULL, 37 35 }; 38 36 39 - static struct test generic_tests[] = { 40 - { 41 - .desc = "vmlinux symtab matches kallsyms", 42 - .func = test__vmlinux_matches_kallsyms, 43 - }, 44 - { 45 - .desc = "Detect openat syscall event", 46 - .func = test__openat_syscall_event, 47 - }, 48 - { 49 - .desc = "Detect openat syscall event on all cpus", 50 - .func = test__openat_syscall_event_on_all_cpus, 51 - }, 52 - { 53 - .desc = "Read samples using the mmap interface", 54 - .func = test__basic_mmap, 55 - }, 56 - { 57 - .desc = "Test data source output", 58 - .func = test__mem, 59 - }, 60 - { 61 - .desc = "Parse event definition strings", 62 - .func = test__parse_events, 63 - }, 64 - { 65 - .desc = "Simple expression parser", 66 - .func = test__expr, 67 - }, 68 - { 69 - .desc = "PERF_RECORD_* events & perf_sample fields", 70 - .func = test__PERF_RECORD, 71 - }, 72 - { 73 - .desc = "Parse perf pmu format", 74 - .func = test__pmu, 75 - }, 76 - { 77 - .desc = "PMU events", 78 - .func = test__pmu_events, 79 - .subtest = { 80 - .skip_if_fail = false, 81 - .get_nr = test__pmu_events_subtest_get_nr, 82 - .get_desc = test__pmu_events_subtest_get_desc, 83 - .skip_reason = test__pmu_events_subtest_skip_reason, 84 - }, 85 - 86 - }, 87 - { 88 - .desc = "DSO data read", 89 - .func = test__dso_data, 90 - }, 91 - { 92 - .desc = "DSO data cache", 93 - .func = test__dso_data_cache, 94 - }, 95 - { 96 - .desc = "DSO data reopen", 97 - .func = test__dso_data_reopen, 98 - }, 99 - { 100 - .desc = "Roundtrip evsel->name", 101 - .func = test__perf_evsel__roundtrip_name_test, 102 - }, 103 - { 104 - .desc = "Parse sched tracepoints fields", 105 - .func = test__perf_evsel__tp_sched_test, 106 - }, 107 - { 108 - .desc = "syscalls:sys_enter_openat event fields", 109 - .func = test__syscall_openat_tp_fields, 110 - }, 111 - { 112 - .desc = "Setup struct perf_event_attr", 113 - .func = test__attr, 114 - }, 115 - { 116 - .desc = "Match and link multiple hists", 117 - .func = test__hists_link, 118 - }, 119 - { 120 - .desc = "'import perf' in python", 121 - .func = test__python_use, 122 - }, 123 - { 124 - .desc = "Breakpoint overflow signal handler", 125 - .func = test__bp_signal, 126 - .is_supported = test__bp_signal_is_supported, 127 - }, 128 - { 129 - .desc = "Breakpoint overflow sampling", 130 - .func = test__bp_signal_overflow, 131 - .is_supported = test__bp_signal_is_supported, 132 - }, 133 - { 134 - .desc = "Breakpoint accounting", 135 - .func = test__bp_accounting, 136 - .is_supported = test__bp_account_is_supported, 137 - }, 138 - { 139 - .desc = "Watchpoint", 140 - .func = test__wp, 141 - .is_supported = test__wp_is_supported, 142 - .subtest = { 143 - .skip_if_fail = false, 144 - .get_nr = test__wp_subtest_get_nr, 145 - .get_desc = test__wp_subtest_get_desc, 146 - .skip_reason = test__wp_subtest_skip_reason, 147 - }, 148 - }, 149 - { 150 - .desc = "Number of exit events of a simple workload", 151 - .func = test__task_exit, 152 - }, 153 - { 154 - .desc = "Software clock events period values", 155 - .func = test__sw_clock_freq, 156 - }, 157 - { 158 - .desc = "Object code reading", 159 - .func = test__code_reading, 160 - }, 161 - { 162 - .desc = "Sample parsing", 163 - .func = test__sample_parsing, 164 - }, 165 - { 166 - .desc = "Use a dummy software event to keep tracking", 167 - .func = test__keep_tracking, 168 - }, 169 - { 170 - .desc = "Parse with no sample_id_all bit set", 171 - .func = test__parse_no_sample_id_all, 172 - }, 173 - { 174 - .desc = "Filter hist entries", 175 - .func = test__hists_filter, 176 - }, 177 - { 178 - .desc = "Lookup mmap thread", 179 - .func = test__mmap_thread_lookup, 180 - }, 181 - { 182 - .desc = "Share thread maps", 183 - .func = test__thread_maps_share, 184 - }, 185 - { 186 - .desc = "Sort output of hist entries", 187 - .func = test__hists_output, 188 - }, 189 - { 190 - .desc = "Cumulate child hist entries", 191 - .func = test__hists_cumulate, 192 - }, 193 - { 194 - .desc = "Track with sched_switch", 195 - .func = test__switch_tracking, 196 - }, 197 - { 198 - .desc = "Filter fds with revents mask in a fdarray", 199 - .func = test__fdarray__filter, 200 - }, 201 - { 202 - .desc = "Add fd to a fdarray, making it autogrow", 203 - .func = test__fdarray__add, 204 - }, 205 - { 206 - .desc = "kmod_path__parse", 207 - .func = test__kmod_path__parse, 208 - }, 209 - { 210 - .desc = "Thread map", 211 - .func = test__thread_map, 212 - }, 213 - { 214 - .desc = "LLVM search and compile", 215 - .func = test__llvm, 216 - .subtest = { 217 - .skip_if_fail = true, 218 - .get_nr = test__llvm_subtest_get_nr, 219 - .get_desc = test__llvm_subtest_get_desc, 220 - }, 221 - }, 222 - { 223 - .desc = "Session topology", 224 - .func = test__session_topology, 225 - }, 226 - { 227 - .desc = "BPF filter", 228 - .func = test__bpf, 229 - .subtest = { 230 - .skip_if_fail = true, 231 - .get_nr = test__bpf_subtest_get_nr, 232 - .get_desc = test__bpf_subtest_get_desc, 233 - }, 234 - }, 235 - { 236 - .desc = "Synthesize thread map", 237 - .func = test__thread_map_synthesize, 238 - }, 239 - { 240 - .desc = "Remove thread map", 241 - .func = test__thread_map_remove, 242 - }, 243 - { 244 - .desc = "Synthesize cpu map", 245 - .func = test__cpu_map_synthesize, 246 - }, 247 - { 248 - .desc = "Synthesize stat config", 249 - .func = test__synthesize_stat_config, 250 - }, 251 - { 252 - .desc = "Synthesize stat", 253 - .func = test__synthesize_stat, 254 - }, 255 - { 256 - .desc = "Synthesize stat round", 257 - .func = test__synthesize_stat_round, 258 - }, 259 - { 260 - .desc = "Synthesize attr update", 261 - .func = test__event_update, 262 - }, 263 - { 264 - .desc = "Event times", 265 - .func = test__event_times, 266 - }, 267 - { 268 - .desc = "Read backward ring buffer", 269 - .func = test__backward_ring_buffer, 270 - }, 271 - { 272 - .desc = "Print cpu map", 273 - .func = test__cpu_map_print, 274 - }, 275 - { 276 - .desc = "Merge cpu map", 277 - .func = test__cpu_map_merge, 278 - }, 279 - 280 - { 281 - .desc = "Probe SDT events", 282 - .func = test__sdt_event, 283 - }, 284 - { 285 - .desc = "is_printable_array", 286 - .func = test__is_printable_array, 287 - }, 288 - { 289 - .desc = "Print bitmap", 290 - .func = test__bitmap_print, 291 - }, 292 - { 293 - .desc = "perf hooks", 294 - .func = test__perf_hooks, 295 - }, 296 - { 297 - .desc = "builtin clang support", 298 - .func = test__clang, 299 - .subtest = { 300 - .skip_if_fail = true, 301 - .get_nr = test__clang_subtest_get_nr, 302 - .get_desc = test__clang_subtest_get_desc, 303 - } 304 - }, 305 - { 306 - .desc = "unit_number__scnprintf", 307 - .func = test__unit_number__scnprint, 308 - }, 309 - { 310 - .desc = "mem2node", 311 - .func = test__mem2node, 312 - }, 313 - { 314 - .desc = "time utils", 315 - .func = test__time_utils, 316 - }, 317 - { 318 - .desc = "Test jit_write_elf", 319 - .func = test__jit_write_elf, 320 - }, 321 - { 322 - .desc = "Test libpfm4 support", 323 - .func = test__pfm, 324 - .subtest = { 325 - .skip_if_fail = true, 326 - .get_nr = test__pfm_subtest_get_nr, 327 - .get_desc = test__pfm_subtest_get_desc, 328 - } 329 - }, 330 - { 331 - .desc = "Test api io", 332 - .func = test__api_io, 333 - }, 334 - { 335 - .desc = "maps__merge_in", 336 - .func = test__maps__merge_in, 337 - }, 338 - { 339 - .desc = "Demangle Java", 340 - .func = test__demangle_java, 341 - }, 342 - { 343 - .desc = "Demangle OCaml", 344 - .func = test__demangle_ocaml, 345 - }, 346 - { 347 - .desc = "Parse and process metrics", 348 - .func = test__parse_metric, 349 - }, 350 - { 351 - .desc = "PE file support", 352 - .func = test__pe_file_parsing, 353 - }, 354 - { 355 - .desc = "Event expansion for cgroups", 356 - .func = test__expand_cgroup_events, 357 - }, 358 - { 359 - .desc = "Convert perf time to TSC", 360 - .func = test__perf_time_to_tsc, 361 - .is_supported = test__tsc_is_supported, 362 - }, 363 - { 364 - .desc = "dlfilter C API", 365 - .func = test__dlfilter, 366 - }, 367 - { 368 - .func = NULL, 369 - }, 37 + static struct test_suite *generic_tests[] = { 38 + &suite__vmlinux_matches_kallsyms, 39 + &suite__openat_syscall_event, 40 + &suite__openat_syscall_event_on_all_cpus, 41 + &suite__basic_mmap, 42 + &suite__mem, 43 + &suite__parse_events, 44 + &suite__expr, 45 + &suite__PERF_RECORD, 46 + &suite__pmu, 47 + &suite__pmu_events, 48 + &suite__dso_data, 49 + &suite__dso_data_cache, 50 + &suite__dso_data_reopen, 51 + &suite__perf_evsel__roundtrip_name_test, 52 + &suite__perf_evsel__tp_sched_test, 53 + &suite__syscall_openat_tp_fields, 54 + &suite__attr, 55 + &suite__hists_link, 56 + &suite__python_use, 57 + &suite__bp_signal, 58 + &suite__bp_signal_overflow, 59 + &suite__bp_accounting, 60 + &suite__wp, 61 + &suite__task_exit, 62 + &suite__sw_clock_freq, 63 + &suite__code_reading, 64 + &suite__sample_parsing, 65 + &suite__keep_tracking, 66 + &suite__parse_no_sample_id_all, 67 + &suite__hists_filter, 68 + &suite__mmap_thread_lookup, 69 + &suite__thread_maps_share, 70 + &suite__hists_output, 71 + &suite__hists_cumulate, 72 + &suite__switch_tracking, 73 + &suite__fdarray__filter, 74 + &suite__fdarray__add, 75 + &suite__kmod_path__parse, 76 + &suite__thread_map, 77 + &suite__llvm, 78 + &suite__session_topology, 79 + &suite__bpf, 80 + &suite__thread_map_synthesize, 81 + &suite__thread_map_remove, 82 + &suite__cpu_map_synthesize, 83 + &suite__synthesize_stat_config, 84 + &suite__synthesize_stat, 85 + &suite__synthesize_stat_round, 86 + &suite__event_update, 87 + &suite__event_times, 88 + &suite__backward_ring_buffer, 89 + &suite__cpu_map_print, 90 + &suite__cpu_map_merge, 91 + &suite__sdt_event, 92 + &suite__is_printable_array, 93 + &suite__bitmap_print, 94 + &suite__perf_hooks, 95 + &suite__clang, 96 + &suite__unit_number__scnprint, 97 + &suite__mem2node, 98 + &suite__time_utils, 99 + &suite__jit_write_elf, 100 + &suite__pfm, 101 + &suite__api_io, 102 + &suite__maps__merge_in, 103 + &suite__demangle_java, 104 + &suite__demangle_ocaml, 105 + &suite__parse_metric, 106 + &suite__pe_file_parsing, 107 + &suite__expand_cgroup_events, 108 + &suite__perf_time_to_tsc, 109 + &suite__dlfilter, 110 + NULL, 370 111 }; 371 112 372 - static struct test *tests[] = { 113 + static struct test_suite **tests[] = { 373 114 generic_tests, 374 115 arch_tests, 375 116 }; 117 + 118 + static int num_subtests(const struct test_suite *t) 119 + { 120 + int num; 121 + 122 + if (!t->test_cases) 123 + return 0; 124 + 125 + num = 0; 126 + while (t->test_cases[num].name) 127 + num++; 128 + 129 + return num; 130 + } 131 + 132 + static bool has_subtests(const struct test_suite *t) 133 + { 134 + return num_subtests(t) > 1; 135 + } 136 + 137 + static const char *skip_reason(const struct test_suite *t, int subtest) 138 + { 139 + if (t->test_cases && subtest >= 0) 140 + return t->test_cases[subtest].skip_reason; 141 + 142 + return NULL; 143 + } 144 + 145 + static const char *test_description(const struct test_suite *t, int subtest) 146 + { 147 + if (t->test_cases && subtest >= 0) 148 + return t->test_cases[subtest].desc; 149 + 150 + return t->desc; 151 + } 152 + 153 + static test_fnptr test_function(const struct test_suite *t, int subtest) 154 + { 155 + if (subtest <= 0) 156 + return t->test_cases[0].run_case; 157 + 158 + return t->test_cases[subtest].run_case; 159 + } 376 160 377 161 static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) 378 162 { ··· 182 398 return false; 183 399 } 184 400 185 - static int run_test(struct test *test, int subtest) 401 + static int run_test(struct test_suite *test, int subtest) 186 402 { 187 403 int status, err = -1, child = dont_fork ? 0 : fork(); 188 404 char sbuf[STRERR_BUFSIZE]; ··· 214 430 } 215 431 } 216 432 217 - err = test->func(test, subtest); 433 + err = test_function(test, subtest)(test, subtest); 218 434 if (!dont_fork) 219 435 exit(err); 220 436 } ··· 234 450 return err; 235 451 } 236 452 237 - #define for_each_test(j, t) \ 453 + #define for_each_test(j, k, t) \ 238 454 for (j = 0; j < ARRAY_SIZE(tests); j++) \ 239 - for (t = &tests[j][0]; t->func; t++) 455 + for (k = 0, t = tests[j][k]; tests[j][k]; k++, t = tests[j][k]) 240 456 241 - static int test_and_print(struct test *t, bool force_skip, int subtest) 457 + static int test_and_print(struct test_suite *t, int subtest) 242 458 { 243 459 int err; 244 460 245 - if (!force_skip) { 246 - pr_debug("\n--- start ---\n"); 247 - err = run_test(t, subtest); 248 - pr_debug("---- end ----\n"); 249 - } else { 250 - pr_debug("\n--- force skipped ---\n"); 251 - err = TEST_SKIP; 252 - } 461 + pr_debug("\n--- start ---\n"); 462 + err = run_test(t, subtest); 463 + pr_debug("---- end ----\n"); 253 464 254 - if (!t->subtest.get_nr) 465 + if (!has_subtests(t)) 255 466 pr_debug("%s:", t->desc); 256 467 else 257 468 pr_debug("%s subtest %d:", t->desc, subtest + 1); ··· 256 477 pr_info(" Ok\n"); 257 478 break; 258 479 case TEST_SKIP: { 259 - const char *skip_reason = NULL; 260 - if (t->subtest.skip_reason) 261 - skip_reason = t->subtest.skip_reason(subtest); 262 - if (skip_reason) 263 - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason); 480 + const char *reason = skip_reason(t, subtest); 481 + 482 + if (reason) 483 + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", reason); 264 484 else 265 485 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); 266 486 } ··· 358 580 const char *file; 359 581 }; 360 582 361 - static int shell_test__run(struct test *test, int subdir __maybe_unused) 583 + static int shell_test__run(struct test_suite *test, int subdir __maybe_unused) 362 584 { 363 585 int err; 364 586 char script[PATH_MAX]; ··· 400 622 for_each_shell_test(entlist, n_dirs, st.dir, ent) { 401 623 int curr = i++; 402 624 char desc[256]; 403 - struct test test = { 404 - .desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name), 405 - .func = shell_test__run, 625 + struct test_case test_cases[] = { 626 + { 627 + .desc = shell_test__description(desc, 628 + sizeof(desc), 629 + st.dir, 630 + ent->d_name), 631 + .run_case = shell_test__run, 632 + }, 633 + { .name = NULL, } 634 + }; 635 + struct test_suite test_suite = { 636 + .desc = test_cases[0].desc, 637 + .test_cases = test_cases, 406 638 .priv = &st, 407 639 }; 408 640 409 - if (!perf_test__matches(test.desc, curr, argc, argv)) 641 + if (!perf_test__matches(test_suite.desc, curr, argc, argv)) 410 642 continue; 411 643 412 644 st.file = ent->d_name; 413 - pr_info("%2d: %-*s:", i, width, test.desc); 645 + pr_info("%2d: %-*s:", i, width, test_suite.desc); 414 646 415 647 if (intlist__find(skiplist, i)) { 416 648 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); 417 649 continue; 418 650 } 419 651 420 - test_and_print(&test, false, -1); 652 + test_and_print(&test_suite, 0); 421 653 } 422 654 423 655 for (e = 0; e < n_dirs; e++) ··· 438 650 439 651 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) 440 652 { 441 - struct test *t; 442 - unsigned int j; 653 + struct test_suite *t; 654 + unsigned int j, k; 443 655 int i = 0; 444 656 int width = shell_tests__max_desc_width(); 445 657 446 - for_each_test(j, t) { 447 - int len = strlen(t->desc); 658 + for_each_test(j, k, t) { 659 + int len = strlen(test_description(t, -1)); 448 660 449 661 if (width < len) 450 662 width = len; 451 663 } 452 664 453 - for_each_test(j, t) { 454 - int curr = i++, err; 665 + for_each_test(j, k, t) { 666 + int curr = i++; 455 667 int subi; 456 668 457 - if (!perf_test__matches(t->desc, curr, argc, argv)) { 669 + if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { 458 670 bool skip = true; 459 671 int subn; 460 672 461 - if (!t->subtest.get_nr) 462 - continue; 463 - 464 - subn = t->subtest.get_nr(); 673 + subn = num_subtests(t); 465 674 466 675 for (subi = 0; subi < subn; subi++) { 467 - if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) 676 + if (perf_test__matches(test_description(t, subi), 677 + curr, argc, argv)) 468 678 skip = false; 469 679 } 470 680 ··· 470 684 continue; 471 685 } 472 686 473 - if (t->is_supported && !t->is_supported()) { 474 - pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); 475 - continue; 476 - } 477 - 478 - pr_info("%2d: %-*s:", i, width, t->desc); 687 + pr_info("%2d: %-*s:", i, width, test_description(t, -1)); 479 688 480 689 if (intlist__find(skiplist, i)) { 481 690 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); 482 691 continue; 483 692 } 484 693 485 - if (!t->subtest.get_nr) { 486 - test_and_print(t, false, -1); 694 + if (!has_subtests(t)) { 695 + test_and_print(t, -1); 487 696 } else { 488 - int subn = t->subtest.get_nr(); 697 + int subn = num_subtests(t); 489 698 /* 490 699 * minus 2 to align with normal testcases. 491 700 * For subtest we print additional '.x' in number. ··· 490 709 * 35.1: Basic BPF llvm compiling test : Ok 491 710 */ 492 711 int subw = width > 2 ? width - 2 : width; 493 - bool skip = false; 494 712 495 713 if (subn <= 0) { 496 714 color_fprintf(stderr, PERF_COLOR_YELLOW, ··· 499 719 pr_info("\n"); 500 720 501 721 for (subi = 0; subi < subn; subi++) { 502 - int len = strlen(t->subtest.get_desc(subi)); 722 + int len = strlen(test_description(t, subi)); 503 723 504 724 if (subw < len) 505 725 subw = len; 506 726 } 507 727 508 728 for (subi = 0; subi < subn; subi++) { 509 - if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) 729 + if (!perf_test__matches(test_description(t, subi), 730 + curr, argc, argv)) 510 731 continue; 511 732 512 733 pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, 513 - t->subtest.get_desc(subi)); 514 - err = test_and_print(t, skip, subi); 515 - if (err != TEST_OK && t->subtest.skip_if_fail) 516 - skip = true; 734 + test_description(t, subi)); 735 + test_and_print(t, subi); 517 736 } 518 737 } 519 738 } ··· 538 759 for_each_shell_test(entlist, n_dirs, path, ent) { 539 760 int curr = i++; 540 761 char bf[256]; 541 - struct test t = { 762 + struct test_suite t = { 542 763 .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name), 543 764 }; 544 765 ··· 557 778 558 779 static int perf_test__list(int argc, const char **argv) 559 780 { 560 - unsigned int j; 561 - struct test *t; 781 + unsigned int j, k; 782 + struct test_suite *t; 562 783 int i = 0; 563 784 564 - for_each_test(j, t) { 785 + for_each_test(j, k, t) { 565 786 int curr = i++; 566 787 567 - if (!perf_test__matches(t->desc, curr, argc, argv) || 568 - (t->is_supported && !t->is_supported())) 788 + if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) 569 789 continue; 570 790 571 - pr_info("%2d: %s\n", i, t->desc); 791 + pr_info("%2d: %s\n", i, test_description(t, -1)); 572 792 573 - if (t->subtest.get_nr) { 574 - int subn = t->subtest.get_nr(); 793 + if (has_subtests(t)) { 794 + int subn = num_subtests(t); 575 795 int subi; 576 796 577 797 for (subi = 0; subi < subn; subi++) 578 798 pr_info("%2d:%1d: %s\n", i, subi + 1, 579 - t->subtest.get_desc(subi)); 799 + test_description(t, subi)); 580 800 } 581 801 } 582 802

+20 -34

tools/perf/tests/clang.c

··· 3 3 #include "c++/clang-c.h" 4 4 #include <linux/kernel.h> 5 5 6 - static struct { 7 - int (*func)(void); 8 - const char *desc; 9 - } clang_testcase_table[] = { 10 - #ifdef HAVE_LIBCLANGLLVM_SUPPORT 11 - { 12 - .func = test__clang_to_IR, 13 - .desc = "builtin clang compile C source to IR", 14 - }, 15 - { 16 - .func = test__clang_to_obj, 17 - .desc = "builtin clang compile C source to ELF object", 18 - }, 19 - #endif 20 - }; 21 - 22 - int test__clang_subtest_get_nr(void) 23 - { 24 - return (int)ARRAY_SIZE(clang_testcase_table); 25 - } 26 - 27 - const char *test__clang_subtest_get_desc(int i) 28 - { 29 - if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table)) 30 - return NULL; 31 - return clang_testcase_table[i].desc; 32 - } 33 - 34 6 #ifndef HAVE_LIBCLANGLLVM_SUPPORT 35 - int test__clang(struct test *test __maybe_unused, int i __maybe_unused) 7 + static int test__clang_to_IR(struct test_suite *test __maybe_unused, 8 + int subtest __maybe_unused) 36 9 { 37 10 return TEST_SKIP; 38 11 } 39 - #else 40 - int test__clang(struct test *test __maybe_unused, int i) 12 + 13 + static int test__clang_to_obj(struct test_suite *test __maybe_unused, 14 + int subtest __maybe_unused) 41 15 { 42 - if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table)) 43 - return TEST_FAIL; 44 - return clang_testcase_table[i].func(); 16 + return TEST_SKIP; 45 17 } 46 18 #endif 19 + 20 + static struct test_case clang_tests[] = { 21 + TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR, 22 + "not compiled in"), 23 + TEST_CASE_REASON("builtin clang compile C source to ELF object", 24 + clang_to_obj, 25 + "not compiled in"), 26 + { .name = NULL, } 27 + }; 28 + 29 + struct test_suite suite__clang = { 30 + .desc = "builtin clang support", 31 + .test_cases = clang_tests, 32 + };

+3 -1

tools/perf/tests/code-reading.c

··· 716 716 return err; 717 717 } 718 718 719 - int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused) 719 + static int test__code_reading(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 720 720 { 721 721 int ret; 722 722 ··· 743 743 return -1; 744 744 }; 745 745 } 746 + 747 + DEFINE_SUITE("Object code reading", code_reading);

+7 -3

tools/perf/tests/cpumap.c

··· 75 75 } 76 76 77 77 78 - int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) 78 + static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 79 79 { 80 80 struct perf_cpu_map *cpus; 81 81 ··· 111 111 return !strcmp(buf, str); 112 112 } 113 113 114 - int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused) 114 + static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 115 115 { 116 116 TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1")); 117 117 TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5")); ··· 123 123 return 0; 124 124 } 125 125 126 - int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused) 126 + static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 127 127 { 128 128 struct perf_cpu_map *a = perf_cpu_map__new("4,2,1"); 129 129 struct perf_cpu_map *b = perf_cpu_map__new("4,5,7"); ··· 137 137 perf_cpu_map__put(c); 138 138 return 0; 139 139 } 140 + 141 + DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize); 142 + DEFINE_SUITE("Print cpu map", cpu_map_print); 143 + DEFINE_SUITE("Merge cpu map", cpu_map_merge);

+3 -1

tools/perf/tests/demangle-java-test.c

··· 7 7 #include "debug.h" 8 8 #include "demangle-java.h" 9 9 10 - int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused) 10 + static int test__demangle_java(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 11 11 { 12 12 int ret = TEST_OK; 13 13 char *buf = NULL; ··· 40 40 41 41 return ret; 42 42 } 43 + 44 + DEFINE_SUITE("Demangle Java", demangle_java);

+3 -1

tools/perf/tests/demangle-ocaml-test.c

··· 7 7 #include "debug.h" 8 8 #include "demangle-ocaml.h" 9 9 10 - int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused) 10 + static int test__demangle_ocaml(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 11 11 { 12 12 int ret = TEST_OK; 13 13 char *buf = NULL; ··· 41 41 42 42 return ret; 43 43 } 44 + 45 + DEFINE_SUITE("Demangle OCaml", demangle_ocaml);

+3 -1

tools/perf/tests/dlfilter-test.c

··· 398 398 } 399 399 } 400 400 401 - int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused) 401 + static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 402 402 { 403 403 struct test_data td = {.fd = -1}; 404 404 int pid = getpid(); ··· 414 414 test_data__free(&td); 415 415 return err; 416 416 } 417 + 418 + DEFINE_SUITE("dlfilter C API", dlfilter);

+7 -3

tools/perf/tests/dso-data.c

··· 113 113 return fd; 114 114 } 115 115 116 - int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused) 116 + static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 117 117 { 118 118 struct machine machine; 119 119 struct dso *dso; ··· 248 248 return setrlimit(RLIMIT_NOFILE, &rlim); 249 249 } 250 250 251 - int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused) 251 + static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 252 252 { 253 253 struct machine machine; 254 254 long nr_end, nr = open_files_cnt(); ··· 318 318 return ret; 319 319 } 320 320 321 - int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused) 321 + static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 322 322 { 323 323 struct machine machine; 324 324 long nr_end, nr = open_files_cnt(), lim = new_limit(3); ··· 393 393 TEST_ASSERT_VAL("failed leaking files", nr == nr_end); 394 394 return 0; 395 395 } 396 + 397 + DEFINE_SUITE("DSO data read", dso_data); 398 + DEFINE_SUITE("DSO data cache", dso_data_cache); 399 + DEFINE_SUITE("DSO data reopen", dso_data_reopen);

+4 -1

tools/perf/tests/dwarf-unwind.c

··· 195 195 return ret; 196 196 } 197 197 198 - int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) 198 + static int test__dwarf_unwind(struct test_suite *test __maybe_unused, 199 + int subtest __maybe_unused) 199 200 { 200 201 struct machine *machine; 201 202 struct thread *thread; ··· 238 237 machine__delete(machine); 239 238 return err; 240 239 } 240 + 241 + DEFINE_SUITE("Test dwarf unwind", dwarf_unwind);

+3 -1

tools/perf/tests/event-times.c

··· 216 216 * and checks that enabled and running times 217 217 * match. 218 218 */ 219 - int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused) 219 + static int test__event_times(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 220 220 { 221 221 int err, ret = 0; 222 222 ··· 239 239 #undef _T 240 240 return ret; 241 241 } 242 + 243 + DEFINE_SUITE("Event times", event_times);

+3 -1

tools/perf/tests/event_update.c

··· 83 83 return 0; 84 84 } 85 85 86 - int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused) 86 + static int test__event_update(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 87 87 { 88 88 struct evsel *evsel; 89 89 struct event_name tmp; ··· 123 123 evlist__delete(evlist); 124 124 return 0; 125 125 } 126 + 127 + DEFINE_SUITE("Synthesize attr update", event_update);

+4 -1

tools/perf/tests/evsel-roundtrip-name.c

··· 99 99 #define perf_evsel__name_array_test(names, distance) \ 100 100 __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance) 101 101 102 - int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused) 102 + static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused, 103 + int subtest __maybe_unused) 103 104 { 104 105 int err = 0, ret = 0; 105 106 ··· 121 120 122 121 return ret; 123 122 } 123 + 124 + DEFINE_SUITE("Roundtrip evsel->name", perf_evsel__roundtrip_name_test);

+4 -1

tools/perf/tests/evsel-tp-sched.c

··· 32 32 return ret; 33 33 } 34 34 35 - int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) 35 + static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unused, 36 + int subtest __maybe_unused) 36 37 { 37 38 struct evsel *evsel = evsel__newtp("sched", "sched_switch"); 38 39 int ret = 0; ··· 88 87 evsel__delete(evsel); 89 88 return ret; 90 89 } 90 + 91 + DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test);

+4 -2

tools/perf/tests/expand-cgroup.c

··· 221 221 return ret; 222 222 } 223 223 224 - int test__expand_cgroup_events(struct test *test __maybe_unused, 225 - int subtest __maybe_unused) 224 + static int test__expand_cgroup_events(struct test_suite *test __maybe_unused, 225 + int subtest __maybe_unused) 226 226 { 227 227 int ret; 228 228 ··· 240 240 241 241 return ret; 242 242 } 243 + 244 + DEFINE_SUITE("Event expansion for cgroups", expand_cgroup_events);

+36 -2

tools/perf/tests/expr.c

··· 62 62 return 0; 63 63 } 64 64 65 - int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) 65 + static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 66 66 { 67 67 struct expr_id_data *val_ptr; 68 68 const char *p; 69 - double val; 69 + double val, num_cpus, num_cores, num_dies, num_packages; 70 70 int ret; 71 71 struct expr_parse_ctx *ctx; 72 72 ··· 134 134 TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", 135 135 (void **)&val_ptr)); 136 136 137 + expr__ctx_clear(ctx); 138 + TEST_ASSERT_VAL("find ids", 139 + expr__find_ids("dash\\-event1 - dash\\-event2", 140 + NULL, ctx) == 0); 141 + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); 142 + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", 143 + (void **)&val_ptr)); 144 + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", 145 + (void **)&val_ptr)); 146 + 137 147 /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ 138 148 expr__ctx_clear(ctx); 139 149 TEST_ASSERT_VAL("find ids", ··· 161 151 NULL, ctx) == 0); 162 152 TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0); 163 153 154 + /* Test toplogy constants appear well ordered. */ 155 + expr__ctx_clear(ctx); 156 + TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0); 157 + TEST_ASSERT_VAL("#num_cores", expr__parse(&num_cores, ctx, "#num_cores") == 0); 158 + TEST_ASSERT_VAL("#num_cpus >= #num_cores", num_cpus >= num_cores); 159 + TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); 160 + TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); 161 + TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); 162 + TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); 163 + 164 + /* 165 + * Source count returns the number of events aggregating in a leader 166 + * event including the leader. Check parsing yields an id. 167 + */ 168 + expr__ctx_clear(ctx); 169 + TEST_ASSERT_VAL("source count", 170 + expr__find_ids("source_count(EVENT1)", 171 + NULL, ctx) == 0); 172 + TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1); 173 + TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", 174 + (void **)&val_ptr)); 175 + 164 176 expr__ctx_free(ctx); 165 177 166 178 return 0; 167 179 } 180 + 181 + DEFINE_SUITE("Simple expression parser", expr);

+5 -2

tools/perf/tests/fdarray.c

··· 28 28 return printed + fdarray__fprintf(fda, fp); 29 29 } 30 30 31 - int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused) 31 + static int test__fdarray__filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 32 32 { 33 33 int nr_fds, err = TEST_FAIL; 34 34 struct fdarray *fda = fdarray__new(5, 5); ··· 89 89 return err; 90 90 } 91 91 92 - int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused) 92 + static int test__fdarray__add(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 93 93 { 94 94 int err = TEST_FAIL; 95 95 struct fdarray *fda = fdarray__new(2, 2); ··· 158 158 out: 159 159 return err; 160 160 } 161 + 162 + DEFINE_SUITE("Filter fds with revents mask in a fdarray", fdarray__filter); 163 + DEFINE_SUITE("Add fd to a fdarray, making it autogrow", fdarray__add);

+4 -2

tools/perf/tests/genelf.c

··· 16 16 17 17 #define TEMPL "/tmp/perf-test-XXXXXX" 18 18 19 - int test__jit_write_elf(struct test *test __maybe_unused, 20 - int subtest __maybe_unused) 19 + static int test__jit_write_elf(struct test_suite *test __maybe_unused, 20 + int subtest __maybe_unused) 21 21 { 22 22 #ifdef HAVE_JITDUMP 23 23 static unsigned char x86_code[] = { ··· 49 49 return TEST_SKIP; 50 50 #endif 51 51 } 52 + 53 + DEFINE_SUITE("Test jit_write_elf", jit_write_elf);

+3 -1

tools/perf/tests/hists_cumulate.c

··· 689 689 return err; 690 690 } 691 691 692 - int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused) 692 + static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 693 693 { 694 694 int err = TEST_FAIL; 695 695 struct machines machines; ··· 736 736 737 737 return err; 738 738 } 739 + 740 + DEFINE_SUITE("Cumulate child hist entries", hists_cumulate);

+3 -1

tools/perf/tests/hists_filter.c

··· 101 101 return TEST_FAIL; 102 102 } 103 103 104 - int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused) 104 + static int test__hists_filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 105 105 { 106 106 int err = TEST_FAIL; 107 107 struct machines machines; ··· 325 325 326 326 return err; 327 327 } 328 + 329 + DEFINE_SUITE("Filter hist entries", hists_filter);

+3 -1

tools/perf/tests/hists_link.c

··· 264 264 return __validate_link(leader, 0) || __validate_link(other, 1); 265 265 } 266 266 267 - int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused) 267 + static int test__hists_link(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 268 268 { 269 269 int err = -1; 270 270 struct hists *hists, *first_hists; ··· 339 339 340 340 return err; 341 341 } 342 + 343 + DEFINE_SUITE("Match and link multiple hists", hists_link);

+3 -1

tools/perf/tests/hists_output.c

··· 575 575 return err; 576 576 } 577 577 578 - int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused) 578 + static int test__hists_output(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 579 579 { 580 580 int err = TEST_FAIL; 581 581 struct machines machines; ··· 623 623 624 624 return err; 625 625 } 626 + 627 + DEFINE_SUITE("Sort output of hist entries", hists_output);

+3 -1

tools/perf/tests/is_printable_array.c

··· 5 5 #include "debug.h" 6 6 #include "print_binary.h" 7 7 8 - int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused) 8 + static int test__is_printable_array(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 9 9 { 10 10 char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 }; 11 11 char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 }; ··· 36 36 37 37 return TEST_OK; 38 38 } 39 + 40 + DEFINE_SUITE("is_printable_array", is_printable_array);

+3 -1

tools/perf/tests/keep-tracking.c

··· 61 61 * when an event is disabled but a dummy software event is not disabled. If the 62 62 * test passes %0 is returned, otherwise %-1 is returned. 63 63 */ 64 - int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) 64 + static int test__keep_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 65 65 { 66 66 struct record_opts opts = { 67 67 .mmap_pages = UINT_MAX, ··· 160 160 161 161 return err; 162 162 } 163 + 164 + DEFINE_SUITE("Use a dummy software event to keep tracking", keep_tracking);

+3 -1

tools/perf/tests/kmod-path.c

··· 47 47 #define M(path, c, e) \ 48 48 TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) 49 49 50 - int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused) 50 + static int test__kmod_path__parse(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 51 51 { 52 52 /* path alloc_name kmod comp name */ 53 53 T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]"); ··· 159 159 160 160 return 0; 161 161 } 162 + 163 + DEFINE_SUITE("kmod_path__parse", kmod_path__parse);

+60 -20

tools/perf/tests/llvm.c

··· 124 124 return ret; 125 125 } 126 126 127 - int test__llvm(struct test *test __maybe_unused, int subtest) 127 + static int test__llvm(int subtest) 128 128 { 129 129 int ret; 130 130 void *obj_buf = NULL; ··· 148 148 149 149 return ret; 150 150 } 151 + #endif //HAVE_LIBBPF_SUPPORT 151 152 152 - int test__llvm_subtest_get_nr(void) 153 + static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused, 154 + int subtest __maybe_unused) 153 155 { 154 - return __LLVM_TESTCASE_MAX; 155 - } 156 - 157 - const char *test__llvm_subtest_get_desc(int subtest) 158 - { 159 - if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) 160 - return NULL; 161 - 162 - return bpf_source_table[subtest].desc; 163 - } 164 - #else //HAVE_LIBBPF_SUPPORT 165 - int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused) 166 - { 156 + #ifdef HAVE_LIBBPF_SUPPORT 157 + return test__llvm(LLVM_TESTCASE_BASE); 158 + #else 159 + pr_debug("Skip LLVM test because BPF support is not compiled\n"); 167 160 return TEST_SKIP; 161 + #endif 168 162 } 169 163 170 - int test__llvm_subtest_get_nr(void) 164 + static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused, 165 + int subtest __maybe_unused) 171 166 { 172 - return 0; 167 + #ifdef HAVE_LIBBPF_SUPPORT 168 + return test__llvm(LLVM_TESTCASE_KBUILD); 169 + #else 170 + pr_debug("Skip LLVM test because BPF support is not compiled\n"); 171 + return TEST_SKIP; 172 + #endif 173 173 } 174 174 175 - const char *test__llvm_subtest_get_desc(int subtest __maybe_unused) 175 + static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused, 176 + int subtest __maybe_unused) 176 177 { 177 - return NULL; 178 + #ifdef HAVE_LIBBPF_SUPPORT 179 + return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE); 180 + #else 181 + pr_debug("Skip LLVM test because BPF support is not compiled\n"); 182 + return TEST_SKIP; 183 + #endif 178 184 } 179 - #endif // HAVE_LIBBPF_SUPPORT 185 + 186 + static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused, 187 + int subtest __maybe_unused) 188 + { 189 + #ifdef HAVE_LIBBPF_SUPPORT 190 + return test__llvm(LLVM_TESTCASE_BPF_RELOCATION); 191 + #else 192 + pr_debug("Skip LLVM test because BPF support is not compiled\n"); 193 + return TEST_SKIP; 194 + #endif 195 + } 196 + 197 + 198 + static struct test_case llvm_tests[] = { 199 + #ifdef HAVE_LIBBPF_SUPPORT 200 + TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog), 201 + TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog), 202 + TEST_CASE("Compile source for BPF prologue generation", 203 + llvm__bpf_test_prologue_prog), 204 + TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation), 205 + #else 206 + TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"), 207 + TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"), 208 + TEST_CASE_REASON("Compile source for BPF prologue generation", 209 + llvm__bpf_test_prologue_prog, "not compiled in"), 210 + TEST_CASE_REASON("Compile source for BPF relocation", 211 + llvm__bpf_test_relocation, "not compiled in"), 212 + #endif 213 + { .name = NULL, } 214 + }; 215 + 216 + struct test_suite suite__llvm = { 217 + .desc = "LLVM search and compile", 218 + .test_cases = llvm_tests, 219 + };

+3 -1

tools/perf/tests/maps.c

··· 33 33 return TEST_OK; 34 34 } 35 35 36 - int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) 36 + static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 37 37 { 38 38 struct maps maps; 39 39 unsigned int i; ··· 120 120 maps__exit(&maps); 121 121 return TEST_OK; 122 122 } 123 + 124 + DEFINE_SUITE("maps__merge_in", maps__merge_in);

+3 -1

tools/perf/tests/mem.c

··· 23 23 return 0; 24 24 } 25 25 26 - int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused) 26 + static int test__mem(struct test_suite *text __maybe_unused, int subtest __maybe_unused) 27 27 { 28 28 int ret = 0; 29 29 union perf_mem_data_src src; ··· 56 56 57 57 return ret; 58 58 } 59 + 60 + DEFINE_SUITE("Test data source output", mem);

+3 -1

tools/perf/tests/mem2node.c

··· 43 43 return bm && map ? bm : NULL; 44 44 } 45 45 46 - int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused) 46 + static int test__mem2node(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 47 47 { 48 48 struct mem2node map; 49 49 struct memory_node nodes[3]; ··· 77 77 mem2node__exit(&map); 78 78 return 0; 79 79 } 80 + 81 + DEFINE_SUITE("mem2node", mem2node);

+3 -1

tools/perf/tests/mmap-basic.c

··· 29 29 * Then it checks if the number of syscalls reported as perf events by 30 30 * the kernel corresponds to the number of syscalls made. 31 31 */ 32 - int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused) 32 + static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 33 33 { 34 34 int err = -1; 35 35 union perf_event *event; ··· 164 164 perf_thread_map__put(threads); 165 165 return err; 166 166 } 167 + 168 + DEFINE_SUITE("Read samples using the mmap interface", basic_mmap);

+3 -1

tools/perf/tests/mmap-thread-lookup.c

··· 224 224 * 225 225 * by using all thread objects. 226 226 */ 227 - int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused) 227 + static int test__mmap_thread_lookup(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 228 228 { 229 229 /* perf_event__synthesize_threads synthesize */ 230 230 TEST_ASSERT_VAL("failed with sythesizing all", ··· 236 236 237 237 return 0; 238 238 } 239 + 240 + DEFINE_SUITE("Lookup mmap thread", mmap_thread_lookup);

+4 -1

tools/perf/tests/openat-syscall-all-cpus.c

··· 19 19 #include "stat.h" 20 20 #include "util/counts.h" 21 21 22 - int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused) 22 + static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, 23 + int subtest __maybe_unused) 23 24 { 24 25 int err = -1, fd, cpu; 25 26 struct perf_cpu_map *cpus; ··· 128 127 perf_thread_map__put(threads); 129 128 return err; 130 129 } 130 + 131 + DEFINE_SUITE("Detect openat syscall event on all cpus", openat_syscall_event_on_all_cpus);

+4 -1

tools/perf/tests/openat-syscall-tp-fields.c

··· 22 22 #define AT_FDCWD -100 23 23 #endif 24 24 25 - int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused) 25 + static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused, 26 + int subtest __maybe_unused) 26 27 { 27 28 struct record_opts opts = { 28 29 .target = { ··· 143 142 out: 144 143 return err; 145 144 } 145 + 146 + DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields);

+4 -1

tools/perf/tests/openat-syscall.c

··· 13 13 #include "tests.h" 14 14 #include "util/counts.h" 15 15 16 - int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused) 16 + static int test__openat_syscall_event(struct test_suite *test __maybe_unused, 17 + int subtest __maybe_unused) 17 18 { 18 19 int err = -1, fd; 19 20 struct evsel *evsel; ··· 67 66 perf_thread_map__put(threads); 68 67 return err; 69 68 } 69 + 70 + DEFINE_SUITE("Detect openat syscall event", openat_syscall_event);

+3 -1

tools/perf/tests/parse-events.c

··· 2276 2276 return test_event(&e); 2277 2277 } 2278 2278 2279 - int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused) 2279 + static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 2280 2280 { 2281 2281 int ret1, ret2 = 0; 2282 2282 char *event, *alias; ··· 2319 2319 2320 2320 return ret2; 2321 2321 } 2322 + 2323 + DEFINE_SUITE("Parse event definition strings", parse_events);

+3 -1

tools/perf/tests/parse-metric.c

··· 369 369 return 0; 370 370 } 371 371 372 - int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) 372 + static int test__parse_metric(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 373 373 { 374 374 TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); 375 375 TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); ··· 383 383 } 384 384 return 0; 385 385 } 386 + 387 + DEFINE_SUITE("Parse and process metrics", parse_metric);

+4 -1

tools/perf/tests/parse-no-sample-id-all.c

··· 67 67 * 68 68 * Return: %0 on success, %-1 if the test fails. 69 69 */ 70 - int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused) 70 + static int test__parse_no_sample_id_all(struct test_suite *test __maybe_unused, 71 + int subtest __maybe_unused) 71 72 { 72 73 int err; 73 74 ··· 104 103 105 104 return 0; 106 105 } 106 + 107 + DEFINE_SUITE("Parse with no sample_id_all bit set", parse_no_sample_id_all);

+4 -2

tools/perf/tests/pe-file-parsing.c

··· 68 68 return TEST_OK; 69 69 } 70 70 71 - int test__pe_file_parsing(struct test *test __maybe_unused, 71 + static int test__pe_file_parsing(struct test_suite *test __maybe_unused, 72 72 int subtest __maybe_unused) 73 73 { 74 74 struct stat st; ··· 89 89 90 90 #else 91 91 92 - int test__pe_file_parsing(struct test *test __maybe_unused, 92 + static int test__pe_file_parsing(struct test_suite *test __maybe_unused, 93 93 int subtest __maybe_unused) 94 94 { 95 95 return TEST_SKIP; 96 96 } 97 97 98 98 #endif 99 + 100 + DEFINE_SUITE("PE file support", pe_file_parsing);

+3 -1

tools/perf/tests/perf-hooks.c

··· 26 26 raise(SIGSEGV); 27 27 } 28 28 29 - int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) 29 + static int test__perf_hooks(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 30 30 { 31 31 int hook_flags = 0; 32 32 ··· 45 45 return TEST_FAIL; 46 46 return TEST_OK; 47 47 } 48 + 49 + DEFINE_SUITE("perf hooks", perf_hooks);

+3 -1

tools/perf/tests/perf-record.c

··· 41 41 return cpu; 42 42 } 43 43 44 - int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused) 44 + static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 45 45 { 46 46 struct record_opts opts = { 47 47 .target = { ··· 332 332 out: 333 333 return (err < 0 || errs > 0) ? -1 : 0; 334 334 } 335 + 336 + DEFINE_SUITE("PERF_RECORD_* events & perf_sample fields", PERF_RECORD);

+17 -13

tools/perf/tests/perf-time-to-tsc.c

··· 23 23 #include "pmu.h" 24 24 #include "pmu-hybrid.h" 25 25 26 + /* 27 + * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just 28 + * enable the test for x86_64/i386 and Arm64 archs. 29 + */ 30 + #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) 31 + #define TSC_IS_SUPPORTED 1 32 + #else 33 + #define TSC_IS_SUPPORTED 0 34 + #endif 35 + 26 36 #define CHECK__(x) { \ 27 37 while ((x) < 0) { \ 28 38 pr_debug(#x " failed!\n"); \ ··· 55 45 * %0 is returned, otherwise %-1 is returned. If TSC conversion is not 56 46 * supported then then the test passes but " (not supported)" is printed. 57 47 */ 58 - int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) 48 + static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 59 49 { 60 50 struct record_opts opts = { 61 51 .mmap_pages = UINT_MAX, ··· 78 68 u64 test_tsc, comm1_tsc, comm2_tsc; 79 69 u64 test_time, comm1_time = 0, comm2_time = 0; 80 70 struct mmap *md; 71 + 72 + if (!TSC_IS_SUPPORTED) { 73 + pr_debug("Test not supported on this architecture"); 74 + return TEST_SKIP; 75 + } 81 76 82 77 threads = thread_map__new(-1, getpid(), UINT_MAX); 83 78 CHECK_NOT_NULL__(threads); ··· 200 185 return err; 201 186 } 202 187 203 - bool test__tsc_is_supported(void) 204 - { 205 - /* 206 - * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. 207 - * Just enable the test for x86_64/i386 and Arm64 archs. 208 - */ 209 - #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) 210 - return true; 211 - #else 212 - return false; 213 - #endif 214 - } 188 + DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc);

+25 -44

tools/perf/tests/pfm.c

··· 12 12 #include <linux/kernel.h> 13 13 14 14 #ifdef HAVE_LIBPFM 15 - static int test__pfm_events(void); 16 - static int test__pfm_group(void); 17 - #endif 18 - 19 - static const struct { 20 - int (*func)(void); 21 - const char *desc; 22 - } pfm_testcase_table[] = { 23 - #ifdef HAVE_LIBPFM 24 - { 25 - .func = test__pfm_events, 26 - .desc = "test of individual --pfm-events", 27 - }, 28 - { 29 - .func = test__pfm_group, 30 - .desc = "test groups of --pfm-events", 31 - }, 32 - #endif 33 - }; 34 - 35 - #ifdef HAVE_LIBPFM 36 15 static int count_pfm_events(struct perf_evlist *evlist) 37 16 { 38 17 struct perf_evsel *evsel; ··· 23 44 return count; 24 45 } 25 46 26 - static int test__pfm_events(void) 47 + static int test__pfm_events(struct test_suite *test __maybe_unused, 48 + int subtest __maybe_unused) 27 49 { 28 50 struct evlist *evlist; 29 51 struct option opt; ··· 84 104 return 0; 85 105 } 86 106 87 - static int test__pfm_group(void) 107 + static int test__pfm_group(struct test_suite *test __maybe_unused, 108 + int subtest __maybe_unused) 88 109 { 89 110 struct evlist *evlist; 90 111 struct option opt; ··· 168 187 } 169 188 return 0; 170 189 } 171 - #endif 172 - 173 - const char *test__pfm_subtest_get_desc(int i) 174 - { 175 - if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) 176 - return NULL; 177 - return pfm_testcase_table[i].desc; 178 - } 179 - 180 - int test__pfm_subtest_get_nr(void) 181 - { 182 - return (int)ARRAY_SIZE(pfm_testcase_table); 183 - } 184 - 185 - int test__pfm(struct test *test __maybe_unused, int i __maybe_unused) 186 - { 187 - #ifdef HAVE_LIBPFM 188 - if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) 189 - return TEST_FAIL; 190 - return pfm_testcase_table[i].func(); 191 190 #else 191 + static int test__pfm_events(struct test_suite *test __maybe_unused, 192 + int subtest __maybe_unused) 193 + { 192 194 return TEST_SKIP; 193 - #endif 194 195 } 196 + 197 + static int test__pfm_group(struct test_suite *test __maybe_unused, 198 + int subtest __maybe_unused) 199 + { 200 + return TEST_SKIP; 201 + } 202 + #endif 203 + 204 + static struct test_case pfm_tests[] = { 205 + TEST_CASE_REASON("test of individual --pfm-events", pfm_events, "not compiled in"), 206 + TEST_CASE_REASON("test groups of --pfm-events", pfm_group, "not compiled in"), 207 + { .name = NULL, } 208 + }; 209 + 210 + struct test_suite suite__pfm = { 211 + .desc = "Test libpfm4 support", 212 + .test_cases = pfm_tests, 213 + };

+19 -54

tools/perf/tests/pmu-events.c

··· 418 418 } 419 419 420 420 /* Verify generated events from pmu-events.c are as expected */ 421 - static int test_pmu_event_table(void) 421 + static int test__pmu_event_table(struct test_suite *test __maybe_unused, 422 + int subtest __maybe_unused) 422 423 { 423 424 const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); 424 425 const struct pmu_events_map *map = __test_pmu_get_events_map(); ··· 706 705 }; 707 706 708 707 /* Test that aliases generated are as expected */ 709 - static int test_aliases(void) 708 + static int test__aliases(struct test_suite *test __maybe_unused, 709 + int subtest __maybe_unused) 710 710 { 711 711 struct perf_pmu *pmu = NULL; 712 712 unsigned long i; ··· 894 892 895 893 } 896 894 897 - static int test_parsing(void) 895 + static int test__parsing(struct test_suite *test __maybe_unused, 896 + int subtest __maybe_unused) 898 897 { 899 898 const struct pmu_events_map *cpus_map = pmu_events_map__find(); 900 899 const struct pmu_events_map *map; ··· 1037 1034 * or all defined cpus via the 'fake_pmu' 1038 1035 * in parse_events. 1039 1036 */ 1040 - static int test_parsing_fake(void) 1037 + static int test__parsing_fake(struct test_suite *test __maybe_unused, 1038 + int subtest __maybe_unused) 1041 1039 { 1042 1040 const struct pmu_events_map *map; 1043 1041 const struct pmu_event *pe; ··· 1072 1068 return 0; 1073 1069 } 1074 1070 1075 - static const struct { 1076 - int (*func)(void); 1077 - const char *desc; 1078 - } pmu_events_testcase_table[] = { 1079 - { 1080 - .func = test_pmu_event_table, 1081 - .desc = "PMU event table sanity", 1082 - }, 1083 - { 1084 - .func = test_aliases, 1085 - .desc = "PMU event map aliases", 1086 - }, 1087 - { 1088 - .func = test_parsing, 1089 - .desc = "Parsing of PMU event table metrics", 1090 - }, 1091 - { 1092 - .func = test_parsing_fake, 1093 - .desc = "Parsing of PMU event table metrics with fake PMUs", 1094 - }, 1071 + static struct test_case pmu_events_tests[] = { 1072 + TEST_CASE("PMU event table sanity", pmu_event_table), 1073 + TEST_CASE("PMU event map aliases", aliases), 1074 + TEST_CASE_REASON("Parsing of PMU event table metrics", parsing, 1075 + "some metrics failed"), 1076 + TEST_CASE("Parsing of PMU event table metrics with fake PMUs", parsing_fake), 1077 + { .name = NULL, } 1095 1078 }; 1096 1079 1097 - const char *test__pmu_events_subtest_get_desc(int subtest) 1098 - { 1099 - if (subtest < 0 || 1100 - subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 1101 - return NULL; 1102 - return pmu_events_testcase_table[subtest].desc; 1103 - } 1104 - 1105 - const char *test__pmu_events_subtest_skip_reason(int subtest) 1106 - { 1107 - if (subtest < 0 || 1108 - subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 1109 - return NULL; 1110 - if (pmu_events_testcase_table[subtest].func != test_parsing) 1111 - return NULL; 1112 - return "some metrics failed"; 1113 - } 1114 - 1115 - int test__pmu_events_subtest_get_nr(void) 1116 - { 1117 - return (int)ARRAY_SIZE(pmu_events_testcase_table); 1118 - } 1119 - 1120 - int test__pmu_events(struct test *test __maybe_unused, int subtest) 1121 - { 1122 - if (subtest < 0 || 1123 - subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) 1124 - return TEST_FAIL; 1125 - return pmu_events_testcase_table[subtest].func(); 1126 - } 1080 + struct test_suite suite__pmu_events = { 1081 + .desc = "PMU events", 1082 + .test_cases = pmu_events_tests, 1083 + };

+3 -1

tools/perf/tests/pmu.c

··· 137 137 return &terms; 138 138 } 139 139 140 - int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) 140 + static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 141 141 { 142 142 char *format = test_format_dir_get(); 143 143 LIST_HEAD(formats); ··· 177 177 test_format_dir_put(format); 178 178 return ret; 179 179 } 180 + 181 + DEFINE_SUITE("Parse perf pmu format", pmu);

+3 -1

tools/perf/tests/python-use.c

··· 9 9 #include "tests.h" 10 10 #include "util/debug.h" 11 11 12 - int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) 12 + static int test__python_use(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 13 13 { 14 14 char *cmd; 15 15 int ret; ··· 23 23 free(cmd); 24 24 return ret; 25 25 } 26 + 27 + DEFINE_SUITE("'import perf' in python", python_use);

+3 -1

tools/perf/tests/sample-parsing.c

··· 368 368 * checks sample format bits separately and together. If the test passes %0 is 369 369 * returned, otherwise %-1 is returned. 370 370 */ 371 - int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) 371 + static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 372 372 { 373 373 const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; 374 374 u64 sample_type; ··· 426 426 427 427 return 0; 428 428 } 429 + 430 + DEFINE_SUITE("Sample parsing", sample_parsing);

+4 -2

tools/perf/tests/sdt.c

··· 76 76 return ret; 77 77 } 78 78 79 - int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) 79 + static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused) 80 80 { 81 81 int ret = TEST_FAIL; 82 82 char __tempdir[] = "./test-buildid-XXXXXX"; ··· 114 114 return ret; 115 115 } 116 116 #else 117 - int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) 117 + static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused) 118 118 { 119 119 pr_debug("Skip SDT event test because SDT support is not compiled\n"); 120 120 return TEST_SKIP; 121 121 } 122 122 #endif 123 + 124 + DEFINE_SUITE("Probe SDT events", sdt_event);

+1 -1

tools/perf/tests/shell/record+zstd_comp_decomp.sh

··· 12 12 13 13 collect_z_record() { 14 14 echo "Collecting compressed record file:" 15 - [[ "$(uname -m)" != s390x ]] && gflag='-g' 15 + [ "$(uname -m)" != s390x ] && gflag='-g' 16 16 $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ 17 17 dd count=500 if=/dev/urandom of=/dev/null 18 18 }

+2 -2

tools/perf/tests/shell/stat_all_pmu.sh

··· 7 7 for p in $(perf list --raw-dump pmu); do 8 8 echo "Testing $p" 9 9 result=$(perf stat -e "$p" true 2>&1) 10 - if [[ ! "$result" =~ "$p" ]] && [[ ! "$result" =~ "<not supported>" ]]; then 10 + if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then 11 11 # We failed to see the event and it is supported. Possibly the workload was 12 12 # too small so retry with something longer. 13 13 result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) 14 - if [[ ! "$result" =~ "$p" ]]; then 14 + if ! echo "$result" | grep -q "$p" ; then 15 15 echo "Event '$p' not printed in:" 16 16 echo "$result" 17 17 exit 1

+1 -1

tools/perf/tests/shell/stat_bpf_counters.sh

··· 23 23 24 24 # skip if --bpf-counters is not supported 25 25 if ! perf stat --bpf-counters true > /dev/null 2>&1; then 26 - if [ "$1" == "-v" ]; then 26 + if [ "$1" = "-v" ]; then 27 27 echo "Skipping: --bpf-counters not supported" 28 28 perf --no-pager stat --bpf-counters true || true 29 29 fi

+89

tools/perf/tests/shell/test_arm_spe.sh

··· 1 + #!/bin/sh 2 + # Check Arm SPE trace data recording and synthesized samples 3 + 4 + # Uses the 'perf record' to record trace data of Arm SPE events; 5 + # then verify if any SPE event samples are generated by SPE with 6 + # 'perf script' and 'perf report' commands. 7 + 8 + # SPDX-License-Identifier: GPL-2.0 9 + # German Gomez <german.gomez@arm.com>, 2021 10 + 11 + skip_if_no_arm_spe_event() { 12 + perf list | egrep -q 'arm_spe_[0-9]+//' && return 0 13 + 14 + # arm_spe event doesn't exist 15 + return 2 16 + } 17 + 18 + skip_if_no_arm_spe_event || exit 2 19 + 20 + perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) 21 + glb_err=0 22 + 23 + cleanup_files() 24 + { 25 + rm -f ${perfdata} 26 + exit $glb_err 27 + } 28 + 29 + trap cleanup_files exit term int 30 + 31 + arm_spe_report() { 32 + if [ $2 != 0 ]; then 33 + echo "$1: FAIL" 34 + glb_err=$2 35 + else 36 + echo "$1: PASS" 37 + fi 38 + } 39 + 40 + perf_script_samples() { 41 + echo "Looking at perf.data file for dumping samples:" 42 + 43 + # from arm-spe.c/arm_spe_synth_events() 44 + events="(ld1-miss|ld1-access|llc-miss|lld-access|tlb-miss|tlb-access|branch-miss|remote-access|memory)" 45 + 46 + # Below is an example of the samples dumping: 47 + # dd 3048 [002] 1 l1d-access: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) 48 + # dd 3048 [002] 1 tlb-access: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) 49 + # dd 3048 [002] 1 memory: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) 50 + perf script -F,-time -i ${perfdata} 2>&1 | \ 51 + egrep " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1 52 + } 53 + 54 + perf_report_samples() { 55 + echo "Looking at perf.data file for reporting samples:" 56 + 57 + # Below is an example of the samples reporting: 58 + # 73.04% 73.04% dd libc-2.27.so [.] _dl_addr 59 + # 7.71% 7.71% dd libc-2.27.so [.] getenv 60 + # 2.59% 2.59% dd ld-2.27.so [.] strcmp 61 + perf report --stdio -i ${perfdata} 2>&1 | \ 62 + egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1 63 + } 64 + 65 + arm_spe_snapshot_test() { 66 + echo "Recording trace with snapshot mode $perfdata" 67 + perf record -o ${perfdata} -e arm_spe// -S \ 68 + -- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 & 69 + PERFPID=$! 70 + 71 + # Wait for perf program 72 + sleep 1 73 + 74 + # Send signal to snapshot trace data 75 + kill -USR2 $PERFPID 76 + 77 + # Stop perf program 78 + kill $PERFPID 79 + wait $PERFPID 80 + 81 + perf_script_samples dd && 82 + perf_report_samples dd 83 + 84 + err=$? 85 + arm_spe_report "SPE snapshot testing" $err 86 + } 87 + 88 + arm_spe_snapshot_test 89 + exit $glb_err

+8 -3

tools/perf/tests/stat.c

··· 47 47 return 0; 48 48 } 49 49 50 - int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused) 50 + static int test__synthesize_stat_config(struct test_suite *test __maybe_unused, 51 + int subtest __maybe_unused) 51 52 { 52 53 struct perf_stat_config stat_config = { 53 54 .aggr_mode = AGGR_CORE, ··· 78 77 return 0; 79 78 } 80 79 81 - int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused) 80 + static int test__synthesize_stat(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 82 81 { 83 82 struct perf_counts_values count; 84 83 ··· 104 103 return 0; 105 104 } 106 105 107 - int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused) 106 + static int test__synthesize_stat_round(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 108 107 { 109 108 TEST_ASSERT_VAL("failed to synthesize stat_config", 110 109 !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL, ··· 112 111 113 112 return 0; 114 113 } 114 + 115 + DEFINE_SUITE("Synthesize stat config", synthesize_stat_config); 116 + DEFINE_SUITE("Synthesize stat", synthesize_stat); 117 + DEFINE_SUITE("Synthesize stat round", synthesize_stat_round);

+3 -1

tools/perf/tests/sw-clock.c

··· 133 133 return err; 134 134 } 135 135 136 - int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused) 136 + static int test__sw_clock_freq(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 137 137 { 138 138 int ret; 139 139 ··· 143 143 144 144 return ret; 145 145 } 146 + 147 + DEFINE_SUITE("Software clock events period values", sw_clock_freq);

+3 -1

tools/perf/tests/switch-tracking.c

··· 321 321 * evsel->core.system_wide and evsel->tracking flags (respectively) with other events 322 322 * sometimes enabled or disabled. 323 323 */ 324 - int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) 324 + static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 325 325 { 326 326 const char *sched_switch = "sched:sched_switch"; 327 327 struct switch_tracking switch_tracking = { .tids = NULL, }; ··· 588 588 err = -1; 589 589 goto out; 590 590 } 591 + 592 + DEFINE_SUITE("Track with sched_switch", switch_tracking);

+3 -1

tools/perf/tests/task-exit.c

··· 39 39 * if the number of exit event reported by the kernel is 1 or not 40 40 * in order to check the kernel returns correct number of event. 41 41 */ 42 - int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused) 42 + static int test__task_exit(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 43 43 { 44 44 int err = -1; 45 45 union perf_event *event; ··· 151 151 evlist__delete(evlist); 152 152 return err; 153 153 } 154 + 155 + DEFINE_SUITE("Number of exit events of a simple workload", task_exit);

+136 -102

tools/perf/tests/tests.h

··· 27 27 TEST_SKIP = -2, 28 28 }; 29 29 30 - struct test { 30 + struct test_suite; 31 + 32 + typedef int (*test_fnptr)(struct test_suite *, int); 33 + 34 + struct test_case { 35 + const char *name; 31 36 const char *desc; 32 - int (*func)(struct test *test, int subtest); 33 - struct { 34 - bool skip_if_fail; 35 - int (*get_nr)(void); 36 - const char *(*get_desc)(int subtest); 37 - const char *(*skip_reason)(int subtest); 38 - } subtest; 39 - bool (*is_supported)(void); 37 + const char *skip_reason; 38 + test_fnptr run_case; 39 + }; 40 + 41 + struct test_suite { 42 + const char *desc; 43 + struct test_case *test_cases; 40 44 void *priv; 41 45 }; 42 46 43 - /* Tests */ 44 - int test__vmlinux_matches_kallsyms(struct test *test, int subtest); 45 - int test__openat_syscall_event(struct test *test, int subtest); 46 - int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest); 47 - int test__basic_mmap(struct test *test, int subtest); 48 - int test__PERF_RECORD(struct test *test, int subtest); 49 - int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest); 50 - int test__perf_evsel__tp_sched_test(struct test *test, int subtest); 51 - int test__syscall_openat_tp_fields(struct test *test, int subtest); 52 - int test__pmu(struct test *test, int subtest); 53 - int test__pmu_events(struct test *test, int subtest); 54 - const char *test__pmu_events_subtest_get_desc(int subtest); 55 - const char *test__pmu_events_subtest_skip_reason(int subtest); 56 - int test__pmu_events_subtest_get_nr(void); 57 - int test__attr(struct test *test, int subtest); 58 - int test__dso_data(struct test *test, int subtest); 59 - int test__dso_data_cache(struct test *test, int subtest); 60 - int test__dso_data_reopen(struct test *test, int subtest); 61 - int test__parse_events(struct test *test, int subtest); 62 - int test__hists_link(struct test *test, int subtest); 63 - int test__python_use(struct test *test, int subtest); 64 - int test__bp_signal(struct test *test, int subtest); 65 - int test__bp_signal_overflow(struct test *test, int subtest); 66 - int test__bp_accounting(struct test *test, int subtest); 67 - int test__wp(struct test *test, int subtest); 68 - const char *test__wp_subtest_get_desc(int subtest); 69 - const char *test__wp_subtest_skip_reason(int subtest); 70 - int test__wp_subtest_get_nr(void); 71 - int test__task_exit(struct test *test, int subtest); 72 - int test__mem(struct test *test, int subtest); 73 - int test__sw_clock_freq(struct test *test, int subtest); 74 - int test__code_reading(struct test *test, int subtest); 75 - int test__sample_parsing(struct test *test, int subtest); 76 - int test__keep_tracking(struct test *test, int subtest); 77 - int test__parse_no_sample_id_all(struct test *test, int subtest); 78 - int test__dwarf_unwind(struct test *test, int subtest); 79 - int test__expr(struct test *test, int subtest); 80 - int test__hists_filter(struct test *test, int subtest); 81 - int test__mmap_thread_lookup(struct test *test, int subtest); 82 - int test__thread_maps_share(struct test *test, int subtest); 83 - int test__hists_output(struct test *test, int subtest); 84 - int test__hists_cumulate(struct test *test, int subtest); 85 - int test__switch_tracking(struct test *test, int subtest); 86 - int test__fdarray__filter(struct test *test, int subtest); 87 - int test__fdarray__add(struct test *test, int subtest); 88 - int test__kmod_path__parse(struct test *test, int subtest); 89 - int test__thread_map(struct test *test, int subtest); 90 - int test__llvm(struct test *test, int subtest); 91 - const char *test__llvm_subtest_get_desc(int subtest); 92 - int test__llvm_subtest_get_nr(void); 93 - int test__bpf(struct test *test, int subtest); 94 - const char *test__bpf_subtest_get_desc(int subtest); 95 - int test__bpf_subtest_get_nr(void); 96 - int test__session_topology(struct test *test, int subtest); 97 - int test__thread_map_synthesize(struct test *test, int subtest); 98 - int test__thread_map_remove(struct test *test, int subtest); 99 - int test__cpu_map_synthesize(struct test *test, int subtest); 100 - int test__synthesize_stat_config(struct test *test, int subtest); 101 - int test__synthesize_stat(struct test *test, int subtest); 102 - int test__synthesize_stat_round(struct test *test, int subtest); 103 - int test__event_update(struct test *test, int subtest); 104 - int test__event_times(struct test *test, int subtest); 105 - int test__backward_ring_buffer(struct test *test, int subtest); 106 - int test__cpu_map_print(struct test *test, int subtest); 107 - int test__cpu_map_merge(struct test *test, int subtest); 108 - int test__sdt_event(struct test *test, int subtest); 109 - int test__is_printable_array(struct test *test, int subtest); 110 - int test__bitmap_print(struct test *test, int subtest); 111 - int test__perf_hooks(struct test *test, int subtest); 112 - int test__clang(struct test *test, int subtest); 113 - const char *test__clang_subtest_get_desc(int subtest); 114 - int test__clang_subtest_get_nr(void); 115 - int test__unit_number__scnprint(struct test *test, int subtest); 116 - int test__mem2node(struct test *t, int subtest); 117 - int test__maps__merge_in(struct test *t, int subtest); 118 - int test__time_utils(struct test *t, int subtest); 119 - int test__jit_write_elf(struct test *test, int subtest); 120 - int test__api_io(struct test *test, int subtest); 121 - int test__demangle_java(struct test *test, int subtest); 122 - int test__demangle_ocaml(struct test *test, int subtest); 123 - int test__pfm(struct test *test, int subtest); 124 - const char *test__pfm_subtest_get_desc(int subtest); 125 - int test__pfm_subtest_get_nr(void); 126 - int test__parse_metric(struct test *test, int subtest); 127 - int test__pe_file_parsing(struct test *test, int subtest); 128 - int test__expand_cgroup_events(struct test *test, int subtest); 129 - int test__perf_time_to_tsc(struct test *test, int subtest); 130 - int test__dlfilter(struct test *test, int subtest); 47 + #define DECLARE_SUITE(name) \ 48 + extern struct test_suite suite__##name; 131 49 132 - bool test__bp_signal_is_supported(void); 133 - bool test__bp_account_is_supported(void); 134 - bool test__wp_is_supported(void); 135 - bool test__tsc_is_supported(void); 50 + #define TEST_CASE(description, _name) \ 51 + { \ 52 + .name = #_name, \ 53 + .desc = description, \ 54 + .run_case = test__##_name, \ 55 + } 56 + 57 + #define TEST_CASE_REASON(description, _name, _reason) \ 58 + { \ 59 + .name = #_name, \ 60 + .desc = description, \ 61 + .run_case = test__##_name, \ 62 + .skip_reason = _reason, \ 63 + } 64 + 65 + #define DEFINE_SUITE(description, _name) \ 66 + struct test_case tests__##_name[] = { \ 67 + TEST_CASE(description, _name), \ 68 + { .name = NULL, } \ 69 + }; \ 70 + struct test_suite suite__##_name = { \ 71 + .desc = description, \ 72 + .test_cases = tests__##_name, \ 73 + } 74 + 75 + /* Tests */ 76 + DECLARE_SUITE(vmlinux_matches_kallsyms); 77 + DECLARE_SUITE(openat_syscall_event); 78 + DECLARE_SUITE(openat_syscall_event_on_all_cpus); 79 + DECLARE_SUITE(basic_mmap); 80 + DECLARE_SUITE(PERF_RECORD); 81 + DECLARE_SUITE(perf_evsel__roundtrip_name_test); 82 + DECLARE_SUITE(perf_evsel__tp_sched_test); 83 + DECLARE_SUITE(syscall_openat_tp_fields); 84 + DECLARE_SUITE(pmu); 85 + DECLARE_SUITE(pmu_events); 86 + DECLARE_SUITE(attr); 87 + DECLARE_SUITE(dso_data); 88 + DECLARE_SUITE(dso_data_cache); 89 + DECLARE_SUITE(dso_data_reopen); 90 + DECLARE_SUITE(parse_events); 91 + DECLARE_SUITE(hists_link); 92 + DECLARE_SUITE(python_use); 93 + DECLARE_SUITE(bp_signal); 94 + DECLARE_SUITE(bp_signal_overflow); 95 + DECLARE_SUITE(bp_accounting); 96 + DECLARE_SUITE(wp); 97 + DECLARE_SUITE(task_exit); 98 + DECLARE_SUITE(mem); 99 + DECLARE_SUITE(sw_clock_freq); 100 + DECLARE_SUITE(code_reading); 101 + DECLARE_SUITE(sample_parsing); 102 + DECLARE_SUITE(keep_tracking); 103 + DECLARE_SUITE(parse_no_sample_id_all); 104 + DECLARE_SUITE(dwarf_unwind); 105 + DECLARE_SUITE(expr); 106 + DECLARE_SUITE(hists_filter); 107 + DECLARE_SUITE(mmap_thread_lookup); 108 + DECLARE_SUITE(thread_maps_share); 109 + DECLARE_SUITE(hists_output); 110 + DECLARE_SUITE(hists_cumulate); 111 + DECLARE_SUITE(switch_tracking); 112 + DECLARE_SUITE(fdarray__filter); 113 + DECLARE_SUITE(fdarray__add); 114 + DECLARE_SUITE(kmod_path__parse); 115 + DECLARE_SUITE(thread_map); 116 + DECLARE_SUITE(llvm); 117 + DECLARE_SUITE(bpf); 118 + DECLARE_SUITE(session_topology); 119 + DECLARE_SUITE(thread_map_synthesize); 120 + DECLARE_SUITE(thread_map_remove); 121 + DECLARE_SUITE(cpu_map_synthesize); 122 + DECLARE_SUITE(synthesize_stat_config); 123 + DECLARE_SUITE(synthesize_stat); 124 + DECLARE_SUITE(synthesize_stat_round); 125 + DECLARE_SUITE(event_update); 126 + DECLARE_SUITE(event_times); 127 + DECLARE_SUITE(backward_ring_buffer); 128 + DECLARE_SUITE(cpu_map_print); 129 + DECLARE_SUITE(cpu_map_merge); 130 + DECLARE_SUITE(sdt_event); 131 + DECLARE_SUITE(is_printable_array); 132 + DECLARE_SUITE(bitmap_print); 133 + DECLARE_SUITE(perf_hooks); 134 + DECLARE_SUITE(clang); 135 + DECLARE_SUITE(unit_number__scnprint); 136 + DECLARE_SUITE(mem2node); 137 + DECLARE_SUITE(maps__merge_in); 138 + DECLARE_SUITE(time_utils); 139 + DECLARE_SUITE(jit_write_elf); 140 + DECLARE_SUITE(api_io); 141 + DECLARE_SUITE(demangle_java); 142 + DECLARE_SUITE(demangle_ocaml); 143 + DECLARE_SUITE(pfm); 144 + DECLARE_SUITE(parse_metric); 145 + DECLARE_SUITE(pe_file_parsing); 146 + DECLARE_SUITE(expand_cgroup_events); 147 + DECLARE_SUITE(perf_time_to_tsc); 148 + DECLARE_SUITE(dlfilter); 149 + 150 + /* 151 + * PowerPC and S390 do not support creation of instruction breakpoints using the 152 + * perf_event interface. 153 + * 154 + * ARM requires explicit rounding down of the instruction pointer in Thumb mode, 155 + * and then requires the single-step to be handled explicitly in the overflow 156 + * handler to avoid stepping into the SIGIO handler and getting stuck on the 157 + * breakpointed instruction. 158 + * 159 + * Since arm64 has the same issue with arm for the single-step handling, this 160 + * case also gets stuck on the breakpointed instruction. 161 + * 162 + * Just disable the test for these architectures until these issues are 163 + * resolved. 164 + */ 165 + #if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || defined(__aarch64__) 166 + #define BP_SIGNAL_IS_SUPPORTED 0 167 + #else 168 + #define BP_SIGNAL_IS_SUPPORTED 1 169 + #endif 136 170 137 171 #ifdef HAVE_DWARF_UNWIND_SUPPORT 138 172 struct thread; ··· 176 142 #endif 177 143 178 144 #if defined(__arm__) 179 - int test__vectors_page(struct test *test, int subtest); 145 + DECLARE_SUITE(vectors_page); 180 146 #endif 181 147 182 148 #endif /* TESTS_H */

+7 -3

tools/perf/tests/thread-map.c

··· 19 19 #define NAME (const char *) "perf" 20 20 #define NAMEUL (unsigned long) NAME 21 21 22 - int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused) 22 + static int test__thread_map(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 23 23 { 24 24 struct perf_thread_map *map; 25 25 ··· 86 86 return 0; 87 87 } 88 88 89 - int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) 89 + static int test__thread_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 90 90 { 91 91 struct perf_thread_map *threads; 92 92 ··· 106 106 return 0; 107 107 } 108 108 109 - int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused) 109 + static int test__thread_map_remove(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 110 110 { 111 111 struct perf_thread_map *threads; 112 112 char *str; ··· 145 145 perf_thread_map__put(threads); 146 146 return 0; 147 147 } 148 + 149 + DEFINE_SUITE("Thread map", thread_map); 150 + DEFINE_SUITE("Synthesize thread map", thread_map_synthesize); 151 + DEFINE_SUITE("Remove thread map", thread_map_remove);

+3 -1

tools/perf/tests/thread-maps-share.c

······

+3 -1

tools/perf/tests/time-utils-test.c

··· 131 131 return pass; 132 132 } 133 133 134 - int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) 134 + static int test__time_utils(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 135 135 { 136 136 bool pass = true; 137 137 ··· 249 249 250 250 return pass ? 0 : TEST_FAIL; 251 251 } 252 + 253 + DEFINE_SUITE("time utils", time_utils);

+3 -1

tools/perf/tests/topology.c

··· 175 175 return 0; 176 176 } 177 177 178 - int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused) 178 + static int test__session_topology(struct test_suite *test __maybe_unused, int subtest __maybe_unused) 179 179 { 180 180 char path[PATH_MAX]; 181 181 struct perf_cpu_map *map; ··· 201 201 unlink(path); 202 202 return ret; 203 203 } 204 + 205 + DEFINE_SUITE("Session topology", session_topology);

+3 -1

tools/perf/tests/unit_number__scnprintf.c

··· 7 7 #include "units.h" 8 8 #include "debug.h" 9 9 10 - int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused) 10 + static int test__unit_number__scnprint(struct test_suite *t __maybe_unused, int subtest __maybe_unused) 11 11 { 12 12 struct { 13 13 u64 n; ··· 38 38 39 39 return TEST_OK; 40 40 } 41 + 42 + DEFINE_SUITE("unit_number__scnprintf", unit_number__scnprint);

+4 -1

tools/perf/tests/vmlinux-kallsyms.c

··· 111 111 return false; 112 112 } 113 113 114 - int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused) 114 + static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, 115 + int subtest __maybe_unused) 115 116 { 116 117 int err = -1; 117 118 struct rb_node *nd; ··· 353 352 machine__exit(&vmlinux); 354 353 return err; 355 354 } 355 + 356 + DEFINE_SUITE("vmlinux symtab matches kallsyms", vmlinux_matches_kallsyms);

+35 -89

tools/perf/tests/wp.c

··· 48 48 attr->exclude_hv = 1; 49 49 } 50 50 51 + #ifndef __s390x__ 51 52 static int __event(int wp_type, void *wp_addr, unsigned long wp_len) 52 53 { 53 54 int fd; ··· 62 61 63 62 return fd; 64 63 } 64 + #endif 65 65 66 - static int wp_ro_test(void) 66 + static int test__wp_ro(struct test_suite *test __maybe_unused, 67 + int subtest __maybe_unused) 67 68 { 69 + #if defined(__s390x__) || defined(__x86_64__) || defined(__i386__) 70 + return TEST_SKIP; 71 + #else 68 72 int fd; 69 73 unsigned long tmp, tmp1 = rand(); 70 74 ··· 85 79 86 80 close(fd); 87 81 return 0; 82 + #endif 88 83 } 89 84 90 - static int wp_wo_test(void) 85 + static int test__wp_wo(struct test_suite *test __maybe_unused, 86 + int subtest __maybe_unused) 91 87 { 88 + #if defined(__s390x__) 89 + return TEST_SKIP; 90 + #else 92 91 int fd; 93 92 unsigned long tmp, tmp1 = rand(); 94 93 ··· 109 98 110 99 close(fd); 111 100 return 0; 101 + #endif 112 102 } 113 103 114 - static int wp_rw_test(void) 104 + static int test__wp_rw(struct test_suite *test __maybe_unused, 105 + int subtest __maybe_unused) 115 106 { 107 + #if defined(__s390x__) 108 + return TEST_SKIP; 109 + #else 116 110 int fd; 117 111 unsigned long tmp, tmp1 = rand(); 118 112 ··· 134 118 135 119 close(fd); 136 120 return 0; 121 + #endif 137 122 } 138 123 139 - static int wp_modify_test(void) 124 + static int test__wp_modify(struct test_suite *test __maybe_unused, 125 + int subtest __maybe_unused) 140 126 { 127 + #if defined(__s390x__) 128 + return TEST_SKIP; 129 + #else 141 130 int fd, ret; 142 131 unsigned long tmp = rand(); 143 132 struct perf_event_attr new_attr; ··· 184 163 185 164 close(fd); 186 165 return 0; 187 - } 188 - 189 - static bool wp_ro_supported(void) 190 - { 191 - #if defined (__x86_64__) || defined (__i386__) 192 - return false; 193 - #else 194 - return true; 195 166 #endif 196 167 } 197 168 198 - static const char *wp_ro_skip_msg(void) 199 - { 200 - #if defined (__x86_64__) || defined (__i386__) 201 - return "missing hardware support"; 202 - #else 203 - return NULL; 204 - #endif 205 - } 206 - 207 - static struct { 208 - const char *desc; 209 - int (*target_func)(void); 210 - bool (*is_supported)(void); 211 - const char *(*skip_msg)(void); 212 - } wp_testcase_table[] = { 213 - { 214 - .desc = "Read Only Watchpoint", 215 - .target_func = &wp_ro_test, 216 - .is_supported = &wp_ro_supported, 217 - .skip_msg = &wp_ro_skip_msg, 218 - }, 219 - { 220 - .desc = "Write Only Watchpoint", 221 - .target_func = &wp_wo_test, 222 - }, 223 - { 224 - .desc = "Read / Write Watchpoint", 225 - .target_func = &wp_rw_test, 226 - }, 227 - { 228 - .desc = "Modify Watchpoint", 229 - .target_func = &wp_modify_test, 230 - }, 169 + static struct test_case wp_tests[] = { 170 + TEST_CASE_REASON("Read Only Watchpoint", wp_ro, "missing hardware support"), 171 + TEST_CASE_REASON("Write Only Watchpoint", wp_wo, "missing hardware support"), 172 + TEST_CASE_REASON("Read / Write Watchpoint", wp_rw, "missing hardware support"), 173 + TEST_CASE_REASON("Modify Watchpoint", wp_modify, "missing hardware support"), 174 + { .name = NULL, } 231 175 }; 232 176 233 - int test__wp_subtest_get_nr(void) 234 - { 235 - return (int)ARRAY_SIZE(wp_testcase_table); 236 - } 237 - 238 - const char *test__wp_subtest_get_desc(int i) 239 - { 240 - if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) 241 - return NULL; 242 - return wp_testcase_table[i].desc; 243 - } 244 - 245 - const char *test__wp_subtest_skip_reason(int i) 246 - { 247 - if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) 248 - return NULL; 249 - if (!wp_testcase_table[i].skip_msg) 250 - return NULL; 251 - return wp_testcase_table[i].skip_msg(); 252 - } 253 - 254 - int test__wp(struct test *test __maybe_unused, int i) 255 - { 256 - if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) 257 - return TEST_FAIL; 258 - 259 - if (wp_testcase_table[i].is_supported && 260 - !wp_testcase_table[i].is_supported()) 261 - return TEST_SKIP; 262 - 263 - return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; 264 - } 265 - 266 - /* The s390 so far does not have support for 267 - * instruction breakpoint using the perf_event_open() system call. 268 - */ 269 - bool test__wp_is_supported(void) 270 - { 271 - #if defined(__s390x__) 272 - return false; 273 - #else 274 - return true; 275 - #endif 276 - } 177 + struct test_suite suite__wp = { 178 + .desc = "Watchpoint", 179 + .test_cases = wp_tests, 180 + };

+5

tools/perf/trace/beauty/beauty.h

··· 62 62 63 63 extern struct strarray strarray__socket_families; 64 64 65 + extern struct strarray strarray__socket_level; 66 + 65 67 /** 66 68 * augmented_arg: extra payload for syscall pointer arguments 67 69 ··· 231 229 232 230 size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg); 233 231 #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol 232 + 233 + size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg); 234 + #define SCA_SK_LEVEL syscall_arg__scnprintf_socket_level 234 235 235 236 size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); 236 237 #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags

+1 -1

tools/perf/trace/beauty/sockaddr.c

··· 7 7 #include <sys/un.h> 8 8 #include <arpa/inet.h> 9 9 10 - #include "trace/beauty/generated/socket_arrays.c" 10 + #include "trace/beauty/generated/sockaddr.c" 11 11 DEFINE_STRARRAY(socket_families, "PF_"); 12 12 13 13 static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)

+24

tools/perf/trace/beauty/sockaddr.sh

··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + # This one uses a copy from the kernel sources headers that is in a 5 + # place used just for these tools/perf/beauty/ usage, we shouldn't not 6 + # put it in tools/include/linux otherwise they would be used in the 7 + # normal compiler building process and would drag needless stuff from the 8 + # kernel. 9 + 10 + # When what these scripts need is already in tools/include/ then use it, 11 + # otherwise grab and check the copy from the kernel sources just for these 12 + # string table building scripts. 13 + 14 + [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/ 15 + 16 + printf "static const char *socket_families[] = {\n" 17 + # #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ 18 + regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*' 19 + 20 + egrep $regex ${header_dir}/socket.h | \ 21 + sed -r "s/$regex/\2 \1/g" | \ 22 + xargs printf "\t[%s] = \"%s\",\n" | \ 23 + egrep -v "\"(UNIX|MAX)\"" 24 + printf "};\n"

+20 -1

tools/perf/trace/beauty/socket.c

··· 9 9 #include <sys/types.h> 10 10 #include <sys/socket.h> 11 11 12 + #include "trace/beauty/generated/socket.c" 13 + 12 14 static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size, bool show_prefix) 13 15 { 14 - #include "trace/beauty/generated/socket_ipproto_array.c" 15 16 static DEFINE_STRARRAY(socket_ipproto, "IPPROTO_"); 16 17 17 18 return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", show_prefix, protocol); ··· 26 25 return socket__scnprintf_ipproto(arg->val, bf, size, arg->show_string_prefix); 27 26 28 27 return syscall_arg__scnprintf_int(bf, size, arg); 28 + } 29 + 30 + static size_t socket__scnprintf_level(int level, char *bf, size_t size, bool show_prefix) 31 + { 32 + #if defined(__alpha__) || defined(__hppa__) || defined(__mips__) || defined(__sparc__) 33 + const int sol_socket = 0xffff; 34 + #else 35 + const int sol_socket = 1; 36 + #endif 37 + if (level == sol_socket) 38 + return scnprintf(bf, size, "%sSOCKET", show_prefix ? "SOL_" : ""); 39 + 40 + return strarray__scnprintf(&strarray__socket_level, bf, size, "%d", show_prefix, level); 41 + } 42 + 43 + size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg) 44 + { 45 + return socket__scnprintf_level(arg->val, bf, size, arg->show_string_prefix); 29 46 }

+21 -17

tools/perf/trace/beauty/socket.sh

··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: LGPL-2.1 3 3 4 - # This one uses a copy from the kernel sources headers that is in a 5 - # place used just for these tools/perf/beauty/ usage, we shouldn't not 6 - # put it in tools/include/linux otherwise they would be used in the 7 - # normal compiler building process and would drag needless stuff from the 8 - # kernel. 4 + if [ $# -gt 0 ] ; then 5 + uapi_header_dir=$1 6 + beauty_header_dir=$2 7 + else 8 + uapi_header_dir=tools/include/uapi/linux/ 9 + beauty_header_dir=tools/perf/trace/beauty/include/linux/ 10 + fi 9 11 10 - # When what these scripts need is already in tools/include/ then use it, 11 - # otherwise grab and check the copy from the kernel sources just for these 12 - # string table building scripts. 12 + printf "static const char *socket_ipproto[] = {\n" 13 + ipproto_regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*' 13 14 14 - [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/ 15 + egrep $ipproto_regex ${uapi_header_dir}/in.h | \ 16 + sed -r "s/$ipproto_regex/\2 \1/g" | \ 17 + sort -n | xargs printf "\t[%s] = \"%s\",\n" 18 + printf "};\n\n" 15 19 16 - printf "static const char *socket_families[] = {\n" 17 - # #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ 18 - regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*' 20 + printf "static const char *socket_level[] = {\n" 21 + socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' 19 22 20 - egrep $regex ${header_dir}/socket.h | \ 21 - sed -r "s/$regex/\2 \1/g" | \ 22 - xargs printf "\t[%s] = \"%s\",\n" | \ 23 - egrep -v "\"(UNIX|MAX)\"" 24 - printf "};\n" 23 + egrep $socket_level_regex ${beauty_header_dir}/socket.h | \ 24 + sed -r "s/$socket_level_regex/\2 \1/g" | \ 25 + sort -n | xargs printf "\t[%s] = \"%s\",\n" 26 + printf "};\n\n" 27 + 28 + printf 'DEFINE_STRARRAY(socket_level, "SOL_");\n'

-12

tools/perf/trace/beauty/socket_ipproto.sh

··· 1 - #!/bin/sh 2 - # SPDX-License-Identifier: LGPL-2.1 3 - 4 - [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ 5 - 6 - printf "static const char *socket_ipproto[] = {\n" 7 - regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*' 8 - 9 - egrep $regex ${header_dir}/in.h | \ 10 - sed -r "s/$regex/\2 \1/g" | \ 11 - sort | xargs printf "\t[%s] = \"%s\",\n" 12 - printf "};\n"

+12 -1

tools/perf/util/annotate.c

··· 1255 1255 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1256 1256 } 1257 1257 1258 + void annotation__init(struct annotation *notes) 1259 + { 1260 + pthread_mutex_init(&notes->lock, NULL); 1261 + } 1262 + 1263 + void annotation__exit(struct annotation *notes) 1264 + { 1265 + annotated_source__delete(notes->src); 1266 + pthread_mutex_destroy(&notes->lock); 1267 + } 1268 + 1258 1269 static void annotation_line__add(struct annotation_line *al, struct list_head *head) 1259 1270 { 1260 1271 list_add_tail(&al->node, head); ··· 3143 3132 notes->nr_events = nr_pcnt; 3144 3133 3145 3134 annotation__update_column_widths(notes); 3146 - sym->annotate2 = true; 3135 + sym->annotate2 = 1; 3147 3136 3148 3137 return 0; 3149 3138

+3

tools/perf/util/annotate.h

··· 299 299 struct annotated_source *src; 300 300 }; 301 301 302 + void annotation__init(struct annotation *notes); 303 + void annotation__exit(struct annotation *notes); 304 + 302 305 static inline int annotation__cycles_width(struct annotation *notes) 303 306 { 304 307 if (notes->have_cycles && notes->options->show_minmax_cycle)

+2

tools/perf/util/arm-spe-decoder/arm-spe-decoder.c

··· 151 151 u64 payload, ip; 152 152 153 153 memset(&decoder->record, 0x0, sizeof(decoder->record)); 154 + decoder->record.context_id = (u64)-1; 154 155 155 156 while (1) { 156 157 err = arm_spe_get_next_packet(decoder); ··· 181 180 case ARM_SPE_COUNTER: 182 181 break; 183 182 case ARM_SPE_CONTEXT: 183 + decoder->record.context_id = payload; 184 184 break; 185 185 case ARM_SPE_OP_TYPE: 186 186 if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {

+1

tools/perf/util/arm-spe-decoder/arm-spe-decoder.h

··· 38 38 u64 timestamp; 39 39 u64 virt_addr; 40 40 u64 phys_addr; 41 + u64 context_id; 41 42 }; 42 43 43 44 struct arm_spe_insn;

+94 -28

tools/perf/util/arm-spe.c

··· 71 71 u64 kernel_start; 72 72 73 73 unsigned long num_events; 74 + u8 use_ctx_pkt_for_pid; 74 75 }; 75 76 76 77 struct arm_spe_queue { ··· 101 100 const char *color = PERF_COLOR_BLUE; 102 101 103 102 color_fprintf(stdout, color, 104 - ". ... ARM SPE data: size %zu bytes\n", 103 + ". ... ARM SPE data: size %#zx bytes\n", 105 104 len); 106 105 107 106 while (len) { ··· 225 224 return ip >= spe->kernel_start ? 226 225 PERF_RECORD_MISC_KERNEL : 227 226 PERF_RECORD_MISC_USER; 227 + } 228 + 229 + static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 230 + struct auxtrace_queue *queue) 231 + { 232 + struct arm_spe_queue *speq = queue->priv; 233 + pid_t tid; 234 + 235 + tid = machine__get_current_tid(spe->machine, speq->cpu); 236 + if (tid != -1) { 237 + speq->tid = tid; 238 + thread__zput(speq->thread); 239 + } else 240 + speq->tid = queue->tid; 241 + 242 + if ((!speq->thread) && (speq->tid != -1)) { 243 + speq->thread = machine__find_thread(spe->machine, -1, 244 + speq->tid); 245 + } 246 + 247 + if (speq->thread) { 248 + speq->pid = speq->thread->pid_; 249 + if (queue->cpu == -1) 250 + speq->cpu = speq->thread->cpu; 251 + } 252 + } 253 + 254 + static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 255 + { 256 + struct arm_spe *spe = speq->spe; 257 + int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 258 + 259 + if (err) 260 + return err; 261 + 262 + arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 263 + 264 + return 0; 228 265 } 229 266 230 267 static void arm_spe_prep_sample(struct arm_spe *spe, ··· 499 460 * can correlate samples between Arm SPE trace data and other 500 461 * perf events with correct time ordering. 501 462 */ 463 + 464 + /* 465 + * Update pid/tid info. 466 + */ 467 + record = &speq->decoder->record; 468 + if (!spe->timeless_decoding && record->context_id != (u64)-1) { 469 + ret = arm_spe_set_tid(speq, record->context_id); 470 + if (ret) 471 + return ret; 472 + 473 + spe->use_ctx_pkt_for_pid = true; 474 + } 475 + 502 476 ret = arm_spe_sample(speq); 503 477 if (ret) 504 478 return ret; ··· 638 586 return timeless_decoding; 639 587 } 640 588 641 - static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 642 - struct auxtrace_queue *queue) 643 - { 644 - struct arm_spe_queue *speq = queue->priv; 645 - pid_t tid; 646 - 647 - tid = machine__get_current_tid(spe->machine, speq->cpu); 648 - if (tid != -1) { 649 - speq->tid = tid; 650 - thread__zput(speq->thread); 651 - } else 652 - speq->tid = queue->tid; 653 - 654 - if ((!speq->thread) && (speq->tid != -1)) { 655 - speq->thread = machine__find_thread(spe->machine, -1, 656 - speq->tid); 657 - } 658 - 659 - if (speq->thread) { 660 - speq->pid = speq->thread->pid_; 661 - if (queue->cpu == -1) 662 - speq->cpu = speq->thread->cpu; 663 - } 664 - } 665 - 666 589 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 667 590 { 668 591 unsigned int queue_nr; ··· 668 641 ts = timestamp; 669 642 } 670 643 671 - arm_spe_set_pid_tid_cpu(spe, queue); 644 + /* 645 + * A previous context-switch event has set pid/tid in the machine's context, so 646 + * here we need to update the pid/tid in the thread and SPE queue. 647 + */ 648 + if (!spe->use_ctx_pkt_for_pid) 649 + arm_spe_set_pid_tid_cpu(spe, queue); 672 650 673 651 ret = arm_spe_run_decoder(speq, &ts); 674 652 if (ret < 0) { ··· 713 681 return 0; 714 682 } 715 683 684 + static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 685 + struct perf_sample *sample) 686 + { 687 + pid_t pid, tid; 688 + int cpu; 689 + 690 + if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 691 + return 0; 692 + 693 + pid = event->context_switch.next_prev_pid; 694 + tid = event->context_switch.next_prev_tid; 695 + cpu = sample->cpu; 696 + 697 + if (tid == -1) 698 + pr_warning("context_switch event has no tid\n"); 699 + 700 + return machine__set_current_tid(spe->machine, cpu, pid, tid); 701 + } 702 + 716 703 static int arm_spe_process_event(struct perf_session *session, 717 704 union perf_event *event, 718 705 struct perf_sample *sample, ··· 769 718 } 770 719 } else if (timestamp) { 771 720 err = arm_spe_process_queues(spe, timestamp); 721 + if (err) 722 + return err; 723 + 724 + if (!spe->use_ctx_pkt_for_pid && 725 + (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 726 + event->header.type == PERF_RECORD_SWITCH)) 727 + err = arm_spe_context_switch(spe, event, sample); 772 728 } 773 729 774 730 return err; ··· 841 783 return arm_spe_process_timeless_queues(spe, -1, 842 784 MAX_TIMESTAMP - 1); 843 785 844 - return arm_spe_process_queues(spe, MAX_TIMESTAMP); 786 + ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 787 + if (ret) 788 + return ret; 789 + 790 + if (!spe->use_ctx_pkt_for_pid) 791 + ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 792 + "Matching of TIDs to SPE events could be inaccurate.\n"); 793 + 794 + return 0; 845 795 } 846 796 847 797 static void arm_spe_free_queue(void *priv)

+32 -1

tools/perf/util/bpf-event.c

··· 33 33 return err ? ERR_PTR(err) : btf; 34 34 } 35 35 36 + struct bpf_program * __weak 37 + bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 38 + { 39 + #pragma GCC diagnostic push 40 + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 41 + return bpf_program__next(prev, obj); 42 + #pragma GCC diagnostic pop 43 + } 44 + 45 + struct bpf_map * __weak 46 + bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 47 + { 48 + #pragma GCC diagnostic push 49 + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 50 + return bpf_map__next(prev, obj); 51 + #pragma GCC diagnostic pop 52 + } 53 + 54 + const void * __weak 55 + btf__raw_data(const struct btf *btf_ro, __u32 *size) 56 + { 57 + #pragma GCC diagnostic push 58 + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 59 + return btf__get_raw_data(btf_ro, size); 60 + #pragma GCC diagnostic pop 61 + } 62 + 36 63 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) 37 64 { 38 65 int ret = 0; ··· 146 119 node->data_size = data_size; 147 120 memcpy(node->data, data, data_size); 148 121 149 - perf_env__insert_btf(env, node); 122 + if (!perf_env__insert_btf(env, node)) { 123 + /* Insertion failed because of a duplicate. */ 124 + free(node); 125 + return -1; 126 + } 150 127 return 0; 151 128 } 152 129

+3 -5

tools/perf/util/c++/clang-c.h

··· 12 12 extern void perf_clang__init(void); 13 13 extern void perf_clang__cleanup(void); 14 14 15 - extern int test__clang_to_IR(void); 16 - extern int test__clang_to_obj(void); 15 + struct test_suite; 16 + extern int test__clang_to_IR(struct test_suite *test, int subtest); 17 + extern int test__clang_to_obj(struct test_suite *test, int subtest); 17 18 18 19 extern int perf_clang__compile_bpf(const char *filename, 19 20 void **p_obj_buf, ··· 26 25 27 26 static inline void perf_clang__init(void) { } 28 27 static inline void perf_clang__cleanup(void) { } 29 - 30 - static inline int test__clang_to_IR(void) { return -1; } 31 - static inline int test__clang_to_obj(void) { return -1;} 32 28 33 29 static inline int 34 30 perf_clang__compile_bpf(const char *filename __maybe_unused,

+4 -2

tools/perf/util/c++/clang-test.cpp

··· 35 35 } 36 36 37 37 extern "C" { 38 - int test__clang_to_IR(void) 38 + int test__clang_to_IR(struct test_suite *test __maybe_unused, 39 + int subtest __maybe_unused) 39 40 { 40 41 perf_clang_scope _scope; 41 42 ··· 49 48 return -1; 50 49 } 51 50 52 - int test__clang_to_obj(void) 51 + int test__clang_to_obj(struct test_suite *test __maybe_unused, 52 + int subtest __maybe_unused) 53 53 { 54 54 perf_clang_scope _scope; 55 55

+41 -37

tools/perf/util/cputopo.c

··· 14 14 #include "env.h" 15 15 #include "pmu-hybrid.h" 16 16 17 - #define CORE_SIB_FMT \ 17 + #define PACKAGE_CPUS_FMT \ 18 + "%s/devices/system/cpu/cpu%d/topology/package_cpus_list" 19 + #define PACKAGE_CPUS_FMT_OLD \ 18 20 "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" 19 - #define DIE_SIB_FMT \ 21 + #define DIE_CPUS_FMT \ 20 22 "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" 21 - #define THRD_SIB_FMT \ 22 - "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" 23 - #define THRD_SIB_FMT_NEW \ 23 + #define CORE_CPUS_FMT \ 24 24 "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" 25 + #define CORE_CPUS_FMT_OLD \ 26 + "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" 25 27 #define NODE_ONLINE_FMT \ 26 28 "%s/devices/system/node/online" 27 29 #define NODE_MEMINFO_FMT \ ··· 41 39 u32 i = 0; 42 40 int ret = -1; 43 41 44 - scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT, 42 + scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT, 45 43 sysfs__mountpoint(), cpu); 44 + if (access(filename, F_OK) == -1) { 45 + scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD, 46 + sysfs__mountpoint(), cpu); 47 + } 46 48 fp = fopen(filename, "r"); 47 49 if (!fp) 48 50 goto try_dies; ··· 60 54 if (p) 61 55 *p = '\0'; 62 56 63 - for (i = 0; i < tp->core_sib; i++) { 64 - if (!strcmp(buf, tp->core_siblings[i])) 57 + for (i = 0; i < tp->package_cpus_lists; i++) { 58 + if (!strcmp(buf, tp->package_cpus_list[i])) 65 59 break; 66 60 } 67 - if (i == tp->core_sib) { 68 - tp->core_siblings[i] = buf; 69 - tp->core_sib++; 61 + if (i == tp->package_cpus_lists) { 62 + tp->package_cpus_list[i] = buf; 63 + tp->package_cpus_lists++; 70 64 buf = NULL; 71 65 len = 0; 72 66 } 73 67 ret = 0; 74 68 75 69 try_dies: 76 - if (!tp->die_siblings) 70 + if (!tp->die_cpus_list) 77 71 goto try_threads; 78 72 79 - scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, 73 + scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, 80 74 sysfs__mountpoint(), cpu); 81 75 fp = fopen(filename, "r"); 82 76 if (!fp) ··· 91 85 if (p) 92 86 *p = '\0'; 93 87 94 - for (i = 0; i < tp->die_sib; i++) { 95 - if (!strcmp(buf, tp->die_siblings[i])) 88 + for (i = 0; i < tp->die_cpus_lists; i++) { 89 + if (!strcmp(buf, tp->die_cpus_list[i])) 96 90 break; 97 91 } 98 - if (i == tp->die_sib) { 99 - tp->die_siblings[i] = buf; 100 - tp->die_sib++; 92 + if (i == tp->die_cpus_lists) { 93 + tp->die_cpus_list[i] = buf; 94 + tp->die_cpus_lists++; 101 95 buf = NULL; 102 96 len = 0; 103 97 } 104 98 ret = 0; 105 99 106 100 try_threads: 107 - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, 101 + scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT, 108 102 sysfs__mountpoint(), cpu); 109 103 if (access(filename, F_OK) == -1) { 110 - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, 104 + scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD, 111 105 sysfs__mountpoint(), cpu); 112 106 } 113 107 fp = fopen(filename, "r"); ··· 121 115 if (p) 122 116 *p = '\0'; 123 117 124 - for (i = 0; i < tp->thread_sib; i++) { 125 - if (!strcmp(buf, tp->thread_siblings[i])) 118 + for (i = 0; i < tp->core_cpus_lists; i++) { 119 + if (!strcmp(buf, tp->core_cpus_list[i])) 126 120 break; 127 121 } 128 - if (i == tp->thread_sib) { 129 - tp->thread_siblings[i] = buf; 130 - tp->thread_sib++; 122 + if (i == tp->core_cpus_lists) { 123 + tp->core_cpus_list[i] = buf; 124 + tp->core_cpus_lists++; 131 125 buf = NULL; 132 126 } 133 127 ret = 0; ··· 145 139 if (!tp) 146 140 return; 147 141 148 - for (i = 0 ; i < tp->core_sib; i++) 149 - zfree(&tp->core_siblings[i]); 142 + for (i = 0 ; i < tp->package_cpus_lists; i++) 143 + zfree(&tp->package_cpus_list[i]); 150 144 151 - if (tp->die_sib) { 152 - for (i = 0 ; i < tp->die_sib; i++) 153 - zfree(&tp->die_siblings[i]); 154 - } 145 + for (i = 0 ; i < tp->die_cpus_lists; i++) 146 + zfree(&tp->die_cpus_list[i]); 155 147 156 - for (i = 0 ; i < tp->thread_sib; i++) 157 - zfree(&tp->thread_siblings[i]); 148 + for (i = 0 ; i < tp->core_cpus_lists; i++) 149 + zfree(&tp->core_cpus_list[i]); 158 150 159 151 free(tp); 160 152 } ··· 168 164 if (strncmp(uts.machine, "x86_64", 6)) 169 165 return false; 170 166 171 - scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, 167 + scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, 172 168 sysfs__mountpoint(), 0); 173 169 if (access(filename, F_OK) == -1) 174 170 return false; ··· 209 205 210 206 tp = addr; 211 207 addr += sizeof(*tp); 212 - tp->core_siblings = addr; 208 + tp->package_cpus_list = addr; 213 209 addr += sz; 214 210 if (has_die) { 215 - tp->die_siblings = addr; 211 + tp->die_cpus_list = addr; 216 212 addr += sz; 217 213 } 218 - tp->thread_siblings = addr; 214 + tp->core_cpus_list = addr; 219 215 220 216 for (i = 0; i < nr; i++) { 221 217 if (!cpu_map__has(map, i))

+27 -6

tools/perf/util/cputopo.h

··· 5 5 #include <linux/types.h> 6 6 7 7 struct cpu_topology { 8 - u32 core_sib; 9 - u32 die_sib; 10 - u32 thread_sib; 11 - char **core_siblings; 12 - char **die_siblings; 13 - char **thread_siblings; 8 + /* The number of unique package_cpus_lists below. */ 9 + u32 package_cpus_lists; 10 + /* The number of unique die_cpu_lists below. */ 11 + u32 die_cpus_lists; 12 + /* The number of unique core_cpu_lists below. */ 13 + u32 core_cpus_lists; 14 + /* 15 + * An array of strings where each string is unique and read from 16 + * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI 17 + * each of these is a human-readable list of CPUs sharing the same 18 + * physical_package_id. The format is like 0-3, 8-11, 14,17. 19 + */ 20 + const char **package_cpus_list; 21 + /* 22 + * An array of string where each string is unique and from 23 + * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI 24 + * each of these is a human-readable list of CPUs within the same die. 25 + * The format is like 0-3, 8-11, 14,17. 26 + */ 27 + const char **die_cpus_list; 28 + /* 29 + * An array of string where each string is unique and from 30 + * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI 31 + * each of these is a human-readable list of CPUs within the same 32 + * core. The format is like 0-3, 8-11, 14,17. 33 + */ 34 + const char **core_cpus_list; 14 35 }; 15 36 16 37 struct numa_topology_node {

+1 -1

tools/perf/util/cs-etm.c

··· 537 537 538 538 fprintf(stdout, "\n"); 539 539 color_fprintf(stdout, color, 540 - ". ... CoreSight %s Trace data: size %zu bytes\n", 540 + ". ... CoreSight %s Trace data: size %#zx bytes\n", 541 541 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 542 542 543 543 do {

+4 -1

tools/perf/util/env.c

··· 75 75 return node; 76 76 } 77 77 78 - void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) 78 + bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) 79 79 { 80 80 struct rb_node *parent = NULL; 81 81 __u32 btf_id = btf_node->id; 82 82 struct btf_node *node; 83 83 struct rb_node **p; 84 + bool ret = true; 84 85 85 86 down_write(&env->bpf_progs.lock); 86 87 p = &env->bpf_progs.btfs.rb_node; ··· 95 94 p = &(*p)->rb_right; 96 95 } else { 97 96 pr_debug("duplicated btf %u\n", btf_id); 97 + ret = false; 98 98 goto out; 99 99 } 100 100 } ··· 105 103 env->bpf_progs.btfs_cnt++; 106 104 out: 107 105 up_write(&env->bpf_progs.lock); 106 + return ret; 108 107 } 109 108 110 109 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)

+1 -1

tools/perf/util/env.h

··· 167 167 struct bpf_prog_info_node *info_node); 168 168 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, 169 169 __u32 prog_id); 170 - void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); 170 + bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); 171 171 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); 172 172 173 173 int perf_env__numa_node(struct perf_env *env, int cpu);

+12

tools/perf/util/evsel.c

··· 3037 3037 { 3038 3038 evsel->core.leader = &leader->core; 3039 3039 } 3040 + 3041 + int evsel__source_count(const struct evsel *evsel) 3042 + { 3043 + struct evsel *pos; 3044 + int count = 0; 3045 + 3046 + evlist__for_each_entry(evsel->evlist, pos) { 3047 + if (pos->metric_leader == evsel) 3048 + count++; 3049 + } 3050 + return count; 3051 + }

+1

tools/perf/util/evsel.h

··· 489 489 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); 490 490 bool evsel__is_leader(struct evsel *evsel); 491 491 void evsel__set_leader(struct evsel *evsel, struct evsel *leader); 492 + int evsel__source_count(const struct evsel *evsel); 492 493 493 494 /* 494 495 * Macro to swap the bit-field postition and size.

+60 -5

tools/perf/util/expr.c

··· 5 5 #include <stdlib.h> 6 6 #include <string.h> 7 7 #include "metricgroup.h" 8 + #include "cpumap.h" 9 + #include "cputopo.h" 8 10 #include "debug.h" 9 11 #include "expr.h" 10 12 #include "expr-bison.h" 11 13 #include "expr-flex.h" 14 + #include "smt.h" 12 15 #include <linux/kernel.h> 13 16 #include <linux/zalloc.h> 14 17 #include <ctype.h> 18 + #include <math.h> 15 19 16 20 #ifdef PARSER_DEBUG 17 21 extern int expr_debug; ··· 23 19 24 20 struct expr_id_data { 25 21 union { 26 - double val; 22 + struct { 23 + double val; 24 + int source_count; 25 + } val; 27 26 struct { 28 27 double val; 29 28 const char *metric_name; ··· 144 137 /* Caller must make sure id is allocated */ 145 138 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) 146 139 { 140 + return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1); 141 + } 142 + 143 + /* Caller must make sure id is allocated */ 144 + int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, 145 + double val, int source_count) 146 + { 147 147 struct expr_id_data *data_ptr = NULL, *old_data = NULL; 148 148 char *old_key = NULL; 149 149 int ret; ··· 158 144 data_ptr = malloc(sizeof(*data_ptr)); 159 145 if (!data_ptr) 160 146 return -ENOMEM; 161 - data_ptr->val = val; 147 + data_ptr->val.val = val; 148 + data_ptr->val.source_count = source_count; 162 149 data_ptr->kind = EXPR_ID_DATA__VALUE; 163 150 164 151 ret = hashmap__set(ctx->ids, id, data_ptr, ··· 255 240 256 241 switch (data->kind) { 257 242 case EXPR_ID_DATA__VALUE: 258 - pr_debug2("lookup(%s): val %f\n", id, data->val); 243 + pr_debug2("lookup(%s): val %f\n", id, data->val.val); 259 244 break; 260 245 case EXPR_ID_DATA__REF: 261 246 pr_debug2("lookup(%s): ref metric name %s\n", id, ··· 266 251 pr_debug("%s failed to count\n", id); 267 252 return -1; 268 253 } 269 - pr_debug("processing metric: %s EXIT: %f\n", id, data->val); 254 + pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val); 270 255 break; 271 256 case EXPR_ID_DATA__REF_VALUE: 272 257 pr_debug2("lookup(%s): ref val %f metric name %s\n", id, ··· 381 366 double expr_id_data__value(const struct expr_id_data *data) 382 367 { 383 368 if (data->kind == EXPR_ID_DATA__VALUE) 384 - return data->val; 369 + return data->val.val; 385 370 assert(data->kind == EXPR_ID_DATA__REF_VALUE); 386 371 return data->ref.val; 372 + } 373 + 374 + double expr_id_data__source_count(const struct expr_id_data *data) 375 + { 376 + assert(data->kind == EXPR_ID_DATA__VALUE); 377 + return data->val.source_count; 378 + } 379 + 380 + double expr__get_literal(const char *literal) 381 + { 382 + static struct cpu_topology *topology; 383 + 384 + if (!strcmp("#smt_on", literal)) 385 + return smt_on() > 0 ? 1.0 : 0.0; 386 + 387 + if (!strcmp("#num_cpus", literal)) 388 + return cpu__max_present_cpu(); 389 + 390 + /* 391 + * Assume that topology strings are consistent, such as CPUs "0-1" 392 + * wouldn't be listed as "0,1", and so after deduplication the number of 393 + * these strings gives an indication of the number of packages, dies, 394 + * etc. 395 + */ 396 + if (!topology) { 397 + topology = cpu_topology__new(); 398 + if (!topology) { 399 + pr_err("Error creating CPU topology"); 400 + return NAN; 401 + } 402 + } 403 + if (!strcmp("#num_packages", literal)) 404 + return topology->package_cpus_lists; 405 + if (!strcmp("#num_dies", literal)) 406 + return topology->die_cpus_lists; 407 + if (!strcmp("#num_cores", literal)) 408 + return topology->core_cpus_lists; 409 + 410 + pr_err("Unrecognized literal '%s'", literal); 411 + return NAN; 387 412 }

+4

tools/perf/util/expr.h

··· 40 40 void expr__del_id(struct expr_parse_ctx *ctx, const char *id); 41 41 int expr__add_id(struct expr_parse_ctx *ctx, const char *id); 42 42 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); 43 + int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, 44 + double val, int source_count); 43 45 int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); 44 46 int expr__get_id(struct expr_parse_ctx *ctx, const char *id, 45 47 struct expr_id_data **data); ··· 57 55 struct expr_parse_ctx *ids); 58 56 59 57 double expr_id_data__value(const struct expr_id_data *data); 58 + double expr_id_data__source_count(const struct expr_id_data *data); 59 + double expr__get_literal(const char *literal); 60 60 61 61 #endif

+15 -1

tools/perf/util/expr.l

··· 6 6 #include <linux/compiler.h> 7 7 #include "expr.h" 8 8 #include "expr-bison.h" 9 + #include <math.h> 9 10 10 11 char *expr_get_text(yyscan_t yyscanner); 11 12 YYSTYPE *expr_get_lval(yyscan_t yyscanner); ··· 78 77 yylval->str = normalize(yylval->str, runtime); 79 78 return token; 80 79 } 80 + 81 + static int literal(yyscan_t scanner) 82 + { 83 + YYSTYPE *yylval = expr_get_lval(scanner); 84 + 85 + yylval->num = expr__get_literal(expr_get_text(scanner)); 86 + if (isnan(yylval->num)) 87 + return EXPR_ERROR; 88 + 89 + return LITERAL; 90 + } 81 91 %} 82 92 83 93 number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) ··· 97 85 spec \\{sch} 98 86 sym [0-9a-zA-Z_\.:@?]+ 99 87 symbol ({spec}|{sym})+ 88 + literal #[0-9a-zA-Z_\.\-]+ 100 89 101 90 %% 102 91 struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); ··· 107 94 min { return MIN; } 108 95 if { return IF; } 109 96 else { return ELSE; } 110 - #smt_on { return SMT_ON; } 97 + source_count { return SOURCE_COUNT; } 98 + {literal} { return literal(yyscanner); } 111 99 {number} { return value(yyscanner); } 112 100 {symbol} { return str(yyscanner, ID, sctx->runtime); } 113 101 "|" { return '|'; }

+42 -31

tools/perf/util/expr.y

··· 3 3 #define YYDEBUG 1 4 4 #include <assert.h> 5 5 #include <math.h> 6 + #include <stdlib.h> 6 7 #include "util/debug.h" 7 - #include "smt.h" 8 8 #define IN_EXPR_Y 1 9 9 #include "expr.h" 10 10 %} ··· 37 37 } ids; 38 38 } 39 39 40 - %token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR 40 + %token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR 41 41 %left MIN MAX IF 42 42 %left '|' 43 43 %left '^' ··· 46 46 %left '-' '+' 47 47 %left '*' '/' '%' 48 48 %left NEG NOT 49 - %type <num> NUMBER 49 + %type <num> NUMBER LITERAL 50 50 %type <str> ID 51 51 %destructor { free ($$); } <str> 52 52 %type <ids> expr if_expr ··· 80 80 .val = BOTTOM, 81 81 .ids = ids__union(ids1.ids, ids2.ids), 82 82 }; 83 + return result; 84 + } 85 + 86 + static struct ids handle_id(struct expr_parse_ctx *ctx, char *id, 87 + bool compute_ids, bool source_count) 88 + { 89 + struct ids result; 90 + 91 + if (!compute_ids) { 92 + /* 93 + * Compute the event's value from ID. If the ID isn't known then 94 + * it isn't used to compute the formula so set to NAN. 95 + */ 96 + struct expr_id_data *data; 97 + 98 + result.val = NAN; 99 + if (expr__resolve_id(ctx, id, &data) == 0) { 100 + result.val = source_count 101 + ? expr_id_data__source_count(data) 102 + : expr_id_data__value(data); 103 + } 104 + result.ids = NULL; 105 + free(id); 106 + } else { 107 + /* 108 + * Set the value to BOTTOM to show that any value is possible 109 + * when the event is computed. Create a set of just the ID. 110 + */ 111 + result.val = BOTTOM; 112 + result.ids = ids__new(); 113 + if (!result.ids || ids__insert(result.ids, id)) { 114 + pr_err("Error creating IDs for '%s'", id); 115 + free(id); 116 + } 117 + } 83 118 return result; 84 119 } 85 120 ··· 203 168 $$.val = $1; 204 169 $$.ids = NULL; 205 170 } 206 - | ID 207 - { 208 - if (!compute_ids) { 209 - /* 210 - * Compute the event's value from ID. If the ID isn't known then 211 - * it isn't used to compute the formula so set to NAN. 212 - */ 213 - struct expr_id_data *data; 214 - 215 - $$.val = NAN; 216 - if (expr__resolve_id(ctx, $1, &data) == 0) 217 - $$.val = expr_id_data__value(data); 218 - 219 - $$.ids = NULL; 220 - free($1); 221 - } else { 222 - /* 223 - * Set the value to BOTTOM to show that any value is possible 224 - * when the event is computed. Create a set of just the ID. 225 - */ 226 - $$.val = BOTTOM; 227 - $$.ids = ids__new(); 228 - if (!$$.ids || ids__insert($$.ids, $1)) 229 - YYABORT; 230 - } 231 - } 171 + | ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); } 172 + | SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); } 232 173 | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } 233 174 | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } 234 175 | expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } ··· 291 280 $$ = union_expr($3, $5); 292 281 } 293 282 } 294 - | SMT_ON 283 + | LITERAL 295 284 { 296 - $$.val = smt_on() > 0 ? 1.0 : 0.0; 285 + $$.val = $1; 297 286 $$.ids = NULL; 298 287 } 299 288 ;

+10 -10

tools/perf/util/header.c

··· 583 583 if (!tp) 584 584 return -1; 585 585 586 - ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib)); 586 + ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists)); 587 587 if (ret < 0) 588 588 goto done; 589 589 590 - for (i = 0; i < tp->core_sib; i++) { 591 - ret = do_write_string(ff, tp->core_siblings[i]); 590 + for (i = 0; i < tp->package_cpus_lists; i++) { 591 + ret = do_write_string(ff, tp->package_cpus_list[i]); 592 592 if (ret < 0) 593 593 goto done; 594 594 } 595 - ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib)); 595 + ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists)); 596 596 if (ret < 0) 597 597 goto done; 598 598 599 - for (i = 0; i < tp->thread_sib; i++) { 600 - ret = do_write_string(ff, tp->thread_siblings[i]); 599 + for (i = 0; i < tp->core_cpus_lists; i++) { 600 + ret = do_write_string(ff, tp->core_cpus_list[i]); 601 601 if (ret < 0) 602 602 break; 603 603 } ··· 617 617 return ret; 618 618 } 619 619 620 - if (!tp->die_sib) 620 + if (!tp->die_cpus_lists) 621 621 goto done; 622 622 623 - ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); 623 + ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists)); 624 624 if (ret < 0) 625 625 goto done; 626 626 627 - for (i = 0; i < tp->die_sib; i++) { 628 - ret = do_write_string(ff, tp->die_siblings[i]); 627 + for (i = 0; i < tp->die_cpus_lists; i++) { 628 + ret = do_write_string(ff, tp->die_cpus_list[i]); 629 629 if (ret < 0) 630 630 goto done; 631 631 }

+6 -1

tools/perf/util/stat-shadow.c

··· 829 829 struct saved_value *v; 830 830 struct stats *stats; 831 831 u64 metric_total = 0; 832 + int source_count; 832 833 833 834 if (!strcmp(metric_events[i]->name, "duration_time")) { 834 835 stats = &walltime_nsecs_stats; 835 836 scale = 1e-9; 837 + source_count = 1; 836 838 } else { 837 839 v = saved_value_lookup(metric_events[i], cpu, false, 838 840 STAT_NONE, 0, st, ··· 843 841 break; 844 842 stats = &v->stats; 845 843 scale = 1.0; 844 + source_count = evsel__source_count(metric_events[i]); 846 845 847 846 if (v->metric_other) 848 847 metric_total = v->metric_total; ··· 852 849 if (!n) 853 850 return -ENOMEM; 854 851 855 - expr__add_id_val(pctx, n, metric_total ? : avg_stats(stats) * scale); 852 + expr__add_id_val_source_count(pctx, n, 853 + metric_total ? : avg_stats(stats) * scale, 854 + source_count); 856 855 } 857 856 858 857 for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {

+8 -1

tools/perf/util/symbol.c

··· 274 274 if (symbol_conf.priv_size) { 275 275 if (symbol_conf.init_annotation) { 276 276 struct annotation *notes = (void *)sym; 277 - pthread_mutex_init(&notes->lock, NULL); 277 + annotation__init(notes); 278 278 } 279 279 sym = ((void *)sym) + symbol_conf.priv_size; 280 280 } ··· 294 294 295 295 void symbol__delete(struct symbol *sym) 296 296 { 297 + if (symbol_conf.priv_size) { 298 + if (symbol_conf.init_annotation) { 299 + struct annotation *notes = symbol__annotation(sym); 300 + 301 + annotation__exit(notes); 302 + } 303 + } 297 304 free(((void *)sym) - symbol_conf.priv_size); 298 305 } 299 306

+15 -4

tools/perf/util/symbol.h

··· 40 40 GElf_Shdr *shp, const char *name, size_t *idx); 41 41 #endif 42 42 43 - /** struct symbol - symtab entry 44 - * 45 - * @ignore - resolvable but tools ignore it (e.g. idle routines) 43 + /** 44 + * A symtab entry. When allocated this may be preceded by an annotation (see 45 + * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node 46 + * to sort by name (see struct symbol_name_rb_node). 46 47 */ 47 48 struct symbol { 48 49 struct rb_node rb_node; 50 + /** Range of symbol [start, end). */ 49 51 u64 start; 50 52 u64 end; 53 + /** Length of the string name. */ 51 54 u16 namelen; 55 + /** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */ 52 56 u8 type:4; 57 + /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */ 53 58 u8 binding:4; 59 + /** Set true for kernel symbols of idle routines. */ 54 60 u8 idle:1; 61 + /** Resolvable but tools ignore it (e.g. idle routines). */ 55 62 u8 ignore:1; 63 + /** Symbol for an inlined function. */ 56 64 u8 inlined:1; 65 + /** Has symbol__annotate2 been performed. */ 66 + u8 annotate2:1; 67 + /** Architecture specific. Unused except on PPC where it holds st_other. */ 57 68 u8 arch_sym; 58 - bool annotate2; 69 + /** The name of length namelen associated with the symbol. */ 59 70 char name[]; 60 71 }; 61 72