Merge tag 'perf-tools-for-v5.13-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

+2

MAINTAINERS

··· 14290 14290 R: Alexander Shishkin <alexander.shishkin@linux.intel.com> 14291 14291 R: Jiri Olsa <jolsa@redhat.com> 14292 14292 R: Namhyung Kim <namhyung@kernel.org> 14293 + L: linux-perf-users@vger.kernel.org 14293 14294 L: linux-kernel@vger.kernel.org 14294 14295 S: Supported 14296 + W: https://perf.wiki.kernel.org/ 14295 14297 T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core 14296 14298 F: arch/*/events/* 14297 14299 F: arch/*/events/*/*

+18 -10

tools/build/Makefile.feature

··· 52 52 libpython-version \ 53 53 libslang \ 54 54 libslang-include-subdir \ 55 + libtraceevent \ 55 56 libcrypto \ 56 57 libunwind \ 57 58 pthread-attr-setaffinity-np \ ··· 240 239 feature_verbose := 1 241 240 endif 242 241 243 - ifeq ($(feature_display),1) 244 - $(info ) 245 - $(info Auto-detecting system features:) 246 - $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) 247 - ifneq ($(feature_verbose),1) 242 + feature_display_entries = $(eval $(feature_display_entries_code)) 243 + define feature_display_entries_code 244 + ifeq ($(feature_display),1) 245 + $(info ) 246 + $(info Auto-detecting system features:) 247 + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) 248 + ifneq ($(feature_verbose),1) 249 + $(info ) 250 + endif 251 + endif 252 + 253 + ifeq ($(feature_verbose),1) 254 + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) 255 + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) 248 256 $(info ) 249 257 endif 250 - endif 258 + endef 251 259 252 - ifeq ($(feature_verbose),1) 253 - TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) 254 - $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) 255 - $(info ) 260 + ifeq ($(FEATURE_DISPLAY_DEFERRED),) 261 + $(call feature_display_entries) 256 262 endif

+4

tools/build/feature/Makefile

··· 36 36 test-libpython-version.bin \ 37 37 test-libslang.bin \ 38 38 test-libslang-include-subdir.bin \ 39 + test-libtraceevent.bin \ 39 40 test-libcrypto.bin \ 40 41 test-libunwind.bin \ 41 42 test-libunwind-debug-frame.bin \ ··· 196 195 197 196 $(OUTPUT)test-libslang-include-subdir.bin: 198 197 $(BUILD) -lslang 198 + 199 + $(OUTPUT)test-libtraceevent.bin: 200 + $(BUILD) -ltraceevent 199 201 200 202 $(OUTPUT)test-libcrypto.bin: 201 203 $(BUILD) -lcrypto

+12

tools/build/feature/test-libtraceevent.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <traceevent/trace-seq.h> 3 + 4 + int main(void) 5 + { 6 + int rv = 0; 7 + struct trace_seq s; 8 + trace_seq_init(&s); 9 + rv += !(s.state == TRACE_SEQ__GOOD); 10 + trace_seq_destroy(&s); 11 + return rv; 12 + }

+75

tools/include/linux/math64.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_MATH64_H 3 + #define _LINUX_MATH64_H 4 + 5 + #include <linux/types.h> 6 + 7 + #ifdef __x86_64__ 8 + static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) 9 + { 10 + u64 q; 11 + 12 + asm ("mulq %2; divq %3" : "=a" (q) 13 + : "a" (a), "rm" (b), "rm" (c) 14 + : "rdx"); 15 + 16 + return q; 17 + } 18 + #define mul_u64_u64_div64 mul_u64_u64_div64 19 + #endif 20 + 21 + #ifdef __SIZEOF_INT128__ 22 + static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) 23 + { 24 + return (u64)(((unsigned __int128)a * b) >> shift); 25 + } 26 + 27 + #else 28 + 29 + #ifdef __i386__ 30 + static inline u64 mul_u32_u32(u32 a, u32 b) 31 + { 32 + u32 high, low; 33 + 34 + asm ("mull %[b]" : "=a" (low), "=d" (high) 35 + : [a] "a" (a), [b] "rm" (b) ); 36 + 37 + return low | ((u64)high) << 32; 38 + } 39 + #else 40 + static inline u64 mul_u32_u32(u32 a, u32 b) 41 + { 42 + return (u64)a * b; 43 + } 44 + #endif 45 + 46 + static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) 47 + { 48 + u32 ah, al; 49 + u64 ret; 50 + 51 + al = a; 52 + ah = a >> 32; 53 + 54 + ret = mul_u32_u32(al, b) >> shift; 55 + if (ah) 56 + ret += mul_u32_u32(ah, b) << (32 - shift); 57 + 58 + return ret; 59 + } 60 + 61 + #endif /* __SIZEOF_INT128__ */ 62 + 63 + #ifndef mul_u64_u64_div64 64 + static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) 65 + { 66 + u64 quot, rem; 67 + 68 + quot = a / c; 69 + rem = a % c; 70 + 71 + return quot * b + (rem * b) / c; 72 + } 73 + #endif 74 + 75 + #endif /* _LINUX_MATH64_H */

+3

tools/include/linux/types.h

··· 61 61 typedef __u64 __bitwise __le64; 62 62 typedef __u64 __bitwise __be64; 63 63 64 + typedef __u16 __bitwise __sum16; 65 + typedef __u32 __bitwise __wsum; 66 + 64 67 typedef struct { 65 68 int counter; 66 69 } atomic_t;

+15

tools/include/uapi/linux/perf_event.h

··· 38 38 }; 39 39 40 40 /* 41 + * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE 42 + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA 43 + * AA: hardware event ID 44 + * EEEEEEEE: PMU type ID 45 + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB 46 + * BB: hardware cache ID 47 + * CC: hardware cache op ID 48 + * DD: hardware cache op result ID 49 + * EEEEEEEE: PMU type ID 50 + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. 51 + */ 52 + #define PERF_PMU_TYPE_SHIFT 32 53 + #define PERF_HW_EVENT_MASK 0xffffffff 54 + 55 + /* 41 56 * Generalized performance event event_id types, used by the 42 57 * attr.event_id parameter of the sys_perf_event_open() 43 58 * syscall:

+3

tools/lib/perf/Documentation/libperf.txt

··· 136 136 struct perf_thread_map *threads); 137 137 void perf_evsel__close(struct perf_evsel *evsel); 138 138 void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); 139 + int perf_evsel__mmap(struct perf_evsel *evsel, int pages); 140 + void perf_evsel__munmap(struct perf_evsel *evsel); 141 + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); 139 142 int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, 140 143 struct perf_counts_values *count); 141 144 int perf_evsel__enable(struct perf_evsel *evsel);

+80

tools/lib/perf/evsel.c

··· 11 11 #include <stdlib.h> 12 12 #include <internal/xyarray.h> 13 13 #include <internal/cpumap.h> 14 + #include <internal/mmap.h> 14 15 #include <internal/threadmap.h> 15 16 #include <internal/lib.h> 16 17 #include <linux/string.h> 17 18 #include <sys/ioctl.h> 19 + #include <sys/mman.h> 18 20 19 21 void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr) 20 22 { ··· 40 38 } 41 39 42 40 #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) 41 + #define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) 43 42 44 43 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 45 44 { ··· 56 53 } 57 54 58 55 return evsel->fd != NULL ? 0 : -ENOMEM; 56 + } 57 + 58 + static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads) 59 + { 60 + evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap)); 61 + 62 + return evsel->mmap != NULL ? 0 : -ENOMEM; 59 63 } 60 64 61 65 static int ··· 166 156 perf_evsel__close_fd_cpu(evsel, cpu); 167 157 } 168 158 159 + void perf_evsel__munmap(struct perf_evsel *evsel) 160 + { 161 + int cpu, thread; 162 + 163 + if (evsel->fd == NULL || evsel->mmap == NULL) 164 + return; 165 + 166 + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { 167 + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { 168 + int fd = FD(evsel, cpu, thread); 169 + struct perf_mmap *map = MMAP(evsel, cpu, thread); 170 + 171 + if (fd < 0) 172 + continue; 173 + 174 + perf_mmap__munmap(map); 175 + } 176 + } 177 + 178 + xyarray__delete(evsel->mmap); 179 + evsel->mmap = NULL; 180 + } 181 + 182 + int perf_evsel__mmap(struct perf_evsel *evsel, int pages) 183 + { 184 + int ret, cpu, thread; 185 + struct perf_mmap_param mp = { 186 + .prot = PROT_READ | PROT_WRITE, 187 + .mask = (pages * page_size) - 1, 188 + }; 189 + 190 + if (evsel->fd == NULL || evsel->mmap) 191 + return -EINVAL; 192 + 193 + if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) 194 + return -ENOMEM; 195 + 196 + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { 197 + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { 198 + int fd = FD(evsel, cpu, thread); 199 + struct perf_mmap *map = MMAP(evsel, cpu, thread); 200 + 201 + if (fd < 0) 202 + continue; 203 + 204 + perf_mmap__init(map, NULL, false, NULL); 205 + 206 + ret = perf_mmap__mmap(map, &mp, fd, cpu); 207 + if (ret) { 208 + perf_evsel__munmap(evsel); 209 + return ret; 210 + } 211 + } 212 + } 213 + 214 + return 0; 215 + } 216 + 217 + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) 218 + { 219 + if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) 220 + return NULL; 221 + 222 + return MMAP(evsel, cpu, thread)->base; 223 + } 224 + 169 225 int perf_evsel__read_size(struct perf_evsel *evsel) 170 226 { 171 227 u64 read_format = evsel->attr.read_format; ··· 266 190 267 191 if (FD(evsel, cpu, thread) < 0) 268 192 return -EINVAL; 193 + 194 + if (MMAP(evsel, cpu, thread) && 195 + !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) 196 + return 0; 269 197 270 198 if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) 271 199 return -errno;

+1

tools/lib/perf/include/internal/evsel.h

··· 41 41 struct perf_cpu_map *own_cpus; 42 42 struct perf_thread_map *threads; 43 43 struct xyarray *fd; 44 + struct xyarray *mmap; 44 45 struct xyarray *sample_id; 45 46 u64 *id; 46 47 u32 ids;

+3

tools/lib/perf/include/internal/mmap.h

··· 11 11 #define PERF_SAMPLE_MAX_SIZE (1 << 16) 12 12 13 13 struct perf_mmap; 14 + struct perf_counts_values; 14 15 15 16 typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); 16 17 ··· 52 51 void perf_mmap__put(struct perf_mmap *map); 53 52 54 53 u64 perf_mmap__read_head(struct perf_mmap *map); 54 + 55 + int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count); 55 56 56 57 #endif /* __LIBPERF_INTERNAL_MMAP_H */

+32

tools/lib/perf/include/internal/tests.h

··· 3 3 #define __LIBPERF_INTERNAL_TESTS_H 4 4 5 5 #include <stdio.h> 6 + #include <unistd.h> 6 7 7 8 int tests_failed; 9 + int tests_verbose; 10 + 11 + static inline int get_verbose(char **argv, int argc) 12 + { 13 + int c; 14 + int verbose = 0; 15 + 16 + while ((c = getopt(argc, argv, "v")) != -1) { 17 + switch (c) 18 + { 19 + case 'v': 20 + verbose = 1; 21 + break; 22 + default: 23 + break; 24 + } 25 + } 26 + return verbose; 27 + } 8 28 9 29 #define __T_START \ 10 30 do { \ 31 + tests_verbose = get_verbose(argv, argc); \ 11 32 fprintf(stdout, "- running %s...", __FILE__); \ 12 33 fflush(NULL); \ 13 34 tests_failed = 0; \ ··· 49 28 tests_failed++; \ 50 29 return -1; \ 51 30 } \ 31 + } while (0) 32 + 33 + #define __T_VERBOSE(...) \ 34 + do { \ 35 + if (tests_verbose) { \ 36 + if (tests_verbose == 1) { \ 37 + fputc('\n', stderr); \ 38 + tests_verbose++; \ 39 + } \ 40 + fprintf(stderr, ##__VA_ARGS__); \ 41 + } \ 52 42 } while (0) 53 43 54 44 #endif /* __LIBPERF_INTERNAL_TESTS_H */

+8 -1

tools/lib/perf/include/internal/xyarray.h

··· 18 18 void xyarray__delete(struct xyarray *xy); 19 19 void xyarray__reset(struct xyarray *xy); 20 20 21 - static inline void *xyarray__entry(struct xyarray *xy, int x, int y) 21 + static inline void *__xyarray__entry(struct xyarray *xy, int x, int y) 22 22 { 23 23 return &xy->contents[x * xy->row_size + y * xy->entry_size]; 24 + } 25 + 26 + static inline void *xyarray__entry(struct xyarray *xy, size_t x, size_t y) 27 + { 28 + if (x >= xy->max_x || y >= xy->max_y) 29 + return NULL; 30 + return __xyarray__entry(xy, x, y); 24 31 } 25 32 26 33 static inline int xyarray__max_y(struct xyarray *xy)

+31

tools/lib/perf/include/perf/bpf_perf.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + #ifndef __LIBPERF_BPF_PERF_H 3 + #define __LIBPERF_BPF_PERF_H 4 + 5 + #include <linux/types.h> /* for __u32 */ 6 + 7 + /* 8 + * bpf_perf uses a hashmap, the attr_map, to track all the leader programs. 9 + * The hashmap is pinned in bpffs. flock() on this file is used to ensure 10 + * no concurrent access to the attr_map. The key of attr_map is struct 11 + * perf_event_attr, and the value is struct perf_event_attr_map_entry. 12 + * 13 + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the 14 + * leader prog, and the diff_map. Each perf-stat session holds a reference 15 + * to the bpf_link to make sure the leader prog is attached to sched_switch 16 + * tracepoint. 17 + * 18 + * Since the hashmap only contains IDs of the bpf_link and diff_map, it 19 + * does not hold any references to the leader program. Once all perf-stat 20 + * sessions of these events exit, the leader prog, its maps, and the 21 + * perf_events will be freed. 22 + */ 23 + struct perf_event_attr_map_entry { 24 + __u32 link_id; 25 + __u32 diff_map_id; 26 + }; 27 + 28 + /* default attr_map name */ 29 + #define BPF_PERF_DEFAULT_ATTR_MAP_PATH "perf_attr_map" 30 + 31 + #endif /* __LIBPERF_BPF_PERF_H */

+5 -2

tools/lib/perf/include/perf/event.h

··· 8 8 #include <linux/bpf.h> 9 9 #include <sys/types.h> /* pid_t */ 10 10 11 + #define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem)) 12 + 11 13 struct perf_record_mmap { 12 14 struct perf_event_header header; 13 15 __u32 pid, tid; ··· 348 346 __u64 time_zero; 349 347 __u64 time_cycles; 350 348 __u64 time_mask; 351 - bool cap_user_time_zero; 352 - bool cap_user_time_short; 349 + __u8 cap_user_time_zero; 350 + __u8 cap_user_time_short; 351 + __u8 reserved[6]; /* For alignment */ 353 352 }; 354 353 355 354 struct perf_record_header_feature {

+3

tools/lib/perf/include/perf/evsel.h

··· 27 27 struct perf_thread_map *threads); 28 28 LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); 29 29 LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); 30 + LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); 31 + LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); 32 + LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); 30 33 LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, 31 34 struct perf_counts_values *count); 32 35 LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);

+3

tools/lib/perf/libperf.map

··· 23 23 perf_evsel__disable; 24 24 perf_evsel__open; 25 25 perf_evsel__close; 26 + perf_evsel__mmap; 27 + perf_evsel__munmap; 28 + perf_evsel__mmap_base; 26 29 perf_evsel__read; 27 30 perf_evsel__cpus; 28 31 perf_evsel__threads;

+88

tools/lib/perf/mmap.c

··· 8 8 #include <linux/perf_event.h> 9 9 #include <perf/mmap.h> 10 10 #include <perf/event.h> 11 + #include <perf/evsel.h> 11 12 #include <internal/mmap.h> 12 13 #include <internal/lib.h> 13 14 #include <linux/kernel.h> 15 + #include <linux/math64.h> 14 16 #include "internal.h" 15 17 16 18 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, ··· 274 272 map->prev = map->start; 275 273 276 274 return event; 275 + } 276 + 277 + #if defined(__i386__) || defined(__x86_64__) 278 + static u64 read_perf_counter(unsigned int counter) 279 + { 280 + unsigned int low, high; 281 + 282 + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); 283 + 284 + return low | ((u64)high) << 32; 285 + } 286 + 287 + static u64 read_timestamp(void) 288 + { 289 + unsigned int low, high; 290 + 291 + asm volatile("rdtsc" : "=a" (low), "=d" (high)); 292 + 293 + return low | ((u64)high) << 32; 294 + } 295 + #else 296 + static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } 297 + static u64 read_timestamp(void) { return 0; } 298 + #endif 299 + 300 + int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count) 301 + { 302 + struct perf_event_mmap_page *pc = map->base; 303 + u32 seq, idx, time_mult = 0, time_shift = 0; 304 + u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL; 305 + 306 + if (!pc || !pc->cap_user_rdpmc) 307 + return -1; 308 + 309 + do { 310 + seq = READ_ONCE(pc->lock); 311 + barrier(); 312 + 313 + count->ena = READ_ONCE(pc->time_enabled); 314 + count->run = READ_ONCE(pc->time_running); 315 + 316 + if (pc->cap_user_time && count->ena != count->run) { 317 + cyc = read_timestamp(); 318 + time_mult = READ_ONCE(pc->time_mult); 319 + time_shift = READ_ONCE(pc->time_shift); 320 + time_offset = READ_ONCE(pc->time_offset); 321 + 322 + if (pc->cap_user_time_short) { 323 + time_cycles = READ_ONCE(pc->time_cycles); 324 + time_mask = READ_ONCE(pc->time_mask); 325 + } 326 + } 327 + 328 + idx = READ_ONCE(pc->index); 329 + cnt = READ_ONCE(pc->offset); 330 + if (pc->cap_user_rdpmc && idx) { 331 + s64 evcnt = read_perf_counter(idx - 1); 332 + u16 width = READ_ONCE(pc->pmc_width); 333 + 334 + evcnt <<= 64 - width; 335 + evcnt >>= 64 - width; 336 + cnt += evcnt; 337 + } else 338 + return -1; 339 + 340 + barrier(); 341 + } while (READ_ONCE(pc->lock) != seq); 342 + 343 + if (count->ena != count->run) { 344 + u64 delta; 345 + 346 + /* Adjust for cap_usr_time_short, a nop if not */ 347 + cyc = time_cycles + ((cyc - time_cycles) & time_mask); 348 + 349 + delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); 350 + 351 + count->ena += delta; 352 + if (idx) 353 + count->run += delta; 354 + 355 + cnt = mul_u64_u64_div64(cnt, count->ena, count->run); 356 + } 357 + 358 + count->val = cnt; 359 + 360 + return 0; 277 361 }

+4 -2

tools/lib/perf/tests/Makefile

··· 5 5 TESTS_SO := $(addsuffix -so,$(TESTS)) 6 6 TESTS_A := $(addsuffix -a,$(TESTS)) 7 7 8 + TEST_ARGS := $(if $(V),-v) 9 + 8 10 # Set compile option CFLAGS 9 11 ifdef EXTRA_CFLAGS 10 12 CFLAGS := $(EXTRA_CFLAGS) ··· 30 28 31 29 run: 32 30 @echo "running static:" 33 - @for i in $(TESTS_A); do ./$$i; done 31 + @for i in $(TESTS_A); do ./$$i $(TEST_ARGS); done 34 32 @echo "running dynamic:" 35 - @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done 33 + @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i $(TEST_ARGS); done 36 34 37 35 clean: 38 36 $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO)

+66

tools/lib/perf/tests/test-evsel.c

··· 120 120 return 0; 121 121 } 122 122 123 + static int test_stat_user_read(int event) 124 + { 125 + struct perf_counts_values counts = { .val = 0 }; 126 + struct perf_thread_map *threads; 127 + struct perf_evsel *evsel; 128 + struct perf_event_mmap_page *pc; 129 + struct perf_event_attr attr = { 130 + .type = PERF_TYPE_HARDWARE, 131 + .config = event, 132 + }; 133 + int err, i; 134 + 135 + threads = perf_thread_map__new_dummy(); 136 + __T("failed to create threads", threads); 137 + 138 + perf_thread_map__set_pid(threads, 0, 0); 139 + 140 + evsel = perf_evsel__new(&attr); 141 + __T("failed to create evsel", evsel); 142 + 143 + err = perf_evsel__open(evsel, NULL, threads); 144 + __T("failed to open evsel", err == 0); 145 + 146 + err = perf_evsel__mmap(evsel, 0); 147 + __T("failed to mmap evsel", err == 0); 148 + 149 + pc = perf_evsel__mmap_base(evsel, 0, 0); 150 + 151 + #if defined(__i386__) || defined(__x86_64__) 152 + __T("userspace counter access not supported", pc->cap_user_rdpmc); 153 + __T("userspace counter access not enabled", pc->index); 154 + __T("userspace counter width not set", pc->pmc_width >= 32); 155 + #endif 156 + 157 + perf_evsel__read(evsel, 0, 0, &counts); 158 + __T("failed to read value for evsel", counts.val != 0); 159 + 160 + for (i = 0; i < 5; i++) { 161 + volatile int count = 0x10000 << i; 162 + __u64 start, end, last = 0; 163 + 164 + __T_VERBOSE("\tloop = %u, ", count); 165 + 166 + perf_evsel__read(evsel, 0, 0, &counts); 167 + start = counts.val; 168 + 169 + while (count--) ; 170 + 171 + perf_evsel__read(evsel, 0, 0, &counts); 172 + end = counts.val; 173 + 174 + __T("invalid counter data", (end - start) > last); 175 + last = end - start; 176 + __T_VERBOSE("count = %llu\n", end - start); 177 + } 178 + 179 + perf_evsel__munmap(evsel); 180 + perf_evsel__close(evsel); 181 + perf_evsel__delete(evsel); 182 + 183 + perf_thread_map__put(threads); 184 + return 0; 185 + } 186 + 123 187 int main(int argc, char **argv) 124 188 { 125 189 __T_START; ··· 193 129 test_stat_cpu(); 194 130 test_stat_thread(); 195 131 test_stat_thread_enable(); 132 + test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); 133 + test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); 196 134 197 135 __T_END; 198 136 return tests_failed == 0 ? 0 : -1;

+1

tools/perf/.gitignore

··· 20 20 output.svg 21 21 perf-archive 22 22 perf-with-kcore 23 + perf-iostat 23 24 tags 24 25 TAGS 25 26 cscope*

+214

tools/perf/Documentation/intel-hybrid.txt

··· 1 + Intel hybrid support 2 + -------------------- 3 + Support for Intel hybrid events within perf tools. 4 + 5 + For some Intel platforms, such as AlderLake, which is hybrid platform and 6 + it consists of atom cpu and core cpu. Each cpu has dedicated event list. 7 + Part of events are available on core cpu, part of events are available 8 + on atom cpu and even part of events are available on both. 9 + 10 + Kernel exports two new cpu pmus via sysfs: 11 + /sys/devices/cpu_core 12 + /sys/devices/cpu_atom 13 + 14 + The 'cpus' files are created under the directories. For example, 15 + 16 + cat /sys/devices/cpu_core/cpus 17 + 0-15 18 + 19 + cat /sys/devices/cpu_atom/cpus 20 + 16-23 21 + 22 + It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus. 23 + 24 + Quickstart 25 + 26 + List hybrid event 27 + ----------------- 28 + 29 + As before, use perf-list to list the symbolic event. 30 + 31 + perf list 32 + 33 + inst_retired.any 34 + [Fixed Counter: Counts the number of instructions retired. Unit: cpu_atom] 35 + inst_retired.any 36 + [Number of instructions retired. Fixed Counter - architectural event. Unit: cpu_core] 37 + 38 + The 'Unit: xxx' is added to brief description to indicate which pmu 39 + the event is belong to. Same event name but with different pmu can 40 + be supported. 41 + 42 + Enable hybrid event with a specific pmu 43 + --------------------------------------- 44 + 45 + To enable a core only event or atom only event, following syntax is supported: 46 + 47 + cpu_core/<event name>/ 48 + or 49 + cpu_atom/<event name>/ 50 + 51 + For example, count the 'cycles' event on core cpus. 52 + 53 + perf stat -e cpu_core/cycles/ 54 + 55 + Create two events for one hardware event automatically 56 + ------------------------------------------------------ 57 + 58 + When creating one event and the event is available on both atom and core, 59 + two events are created automatically. One is for atom, the other is for 60 + core. Most of hardware events and cache events are available on both 61 + cpu_core and cpu_atom. 62 + 63 + For hardware events, they have pre-defined configs (e.g. 0 for cycles). 64 + But on hybrid platform, kernel needs to know where the event comes from 65 + (from atom or from core). The original perf event type PERF_TYPE_HARDWARE 66 + can't carry pmu information. So now this type is extended to be PMU aware 67 + type. The PMU type ID is stored at attr.config[63:32]. 68 + 69 + PMU type ID is retrieved from sysfs. 70 + /sys/devices/cpu_atom/type 71 + /sys/devices/cpu_core/type 72 + 73 + The new attr.config layout for PERF_TYPE_HARDWARE: 74 + 75 + PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA 76 + AA: hardware event ID 77 + EEEEEEEE: PMU type ID 78 + 79 + Cache event is similar. The type PERF_TYPE_HW_CACHE is extended to be 80 + PMU aware type. The PMU type ID is stored at attr.config[63:32]. 81 + 82 + The new attr.config layout for PERF_TYPE_HW_CACHE: 83 + 84 + PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB 85 + BB: hardware cache ID 86 + CC: hardware cache op ID 87 + DD: hardware cache op result ID 88 + EEEEEEEE: PMU type ID 89 + 90 + When enabling a hardware event without specified pmu, such as, 91 + perf stat -e cycles -a (use system-wide in this example), two events 92 + are created automatically. 93 + 94 + ------------------------------------------------------------ 95 + perf_event_attr: 96 + size 120 97 + config 0x400000000 98 + sample_type IDENTIFIER 99 + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING 100 + disabled 1 101 + inherit 1 102 + exclude_guest 1 103 + ------------------------------------------------------------ 104 + 105 + and 106 + 107 + ------------------------------------------------------------ 108 + perf_event_attr: 109 + size 120 110 + config 0x800000000 111 + sample_type IDENTIFIER 112 + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING 113 + disabled 1 114 + inherit 1 115 + exclude_guest 1 116 + ------------------------------------------------------------ 117 + 118 + type 0 is PERF_TYPE_HARDWARE. 119 + 0x4 in 0x400000000 indicates it's cpu_core pmu. 120 + 0x8 in 0x800000000 indicates it's cpu_atom pmu (atom pmu type id is random). 121 + 122 + The kernel creates 'cycles' (0x400000000) on cpu0-cpu15 (core cpus), 123 + and create 'cycles' (0x800000000) on cpu16-cpu23 (atom cpus). 124 + 125 + For perf-stat result, it displays two events: 126 + 127 + Performance counter stats for 'system wide': 128 + 129 + 6,744,979 cpu_core/cycles/ 130 + 1,965,552 cpu_atom/cycles/ 131 + 132 + The first 'cycles' is core event, the second 'cycles' is atom event. 133 + 134 + Thread mode example: 135 + -------------------- 136 + 137 + perf-stat reports the scaled counts for hybrid event and with a percentage 138 + displayed. The percentage is the event's running time/enabling time. 139 + 140 + One example, 'triad_loop' runs on cpu16 (atom core), while we can see the 141 + scaled value for core cycles is 160,444,092 and the percentage is 0.47%. 142 + 143 + perf stat -e cycles -- taskset -c 16 ./triad_loop 144 + 145 + As previous, two events are created. 146 + 147 + ------------------------------------------------------------ 148 + perf_event_attr: 149 + size 120 150 + config 0x400000000 151 + sample_type IDENTIFIER 152 + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING 153 + disabled 1 154 + inherit 1 155 + enable_on_exec 1 156 + exclude_guest 1 157 + ------------------------------------------------------------ 158 + 159 + and 160 + 161 + ------------------------------------------------------------ 162 + perf_event_attr: 163 + size 120 164 + config 0x800000000 165 + sample_type IDENTIFIER 166 + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING 167 + disabled 1 168 + inherit 1 169 + enable_on_exec 1 170 + exclude_guest 1 171 + ------------------------------------------------------------ 172 + 173 + Performance counter stats for 'taskset -c 16 ./triad_loop': 174 + 175 + 233,066,666 cpu_core/cycles/ (0.43%) 176 + 604,097,080 cpu_atom/cycles/ (99.57%) 177 + 178 + perf-record: 179 + ------------ 180 + 181 + If there is no '-e' specified in perf record, on hybrid platform, 182 + it creates two default 'cycles' and adds them to event list. One 183 + is for core, the other is for atom. 184 + 185 + perf-stat: 186 + ---------- 187 + 188 + If there is no '-e' specified in perf stat, on hybrid platform, 189 + besides of software events, following events are created and 190 + added to event list in order. 191 + 192 + cpu_core/cycles/, 193 + cpu_atom/cycles/, 194 + cpu_core/instructions/, 195 + cpu_atom/instructions/, 196 + cpu_core/branches/, 197 + cpu_atom/branches/, 198 + cpu_core/branch-misses/, 199 + cpu_atom/branch-misses/ 200 + 201 + Of course, both perf-stat and perf-record support to enable 202 + hybrid event with a specific pmu. 203 + 204 + e.g. 205 + perf stat -e cpu_core/cycles/ 206 + perf stat -e cpu_atom/cycles/ 207 + perf stat -e cpu_core/r1a/ 208 + perf stat -e cpu_atom/L1-icache-loads/ 209 + perf stat -e cpu_core/cycles/,cpu_atom/instructions/ 210 + perf stat -e '{cpu_core/cycles/,cpu_core/instructions/}' 211 + 212 + But '{cpu_core/cycles/,cpu_atom/instructions/}' will return 213 + warning and disable grouping, because the pmus in group are 214 + not matched (cpu_core vs. cpu_atom).

+7

tools/perf/Documentation/perf-annotate.txt

··· 124 124 --group:: 125 125 Show event group information together 126 126 127 + --demangle:: 128 + Demangle symbol names to human readable form. It's enabled by default, 129 + disable with --no-demangle. 130 + 131 + --demangle-kernel:: 132 + Demangle kernel symbol names to human readable form (for C++ kernels). 133 + 127 134 --percent-type:: 128 135 Set annotation percent type from following choices: 129 136 global-period, local-period, global-hits, local-hits

+1 -1

tools/perf/Documentation/perf-buildid-cache.txt

··· 57 57 -u:: 58 58 --update=:: 59 59 Update specified file of the cache. Note that this doesn't remove 60 - older entires since those may be still needed for annotating old 60 + older entries since those may be still needed for annotating old 61 61 (or remote) perf.data. Only if there is already a cache which has 62 62 exactly same build-id, that is replaced by new one. It can be used 63 63 to update kallsyms and kernel dso to vmlinux in order to support

+11

tools/perf/Documentation/perf-config.txt

··· 123 123 queue-size = 0 124 124 children = true 125 125 group = true 126 + skip-empty = true 126 127 127 128 [llvm] 128 129 dump-obj = true ··· 394 393 395 394 This option works with tui, stdio2 browsers. 396 395 396 + annotate.demangle:: 397 + Demangle symbol names to human readable form. Default is 'true'. 398 + 399 + annotate.demangle_kernel:: 400 + Demangle kernel symbol names to human readable form. Default is 'true'. 401 + 397 402 hist.*:: 398 403 hist.percentage:: 399 404 This option control the way to calculate overhead of filtered entries - ··· 531 524 99.84% 99.76% noploop noploop [.] main 532 525 0.07% 0.00% noploop ld-2.15.so [.] strcmp 533 526 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del 527 + 528 + report.skip-empty:: 529 + This option can change default stat behavior with empty results. 530 + If it's set true, 'perf report --stat' will not show 0 stats. 534 531 535 532 top.*:: 536 533 top.children::

+4 -1

tools/perf/Documentation/perf-data.txt

··· 17 17 COMMANDS 18 18 -------- 19 19 convert:: 20 - Converts perf data file into another format (only CTF [1] format is support by now). 20 + Converts perf data file into another format. 21 21 It's possible to set data-convert debug variable to get debug messages from conversion, 22 22 like: 23 23 perf --debug data-convert data convert ... ··· 26 26 --------------------- 27 27 --to-ctf:: 28 28 Triggers the CTF conversion, specify the path of CTF data directory. 29 + 30 + --to-json:: 31 + Triggers JSON conversion. Specify the JSON filename to output. 29 32 30 33 --tod:: 31 34 Convert time to wall clock time.

+88

tools/perf/Documentation/perf-iostat.txt

··· 1 + perf-iostat(1) 2 + =============== 3 + 4 + NAME 5 + ---- 6 + perf-iostat - Show I/O performance metrics 7 + 8 + SYNOPSIS 9 + -------- 10 + [verse] 11 + 'perf iostat' list 12 + 'perf iostat' <ports> -- <command> [<options>] 13 + 14 + DESCRIPTION 15 + ----------- 16 + Mode is intended to provide four I/O performance metrics per each PCIe root port: 17 + 18 + - Inbound Read - I/O devices below root port read from the host memory, in MB 19 + 20 + - Inbound Write - I/O devices below root port write to the host memory, in MB 21 + 22 + - Outbound Read - CPU reads from I/O devices below root port, in MB 23 + 24 + - Outbound Write - CPU writes to I/O devices below root port, in MB 25 + 26 + OPTIONS 27 + ------- 28 + <command>...:: 29 + Any command you can specify in a shell. 30 + 31 + list:: 32 + List all PCIe root ports. 33 + 34 + <ports>:: 35 + Select the root ports for monitoring. Comma-separated list is supported. 36 + 37 + EXAMPLES 38 + -------- 39 + 40 + 1. List all PCIe root ports (example for 2-S platform): 41 + 42 + $ perf iostat list 43 + S0-uncore_iio_0<0000:00> 44 + S1-uncore_iio_0<0000:80> 45 + S0-uncore_iio_1<0000:17> 46 + S1-uncore_iio_1<0000:85> 47 + S0-uncore_iio_2<0000:3a> 48 + S1-uncore_iio_2<0000:ae> 49 + S0-uncore_iio_3<0000:5d> 50 + S1-uncore_iio_3<0000:d7> 51 + 52 + 2. Collect metrics for all PCIe root ports: 53 + 54 + $ perf iostat -- dd if=/dev/zero of=/dev/nvme0n1 bs=1M oflag=direct 55 + 357708+0 records in 56 + 357707+0 records out 57 + 375083606016 bytes (375 GB, 349 GiB) copied, 215.974 s, 1.7 GB/s 58 + 59 + Performance counter stats for 'system wide': 60 + 61 + port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) 62 + 0000:00 1 0 2 3 63 + 0000:80 0 0 0 0 64 + 0000:17 352552 43 0 21 65 + 0000:85 0 0 0 0 66 + 0000:3a 3 0 0 0 67 + 0000:ae 0 0 0 0 68 + 0000:5d 0 0 0 0 69 + 0000:d7 0 0 0 0 70 + 71 + 3. Collect metrics for comma-separated list of PCIe root ports: 72 + 73 + $ perf iostat 0000:17,0:3a -- dd if=/dev/zero of=/dev/nvme0n1 bs=1M oflag=direct 74 + 357708+0 records in 75 + 357707+0 records out 76 + 375083606016 bytes (375 GB, 349 GiB) copied, 197.08 s, 1.9 GB/s 77 + 78 + Performance counter stats for 'system wide': 79 + 80 + port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) 81 + 0000:17 358559 44 0 22 82 + 0000:3a 3 2 0 0 83 + 84 + 197.081983474 seconds time elapsed 85 + 86 + SEE ALSO 87 + -------- 88 + linkperf:perf-stat[1]

+1

tools/perf/Documentation/perf-record.txt

··· 695 695 wait -n ${perf_pid} 696 696 exit $? 697 697 698 + include::intel-hybrid.txt[] 698 699 699 700 SEE ALSO 700 701 --------

+9 -1

tools/perf/Documentation/perf-report.txt

··· 112 112 - ins_lat: Instruction latency in core cycles. This is the global instruction 113 113 latency 114 114 - local_ins_lat: Local instruction latency version 115 + - p_stage_cyc: On powerpc, this presents the number of cycles spent in a 116 + pipeline stage. And currently supported only on powerpc. 115 117 116 118 By default, comm, dso and symbol keys are used. 117 119 (i.e. --sort comm,dso,symbol) ··· 225 223 -D:: 226 224 --dump-raw-trace:: 227 225 Dump raw trace in ASCII. 226 + 227 + --disable-order:: 228 + Disable raw trace ordering. 228 229 229 230 -g:: 230 231 --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>:: ··· 477 472 but probably we'll make the default not to show the switch-on/off events 478 473 on the --group mode and if there is only one event besides the off/on ones, 479 474 go straight to the histogram browser, just like 'perf report' with no events 480 - explicitely specified does. 475 + explicitly specified does. 481 476 482 477 --itrace:: 483 478 Options for decoding instruction tracing data. The options are: ··· 570 565 'Avg Cycles%' - block average sampled cycles / sum of total block average 571 566 sampled cycles 572 567 'Avg Cycles' - block average sampled cycles 568 + 569 + --skip-empty:: 570 + Do not print 0 results in the --stat output. 573 571 574 572 include::callchain-overhead-calculation.txt[] 575 573

+28 -1

tools/perf/Documentation/perf-stat.txt

··· 93 93 94 94 1.102235068 seconds time elapsed 95 95 96 + --bpf-counters:: 97 + Use BPF programs to aggregate readings from perf_events. This 98 + allows multiple perf-stat sessions that are counting the same metric (cycles, 99 + instructions, etc.) to share hardware counters. 100 + To use BPF programs on common events by default, use 101 + "perf config stat.bpf-counter-events=<list_of_events>". 102 + 103 + --bpf-attr-map:: 104 + With option "--bpf-counters", different perf-stat sessions share 105 + information about shared BPF programs and maps via a pinned hashmap. 106 + Use "--bpf-attr-map" to specify the path of this pinned hashmap. 107 + The default path is /sys/fs/bpf/perf_attr_map. 108 + 96 109 ifdef::HAVE_LIBPFM[] 97 110 --pfm-events events:: 98 111 Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) ··· 155 142 156 143 -n:: 157 144 --null:: 158 - null run - don't start any counters 145 + null run - Don't start any counters. 146 + 147 + This can be useful to measure just elapsed wall-clock time - or to assess the 148 + raw overhead of perf stat itself, without running any counters. 159 149 160 150 -v:: 161 151 --verbose:: ··· 484 468 --summary:: 485 469 Print summary for interval mode (-I). 486 470 471 + --no-csv-summary:: 472 + Don't print 'summary' at the first column for CVS summary output. 473 + This option must be used with -x and --summary. 474 + 475 + This option can be enabled in perf config by setting the variable 476 + 'stat.no-csv-summary'. 477 + 478 + $ perf config stat.no-csv-summary=true 479 + 487 480 EXAMPLES 488 481 -------- 489 482 ··· 551 526 - optional unit of metric 552 527 553 528 Additional metrics may be printed with all earlier fields being empty. 529 + 530 + include::intel-hybrid.txt[] 554 531 555 532 SEE ALSO 556 533 --------

+1 -1

tools/perf/Documentation/perf-top.txt

··· 317 317 but probably we'll make the default not to show the switch-on/off events 318 318 on the --group mode and if there is only one event besides the off/on ones, 319 319 go straight to the histogram browser, just like 'perf top' with no events 320 - explicitely specified does. 320 + explicitly specified does. 321 321 322 322 --stitch-lbr:: 323 323 Show callgraph with stitched LBRs, which may have more complete

+12

tools/perf/Documentation/perf.txt

··· 76 76 linkperf:perf-stat[1], linkperf:perf-top[1], 77 77 linkperf:perf-record[1], linkperf:perf-report[1], 78 78 linkperf:perf-list[1] 79 + 80 + linkperf:perf-annotate[1],linkperf:perf-archive[1], 81 + linkperf:perf-bench[1], linkperf:perf-buildid-cache[1], 82 + linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], 83 + linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], 84 + linkperf:perf-evlist[1], linkperf:perf-ftrace[1], 85 + linkperf:perf-help[1], linkperf:perf-inject[1], 86 + linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], 87 + linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], 88 + linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], 89 + linkperf:perf-script[1], linkperf:perf-test[1], 90 + linkperf:perf-trace[1], linkperf:perf-version[1]

+18

tools/perf/Documentation/topdown.txt

··· 72 72 The Fixed counter 3 must be the leader of the group. 73 73 74 74 #include <linux/perf_event.h> 75 + #include <sys/mman.h> 75 76 #include <sys/syscall.h> 76 77 #include <unistd.h> 77 78 ··· 96 95 if (slots_fd < 0) 97 96 ... error ... 98 97 98 + /* Memory mapping the fd permits _rdpmc calls from userspace */ 99 + void *slots_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, slots_fd, 0); 100 + if (!slot_p) 101 + .... error ... 102 + 99 103 /* 100 104 * Open metrics event file descriptor for current task. 101 105 * Set slots event as the leader of the group. ··· 116 110 if (metrics_fd < 0) 117 111 ... error ... 118 112 113 + /* Memory mapping the fd permits _rdpmc calls from userspace */ 114 + void *metrics_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, metrics_fd, 0); 115 + if (!metrics_p) 116 + ... error ... 117 + 118 + Note: the file descriptors returned by the perf_event_open calls must be memory 119 + mapped to permit calls to the _rdpmd instruction. Permission may also be granted 120 + by writing the /sys/devices/cpu/rdpmc sysfs node. 119 121 120 122 The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used 121 123 to read slots and the topdown metrics at different points of the program: ··· 154 140 as the parallelism and overlap in the CPU program execution will 155 141 cause too much measurement inaccuracy. For example instrumenting 156 142 individual basic blocks is definitely too fine grained. 143 + 144 + _rdpmc calls should not be mixed with reading the metrics and slots counters 145 + through system calls, as the kernel will reset these counters after each system 146 + call. 157 147 158 148 Decoding metrics values 159 149 =======================

+4 -1

tools/perf/Makefile

··· 100 100 # make -C tools/perf -f tests/make 101 101 # 102 102 build-test: 103 - @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out 103 + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg make_static make_with_gtk2 out 104 + 105 + build-test-tarball: 106 + @$(MAKE) -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory out 104 107 105 108 # 106 109 # All other targets get passed through:

+30 -1

tools/perf/Makefile.config

··· 32 32 NO_SYSCALL_TABLE := 0 33 33 endif 34 34 else 35 - ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) 35 + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips)) 36 36 NO_SYSCALL_TABLE := 0 37 37 endif 38 38 endif ··· 85 85 ifeq ($(ARCH),s390) 86 86 NO_PERF_REGS := 0 87 87 CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated 88 + endif 89 + 90 + ifeq ($(ARCH),mips) 91 + NO_PERF_REGS := 0 92 + CFLAGS += -I$(OUTPUT)arch/mips/include/generated 93 + CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi 94 + LIBUNWIND_LIBS = -lunwind -lunwind-mips 88 95 endif 89 96 90 97 ifeq ($(NO_PERF_REGS),0) ··· 299 292 endif 300 293 301 294 ifeq ($(FEATURES_DUMP),) 295 + # We will display at the end of this Makefile.config, using $(call feature_display_entries) 296 + # As we may retry some feature detection here, see the disassembler-four-args case, for instance 297 + FEATURE_DISPLAY_DEFERRED := 1 302 298 include $(srctree)/tools/build/Makefile.feature 303 299 else 304 300 include $(FEATURES_DUMP) ··· 1082 1072 endif 1083 1073 endif 1084 1074 1075 + ifdef LIBTRACEEVENT_DYNAMIC 1076 + $(call feature_check,libtraceevent) 1077 + ifeq ($(feature-libtraceevent), 1) 1078 + EXTLIBS += -ltraceevent 1079 + else 1080 + dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); 1081 + endif 1082 + endif 1083 + 1085 1084 # Among the variables below, these: 1086 1085 # perfexecdir 1087 1086 # perf_include_dir ··· 1227 1208 $(call detected_var,GTK_CFLAGS) 1228 1209 $(call detected_var,PERL_EMBED_CCOPTS) 1229 1210 $(call detected_var,PYTHON_EMBED_CCOPTS) 1211 + 1212 + # re-generate FEATURE-DUMP as we may have called feature_check, found out 1213 + # extra libraries to add to LDFLAGS of some other test and then redo those 1214 + # tests, see the block about libbfd, disassembler-four-args, for instance. 1215 + $(shell rm -f $(FEATURE_DUMP_FILENAME)) 1216 + $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME))) 1217 + 1218 + ifeq ($(feature_display),1) 1219 + $(call feature_display_entries) 1220 + endif

+12 -4

tools/perf/Makefile.perf

··· 128 128 # 129 129 # Define BUILD_BPF_SKEL to enable BPF skeletons 130 130 # 131 + # Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking 132 + # 131 133 132 134 # As per kernel Makefile, avoid funny character set dependencies 133 135 unexport LC_ALL ··· 285 283 286 284 SCRIPT_SH += perf-archive.sh 287 285 SCRIPT_SH += perf-with-kcore.sh 286 + SCRIPT_SH += perf-iostat.sh 288 287 289 288 grep-libs = $(filter -l%,$(1)) 290 289 strip-libs = $(filter-out -l%,$(1)) ··· 312 309 313 310 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a 314 311 export LIBTRACEEVENT 315 - 316 312 LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list 317 313 318 314 # ··· 376 374 377 375 export PERL_PATH 378 376 379 - PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) $(LIBPERF) 377 + PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF) 380 378 ifndef NO_LIBBPF 381 379 ifndef LIBBPF_DYNAMIC 382 380 PERFLIBS += $(LIBBPF) 383 381 endif 382 + endif 383 + ifndef LIBTRACEEVENT_DYNAMIC 384 + PERFLIBS += $(LIBTRACEEVENT) 384 385 endif 385 386 386 387 # We choose to avoid "if .. else if .. else .. endif endif" ··· 953 948 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 954 949 $(call QUIET_INSTALL, perf-with-kcore) \ 955 950 $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 951 + $(call QUIET_INSTALL, perf-iostat) \ 952 + $(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 956 953 ifndef NO_LIBAUDIT 957 954 $(call QUIET_INSTALL, strace/groups) \ 958 955 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \ ··· 1014 1007 SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) 1015 1008 SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) 1016 1009 SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h 1010 + SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h 1017 1011 1018 1012 ifdef BUILD_BPF_SKEL 1019 1013 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool ··· 1029 1021 OUTPUT=$(SKEL_TMP_OUT)/ bootstrap 1030 1022 1031 1023 $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) 1032 - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ 1024 + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ 1033 1025 -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ 1034 1026 1035 1027 $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) ··· 1049 1041 $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) 1050 1042 1051 1043 clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean 1052 - $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) 1044 + $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf-iostat $(LANG_BINDINGS) 1053 1045 $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete 1054 1046 $(Q)$(RM) $(OUTPUT).config-detected 1055 1047 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so

+59 -19

tools/perf/arch/arm/util/cs-etm.c

··· 67 67 char path[PATH_MAX]; 68 68 int err = -EINVAL; 69 69 u32 val; 70 + u64 contextid; 70 71 71 72 ptr = container_of(itr, struct cs_etm_recording, itr); 72 73 cs_etm_pmu = ptr->cs_etm_pmu; ··· 87 86 goto out; 88 87 } 89 88 89 + /* User has configured for PID tracing, respects it. */ 90 + contextid = evsel->core.attr.config & 91 + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); 92 + 90 93 /* 91 - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing 92 - * is supported: 93 - * 0b00000 Context ID tracing is not supported. 94 - * 0b00100 Maximum of 32-bit Context ID size. 95 - * All other values are reserved. 94 + * If user doesn't configure the contextid format, parse PMU format and 95 + * enable PID tracing according to the "contextid" format bits: 96 + * 97 + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; 98 + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. 96 99 */ 97 - val = BMVAL(val, 5, 9); 98 - if (!val || val != 0x4) { 99 - err = -EINVAL; 100 - goto out; 100 + if (!contextid) 101 + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, 102 + "contextid"); 103 + 104 + if (contextid & BIT(ETM_OPT_CTXTID)) { 105 + /* 106 + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID 107 + * tracing is supported: 108 + * 0b00000 Context ID tracing is not supported. 109 + * 0b00100 Maximum of 32-bit Context ID size. 110 + * All other values are reserved. 111 + */ 112 + val = BMVAL(val, 5, 9); 113 + if (!val || val != 0x4) { 114 + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", 115 + CORESIGHT_ETM_PMU_NAME); 116 + err = -EINVAL; 117 + goto out; 118 + } 119 + } 120 + 121 + if (contextid & BIT(ETM_OPT_CTXTID2)) { 122 + /* 123 + * TRCIDR2.VMIDOPT[30:29] != 0 and 124 + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) 125 + * We can't support CONTEXTIDR in VMID if the size of the 126 + * virtual context id is < 32bit. 127 + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. 128 + */ 129 + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { 130 + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", 131 + CORESIGHT_ETM_PMU_NAME); 132 + err = -EINVAL; 133 + goto out; 134 + } 101 135 } 102 136 103 137 /* All good, let the kernel know */ 104 - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); 138 + evsel->core.attr.config |= contextid; 105 139 err = 0; 106 140 107 141 out: 108 - 109 142 return err; 110 143 } 111 144 ··· 208 173 !cpu_map__has(online_cpus, i)) 209 174 continue; 210 175 211 - if (option & ETM_SET_OPT_CTXTID) { 176 + if (option & BIT(ETM_OPT_CTXTID)) { 212 177 err = cs_etm_set_context_id(itr, evsel, i); 213 178 if (err) 214 179 goto out; 215 180 } 216 - if (option & ETM_SET_OPT_TS) { 181 + if (option & BIT(ETM_OPT_TS)) { 217 182 err = cs_etm_set_timestamp(itr, evsel, i); 218 183 if (err) 219 184 goto out; 220 185 } 221 - if (option & ~(ETM_SET_OPT_MASK)) 186 + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) 222 187 /* Nothing else is currently supported */ 223 188 goto out; 224 189 } ··· 378 343 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 379 344 } 380 345 381 - /* Snapshost size can't be bigger than the auxtrace area */ 346 + /* Snapshot size can't be bigger than the auxtrace area */ 382 347 if (opts->auxtrace_snapshot_size > 383 348 opts->auxtrace_mmap_pages * (size_t)page_size) { 384 349 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", ··· 445 410 evsel__set_sample_bit(cs_etm_evsel, CPU); 446 411 447 412 err = cs_etm_set_option(itr, cs_etm_evsel, 448 - ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); 413 + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); 449 414 if (err) 450 415 goto out; 451 416 } ··· 524 489 config |= BIT(ETM4_CFG_BIT_TS); 525 490 if (config_opts & BIT(ETM_OPT_RETSTK)) 526 491 config |= BIT(ETM4_CFG_BIT_RETSTK); 527 - 492 + if (config_opts & BIT(ETM_OPT_CTXTID2)) 493 + config |= BIT(ETM4_CFG_BIT_VMID) | 494 + BIT(ETM4_CFG_BIT_VMID_OPT); 528 495 return config; 529 496 } 530 497 ··· 613 576 struct auxtrace_record *itr, 614 577 struct perf_record_auxtrace_info *info) 615 578 { 616 - u32 increment; 579 + u32 increment, nr_trc_params; 617 580 u64 magic; 618 581 struct cs_etm_recording *ptr = 619 582 container_of(itr, struct cs_etm_recording, itr); ··· 648 611 649 612 /* How much space was used */ 650 613 increment = CS_ETMV4_PRIV_MAX; 614 + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; 651 615 } else { 652 616 magic = __perf_cs_etmv3_magic; 653 617 /* Get configuration register */ ··· 666 628 667 629 /* How much space was used */ 668 630 increment = CS_ETM_PRIV_MAX; 631 + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; 669 632 } 670 633 671 634 /* Build generic header portion */ 672 635 info->priv[*offset + CS_ETM_MAGIC] = magic; 673 636 info->priv[*offset + CS_ETM_CPU] = cpu; 637 + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; 674 638 /* Where the next CPU entry should start from */ 675 639 *offset += increment; 676 640 } ··· 718 678 719 679 /* First fill out the session header */ 720 680 info->type = PERF_AUXTRACE_CS_ETM; 721 - info->priv[CS_HEADER_VERSION_0] = 0; 681 + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; 722 682 info->priv[CS_PMU_TYPE_CPUS] = type << 32; 723 683 info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; 724 684 info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;

+1

tools/perf/arch/arm64/util/Build

··· 2 2 perf-y += machine.o 3 3 perf-y += perf_regs.o 4 4 perf-y += tsc.o 5 + perf-y += pmu.o 5 6 perf-y += kvm-stat.o 6 7 perf-$(CONFIG_DWARF) += dwarf-regs.o 7 8 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o

+2 -2

tools/perf/arch/arm64/util/kvm-stat.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <errno.h> 3 3 #include <memory.h> 4 - #include "../../util/evsel.h" 5 - #include "../../util/kvm-stat.h" 4 + #include "../../../util/evsel.h" 5 + #include "../../../util/kvm-stat.h" 6 6 #include "arm64_exception_types.h" 7 7 #include "debug.h" 8 8

+3 -3

tools/perf/arch/arm64/util/machine.c

··· 6 6 #include "debug.h" 7 7 #include "symbol.h" 8 8 9 - /* On arm64, kernel text segment start at high memory address, 9 + /* On arm64, kernel text segment starts at high memory address, 10 10 * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory 11 - * address, like 0xffff 0000 00ax xxxx. When only samll amount of 11 + * address, like 0xffff 0000 00ax xxxx. When only small amount of 12 12 * memory is used by modules, gap between end of module's text segment 13 - * and start of kernel text segment may be reach 2G. 13 + * and start of kernel text segment may reach 2G. 14 14 * Therefore do not fill this gap and do not assign it to the kernel dso map. 15 15 */ 16 16

+1 -1

tools/perf/arch/arm64/util/perf_regs.c

··· 108 108 /* [sp], [sp, NUM] or [sp,NUM] */ 109 109 new_len = 7; /* + ( % s p ) NULL */ 110 110 111 - /* If the arugment is [sp], need to fill offset '0' */ 111 + /* If the argument is [sp], need to fill offset '0' */ 112 112 if (rm[2].rm_so == -1) 113 113 new_len += 1; 114 114 else

+25

tools/perf/arch/arm64/util/pmu.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "../../../util/cpumap.h" 4 + #include "../../../util/pmu.h" 5 + 6 + struct pmu_events_map *pmu_events_map__find(void) 7 + { 8 + struct perf_pmu *pmu = NULL; 9 + 10 + while ((pmu = perf_pmu__scan(pmu))) { 11 + if (!is_pmu_core(pmu->name)) 12 + continue; 13 + 14 + /* 15 + * The cpumap should cover all CPUs. Otherwise, some CPUs may 16 + * not support some events or have different event IDs. 17 + */ 18 + if (pmu->cpus->nr != cpu__max_cpu()) 19 + return NULL; 20 + 21 + return perf_pmu__find_map(pmu); 22 + } 23 + 24 + return NULL; 25 + }

+2 -2

tools/perf/arch/arm64/util/unwind-libunwind.c

··· 4 4 #ifndef REMOTE_UNWIND_LIBUNWIND 5 5 #include <libunwind.h> 6 6 #include "perf_regs.h" 7 - #include "../../util/unwind.h" 7 + #include "../../../util/unwind.h" 8 8 #endif 9 - #include "../../util/debug.h" 9 + #include "../../../util/debug.h" 10 10 11 11 int LIBUNWIND__ARCH_REG_ID(int regnum) 12 12 {

+22

tools/perf/arch/mips/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + ifndef NO_DWARF 3 + PERF_HAVE_DWARF_REGS := 1 4 + endif 5 + 6 + # Syscall table generation for perf 7 + out := $(OUTPUT)arch/mips/include/generated/asm 8 + header := $(out)/syscalls_n64.c 9 + sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls 10 + sysdef := $(sysprf)/syscall_n64.tbl 11 + systbl := $(sysprf)/mksyscalltbl 12 + 13 + # Create output directory if not already present 14 + _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') 15 + 16 + $(header): $(sysdef) $(systbl) 17 + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ 18 + 19 + clean:: 20 + $(call QUIET_CLEAN, mips) $(RM) $(header) 21 + 22 + archheaders: $(header)

+32

tools/perf/arch/mips/entry/syscalls/mksyscalltbl

··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # Generate system call table for perf. Derived from 5 + # s390 script. 6 + # 7 + # Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 8 + # Changed by: Tiezhu Yang <yangtiezhu@loongson.cn> 9 + 10 + SYSCALL_TBL=$1 11 + 12 + if ! test -r $SYSCALL_TBL; then 13 + echo "Could not read input file" >&2 14 + exit 1 15 + fi 16 + 17 + create_table() 18 + { 19 + local max_nr nr abi sc discard 20 + 21 + echo 'static const char *syscalltbl_mips_n64[] = {' 22 + while read nr abi sc discard; do 23 + printf '\t[%d] = "%s",\n' $nr $sc 24 + max_nr=$nr 25 + done 26 + echo '};' 27 + echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr" 28 + } 29 + 30 + grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL \ 31 + |sort -k1 -n \ 32 + |create_table

+358

tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl

··· 1 + # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note 2 + # 3 + # system call numbers and entry vectors for mips 4 + # 5 + # The format is: 6 + # <number> <abi> <name> <entry point> 7 + # 8 + # The <abi> is always "n64" for this file. 9 + # 10 + 0 n64 read sys_read 11 + 1 n64 write sys_write 12 + 2 n64 open sys_open 13 + 3 n64 close sys_close 14 + 4 n64 stat sys_newstat 15 + 5 n64 fstat sys_newfstat 16 + 6 n64 lstat sys_newlstat 17 + 7 n64 poll sys_poll 18 + 8 n64 lseek sys_lseek 19 + 9 n64 mmap sys_mips_mmap 20 + 10 n64 mprotect sys_mprotect 21 + 11 n64 munmap sys_munmap 22 + 12 n64 brk sys_brk 23 + 13 n64 rt_sigaction sys_rt_sigaction 24 + 14 n64 rt_sigprocmask sys_rt_sigprocmask 25 + 15 n64 ioctl sys_ioctl 26 + 16 n64 pread64 sys_pread64 27 + 17 n64 pwrite64 sys_pwrite64 28 + 18 n64 readv sys_readv 29 + 19 n64 writev sys_writev 30 + 20 n64 access sys_access 31 + 21 n64 pipe sysm_pipe 32 + 22 n64 _newselect sys_select 33 + 23 n64 sched_yield sys_sched_yield 34 + 24 n64 mremap sys_mremap 35 + 25 n64 msync sys_msync 36 + 26 n64 mincore sys_mincore 37 + 27 n64 madvise sys_madvise 38 + 28 n64 shmget sys_shmget 39 + 29 n64 shmat sys_shmat 40 + 30 n64 shmctl sys_old_shmctl 41 + 31 n64 dup sys_dup 42 + 32 n64 dup2 sys_dup2 43 + 33 n64 pause sys_pause 44 + 34 n64 nanosleep sys_nanosleep 45 + 35 n64 getitimer sys_getitimer 46 + 36 n64 setitimer sys_setitimer 47 + 37 n64 alarm sys_alarm 48 + 38 n64 getpid sys_getpid 49 + 39 n64 sendfile sys_sendfile64 50 + 40 n64 socket sys_socket 51 + 41 n64 connect sys_connect 52 + 42 n64 accept sys_accept 53 + 43 n64 sendto sys_sendto 54 + 44 n64 recvfrom sys_recvfrom 55 + 45 n64 sendmsg sys_sendmsg 56 + 46 n64 recvmsg sys_recvmsg 57 + 47 n64 shutdown sys_shutdown 58 + 48 n64 bind sys_bind 59 + 49 n64 listen sys_listen 60 + 50 n64 getsockname sys_getsockname 61 + 51 n64 getpeername sys_getpeername 62 + 52 n64 socketpair sys_socketpair 63 + 53 n64 setsockopt sys_setsockopt 64 + 54 n64 getsockopt sys_getsockopt 65 + 55 n64 clone __sys_clone 66 + 56 n64 fork __sys_fork 67 + 57 n64 execve sys_execve 68 + 58 n64 exit sys_exit 69 + 59 n64 wait4 sys_wait4 70 + 60 n64 kill sys_kill 71 + 61 n64 uname sys_newuname 72 + 62 n64 semget sys_semget 73 + 63 n64 semop sys_semop 74 + 64 n64 semctl sys_old_semctl 75 + 65 n64 shmdt sys_shmdt 76 + 66 n64 msgget sys_msgget 77 + 67 n64 msgsnd sys_msgsnd 78 + 68 n64 msgrcv sys_msgrcv 79 + 69 n64 msgctl sys_old_msgctl 80 + 70 n64 fcntl sys_fcntl 81 + 71 n64 flock sys_flock 82 + 72 n64 fsync sys_fsync 83 + 73 n64 fdatasync sys_fdatasync 84 + 74 n64 truncate sys_truncate 85 + 75 n64 ftruncate sys_ftruncate 86 + 76 n64 getdents sys_getdents 87 + 77 n64 getcwd sys_getcwd 88 + 78 n64 chdir sys_chdir 89 + 79 n64 fchdir sys_fchdir 90 + 80 n64 rename sys_rename 91 + 81 n64 mkdir sys_mkdir 92 + 82 n64 rmdir sys_rmdir 93 + 83 n64 creat sys_creat 94 + 84 n64 link sys_link 95 + 85 n64 unlink sys_unlink 96 + 86 n64 symlink sys_symlink 97 + 87 n64 readlink sys_readlink 98 + 88 n64 chmod sys_chmod 99 + 89 n64 fchmod sys_fchmod 100 + 90 n64 chown sys_chown 101 + 91 n64 fchown sys_fchown 102 + 92 n64 lchown sys_lchown 103 + 93 n64 umask sys_umask 104 + 94 n64 gettimeofday sys_gettimeofday 105 + 95 n64 getrlimit sys_getrlimit 106 + 96 n64 getrusage sys_getrusage 107 + 97 n64 sysinfo sys_sysinfo 108 + 98 n64 times sys_times 109 + 99 n64 ptrace sys_ptrace 110 + 100 n64 getuid sys_getuid 111 + 101 n64 syslog sys_syslog 112 + 102 n64 getgid sys_getgid 113 + 103 n64 setuid sys_setuid 114 + 104 n64 setgid sys_setgid 115 + 105 n64 geteuid sys_geteuid 116 + 106 n64 getegid sys_getegid 117 + 107 n64 setpgid sys_setpgid 118 + 108 n64 getppid sys_getppid 119 + 109 n64 getpgrp sys_getpgrp 120 + 110 n64 setsid sys_setsid 121 + 111 n64 setreuid sys_setreuid 122 + 112 n64 setregid sys_setregid 123 + 113 n64 getgroups sys_getgroups 124 + 114 n64 setgroups sys_setgroups 125 + 115 n64 setresuid sys_setresuid 126 + 116 n64 getresuid sys_getresuid 127 + 117 n64 setresgid sys_setresgid 128 + 118 n64 getresgid sys_getresgid 129 + 119 n64 getpgid sys_getpgid 130 + 120 n64 setfsuid sys_setfsuid 131 + 121 n64 setfsgid sys_setfsgid 132 + 122 n64 getsid sys_getsid 133 + 123 n64 capget sys_capget 134 + 124 n64 capset sys_capset 135 + 125 n64 rt_sigpending sys_rt_sigpending 136 + 126 n64 rt_sigtimedwait sys_rt_sigtimedwait 137 + 127 n64 rt_sigqueueinfo sys_rt_sigqueueinfo 138 + 128 n64 rt_sigsuspend sys_rt_sigsuspend 139 + 129 n64 sigaltstack sys_sigaltstack 140 + 130 n64 utime sys_utime 141 + 131 n64 mknod sys_mknod 142 + 132 n64 personality sys_personality 143 + 133 n64 ustat sys_ustat 144 + 134 n64 statfs sys_statfs 145 + 135 n64 fstatfs sys_fstatfs 146 + 136 n64 sysfs sys_sysfs 147 + 137 n64 getpriority sys_getpriority 148 + 138 n64 setpriority sys_setpriority 149 + 139 n64 sched_setparam sys_sched_setparam 150 + 140 n64 sched_getparam sys_sched_getparam 151 + 141 n64 sched_setscheduler sys_sched_setscheduler 152 + 142 n64 sched_getscheduler sys_sched_getscheduler 153 + 143 n64 sched_get_priority_max sys_sched_get_priority_max 154 + 144 n64 sched_get_priority_min sys_sched_get_priority_min 155 + 145 n64 sched_rr_get_interval sys_sched_rr_get_interval 156 + 146 n64 mlock sys_mlock 157 + 147 n64 munlock sys_munlock 158 + 148 n64 mlockall sys_mlockall 159 + 149 n64 munlockall sys_munlockall 160 + 150 n64 vhangup sys_vhangup 161 + 151 n64 pivot_root sys_pivot_root 162 + 152 n64 _sysctl sys_ni_syscall 163 + 153 n64 prctl sys_prctl 164 + 154 n64 adjtimex sys_adjtimex 165 + 155 n64 setrlimit sys_setrlimit 166 + 156 n64 chroot sys_chroot 167 + 157 n64 sync sys_sync 168 + 158 n64 acct sys_acct 169 + 159 n64 settimeofday sys_settimeofday 170 + 160 n64 mount sys_mount 171 + 161 n64 umount2 sys_umount 172 + 162 n64 swapon sys_swapon 173 + 163 n64 swapoff sys_swapoff 174 + 164 n64 reboot sys_reboot 175 + 165 n64 sethostname sys_sethostname 176 + 166 n64 setdomainname sys_setdomainname 177 + 167 n64 create_module sys_ni_syscall 178 + 168 n64 init_module sys_init_module 179 + 169 n64 delete_module sys_delete_module 180 + 170 n64 get_kernel_syms sys_ni_syscall 181 + 171 n64 query_module sys_ni_syscall 182 + 172 n64 quotactl sys_quotactl 183 + 173 n64 nfsservctl sys_ni_syscall 184 + 174 n64 getpmsg sys_ni_syscall 185 + 175 n64 putpmsg sys_ni_syscall 186 + 176 n64 afs_syscall sys_ni_syscall 187 + # 177 reserved for security 188 + 177 n64 reserved177 sys_ni_syscall 189 + 178 n64 gettid sys_gettid 190 + 179 n64 readahead sys_readahead 191 + 180 n64 setxattr sys_setxattr 192 + 181 n64 lsetxattr sys_lsetxattr 193 + 182 n64 fsetxattr sys_fsetxattr 194 + 183 n64 getxattr sys_getxattr 195 + 184 n64 lgetxattr sys_lgetxattr 196 + 185 n64 fgetxattr sys_fgetxattr 197 + 186 n64 listxattr sys_listxattr 198 + 187 n64 llistxattr sys_llistxattr 199 + 188 n64 flistxattr sys_flistxattr 200 + 189 n64 removexattr sys_removexattr 201 + 190 n64 lremovexattr sys_lremovexattr 202 + 191 n64 fremovexattr sys_fremovexattr 203 + 192 n64 tkill sys_tkill 204 + 193 n64 reserved193 sys_ni_syscall 205 + 194 n64 futex sys_futex 206 + 195 n64 sched_setaffinity sys_sched_setaffinity 207 + 196 n64 sched_getaffinity sys_sched_getaffinity 208 + 197 n64 cacheflush sys_cacheflush 209 + 198 n64 cachectl sys_cachectl 210 + 199 n64 sysmips __sys_sysmips 211 + 200 n64 io_setup sys_io_setup 212 + 201 n64 io_destroy sys_io_destroy 213 + 202 n64 io_getevents sys_io_getevents 214 + 203 n64 io_submit sys_io_submit 215 + 204 n64 io_cancel sys_io_cancel 216 + 205 n64 exit_group sys_exit_group 217 + 206 n64 lookup_dcookie sys_lookup_dcookie 218 + 207 n64 epoll_create sys_epoll_create 219 + 208 n64 epoll_ctl sys_epoll_ctl 220 + 209 n64 epoll_wait sys_epoll_wait 221 + 210 n64 remap_file_pages sys_remap_file_pages 222 + 211 n64 rt_sigreturn sys_rt_sigreturn 223 + 212 n64 set_tid_address sys_set_tid_address 224 + 213 n64 restart_syscall sys_restart_syscall 225 + 214 n64 semtimedop sys_semtimedop 226 + 215 n64 fadvise64 sys_fadvise64_64 227 + 216 n64 timer_create sys_timer_create 228 + 217 n64 timer_settime sys_timer_settime 229 + 218 n64 timer_gettime sys_timer_gettime 230 + 219 n64 timer_getoverrun sys_timer_getoverrun 231 + 220 n64 timer_delete sys_timer_delete 232 + 221 n64 clock_settime sys_clock_settime 233 + 222 n64 clock_gettime sys_clock_gettime 234 + 223 n64 clock_getres sys_clock_getres 235 + 224 n64 clock_nanosleep sys_clock_nanosleep 236 + 225 n64 tgkill sys_tgkill 237 + 226 n64 utimes sys_utimes 238 + 227 n64 mbind sys_mbind 239 + 228 n64 get_mempolicy sys_get_mempolicy 240 + 229 n64 set_mempolicy sys_set_mempolicy 241 + 230 n64 mq_open sys_mq_open 242 + 231 n64 mq_unlink sys_mq_unlink 243 + 232 n64 mq_timedsend sys_mq_timedsend 244 + 233 n64 mq_timedreceive sys_mq_timedreceive 245 + 234 n64 mq_notify sys_mq_notify 246 + 235 n64 mq_getsetattr sys_mq_getsetattr 247 + 236 n64 vserver sys_ni_syscall 248 + 237 n64 waitid sys_waitid 249 + # 238 was sys_setaltroot 250 + 239 n64 add_key sys_add_key 251 + 240 n64 request_key sys_request_key 252 + 241 n64 keyctl sys_keyctl 253 + 242 n64 set_thread_area sys_set_thread_area 254 + 243 n64 inotify_init sys_inotify_init 255 + 244 n64 inotify_add_watch sys_inotify_add_watch 256 + 245 n64 inotify_rm_watch sys_inotify_rm_watch 257 + 246 n64 migrate_pages sys_migrate_pages 258 + 247 n64 openat sys_openat 259 + 248 n64 mkdirat sys_mkdirat 260 + 249 n64 mknodat sys_mknodat 261 + 250 n64 fchownat sys_fchownat 262 + 251 n64 futimesat sys_futimesat 263 + 252 n64 newfstatat sys_newfstatat 264 + 253 n64 unlinkat sys_unlinkat 265 + 254 n64 renameat sys_renameat 266 + 255 n64 linkat sys_linkat 267 + 256 n64 symlinkat sys_symlinkat 268 + 257 n64 readlinkat sys_readlinkat 269 + 258 n64 fchmodat sys_fchmodat 270 + 259 n64 faccessat sys_faccessat 271 + 260 n64 pselect6 sys_pselect6 272 + 261 n64 ppoll sys_ppoll 273 + 262 n64 unshare sys_unshare 274 + 263 n64 splice sys_splice 275 + 264 n64 sync_file_range sys_sync_file_range 276 + 265 n64 tee sys_tee 277 + 266 n64 vmsplice sys_vmsplice 278 + 267 n64 move_pages sys_move_pages 279 + 268 n64 set_robust_list sys_set_robust_list 280 + 269 n64 get_robust_list sys_get_robust_list 281 + 270 n64 kexec_load sys_kexec_load 282 + 271 n64 getcpu sys_getcpu 283 + 272 n64 epoll_pwait sys_epoll_pwait 284 + 273 n64 ioprio_set sys_ioprio_set 285 + 274 n64 ioprio_get sys_ioprio_get 286 + 275 n64 utimensat sys_utimensat 287 + 276 n64 signalfd sys_signalfd 288 + 277 n64 timerfd sys_ni_syscall 289 + 278 n64 eventfd sys_eventfd 290 + 279 n64 fallocate sys_fallocate 291 + 280 n64 timerfd_create sys_timerfd_create 292 + 281 n64 timerfd_gettime sys_timerfd_gettime 293 + 282 n64 timerfd_settime sys_timerfd_settime 294 + 283 n64 signalfd4 sys_signalfd4 295 + 284 n64 eventfd2 sys_eventfd2 296 + 285 n64 epoll_create1 sys_epoll_create1 297 + 286 n64 dup3 sys_dup3 298 + 287 n64 pipe2 sys_pipe2 299 + 288 n64 inotify_init1 sys_inotify_init1 300 + 289 n64 preadv sys_preadv 301 + 290 n64 pwritev sys_pwritev 302 + 291 n64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo 303 + 292 n64 perf_event_open sys_perf_event_open 304 + 293 n64 accept4 sys_accept4 305 + 294 n64 recvmmsg sys_recvmmsg 306 + 295 n64 fanotify_init sys_fanotify_init 307 + 296 n64 fanotify_mark sys_fanotify_mark 308 + 297 n64 prlimit64 sys_prlimit64 309 + 298 n64 name_to_handle_at sys_name_to_handle_at 310 + 299 n64 open_by_handle_at sys_open_by_handle_at 311 + 300 n64 clock_adjtime sys_clock_adjtime 312 + 301 n64 syncfs sys_syncfs 313 + 302 n64 sendmmsg sys_sendmmsg 314 + 303 n64 setns sys_setns 315 + 304 n64 process_vm_readv sys_process_vm_readv 316 + 305 n64 process_vm_writev sys_process_vm_writev 317 + 306 n64 kcmp sys_kcmp 318 + 307 n64 finit_module sys_finit_module 319 + 308 n64 getdents64 sys_getdents64 320 + 309 n64 sched_setattr sys_sched_setattr 321 + 310 n64 sched_getattr sys_sched_getattr 322 + 311 n64 renameat2 sys_renameat2 323 + 312 n64 seccomp sys_seccomp 324 + 313 n64 getrandom sys_getrandom 325 + 314 n64 memfd_create sys_memfd_create 326 + 315 n64 bpf sys_bpf 327 + 316 n64 execveat sys_execveat 328 + 317 n64 userfaultfd sys_userfaultfd 329 + 318 n64 membarrier sys_membarrier 330 + 319 n64 mlock2 sys_mlock2 331 + 320 n64 copy_file_range sys_copy_file_range 332 + 321 n64 preadv2 sys_preadv2 333 + 322 n64 pwritev2 sys_pwritev2 334 + 323 n64 pkey_mprotect sys_pkey_mprotect 335 + 324 n64 pkey_alloc sys_pkey_alloc 336 + 325 n64 pkey_free sys_pkey_free 337 + 326 n64 statx sys_statx 338 + 327 n64 rseq sys_rseq 339 + 328 n64 io_pgetevents sys_io_pgetevents 340 + # 329 through 423 are reserved to sync up with other architectures 341 + 424 n64 pidfd_send_signal sys_pidfd_send_signal 342 + 425 n64 io_uring_setup sys_io_uring_setup 343 + 426 n64 io_uring_enter sys_io_uring_enter 344 + 427 n64 io_uring_register sys_io_uring_register 345 + 428 n64 open_tree sys_open_tree 346 + 429 n64 move_mount sys_move_mount 347 + 430 n64 fsopen sys_fsopen 348 + 431 n64 fsconfig sys_fsconfig 349 + 432 n64 fsmount sys_fsmount 350 + 433 n64 fspick sys_fspick 351 + 434 n64 pidfd_open sys_pidfd_open 352 + 435 n64 clone3 __sys_clone3 353 + 436 n64 close_range sys_close_range 354 + 437 n64 openat2 sys_openat2 355 + 438 n64 pidfd_getfd sys_pidfd_getfd 356 + 439 n64 faccessat2 sys_faccessat2 357 + 440 n64 process_madvise sys_process_madvise 358 + 441 n64 epoll_pwait2 sys_epoll_pwait2

+31

tools/perf/arch/mips/include/dwarf-regs-table.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into 4 + * register names. 5 + * 6 + * Copyright (C) 2013 Cavium, Inc. 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License as published by 10 + * the Free Software Foundation; either version 2 of the License, or 11 + * (at your option) any later version. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + */ 19 + 20 + #ifdef DEFINE_DWARF_REGSTR_TABLE 21 + #undef REG_DWARFNUM_NAME 22 + #define REG_DWARFNUM_NAME(reg, idx) [idx] = "$" #reg 23 + static const char * const mips_regstr_tbl[] = { 24 + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", 25 + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", 26 + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", 27 + "$30", "$31", 28 + REG_DWARFNUM_NAME(hi, 64), 29 + REG_DWARFNUM_NAME(lo, 65), 30 + }; 31 + #endif

+84

tools/perf/arch/mips/include/perf_regs.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef ARCH_PERF_REGS_H 3 + #define ARCH_PERF_REGS_H 4 + 5 + #include <stdlib.h> 6 + #include <linux/types.h> 7 + #include <asm/perf_regs.h> 8 + 9 + #define PERF_REGS_MAX PERF_REG_MIPS_MAX 10 + #define PERF_REG_IP PERF_REG_MIPS_PC 11 + #define PERF_REG_SP PERF_REG_MIPS_R29 12 + 13 + #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) 14 + 15 + static inline const char *__perf_reg_name(int id) 16 + { 17 + switch (id) { 18 + case PERF_REG_MIPS_PC: 19 + return "PC"; 20 + case PERF_REG_MIPS_R1: 21 + return "$1"; 22 + case PERF_REG_MIPS_R2: 23 + return "$2"; 24 + case PERF_REG_MIPS_R3: 25 + return "$3"; 26 + case PERF_REG_MIPS_R4: 27 + return "$4"; 28 + case PERF_REG_MIPS_R5: 29 + return "$5"; 30 + case PERF_REG_MIPS_R6: 31 + return "$6"; 32 + case PERF_REG_MIPS_R7: 33 + return "$7"; 34 + case PERF_REG_MIPS_R8: 35 + return "$8"; 36 + case PERF_REG_MIPS_R9: 37 + return "$9"; 38 + case PERF_REG_MIPS_R10: 39 + return "$10"; 40 + case PERF_REG_MIPS_R11: 41 + return "$11"; 42 + case PERF_REG_MIPS_R12: 43 + return "$12"; 44 + case PERF_REG_MIPS_R13: 45 + return "$13"; 46 + case PERF_REG_MIPS_R14: 47 + return "$14"; 48 + case PERF_REG_MIPS_R15: 49 + return "$15"; 50 + case PERF_REG_MIPS_R16: 51 + return "$16"; 52 + case PERF_REG_MIPS_R17: 53 + return "$17"; 54 + case PERF_REG_MIPS_R18: 55 + return "$18"; 56 + case PERF_REG_MIPS_R19: 57 + return "$19"; 58 + case PERF_REG_MIPS_R20: 59 + return "$20"; 60 + case PERF_REG_MIPS_R21: 61 + return "$21"; 62 + case PERF_REG_MIPS_R22: 63 + return "$22"; 64 + case PERF_REG_MIPS_R23: 65 + return "$23"; 66 + case PERF_REG_MIPS_R24: 67 + return "$24"; 68 + case PERF_REG_MIPS_R25: 69 + return "$25"; 70 + case PERF_REG_MIPS_R28: 71 + return "$28"; 72 + case PERF_REG_MIPS_R29: 73 + return "$29"; 74 + case PERF_REG_MIPS_R30: 75 + return "$30"; 76 + case PERF_REG_MIPS_R31: 77 + return "$31"; 78 + default: 79 + break; 80 + } 81 + return NULL; 82 + } 83 + 84 + #endif /* ARCH_PERF_REGS_H */

+3

tools/perf/arch/mips/util/Build

··· 1 + perf-y += perf_regs.o 2 + perf-$(CONFIG_DWARF) += dwarf-regs.o 3 + perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o

+38

tools/perf/arch/mips/util/dwarf-regs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. 4 + * 5 + * Copyright (C) 2013 Cavium, Inc. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License as published by 9 + * the Free Software Foundation; either version 2 of the License, or 10 + * (at your option) any later version. 11 + * 12 + * This program is distributed in the hope that it will be useful, 13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 + * GNU General Public License for more details. 16 + * 17 + */ 18 + 19 + #include <stdio.h> 20 + #include <dwarf-regs.h> 21 + 22 + static const char *mips_gpr_names[32] = { 23 + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", 24 + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", 25 + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", 26 + "$30", "$31" 27 + }; 28 + 29 + const char *get_arch_regstr(unsigned int n) 30 + { 31 + if (n < 32) 32 + return mips_gpr_names[n]; 33 + if (n == 64) 34 + return "hi"; 35 + if (n == 65) 36 + return "lo"; 37 + return NULL; 38 + }

+6

tools/perf/arch/mips/util/perf_regs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };

+22

tools/perf/arch/mips/util/unwind-libunwind.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <errno.h> 4 + #include <libunwind.h> 5 + #include "perf_regs.h" 6 + #include "../../util/unwind.h" 7 + #include "util/debug.h" 8 + 9 + int libunwind__arch_reg_id(int regnum) 10 + { 11 + switch (regnum) { 12 + case UNW_MIPS_R1 ... UNW_MIPS_R25: 13 + return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1; 14 + case UNW_MIPS_R28 ... UNW_MIPS_R31: 15 + return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28; 16 + case UNW_MIPS_PC: 17 + return PERF_REG_MIPS_PC; 18 + default: 19 + pr_err("unwind: invalid reg id %d\n", regnum); 20 + return -EINVAL; 21 + } 22 + }

+2

tools/perf/arch/powerpc/util/Build

··· 4 4 perf-y += perf_regs.o 5 5 perf-y += mem-events.o 6 6 perf-y += sym-handling.o 7 + perf-y += evsel.o 8 + perf-y += event.o 7 9 8 10 perf-$(CONFIG_DWARF) += dwarf-regs.o 9 11 perf-$(CONFIG_DWARF) += skip-callchain-idx.o

+53

tools/perf/arch/powerpc/util/event.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/types.h> 3 + #include <linux/string.h> 4 + #include <linux/zalloc.h> 5 + 6 + #include "../../../util/event.h" 7 + #include "../../../util/synthetic-events.h" 8 + #include "../../../util/machine.h" 9 + #include "../../../util/tool.h" 10 + #include "../../../util/map.h" 11 + #include "../../../util/debug.h" 12 + 13 + void arch_perf_parse_sample_weight(struct perf_sample *data, 14 + const __u64 *array, u64 type) 15 + { 16 + union perf_sample_weight weight; 17 + 18 + weight.full = *array; 19 + if (type & PERF_SAMPLE_WEIGHT) 20 + data->weight = weight.full; 21 + else { 22 + data->weight = weight.var1_dw; 23 + data->ins_lat = weight.var2_w; 24 + data->p_stage_cyc = weight.var3_w; 25 + } 26 + } 27 + 28 + void arch_perf_synthesize_sample_weight(const struct perf_sample *data, 29 + __u64 *array, u64 type) 30 + { 31 + *array = data->weight; 32 + 33 + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { 34 + *array &= 0xffffffff; 35 + *array |= ((u64)data->ins_lat << 32); 36 + } 37 + } 38 + 39 + const char *arch_perf_header_entry(const char *se_header) 40 + { 41 + if (!strcmp(se_header, "Local INSTR Latency")) 42 + return "Finish Cyc"; 43 + else if (!strcmp(se_header, "Pipeline Stage Cycle")) 44 + return "Dispatch Cyc"; 45 + return se_header; 46 + } 47 + 48 + int arch_support_sort_key(const char *sort_key) 49 + { 50 + if (!strcmp(sort_key, "p_stage_cyc")) 51 + return 1; 52 + return 0; 53 + }

+8

tools/perf/arch/powerpc/util/evsel.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdio.h> 3 + #include "util/evsel.h" 4 + 5 + void arch_evsel__set_sample_weight(struct evsel *evsel) 6 + { 7 + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); 8 + }

+1 -1

tools/perf/arch/powerpc/util/kvm-stat.c

··· 176 176 } 177 177 178 178 /* 179 - * Incase of powerpc architecture, pmu registers are programmable 179 + * In case of powerpc architecture, pmu registers are programmable 180 180 * by guest kernel. So monitoring guest via host may not provide 181 181 * valid samples with default 'cycles' event. It is better to use 182 182 * 'trace_imc/trace_cycles' event for guest profiling, since it

+1 -1

tools/perf/arch/powerpc/util/utils_header.h

··· 10 10 11 11 #define SPRN_PVR 0x11F /* Processor Version Register */ 12 12 #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ 13 - #define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ 13 + #define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */ 14 14 15 15 #endif /* __PERF_UTIL_HEADER_H */

+1 -1

tools/perf/arch/x86/tests/bp-modify.c

··· 73 73 /* 74 74 * The parent does following steps: 75 75 * - creates a new breakpoint (id 0) for bp_2 function 76 - * - changes that breakponit to bp_1 function 76 + * - changes that breakpoint to bp_1 function 77 77 * - waits for the breakpoint to hit and checks 78 78 * it has proper rip of bp_1 function 79 79 * - detaches the child

+1

tools/perf/arch/x86/util/Build

··· 9 9 perf-y += evlist.o 10 10 perf-y += mem-events.o 11 11 perf-y += evsel.o 12 + perf-y += iostat.o 12 13 13 14 perf-$(CONFIG_DWARF) += dwarf-regs.o 14 15 perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o

+470

tools/perf/arch/x86/util/iostat.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * perf iostat 4 + * 5 + * Copyright (C) 2020, Intel Corporation 6 + * 7 + * Authors: Alexander Antonov <alexander.antonov@linux.intel.com> 8 + */ 9 + 10 + #include <api/fs/fs.h> 11 + #include <linux/kernel.h> 12 + #include <linux/err.h> 13 + #include <limits.h> 14 + #include <stdio.h> 15 + #include <string.h> 16 + #include <errno.h> 17 + #include <sys/types.h> 18 + #include <sys/stat.h> 19 + #include <fcntl.h> 20 + #include <dirent.h> 21 + #include <unistd.h> 22 + #include <stdlib.h> 23 + #include <regex.h> 24 + #include "util/cpumap.h" 25 + #include "util/debug.h" 26 + #include "util/iostat.h" 27 + #include "util/counts.h" 28 + #include "path.h" 29 + 30 + #ifndef MAX_PATH 31 + #define MAX_PATH 1024 32 + #endif 33 + 34 + #define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" 35 + #define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH 36 + #define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" 37 + 38 + /* 39 + * Each metric requiries one IIO event which increments at every 4B transfer 40 + * in corresponding direction. The formulas to compute metrics are generic: 41 + * #EventCount * 4B / (1024 * 1024) 42 + */ 43 + static const char * const iostat_metrics[] = { 44 + "Inbound Read(MB)", 45 + "Inbound Write(MB)", 46 + "Outbound Read(MB)", 47 + "Outbound Write(MB)", 48 + }; 49 + 50 + static inline int iostat_metrics_count(void) 51 + { 52 + return sizeof(iostat_metrics) / sizeof(char *); 53 + } 54 + 55 + static const char *iostat_metric_by_idx(int idx) 56 + { 57 + return *(iostat_metrics + idx % iostat_metrics_count()); 58 + } 59 + 60 + struct iio_root_port { 61 + u32 domain; 62 + u8 bus; 63 + u8 die; 64 + u8 pmu_idx; 65 + int idx; 66 + }; 67 + 68 + struct iio_root_ports_list { 69 + struct iio_root_port **rps; 70 + int nr_entries; 71 + }; 72 + 73 + static struct iio_root_ports_list *root_ports; 74 + 75 + static void iio_root_port_show(FILE *output, 76 + const struct iio_root_port * const rp) 77 + { 78 + if (output && rp) 79 + fprintf(output, "S%d-uncore_iio_%d<%04x:%02x>\n", 80 + rp->die, rp->pmu_idx, rp->domain, rp->bus); 81 + } 82 + 83 + static struct iio_root_port *iio_root_port_new(u32 domain, u8 bus, 84 + u8 die, u8 pmu_idx) 85 + { 86 + struct iio_root_port *p = calloc(1, sizeof(*p)); 87 + 88 + if (p) { 89 + p->domain = domain; 90 + p->bus = bus; 91 + p->die = die; 92 + p->pmu_idx = pmu_idx; 93 + } 94 + return p; 95 + } 96 + 97 + static void iio_root_ports_list_free(struct iio_root_ports_list *list) 98 + { 99 + int idx; 100 + 101 + if (list) { 102 + for (idx = 0; idx < list->nr_entries; idx++) 103 + free(list->rps[idx]); 104 + free(list->rps); 105 + free(list); 106 + } 107 + } 108 + 109 + static struct iio_root_port *iio_root_port_find_by_notation( 110 + const struct iio_root_ports_list * const list, u32 domain, u8 bus) 111 + { 112 + int idx; 113 + struct iio_root_port *rp; 114 + 115 + if (list) { 116 + for (idx = 0; idx < list->nr_entries; idx++) { 117 + rp = list->rps[idx]; 118 + if (rp && rp->domain == domain && rp->bus == bus) 119 + return rp; 120 + } 121 + } 122 + return NULL; 123 + } 124 + 125 + static int iio_root_ports_list_insert(struct iio_root_ports_list *list, 126 + struct iio_root_port * const rp) 127 + { 128 + struct iio_root_port **tmp_buf; 129 + 130 + if (list && rp) { 131 + rp->idx = list->nr_entries++; 132 + tmp_buf = realloc(list->rps, 133 + list->nr_entries * sizeof(*list->rps)); 134 + if (!tmp_buf) { 135 + pr_err("Failed to realloc memory\n"); 136 + return -ENOMEM; 137 + } 138 + tmp_buf[rp->idx] = rp; 139 + list->rps = tmp_buf; 140 + } 141 + return 0; 142 + } 143 + 144 + static int iio_mapping(u8 pmu_idx, struct iio_root_ports_list * const list) 145 + { 146 + char *buf; 147 + char path[MAX_PATH]; 148 + u32 domain; 149 + u8 bus; 150 + struct iio_root_port *rp; 151 + size_t size; 152 + int ret; 153 + 154 + for (int die = 0; die < cpu__max_node(); die++) { 155 + scnprintf(path, MAX_PATH, PLATFORM_MAPPING_PATH, pmu_idx, die); 156 + if (sysfs__read_str(path, &buf, &size) < 0) { 157 + if (pmu_idx) 158 + goto out; 159 + pr_err("Mode iostat is not supported\n"); 160 + return -1; 161 + } 162 + ret = sscanf(buf, "%04x:%02hhx", &domain, &bus); 163 + free(buf); 164 + if (ret != 2) { 165 + pr_err("Invalid mapping data: iio_%d; die%d\n", 166 + pmu_idx, die); 167 + return -1; 168 + } 169 + rp = iio_root_port_new(domain, bus, die, pmu_idx); 170 + if (!rp || iio_root_ports_list_insert(list, rp)) { 171 + free(rp); 172 + return -ENOMEM; 173 + } 174 + } 175 + out: 176 + return 0; 177 + } 178 + 179 + static u8 iio_pmu_count(void) 180 + { 181 + u8 pmu_idx = 0; 182 + char path[MAX_PATH]; 183 + const char *sysfs = sysfs__mountpoint(); 184 + 185 + if (sysfs) { 186 + for (;; pmu_idx++) { 187 + snprintf(path, sizeof(path), SYSFS_UNCORE_PMU_PATH, 188 + sysfs, pmu_idx); 189 + if (access(path, F_OK) != 0) 190 + break; 191 + } 192 + } 193 + return pmu_idx; 194 + } 195 + 196 + static int iio_root_ports_scan(struct iio_root_ports_list **list) 197 + { 198 + int ret = -ENOMEM; 199 + struct iio_root_ports_list *tmp_list; 200 + u8 pmu_count = iio_pmu_count(); 201 + 202 + if (!pmu_count) { 203 + pr_err("Unsupported uncore pmu configuration\n"); 204 + return -1; 205 + } 206 + 207 + tmp_list = calloc(1, sizeof(*tmp_list)); 208 + if (!tmp_list) 209 + goto err; 210 + 211 + for (u8 pmu_idx = 0; pmu_idx < pmu_count; pmu_idx++) { 212 + ret = iio_mapping(pmu_idx, tmp_list); 213 + if (ret) 214 + break; 215 + } 216 + err: 217 + if (!ret) 218 + *list = tmp_list; 219 + else 220 + iio_root_ports_list_free(tmp_list); 221 + 222 + return ret; 223 + } 224 + 225 + static int iio_root_port_parse_str(u32 *domain, u8 *bus, char *str) 226 + { 227 + int ret; 228 + regex_t regex; 229 + /* 230 + * Expected format domain:bus: 231 + * Valid domain range [0:ffff] 232 + * Valid bus range [0:ff] 233 + * Example: 0000:af, 0:3d, 01:7 234 + */ 235 + regcomp(&regex, "^([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})", REG_EXTENDED); 236 + ret = regexec(&regex, str, 0, NULL, 0); 237 + if (ret || sscanf(str, "%08x:%02hhx", domain, bus) != 2) 238 + pr_warning("Unrecognized root port format: %s\n" 239 + "Please use the following format:\n" 240 + "\t [domain]:[bus]\n" 241 + "\t for example: 0000:3d\n", str); 242 + 243 + regfree(&regex); 244 + return ret; 245 + } 246 + 247 + static int iio_root_ports_list_filter(struct iio_root_ports_list **list, 248 + const char *filter) 249 + { 250 + char *tok, *tmp, *filter_copy = NULL; 251 + struct iio_root_port *rp; 252 + u32 domain; 253 + u8 bus; 254 + int ret = -ENOMEM; 255 + struct iio_root_ports_list *tmp_list = calloc(1, sizeof(*tmp_list)); 256 + 257 + if (!tmp_list) 258 + goto err; 259 + 260 + filter_copy = strdup(filter); 261 + if (!filter_copy) 262 + goto err; 263 + 264 + for (tok = strtok_r(filter_copy, ",", &tmp); tok; 265 + tok = strtok_r(NULL, ",", &tmp)) { 266 + if (!iio_root_port_parse_str(&domain, &bus, tok)) { 267 + rp = iio_root_port_find_by_notation(*list, domain, bus); 268 + if (rp) { 269 + (*list)->rps[rp->idx] = NULL; 270 + ret = iio_root_ports_list_insert(tmp_list, rp); 271 + if (ret) { 272 + free(rp); 273 + goto err; 274 + } 275 + } else if (!iio_root_port_find_by_notation(tmp_list, 276 + domain, bus)) 277 + pr_warning("Root port %04x:%02x were not found\n", 278 + domain, bus); 279 + } 280 + } 281 + 282 + if (tmp_list->nr_entries == 0) { 283 + pr_err("Requested root ports were not found\n"); 284 + ret = -EINVAL; 285 + } 286 + err: 287 + iio_root_ports_list_free(*list); 288 + if (ret) 289 + iio_root_ports_list_free(tmp_list); 290 + else 291 + *list = tmp_list; 292 + 293 + free(filter_copy); 294 + return ret; 295 + } 296 + 297 + static int iostat_event_group(struct evlist *evl, 298 + struct iio_root_ports_list *list) 299 + { 300 + int ret; 301 + int idx; 302 + const char *iostat_cmd_template = 303 + "{uncore_iio_%x/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ 304 + uncore_iio_%x/event=0x83,umask=0x01,ch_mask=0xF,fc_mask=0x07/,\ 305 + uncore_iio_%x/event=0xc0,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ 306 + uncore_iio_%x/event=0xc0,umask=0x01,ch_mask=0xF,fc_mask=0x07/}"; 307 + const int len_template = strlen(iostat_cmd_template) + 1; 308 + struct evsel *evsel = NULL; 309 + int metrics_count = iostat_metrics_count(); 310 + char *iostat_cmd = calloc(len_template, 1); 311 + 312 + if (!iostat_cmd) 313 + return -ENOMEM; 314 + 315 + for (idx = 0; idx < list->nr_entries; idx++) { 316 + sprintf(iostat_cmd, iostat_cmd_template, 317 + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx, 318 + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx); 319 + ret = parse_events(evl, iostat_cmd, NULL); 320 + if (ret) 321 + goto err; 322 + } 323 + 324 + evlist__for_each_entry(evl, evsel) { 325 + evsel->priv = list->rps[evsel->idx / metrics_count]; 326 + } 327 + list->nr_entries = 0; 328 + err: 329 + iio_root_ports_list_free(list); 330 + free(iostat_cmd); 331 + return ret; 332 + } 333 + 334 + int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config) 335 + { 336 + if (evlist->core.nr_entries > 0) { 337 + pr_warning("The -e and -M options are not supported." 338 + "All chosen events/metrics will be dropped\n"); 339 + evlist__delete(evlist); 340 + evlist = evlist__new(); 341 + if (!evlist) 342 + return -ENOMEM; 343 + } 344 + 345 + config->metric_only = true; 346 + config->aggr_mode = AGGR_GLOBAL; 347 + 348 + return iostat_event_group(evlist, root_ports); 349 + } 350 + 351 + int iostat_parse(const struct option *opt, const char *str, 352 + int unset __maybe_unused) 353 + { 354 + int ret; 355 + struct perf_stat_config *config = (struct perf_stat_config *)opt->data; 356 + 357 + ret = iio_root_ports_scan(&root_ports); 358 + if (!ret) { 359 + config->iostat_run = true; 360 + if (!str) 361 + iostat_mode = IOSTAT_RUN; 362 + else if (!strcmp(str, "list")) 363 + iostat_mode = IOSTAT_LIST; 364 + else { 365 + iostat_mode = IOSTAT_RUN; 366 + ret = iio_root_ports_list_filter(&root_ports, str); 367 + } 368 + } 369 + return ret; 370 + } 371 + 372 + void iostat_list(struct evlist *evlist, struct perf_stat_config *config) 373 + { 374 + struct evsel *evsel; 375 + struct iio_root_port *rp = NULL; 376 + 377 + evlist__for_each_entry(evlist, evsel) { 378 + if (rp != evsel->priv) { 379 + rp = evsel->priv; 380 + iio_root_port_show(config->output, rp); 381 + } 382 + } 383 + } 384 + 385 + void iostat_release(struct evlist *evlist) 386 + { 387 + struct evsel *evsel; 388 + struct iio_root_port *rp = NULL; 389 + 390 + evlist__for_each_entry(evlist, evsel) { 391 + if (rp != evsel->priv) { 392 + rp = evsel->priv; 393 + free(evsel->priv); 394 + } 395 + } 396 + } 397 + 398 + void iostat_prefix(struct evlist *evlist, 399 + struct perf_stat_config *config, 400 + char *prefix, struct timespec *ts) 401 + { 402 + struct iio_root_port *rp = evlist->selected->priv; 403 + 404 + if (rp) { 405 + if (ts) 406 + sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s", 407 + ts->tv_sec, ts->tv_nsec, 408 + config->csv_sep, rp->domain, rp->bus, 409 + config->csv_sep); 410 + else 411 + sprintf(prefix, "%04x:%02x%s", rp->domain, rp->bus, 412 + config->csv_sep); 413 + } 414 + } 415 + 416 + void iostat_print_header_prefix(struct perf_stat_config *config) 417 + { 418 + if (config->csv_output) 419 + fputs("port,", config->output); 420 + else if (config->interval) 421 + fprintf(config->output, "# time port "); 422 + else 423 + fprintf(config->output, " port "); 424 + } 425 + 426 + void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, 427 + struct perf_stat_output_ctx *out) 428 + { 429 + double iostat_value = 0; 430 + u64 prev_count_val = 0; 431 + const char *iostat_metric = iostat_metric_by_idx(evsel->idx); 432 + u8 die = ((struct iio_root_port *)evsel->priv)->die; 433 + struct perf_counts_values *count = perf_counts(evsel->counts, die, 0); 434 + 435 + if (count->run && count->ena) { 436 + if (evsel->prev_raw_counts && !out->force_header) { 437 + struct perf_counts_values *prev_count = 438 + perf_counts(evsel->prev_raw_counts, die, 0); 439 + 440 + prev_count_val = prev_count->val; 441 + prev_count->val = count->val; 442 + } 443 + iostat_value = (count->val - prev_count_val) / 444 + ((double) count->run / count->ena); 445 + } 446 + out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, 447 + iostat_value / (256 * 1024)); 448 + } 449 + 450 + void iostat_print_counters(struct evlist *evlist, 451 + struct perf_stat_config *config, struct timespec *ts, 452 + char *prefix, iostat_print_counter_t print_cnt_cb) 453 + { 454 + void *perf_device = NULL; 455 + struct evsel *counter = evlist__first(evlist); 456 + 457 + evlist__set_selected(evlist, counter); 458 + iostat_prefix(evlist, config, prefix, ts); 459 + fprintf(config->output, "%s", prefix); 460 + evlist__for_each_entry(evlist, counter) { 461 + perf_device = evlist->selected->priv; 462 + if (perf_device && perf_device != counter->priv) { 463 + evlist__set_selected(evlist, counter); 464 + iostat_prefix(evlist, config, prefix, ts); 465 + fprintf(config->output, "\n%s", prefix); 466 + } 467 + print_cnt_cb(config, counter, prefix); 468 + } 469 + fputc('\n', config->output); 470 + }

+2 -2

tools/perf/arch/x86/util/perf_regs.c

··· 165 165 /* 166 166 * Max x86 register name length is 5(ex: %r15d). So, 6th char 167 167 * should always contain NULL. This helps to find register name 168 - * length using strlen, insted of maintaing one more variable. 168 + * length using strlen, instead of maintaining one more variable. 169 169 */ 170 170 #define SDT_REG_NAME_SIZE 6 171 171 ··· 207 207 * and displacement 0 (Both sign and displacement 0 are 208 208 * optional so it may be empty). Use one more character 209 209 * to hold last NULL so that strlen can be used to find 210 - * prefix length, instead of maintaing one more variable. 210 + * prefix length, instead of maintaining one more variable. 211 211 */ 212 212 char prefix[3] = {0}; 213 213

+2 -2

tools/perf/bench/epoll-wait.c

··· 17 17 * While the second model, enabled via --multiq option, uses multiple 18 18 * queueing (which refers to one epoll instance per worker). For example, 19 19 * short lived tcp connections in a high throughput httpd server will 20 - * ditribute the accept()'ing connections across CPUs. In this case each 20 + * distribute the accept()'ing connections across CPUs. In this case each 21 21 * worker does a limited amount of processing. 22 22 * 23 23 * [queue A] ---> [worker] ··· 198 198 199 199 do { 200 200 /* 201 - * Block undefinitely waiting for the IN event. 201 + * Block indefinitely waiting for the IN event. 202 202 * In order to stress the epoll_wait(2) syscall, 203 203 * call it event per event, instead of a larger 204 204 * batch (max)limit.

+1 -1

tools/perf/bench/inject-buildid.c

··· 372 372 len += synthesize_flush(data); 373 373 } 374 374 375 - /* tihs makes the child to finish */ 375 + /* this makes the child to finish */ 376 376 close(data->input_pipe[1]); 377 377 378 378 wait4(data->pid, &status, 0, &rusage);

+1 -1

tools/perf/bench/numa.c

··· 42 42 #endif 43 43 44 44 /* 45 - * Regular printout to the terminal, supressed if -q is specified: 45 + * Regular printout to the terminal, suppressed if -q is specified: 46 46 */ 47 47 #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) 48 48

+23 -18

tools/perf/builtin-annotate.c

··· 239 239 } 240 240 241 241 /* 242 - * XXX filtered samples can still have branch entires pointing into our 242 + * XXX filtered samples can still have branch entries pointing into our 243 243 * symbol and are missed. 244 244 */ 245 245 process_branch_stack(sample->branch_stack, al, sample); ··· 374 374 } else { 375 375 hist_entry__tty_annotate(he, evsel, ann); 376 376 nd = rb_next(nd); 377 - /* 378 - * Since we have a hist_entry per IP for the same 379 - * symbol, free he->ms.sym->src to signal we already 380 - * processed this symbol. 381 - */ 382 - zfree(&notes->src->cycles_hist); 383 - zfree(&notes->src); 384 377 } 385 378 } 386 379 } ··· 404 411 goto out; 405 412 406 413 if (dump_trace) { 407 - perf_session__fprintf_nr_events(session, stdout); 408 - evlist__fprintf_nr_events(session->evlist, stdout); 414 + perf_session__fprintf_nr_events(session, stdout, false); 415 + evlist__fprintf_nr_events(session->evlist, stdout, false); 409 416 goto out; 410 417 } 411 418 ··· 418 425 total_nr_samples = 0; 419 426 evlist__for_each_entry(session->evlist, pos) { 420 427 struct hists *hists = evsel__hists(pos); 421 - u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 428 + u32 nr_samples = hists->stats.nr_samples; 422 429 423 430 if (nr_samples > 0) { 424 431 total_nr_samples += nr_samples; ··· 531 538 "Strip first N entries of source file path name in programs (with --prefix)"), 532 539 OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", 533 540 "objdump binary to use for disassembly and annotations"), 541 + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 542 + "Enable symbol demangling"), 543 + OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, 544 + "Enable kernel symbol demangling"), 534 545 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 535 546 "Show event group information together"), 536 547 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, ··· 616 619 617 620 setup_browser(true); 618 621 619 - if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) { 622 + /* 623 + * Events of different processes may correspond to the same 624 + * symbol, we do not care about the processes in annotate, 625 + * set sort order to avoid repeated output. 626 + */ 627 + sort_order = "dso,symbol"; 628 + 629 + /* 630 + * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle 631 + * if branch info is in perf data in TUI mode. 632 + */ 633 + if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) 620 634 sort__mode = SORT_MODE__BRANCH; 621 - if (setup_sorting(annotate.session->evlist) < 0) 622 - usage_with_options(annotate_usage, options); 623 - } else { 624 - if (setup_sorting(NULL) < 0) 625 - usage_with_options(annotate_usage, options); 626 - } 635 + 636 + if (setup_sorting(NULL) < 0) 637 + usage_with_options(annotate_usage, options); 627 638 628 639 ret = __cmd_annotate(&annotate); 629 640

-3

tools/perf/builtin-daemon.c

··· 6 6 #include <linux/zalloc.h> 7 7 #include <linux/string.h> 8 8 #include <linux/limits.h> 9 - #include <linux/string.h> 10 9 #include <string.h> 11 10 #include <sys/file.h> 12 11 #include <signal.h> ··· 23 24 #include <sys/signalfd.h> 24 25 #include <sys/wait.h> 25 26 #include <poll.h> 26 - #include <sys/stat.h> 27 - #include <time.h> 28 27 #include "builtin.h" 29 28 #include "perf.h" 30 29 #include "debug.h"

+18 -8

tools/perf/builtin-data.c

··· 7 7 #include "debug.h" 8 8 #include <subcmd/parse-options.h> 9 9 #include "data-convert.h" 10 - #include "data-convert-bt.h" 11 10 12 11 typedef int (*data_cmd_fn_t)(int argc, const char **argv); 13 12 ··· 54 55 55 56 static int cmd_data_convert(int argc, const char **argv) 56 57 { 57 - const char *to_ctf = NULL; 58 + const char *to_json = NULL; 59 + const char *to_ctf = NULL; 58 60 struct perf_data_convert_opts opts = { 59 61 .force = false, 60 62 .all = false, ··· 63 63 const struct option options[] = { 64 64 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 65 65 OPT_STRING('i', "input", &input_name, "file", "input file name"), 66 + OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"), 66 67 #ifdef HAVE_LIBBABELTRACE_SUPPORT 67 68 OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), 68 69 OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"), ··· 73 72 OPT_END() 74 73 }; 75 74 76 - #ifndef HAVE_LIBBABELTRACE_SUPPORT 77 - pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); 78 - return -1; 79 - #endif 80 - 81 75 argc = parse_options(argc, argv, options, 82 76 data_convert_usage, 0); 83 77 if (argc) { ··· 80 84 return -1; 81 85 } 82 86 87 + if (to_json && to_ctf) { 88 + pr_err("You cannot specify both --to-ctf and --to-json.\n"); 89 + return -1; 90 + } 91 + if (!to_json && !to_ctf) { 92 + pr_err("You must specify one of --to-ctf or --to-json.\n"); 93 + return -1; 94 + } 95 + 96 + if (to_json) 97 + return bt_convert__perf2json(input_name, to_json, &opts); 98 + 83 99 if (to_ctf) { 84 100 #ifdef HAVE_LIBBABELTRACE_SUPPORT 85 101 return bt_convert__perf2ctf(input_name, to_ctf, &opts); 86 102 #else 87 - pr_err("The libbabeltrace support is not compiled in.\n"); 103 + pr_err("The libbabeltrace support is not compiled in. perf should be " 104 + "compiled with environment variables LIBBABELTRACE=1 and " 105 + "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); 88 106 return -1; 89 107 #endif 90 108 }

+1 -1

tools/perf/builtin-diff.c

··· 1796 1796 data__for_each_file(i, d) { 1797 1797 1798 1798 /* 1799 - * Baseline or compute realted columns: 1799 + * Baseline or compute related columns: 1800 1800 * 1801 1801 * PERF_HPP_DIFF__BASELINE 1802 1802 * PERF_HPP_DIFF__DELTA

+1 -1

tools/perf/builtin-lock.c

··· 49 49 50 50 /* 51 51 * FIXME: evsel__intval() returns u64, 52 - * so address of lockdep_map should be dealed as 64bit. 52 + * so address of lockdep_map should be treated as 64bit. 53 53 * Is there more better solution? 54 54 */ 55 55 void *addr; /* address of lockdep_map, used as ID */

+49 -6

tools/perf/builtin-record.c

··· 47 47 #include "util/util.h" 48 48 #include "util/pfm.h" 49 49 #include "util/clockid.h" 50 + #include "util/pmu-hybrid.h" 51 + #include "util/evlist-hybrid.h" 50 52 #include "asm/bug.h" 51 53 #include "perf.h" 52 54 ··· 1605 1603 } 1606 1604 } 1607 1605 1606 + static void record__uniquify_name(struct record *rec) 1607 + { 1608 + struct evsel *pos; 1609 + struct evlist *evlist = rec->evlist; 1610 + char *new_name; 1611 + int ret; 1612 + 1613 + if (!perf_pmu__has_hybrid()) 1614 + return; 1615 + 1616 + evlist__for_each_entry(evlist, pos) { 1617 + if (!evsel__is_hybrid(pos)) 1618 + continue; 1619 + 1620 + if (strchr(pos->name, '/')) 1621 + continue; 1622 + 1623 + ret = asprintf(&new_name, "%s/%s/", 1624 + pos->pmu_name, pos->name); 1625 + if (ret) { 1626 + free(pos->name); 1627 + pos->name = new_name; 1628 + } 1629 + } 1630 + } 1631 + 1608 1632 static int __cmd_record(struct record *rec, int argc, const char **argv) 1609 1633 { 1610 1634 int err; ··· 1734 1706 */ 1735 1707 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1736 1708 rec->opts.sample_id = true; 1709 + 1710 + record__uniquify_name(rec); 1737 1711 1738 1712 if (record__open(rec) != 0) { 1739 1713 err = -1; ··· 2007 1977 record__auxtrace_snapshot_exit(rec); 2008 1978 2009 1979 if (forks && workload_exec_errno) { 2010 - char msg[STRERR_BUFSIZE]; 1980 + char msg[STRERR_BUFSIZE], strevsels[2048]; 2011 1981 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2012 - pr_err("Workload failed: %s\n", emsg); 1982 + 1983 + evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 1984 + 1985 + pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 1986 + strevsels, argv[0], emsg); 2013 1987 err = -1; 2014 1988 goto out_child; 2015 1989 } ··· 2820 2786 if (record.opts.overwrite) 2821 2787 record.opts.tail_synthesize = true; 2822 2788 2823 - if (rec->evlist->core.nr_entries == 0 && 2824 - __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2825 - pr_err("Not enough memory for event selector list\n"); 2826 - goto out; 2789 + if (rec->evlist->core.nr_entries == 0) { 2790 + if (perf_pmu__has_hybrid()) { 2791 + err = evlist__add_default_hybrid(rec->evlist, 2792 + !record.opts.no_samples); 2793 + } else { 2794 + err = __evlist__add_default(rec->evlist, 2795 + !record.opts.no_samples); 2796 + } 2797 + 2798 + if (err < 0) { 2799 + pr_err("Not enough memory for event selector list\n"); 2800 + goto out; 2801 + } 2827 2802 } 2828 2803 2829 2804 if (rec->opts.target.tid && !rec->opts.no_inherit_set)

+37 -6

tools/perf/builtin-report.c

··· 84 84 bool nonany_branch_mode; 85 85 bool group_set; 86 86 bool stitch_lbr; 87 + bool disable_order; 88 + bool skip_empty; 87 89 int max_stack; 88 90 struct perf_read_values show_threads_values; 89 91 struct annotation_options annotation_opts; ··· 133 131 134 132 if (!strcmp(var, "report.sort_order")) { 135 133 default_sort_order = strdup(value); 134 + return 0; 135 + } 136 + 137 + if (!strcmp(var, "report.skip-empty")) { 138 + rep->skip_empty = perf_config_bool(var, value); 136 139 return 0; 137 140 } 138 141 ··· 442 435 { 443 436 size_t ret; 444 437 char unit; 445 - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 438 + unsigned long nr_samples = hists->stats.nr_samples; 446 439 u64 nr_events = hists->stats.total_period; 447 440 struct evsel *evsel = hists_to_evsel(hists); 448 441 char buf[512]; ··· 470 463 nr_samples += pos_hists->stats.nr_non_filtered_samples; 471 464 nr_events += pos_hists->stats.total_non_filtered_period; 472 465 } else { 473 - nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; 466 + nr_samples += pos_hists->stats.nr_samples; 474 467 nr_events += pos_hists->stats.total_period; 475 468 } 476 469 } ··· 534 527 const char *evname = evsel__name(pos); 535 528 536 529 if (symbol_conf.event_group && !evsel__is_group_leader(pos)) 530 + continue; 531 + 532 + if (rep->skip_empty && !hists->stats.nr_samples) 537 533 continue; 538 534 539 535 hists__fprintf_nr_sample_events(hists, rep, evname, stdout); ··· 717 707 ui_progress__finish(); 718 708 } 719 709 710 + static int count_sample_event(struct perf_tool *tool __maybe_unused, 711 + union perf_event *event __maybe_unused, 712 + struct perf_sample *sample __maybe_unused, 713 + struct evsel *evsel, 714 + struct machine *machine __maybe_unused) 715 + { 716 + struct hists *hists = evsel__hists(evsel); 717 + 718 + hists__inc_nr_events(hists); 719 + return 0; 720 + } 721 + 720 722 static void stats_setup(struct report *rep) 721 723 { 722 724 memset(&rep->tool, 0, sizeof(rep->tool)); 725 + rep->tool.sample = count_sample_event; 723 726 rep->tool.no_warn = true; 724 727 } 725 728 ··· 740 717 { 741 718 struct perf_session *session = rep->session; 742 719 743 - perf_session__fprintf_nr_events(session, stdout); 720 + perf_session__fprintf_nr_events(session, stdout, rep->skip_empty); 721 + evlist__fprintf_nr_events(session->evlist, stdout, rep->skip_empty); 744 722 return 0; 745 723 } 746 724 ··· 953 929 perf_session__fprintf_dsos(session, stdout); 954 930 955 931 if (dump_trace) { 956 - perf_session__fprintf_nr_events(session, stdout); 957 - evlist__fprintf_nr_events(session->evlist, stdout); 932 + perf_session__fprintf_nr_events(session, stdout, 933 + rep->skip_empty); 934 + evlist__fprintf_nr_events(session->evlist, stdout, 935 + rep->skip_empty); 958 936 return 0; 959 937 } 960 938 } ··· 1165 1139 .pretty_printing_style = "normal", 1166 1140 .socket_filter = -1, 1167 1141 .annotation_opts = annotation__default_options, 1142 + .skip_empty = true, 1168 1143 }; 1169 1144 const struct option options[] = { 1170 1145 OPT_STRING('i', "input", &input_name, "file", ··· 1323 1296 OPTS_EVSWITCH(&report.evswitch), 1324 1297 OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode, 1325 1298 "Sort all blocks by 'Sampled Cycles%'"), 1299 + OPT_BOOLEAN(0, "disable-order", &report.disable_order, 1300 + "Disable raw trace ordering"), 1301 + OPT_BOOLEAN(0, "skip-empty", &report.skip_empty, 1302 + "Do not display empty (or dummy) events in the output"), 1326 1303 OPT_END() 1327 1304 }; 1328 1305 struct perf_data data = { ··· 1360 1329 if (report.mmaps_mode) 1361 1330 report.tasks_mode = true; 1362 1331 1363 - if (dump_trace) 1332 + if (dump_trace && report.disable_order) 1364 1333 report.tool.ordered_events = false; 1365 1334 1366 1335 if (quiet)

+1 -1

tools/perf/builtin-sched.c

··· 1712 1712 { 1713 1713 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1714 1714 1715 - /* run the fork event through the perf machineruy */ 1715 + /* run the fork event through the perf machinery */ 1716 1716 perf_event__process_fork(tool, event, sample, machine); 1717 1717 1718 1718 /* and then run additional processing needed for this command */

+10 -12

tools/perf/builtin-script.c

··· 314 314 return (struct evsel_script *)evsel->priv; 315 315 } 316 316 317 - static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, 318 - struct perf_data *data) 317 + static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) 319 318 { 320 319 struct evsel_script *es = zalloc(sizeof(*es)); 321 320 ··· 334 335 return NULL; 335 336 } 336 337 337 - static void perf_evsel_script__delete(struct evsel_script *es) 338 + static void evsel_script__delete(struct evsel_script *es) 338 339 { 339 340 zfree(&es->filename); 340 341 fclose(es->fp); ··· 342 343 free(es); 343 344 } 344 345 345 - static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) 346 + static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) 346 347 { 347 348 struct stat st; 348 349 ··· 2218 2219 2219 2220 if (!evsel->priv) { 2220 2221 if (scr->per_event_dump) { 2221 - evsel->priv = perf_evsel_script__new(evsel, 2222 - scr->session->data); 2222 + evsel->priv = evsel_script__new(evsel, scr->session->data); 2223 2223 } else { 2224 2224 es = zalloc(sizeof(*es)); 2225 2225 if (!es) ··· 2473 2475 evlist__for_each_entry(evlist, evsel) { 2474 2476 if (!evsel->priv) 2475 2477 break; 2476 - perf_evsel_script__delete(evsel->priv); 2478 + evsel_script__delete(evsel->priv); 2477 2479 evsel->priv = NULL; 2478 2480 } 2479 2481 } ··· 2486 2488 /* 2487 2489 * Already setup? I.e. we may be called twice in cases like 2488 2490 * Intel PT, one for the intel_pt// and dummy events, then 2489 - * for the evsels syntheized from the auxtrace info. 2491 + * for the evsels synthesized from the auxtrace info. 2490 2492 * 2491 2493 * Ses perf_script__process_auxtrace_info. 2492 2494 */ 2493 2495 if (evsel->priv != NULL) 2494 2496 continue; 2495 2497 2496 - evsel->priv = perf_evsel_script__new(evsel, script->session->data); 2498 + evsel->priv = evsel_script__new(evsel, script->session->data); 2497 2499 if (evsel->priv == NULL) 2498 2500 goto out_err_fclose; 2499 2501 } ··· 2528 2530 evlist__for_each_entry(script->session->evlist, evsel) { 2529 2531 struct evsel_script *es = evsel->priv; 2530 2532 2531 - perf_evsel_script__fprintf(es, stdout); 2532 - perf_evsel_script__delete(es); 2533 + evsel_script__fprintf(es, stdout); 2534 + evsel_script__delete(es); 2533 2535 evsel->priv = NULL; 2534 2536 } 2535 2537 } ··· 3083 3085 * 3084 3086 * Fixme: All existing "xxx-record" are all in good formats "-e event ", 3085 3087 * which is covered well now. And new parsing code should be added to 3086 - * cover the future complexing formats like event groups etc. 3088 + * cover the future complex formats like event groups etc. 3087 3089 */ 3088 3090 static int check_ev_match(char *dir_name, char *scriptname, 3089 3091 struct perf_session *session)

+107 -23

tools/perf/builtin-stat.c

··· 48 48 #include "util/pmu.h" 49 49 #include "util/event.h" 50 50 #include "util/evlist.h" 51 + #include "util/evlist-hybrid.h" 51 52 #include "util/evsel.h" 52 53 #include "util/debug.h" 53 54 #include "util/color.h" ··· 69 68 #include "util/affinity.h" 70 69 #include "util/pfm.h" 71 70 #include "util/bpf_counter.h" 71 + #include "util/iostat.h" 72 + #include "util/pmu-hybrid.h" 72 73 #include "asm/bug.h" 73 74 74 75 #include <linux/time64.h> ··· 163 160 }; 164 161 165 162 static struct evlist *evsel_list; 163 + static bool all_counters_use_bpf = true; 166 164 167 165 static struct target target = { 168 166 .uid = UINT_MAX, ··· 216 212 .walltime_nsecs_stats = &walltime_nsecs_stats, 217 213 .big_num = true, 218 214 .ctl_fd = -1, 219 - .ctl_fd_ack = -1 215 + .ctl_fd_ack = -1, 216 + .iostat_run = false, 220 217 }; 221 218 222 219 static bool cpus_map_matched(struct evsel *a, struct evsel *b) ··· 243 238 { 244 239 struct evsel *evsel, *pos, *leader; 245 240 char buf[1024]; 241 + 242 + if (evlist__has_hybrid(evlist)) 243 + evlist__warn_hybrid_group(evlist); 246 244 247 245 evlist__for_each_entry(evlist, evsel) { 248 246 leader = evsel->leader; ··· 407 399 struct affinity affinity; 408 400 int i, ncpus, cpu; 409 401 402 + if (all_counters_use_bpf) 403 + return 0; 404 + 410 405 if (affinity__setup(&affinity) < 0) 411 406 return -1; 412 407 ··· 423 412 424 413 evlist__for_each_entry(evsel_list, counter) { 425 414 if (evsel__cpu_iter_skip(counter, cpu)) 415 + continue; 416 + if (evsel__is_bpf(counter)) 426 417 continue; 427 418 if (!counter->err) { 428 419 counter->err = read_counter_cpu(counter, rs, ··· 442 429 int err; 443 430 444 431 evlist__for_each_entry(evsel_list, counter) { 432 + if (!evsel__is_bpf(counter)) 433 + continue; 434 + 445 435 err = bpf_counter__read(counter); 446 436 if (err) 447 437 return err; ··· 455 439 static void read_counters(struct timespec *rs) 456 440 { 457 441 struct evsel *counter; 458 - int err; 459 442 460 443 if (!stat_config.stop_read_counter) { 461 - if (target__has_bpf(&target)) 462 - err = read_bpf_map_counters(); 463 - else 464 - err = read_affinity_counters(rs); 465 - if (err < 0) 444 + if (read_bpf_map_counters() || 445 + read_affinity_counters(rs)) 466 446 return; 467 447 } 468 448 ··· 547 535 struct evsel *evsel; 548 536 int err; 549 537 550 - if (target__has_bpf(&target)) { 551 - evlist__for_each_entry(evsel_list, evsel) { 552 - err = bpf_counter__enable(evsel); 553 - if (err) 554 - return err; 555 - } 538 + evlist__for_each_entry(evsel_list, evsel) { 539 + if (!evsel__is_bpf(evsel)) 540 + continue; 541 + 542 + err = bpf_counter__enable(evsel); 543 + if (err) 544 + return err; 556 545 } 557 546 558 547 if (stat_config.initial_delay < 0) { ··· 797 784 if (affinity__setup(&affinity) < 0) 798 785 return -1; 799 786 800 - if (target__has_bpf(&target)) { 801 - evlist__for_each_entry(evsel_list, counter) { 802 - if (bpf_counter__load(counter, &target)) 803 - return -1; 804 - } 787 + evlist__for_each_entry(evsel_list, counter) { 788 + if (bpf_counter__load(counter, &target)) 789 + return -1; 790 + if (!evsel__is_bpf(counter)) 791 + all_counters_use_bpf = false; 805 792 } 806 793 807 794 evlist__for_each_cpu (evsel_list, i, cpu) { 795 + /* 796 + * bperf calls evsel__open_per_cpu() in bperf__load(), so 797 + * no need to call it again here. 798 + */ 799 + if (target.use_bpf) 800 + break; 808 801 affinity__set(&affinity, cpu); 809 802 810 803 evlist__for_each_entry(evsel_list, counter) { 811 804 if (evsel__cpu_iter_skip(counter, cpu)) 812 805 continue; 813 806 if (counter->reset_group || counter->errored) 807 + continue; 808 + if (evsel__is_bpf(counter)) 814 809 continue; 815 810 try_again: 816 811 if (create_perf_stat_counter(counter, &stat_config, &target, ··· 946 925 /* 947 926 * Enable counters and exec the command: 948 927 */ 949 - t0 = rdclock(); 950 - clock_gettime(CLOCK_MONOTONIC, &ref_time); 951 - 952 928 if (forks) { 953 929 evlist__start_workload(evsel_list); 954 930 err = enable_counters(); 955 931 if (err) 956 932 return -1; 933 + 934 + t0 = rdclock(); 935 + clock_gettime(CLOCK_MONOTONIC, &ref_time); 957 936 958 937 if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) 959 938 status = dispatch_events(forks, timeout, interval, &times); ··· 975 954 err = enable_counters(); 976 955 if (err) 977 956 return -1; 957 + 958 + t0 = rdclock(); 959 + clock_gettime(CLOCK_MONOTONIC, &ref_time); 960 + 978 961 status = dispatch_events(forks, timeout, interval, &times); 979 962 } 980 963 ··· 1108 1083 stat_config.big_num = (set != 0); 1109 1084 } 1110 1085 1086 + void perf_stat__set_no_csv_summary(int set) 1087 + { 1088 + stat_config.no_csv_summary = (set != 0); 1089 + } 1090 + 1111 1091 static int stat__set_big_num(const struct option *opt __maybe_unused, 1112 1092 const char *s __maybe_unused, int unset) 1113 1093 { ··· 1176 1146 #ifdef HAVE_BPF_SKEL 1177 1147 OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", 1178 1148 "stat events on existing bpf program id"), 1149 + OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, 1150 + "use bpf program to count events"), 1151 + OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", 1152 + "path to perf_event_attr map"), 1179 1153 #endif 1180 1154 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1181 1155 "system-wide collection from all CPUs"), ··· 1269 1235 "threads of same physical core"), 1270 1236 OPT_BOOLEAN(0, "summary", &stat_config.summary, 1271 1237 "print summary for interval mode"), 1238 + OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, 1239 + "don't print 'summary' for CSV summary output"), 1272 1240 OPT_BOOLEAN(0, "quiet", &stat_config.quiet, 1273 1241 "don't print output (useful with record)"), 1274 1242 #ifdef HAVE_LIBPFM ··· 1283 1247 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 1284 1248 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 1285 1249 parse_control_option), 1250 + OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default", 1251 + "measure I/O performance metrics provided by arch/platform", 1252 + iostat_parse), 1286 1253 OPT_END() 1287 1254 }; 1288 1255 ··· 1644 1605 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1645 1606 1646 1607 }; 1608 + struct perf_event_attr default_sw_attrs[] = { 1609 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1610 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1611 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1612 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1613 + }; 1647 1614 1648 1615 /* 1649 1616 * Detailed stats (-d), covering the L1 and last level data caches: ··· 1750 1705 bzero(&errinfo, sizeof(errinfo)); 1751 1706 if (transaction_run) { 1752 1707 /* Handle -T as -M transaction. Once platform specific metrics 1753 - * support has been added to the json files, all archictures 1708 + * support has been added to the json files, all architectures 1754 1709 * will use this approach. To determine transaction support 1755 1710 * on an architecture test for such a metric name. 1756 1711 */ ··· 1886 1841 } 1887 1842 1888 1843 if (!evsel_list->core.nr_entries) { 1844 + if (perf_pmu__has_hybrid()) { 1845 + const char *hybrid_str = "cycles,instructions,branches,branch-misses"; 1846 + 1847 + if (target__has_cpu(&target)) 1848 + default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK; 1849 + 1850 + if (evlist__add_default_attrs(evsel_list, 1851 + default_sw_attrs) < 0) { 1852 + return -1; 1853 + } 1854 + 1855 + err = parse_events(evsel_list, hybrid_str, &errinfo); 1856 + if (err) { 1857 + fprintf(stderr, 1858 + "Cannot set up hybrid events %s: %d\n", 1859 + hybrid_str, err); 1860 + parse_events_print_error(&errinfo, hybrid_str); 1861 + return -1; 1862 + } 1863 + return err; 1864 + } 1865 + 1889 1866 if (target__has_cpu(&target)) 1890 1867 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 1891 1868 ··· 2387 2320 goto out; 2388 2321 } 2389 2322 2323 + if (stat_config.iostat_run) { 2324 + status = iostat_prepare(evsel_list, &stat_config); 2325 + if (status) 2326 + goto out; 2327 + if (iostat_mode == IOSTAT_LIST) { 2328 + iostat_list(evsel_list, &stat_config); 2329 + goto out; 2330 + } else if (verbose) 2331 + iostat_list(evsel_list, &stat_config); 2332 + } 2333 + 2390 2334 if (add_default_attributes()) 2391 2335 goto out; 2392 2336 ··· 2434 2356 } 2435 2357 2436 2358 evlist__check_cpu_maps(evsel_list); 2359 + 2360 + if (perf_pmu__has_hybrid()) 2361 + stat_config.no_merge = true; 2437 2362 2438 2363 /* 2439 2364 * Initialize thread_map with comm names, ··· 2540 2459 /* 2541 2460 * We synthesize the kernel mmap record just so that older tools 2542 2461 * don't emit warnings about not being able to resolve symbols 2543 - * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2462 + * due to /proc/sys/kernel/kptr_restrict settings and instead provide 2544 2463 * a saner message about no samples being in the perf.data file. 2545 2464 * 2546 2465 * This also serves to suppress a warning about f_header.data.size == 0 ··· 2576 2495 perf_stat__exit_aggr_mode(); 2577 2496 evlist__free_stats(evsel_list); 2578 2497 out: 2498 + if (stat_config.iostat_run) 2499 + iostat_release(evsel_list); 2500 + 2579 2501 zfree(&stat_config.walltime_run); 2580 2502 2581 2503 if (smi_cost && smi_reset)

+9 -13

tools/perf/builtin-top.c

··· 328 328 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 329 329 330 330 if (!top->record_opts.overwrite && 331 - (hists->stats.nr_lost_warned != 332 - hists->stats.nr_events[PERF_RECORD_LOST])) { 333 - hists->stats.nr_lost_warned = 334 - hists->stats.nr_events[PERF_RECORD_LOST]; 331 + (top->evlist->stats.nr_lost_warned != 332 + top->evlist->stats.nr_events[PERF_RECORD_LOST])) { 333 + top->evlist->stats.nr_lost_warned = 334 + top->evlist->stats.nr_events[PERF_RECORD_LOST]; 335 335 color_fprintf(stdout, PERF_COLOR_RED, 336 336 "WARNING: LOST %d chunks, Check IO/CPU overload", 337 - hists->stats.nr_lost_warned); 337 + top->evlist->stats.nr_lost_warned); 338 338 ++printed; 339 339 } 340 340 ··· 852 852 perf_top__process_lost(struct perf_top *top, union perf_event *event, 853 853 struct evsel *evsel) 854 854 { 855 - struct hists *hists = evsel__hists(evsel); 856 - 857 855 top->lost += event->lost.lost; 858 856 top->lost_total += event->lost.lost; 859 - hists->stats.total_lost += event->lost.lost; 857 + evsel->evlist->stats.total_lost += event->lost.lost; 860 858 } 861 859 862 860 static void ··· 862 864 union perf_event *event, 863 865 struct evsel *evsel) 864 866 { 865 - struct hists *hists = evsel__hists(evsel); 866 - 867 867 top->lost += event->lost_samples.lost; 868 868 top->lost_total += event->lost_samples.lost; 869 - hists->stats.total_lost_samples += event->lost_samples.lost; 869 + evsel->evlist->stats.total_lost_samples += event->lost_samples.lost; 870 870 } 871 871 872 872 static u64 last_timestamp; ··· 1201 1205 } else if (event->header.type == PERF_RECORD_LOST_SAMPLES) { 1202 1206 perf_top__process_lost_samples(top, event, evsel); 1203 1207 } else if (event->header.type < PERF_RECORD_MAX) { 1204 - hists__inc_nr_events(evsel__hists(evsel), event->header.type); 1208 + events_stats__inc(&session->evlist->stats, event->header.type); 1205 1209 machine__process_event(machine, event, &sample); 1206 1210 } else 1207 1211 ++session->evlist->stats.nr_unknown_events; ··· 1603 1607 if (status) { 1604 1608 /* 1605 1609 * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise 1606 - * warn the user explicitely. 1610 + * warn the user explicitly. 1607 1611 */ 1608 1612 eprintf(status == ENOSYS ? 1 : 0, verbose, 1609 1613 "Couldn't read the cpuid for this machine: %s\n",

+1

tools/perf/check-headers.sh

··· 153 153 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl 154 154 check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl 155 155 check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl 156 + check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl 156 157 157 158 for i in $BEAUTY_FILES; do 158 159 beauty_check $i -B

+1

tools/perf/command-list.txt

··· 14 14 perf-evlist mainporcelain common 15 15 perf-ftrace mainporcelain common 16 16 perf-inject mainporcelain common 17 + perf-iostat mainporcelain common 17 18 perf-kallsyms mainporcelain common 18 19 perf-kmem mainporcelain common 19 20 perf-kvm mainporcelain common

+2 -2

tools/perf/examples/bpf/augmented_raw_syscalls.c

··· 262 262 /* 263 263 * Jump to syscall specific augmenter, even if the default one, 264 264 * "!raw_syscalls:unaugmented" that will just return 1 to return the 265 - * unagmented tracepoint payload. 265 + * unaugmented tracepoint payload. 266 266 */ 267 267 bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); 268 268 ··· 282 282 /* 283 283 * Jump to syscall specific return augmenter, even if the default one, 284 284 * "!raw_syscalls:unaugmented" that will just return 1 to return the 285 - * unagmented tracepoint payload. 285 + * unaugmented tracepoint payload. 286 286 */ 287 287 bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); 288 288 /*

+2 -2

tools/perf/jvmti/jvmti_agent.c

··· 390 390 rec.p.total_size += size; 391 391 392 392 /* 393 - * If JVM is multi-threaded, nultiple concurrent calls to agent 393 + * If JVM is multi-threaded, multiple concurrent calls to agent 394 394 * may be possible, so protect file writes 395 395 */ 396 396 flockfile(fp); ··· 457 457 rec.p.total_size = size; 458 458 459 459 /* 460 - * If JVM is multi-threaded, nultiple concurrent calls to agent 460 + * If JVM is multi-threaded, multiple concurrent calls to agent 461 461 * may be possible, so protect file writes 462 462 */ 463 463 flockfile(fp);

+12

tools/perf/perf-iostat.sh

··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # perf iostat 4 + # Alexander Antonov <alexander.antonov@linux.intel.com> 5 + 6 + if [[ "$1" == "list" ]] || [[ "$1" =~ ([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})(,)? ]]; then 7 + DELIMITER="=" 8 + else 9 + DELIMITER=" " 10 + fi 11 + 12 + perf stat --iostat$DELIMITER$*

+228

tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json

··· 210 210 "BriefDescription": "Attributable Level 2 data TLB refill" 211 211 }, 212 212 { 213 + "PublicDescription": "Attributable Level 2 instruction TLB refill.", 214 + "EventCode": "0x2E", 215 + "EventName": "L2I_TLB_REFILL", 216 + "BriefDescription": "Attributable Level 2 instruction TLB refill." 217 + }, 218 + { 213 219 "PublicDescription": "Attributable Level 2 data or unified TLB access", 214 220 "EventCode": "0x2F", 215 221 "EventName": "L2D_TLB", 216 222 "BriefDescription": "Attributable Level 2 data or unified TLB access" 223 + }, 224 + { 225 + "PublicDescription": "Attributable Level 2 instruction TLB access.", 226 + "EventCode": "0x30", 227 + "EventName": "L2I_TLB", 228 + "BriefDescription": "Attributable Level 2 instruction TLB access." 217 229 }, 218 230 { 219 231 "PublicDescription": "Access to another socket in a multi-socket system", ··· 256 244 "EventCode": "0x37", 257 245 "EventName": "LL_CACHE_MISS_RD", 258 246 "BriefDescription": "Last level cache miss, read" 247 + }, 248 + { 249 + "PublicDescription": "SIMD Instruction architecturally executed.", 250 + "EventCode": "0x8000", 251 + "EventName": "SIMD_INST_RETIRED", 252 + "BriefDescription": "SIMD Instruction architecturally executed." 253 + }, 254 + { 255 + "PublicDescription": "Instruction architecturally executed, SVE.", 256 + "EventCode": "0x8002", 257 + "EventName": "SVE_INST_RETIRED", 258 + "BriefDescription": "Instruction architecturally executed, SVE." 259 + }, 260 + { 261 + "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", 262 + "EventCode": "0x8008", 263 + "EventName": "UOP_SPEC", 264 + "BriefDescription": "Microarchitectural operation, Operations speculatively executed." 265 + }, 266 + { 267 + "PublicDescription": "SVE Math accelerator Operations speculatively executed.", 268 + "EventCode": "0x800E", 269 + "EventName": "SVE_MATH_SPEC", 270 + "BriefDescription": "SVE Math accelerator Operations speculatively executed." 271 + }, 272 + { 273 + "PublicDescription": "Floating-point Operations speculatively executed.", 274 + "EventCode": "0x8010", 275 + "EventName": "FP_SPEC", 276 + "BriefDescription": "Floating-point Operations speculatively executed." 277 + }, 278 + { 279 + "PublicDescription": "Floating-point FMA Operations speculatively executed.", 280 + "EventCode": "0x8028", 281 + "EventName": "FP_FMA_SPEC", 282 + "BriefDescription": "Floating-point FMA Operations speculatively executed." 283 + }, 284 + { 285 + "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.", 286 + "EventCode": "0x8034", 287 + "EventName": "FP_RECPE_SPEC", 288 + "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed." 289 + }, 290 + { 291 + "PublicDescription": "floating-point convert Operations speculatively executed.", 292 + "EventCode": "0x8038", 293 + "EventName": "FP_CVT_SPEC", 294 + "BriefDescription": "floating-point convert Operations speculatively executed." 295 + }, 296 + { 297 + "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.", 298 + "EventCode": "0x8043", 299 + "EventName": "ASE_SVE_INT_SPEC", 300 + "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed." 301 + }, 302 + { 303 + "PublicDescription": "SVE predicated Operations speculatively executed.", 304 + "EventCode": "0x8074", 305 + "EventName": "SVE_PRED_SPEC", 306 + "BriefDescription": "SVE predicated Operations speculatively executed." 307 + }, 308 + { 309 + "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", 310 + "EventCode": "0x807C", 311 + "EventName": "SVE_MOVPRFX_SPEC", 312 + "BriefDescription": "SVE MOVPRFX Operations speculatively executed." 313 + }, 314 + { 315 + "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.", 316 + "EventCode": "0x807F", 317 + "EventName": "SVE_MOVPRFX_U_SPEC", 318 + "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed." 319 + }, 320 + { 321 + "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.", 322 + "EventCode": "0x8085", 323 + "EventName": "ASE_SVE_LD_SPEC", 324 + "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed." 325 + }, 326 + { 327 + "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.", 328 + "EventCode": "0x8086", 329 + "EventName": "ASE_SVE_ST_SPEC", 330 + "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed." 331 + }, 332 + { 333 + "PublicDescription": "Prefetch Operations speculatively executed.", 334 + "EventCode": "0x8087", 335 + "EventName": "PRF_SPEC", 336 + "BriefDescription": "Prefetch Operations speculatively executed." 337 + }, 338 + { 339 + "PublicDescription": "General-purpose register load Operations speculatively executed.", 340 + "EventCode": "0x8089", 341 + "EventName": "BASE_LD_REG_SPEC", 342 + "BriefDescription": "General-purpose register load Operations speculatively executed." 343 + }, 344 + { 345 + "PublicDescription": "General-purpose register store Operations speculatively executed.", 346 + "EventCode": "0x808A", 347 + "EventName": "BASE_ST_REG_SPEC", 348 + "BriefDescription": "General-purpose register store Operations speculatively executed." 349 + }, 350 + { 351 + "PublicDescription": "SVE unpredicated load register Operations speculatively executed.", 352 + "EventCode": "0x8091", 353 + "EventName": "SVE_LDR_REG_SPEC", 354 + "BriefDescription": "SVE unpredicated load register Operations speculatively executed." 355 + }, 356 + { 357 + "PublicDescription": "SVE unpredicated store register Operations speculatively executed.", 358 + "EventCode": "0x8092", 359 + "EventName": "SVE_STR_REG_SPEC", 360 + "BriefDescription": "SVE unpredicated store register Operations speculatively executed." 361 + }, 362 + { 363 + "PublicDescription": "SVE load predicate register Operations speculatively executed.", 364 + "EventCode": "0x8095", 365 + "EventName": "SVE_LDR_PREG_SPEC", 366 + "BriefDescription": "SVE load predicate register Operations speculatively executed." 367 + }, 368 + { 369 + "PublicDescription": "SVE store predicate register Operations speculatively executed.", 370 + "EventCode": "0x8096", 371 + "EventName": "SVE_STR_PREG_SPEC", 372 + "BriefDescription": "SVE store predicate register Operations speculatively executed." 373 + }, 374 + { 375 + "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.", 376 + "EventCode": "0x809F", 377 + "EventName": "SVE_PRF_CONTIG_SPEC", 378 + "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed." 379 + }, 380 + { 381 + "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.", 382 + "EventCode": "0x80A5", 383 + "EventName": "ASE_SVE_LD_MULTI_SPEC", 384 + "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed." 385 + }, 386 + { 387 + "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.", 388 + "EventCode": "0x80A6", 389 + "EventName": "ASE_SVE_ST_MULTI_SPEC", 390 + "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed." 391 + }, 392 + { 393 + "PublicDescription": "SVE gather-load Operations speculatively executed.", 394 + "EventCode": "0x80AD", 395 + "EventName": "SVE_LD_GATHER_SPEC", 396 + "BriefDescription": "SVE gather-load Operations speculatively executed." 397 + }, 398 + { 399 + "PublicDescription": "SVE scatter-store Operations speculatively executed.", 400 + "EventCode": "0x80AE", 401 + "EventName": "SVE_ST_SCATTER_SPEC", 402 + "BriefDescription": "SVE scatter-store Operations speculatively executed." 403 + }, 404 + { 405 + "PublicDescription": "SVE gather-prefetch Operations speculatively executed.", 406 + "EventCode": "0x80AF", 407 + "EventName": "SVE_PRF_GATHER_SPEC", 408 + "BriefDescription": "SVE gather-prefetch Operations speculatively executed." 409 + }, 410 + { 411 + "PublicDescription": "SVE First-fault load Operations speculatively executed.", 412 + "EventCode": "0x80BC", 413 + "EventName": "SVE_LDFF_SPEC", 414 + "BriefDescription": "SVE First-fault load Operations speculatively executed." 415 + }, 416 + { 417 + "PublicDescription": "Scalable floating-point element Operations speculatively executed.", 418 + "EventCode": "0x80C0", 419 + "EventName": "FP_SCALE_OPS_SPEC", 420 + "BriefDescription": "Scalable floating-point element Operations speculatively executed." 421 + }, 422 + { 423 + "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.", 424 + "EventCode": "0x80C1", 425 + "EventName": "FP_FIXED_OPS_SPEC", 426 + "BriefDescription": "Non-scalable floating-point element Operations speculatively executed." 427 + }, 428 + { 429 + "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.", 430 + "EventCode": "0x80C2", 431 + "EventName": "FP_HP_SCALE_OPS_SPEC", 432 + "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed." 433 + }, 434 + { 435 + "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.", 436 + "EventCode": "0x80C3", 437 + "EventName": "FP_HP_FIXED_OPS_SPEC", 438 + "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed." 439 + }, 440 + { 441 + "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.", 442 + "EventCode": "0x80C4", 443 + "EventName": "FP_SP_SCALE_OPS_SPEC", 444 + "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed." 445 + }, 446 + { 447 + "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.", 448 + "EventCode": "0x80C5", 449 + "EventName": "FP_SP_FIXED_OPS_SPEC", 450 + "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed." 451 + }, 452 + { 453 + "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.", 454 + "EventCode": "0x80C6", 455 + "EventName": "FP_DP_SCALE_OPS_SPEC", 456 + "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed." 457 + }, 458 + { 459 + "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.", 460 + "EventCode": "0x80C7", 461 + "EventName": "FP_DP_FIXED_OPS_SPEC", 462 + "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." 259 463 } 260 464 ]

+8

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "BR_MIS_PRED" 4 + }, 5 + { 6 + "ArchStdEvent": "BR_PRED" 7 + } 8 + ]

+62

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json

··· 1 + [ 2 + { 3 + "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.", 4 + "EventCode": "0x314", 5 + "EventName": "BUS_READ_TOTAL_TOFU", 6 + "BriefDescription": "This event counts read transactions from tofu controller to measured CMG." 7 + }, 8 + { 9 + "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.", 10 + "EventCode": "0x315", 11 + "EventName": "BUS_READ_TOTAL_PCI", 12 + "BriefDescription": "This event counts read transactions from PCI controller to measured CMG." 13 + }, 14 + { 15 + "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.", 16 + "EventCode": "0x316", 17 + "EventName": "BUS_READ_TOTAL_MEM", 18 + "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG." 19 + }, 20 + { 21 + "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.", 22 + "EventCode": "0x318", 23 + "EventName": "BUS_WRITE_TOTAL_CMG0", 24 + "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0." 25 + }, 26 + { 27 + "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.", 28 + "EventCode": "0x319", 29 + "EventName": "BUS_WRITE_TOTAL_CMG1", 30 + "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1." 31 + }, 32 + { 33 + "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.", 34 + "EventCode": "0x31A", 35 + "EventName": "BUS_WRITE_TOTAL_CMG2", 36 + "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2." 37 + }, 38 + { 39 + "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.", 40 + "EventCode": "0x31B", 41 + "EventName": "BUS_WRITE_TOTAL_CMG3", 42 + "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3." 43 + }, 44 + { 45 + "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.", 46 + "EventCode": "0x31C", 47 + "EventName": "BUS_WRITE_TOTAL_TOFU", 48 + "BriefDescription": "This event counts write transactions from measured CMG to tofu controller." 49 + }, 50 + { 51 + "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.", 52 + "EventCode": "0x31D", 53 + "EventName": "BUS_WRITE_TOTAL_PCI", 54 + "BriefDescription": "This event counts write transactions from measured CMG to PCI controller." 55 + }, 56 + { 57 + "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.", 58 + "EventCode": "0x31E", 59 + "EventName": "BUS_WRITE_TOTAL_MEM", 60 + "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory." 61 + } 62 + ]

+128

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "L1I_CACHE_REFILL" 4 + }, 5 + { 6 + "ArchStdEvent": "L1I_TLB_REFILL" 7 + }, 8 + { 9 + "ArchStdEvent": "L1D_CACHE_REFILL" 10 + }, 11 + { 12 + "ArchStdEvent": "L1D_CACHE" 13 + }, 14 + { 15 + "ArchStdEvent": "L1D_TLB_REFILL" 16 + }, 17 + { 18 + "ArchStdEvent": "L1I_CACHE" 19 + }, 20 + { 21 + "ArchStdEvent": "L1D_CACHE_WB" 22 + }, 23 + { 24 + "ArchStdEvent": "L2D_CACHE" 25 + }, 26 + { 27 + "ArchStdEvent": "L2D_CACHE_REFILL" 28 + }, 29 + { 30 + "ArchStdEvent": "L2D_CACHE_WB" 31 + }, 32 + { 33 + "ArchStdEvent": "L2D_TLB_REFILL" 34 + }, 35 + { 36 + "ArchStdEvent": "L2I_TLB_REFILL" 37 + }, 38 + { 39 + "ArchStdEvent": "L2D_TLB" 40 + }, 41 + { 42 + "ArchStdEvent": "L2I_TLB" 43 + }, 44 + { 45 + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", 46 + "EventCode": "0x49", 47 + "EventName": "L1D_CACHE_REFILL_PRF", 48 + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch." 49 + }, 50 + { 51 + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", 52 + "EventCode": "0x59", 53 + "EventName": "L2D_CACHE_REFILL_PRF", 54 + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch." 55 + }, 56 + { 57 + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.", 58 + "EventCode": "0x200", 59 + "EventName": "L1D_CACHE_REFILL_DM", 60 + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access." 61 + }, 62 + { 63 + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", 64 + "EventCode": "0x202", 65 + "EventName": "L1D_CACHE_REFILL_HWPRF", 66 + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch." 67 + }, 68 + { 69 + "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.", 70 + "EventCode": "0x208", 71 + "EventName": "L1_MISS_WAIT", 72 + "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle." 73 + }, 74 + { 75 + "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.", 76 + "EventCode": "0x209", 77 + "EventName": "L1I_MISS_WAIT", 78 + "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle." 79 + }, 80 + { 81 + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.", 82 + "EventCode": "0x300", 83 + "EventName": "L2D_CACHE_REFILL_DM", 84 + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access." 85 + }, 86 + { 87 + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", 88 + "EventCode": "0x302", 89 + "EventName": "L2D_CACHE_REFILL_HWPRF", 90 + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch." 91 + }, 92 + { 93 + "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.", 94 + "EventCode": "0x308", 95 + "EventName": "L2_MISS_WAIT", 96 + "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle." 97 + }, 98 + { 99 + "PublicDescription": "This event counts the number of times of L2 cache miss.", 100 + "EventCode": "0x309", 101 + "EventName": "L2_MISS_COUNT", 102 + "BriefDescription": "This event counts the number of times of L2 cache miss." 103 + }, 104 + { 105 + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", 106 + "EventCode": "0x325", 107 + "EventName": "L2D_SWAP_DM", 108 + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." 109 + }, 110 + { 111 + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", 112 + "EventCode": "0x326", 113 + "EventName": "L2D_CACHE_MIBMCH_PRF", 114 + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." 115 + }, 116 + { 117 + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", 118 + "EventCode": "0x396", 119 + "EventName": "L2D_CACHE_SWAP_LOCAL", 120 + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." 121 + }, 122 + { 123 + "PublicDescription": "This event counts energy consumption per cycle of L2 cache.", 124 + "EventCode": "0x3E0", 125 + "EventName": "EA_L2", 126 + "BriefDescription": "This event counts energy consumption per cycle of L2 cache." 127 + } 128 + ]

+5

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "CPU_CYCLES" 4 + } 5 + ]

+29

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "EXC_TAKEN" 4 + }, 5 + { 6 + "ArchStdEvent": "EXC_UNDEF" 7 + }, 8 + { 9 + "ArchStdEvent": "EXC_SVC" 10 + }, 11 + { 12 + "ArchStdEvent": "EXC_PABORT" 13 + }, 14 + { 15 + "ArchStdEvent": "EXC_DABORT" 16 + }, 17 + { 18 + "ArchStdEvent": "EXC_IRQ" 19 + }, 20 + { 21 + "ArchStdEvent": "EXC_FIQ" 22 + }, 23 + { 24 + "ArchStdEvent": "EXC_SMC" 25 + }, 26 + { 27 + "ArchStdEvent": "EXC_HVC" 28 + } 29 + ]

+131

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "SW_INCR" 4 + }, 5 + { 6 + "ArchStdEvent": "INST_RETIRED" 7 + }, 8 + { 9 + "ArchStdEvent": "EXC_RETURN" 10 + }, 11 + { 12 + "ArchStdEvent": "CID_WRITE_RETIRED" 13 + }, 14 + { 15 + "ArchStdEvent": "INST_SPEC" 16 + }, 17 + { 18 + "ArchStdEvent": "LDREX_SPEC" 19 + }, 20 + { 21 + "ArchStdEvent": "STREX_SPEC" 22 + }, 23 + { 24 + "ArchStdEvent": "LD_SPEC" 25 + }, 26 + { 27 + "ArchStdEvent": "ST_SPEC" 28 + }, 29 + { 30 + "ArchStdEvent": "LDST_SPEC" 31 + }, 32 + { 33 + "ArchStdEvent": "DP_SPEC" 34 + }, 35 + { 36 + "ArchStdEvent": "ASE_SPEC" 37 + }, 38 + { 39 + "ArchStdEvent": "VFP_SPEC" 40 + }, 41 + { 42 + "ArchStdEvent": "PC_WRITE_SPEC" 43 + }, 44 + { 45 + "ArchStdEvent": "CRYPTO_SPEC" 46 + }, 47 + { 48 + "ArchStdEvent": "BR_IMMED_SPEC" 49 + }, 50 + { 51 + "ArchStdEvent": "BR_RETURN_SPEC" 52 + }, 53 + { 54 + "ArchStdEvent": "BR_INDIRECT_SPEC" 55 + }, 56 + { 57 + "ArchStdEvent": "ISB_SPEC" 58 + }, 59 + { 60 + "ArchStdEvent": "DSB_SPEC" 61 + }, 62 + { 63 + "ArchStdEvent": "DMB_SPEC" 64 + }, 65 + { 66 + "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", 67 + "EventCode": "0x9F", 68 + "EventName": "DCZVA_SPEC", 69 + "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction." 70 + }, 71 + { 72 + "PublicDescription": "This event counts architecturally executed floating-point move operations.", 73 + "EventCode": "0x105", 74 + "EventName": "FP_MV_SPEC", 75 + "BriefDescription": "This event counts architecturally executed floating-point move operations." 76 + }, 77 + { 78 + "PublicDescription": "This event counts architecturally executed operations that using predicate register.", 79 + "EventCode": "0x108", 80 + "EventName": "PRD_SPEC", 81 + "BriefDescription": "This event counts architecturally executed operations that using predicate register." 82 + }, 83 + { 84 + "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.", 85 + "EventCode": "0x109", 86 + "EventName": "IEL_SPEC", 87 + "BriefDescription": "This event counts architecturally executed inter-element manipulation operations." 88 + }, 89 + { 90 + "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.", 91 + "EventCode": "0x10A", 92 + "EventName": "IREG_SPEC", 93 + "BriefDescription": "This event counts architecturally executed inter-register manipulation operations." 94 + }, 95 + { 96 + "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.", 97 + "EventCode": "0x112", 98 + "EventName": "FP_LD_SPEC", 99 + "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers." 100 + }, 101 + { 102 + "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.", 103 + "EventCode": "0x113", 104 + "EventName": "FP_ST_SPEC", 105 + "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers." 106 + }, 107 + { 108 + "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.", 109 + "EventCode": "0x11A", 110 + "EventName": "BC_LD_SPEC", 111 + "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations." 112 + }, 113 + { 114 + "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", 115 + "EventCode": "0x121", 116 + "EventName": "EFFECTIVE_INST_SPEC", 117 + "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction." 118 + }, 119 + { 120 + "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", 121 + "EventCode": "0x123", 122 + "EventName": "PRE_INDEX_SPEC", 123 + "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode." 124 + }, 125 + { 126 + "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", 127 + "EventCode": "0x124", 128 + "EventName": "POST_INDEX_SPEC", 129 + "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode." 130 + } 131 + ]

+8

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json

··· 1 + [ 2 + { 3 + "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.", 4 + "EventCode": "0x3E8", 5 + "EventName": "EA_MEMORY", 6 + "BriefDescription": "This event counts energy consumption per cycle of CMG local memory." 7 + } 8 + ]

+188

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json

··· 1 + [ 2 + { 3 + "PublicDescription": "This event counts the occurrence count of the micro-operation split.", 4 + "EventCode": "0x139", 5 + "EventName": "UOP_SPLIT", 6 + "BriefDescription": "This event counts the occurrence count of the micro-operation split." 7 + }, 8 + { 9 + "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.", 10 + "EventCode": "0x180", 11 + "EventName": "LD_COMP_WAIT_L2_MISS", 12 + "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access." 13 + }, 14 + { 15 + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.", 16 + "EventCode": "0x181", 17 + "EventName": "LD_COMP_WAIT_L2_MISS_EX", 18 + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access." 19 + }, 20 + { 21 + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.", 22 + "EventCode": "0x182", 23 + "EventName": "LD_COMP_WAIT_L1_MISS", 24 + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access." 25 + }, 26 + { 27 + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.", 28 + "EventCode": "0x183", 29 + "EventName": "LD_COMP_WAIT_L1_MISS_EX", 30 + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access." 31 + }, 32 + { 33 + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.", 34 + "EventCode": "0x184", 35 + "EventName": "LD_COMP_WAIT", 36 + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access." 37 + }, 38 + { 39 + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.", 40 + "EventCode": "0x185", 41 + "EventName": "LD_COMP_WAIT_EX", 42 + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access." 43 + }, 44 + { 45 + "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.", 46 + "EventCode": "0x186", 47 + "EventName": "LD_COMP_WAIT_PFP_BUSY", 48 + "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port." 49 + }, 50 + { 51 + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", 52 + "EventCode": "0x187", 53 + "EventName": "LD_COMP_WAIT_PFP_BUSY_EX", 54 + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation." 55 + }, 56 + { 57 + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", 58 + "EventCode": "0x188", 59 + "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF", 60 + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction." 61 + }, 62 + { 63 + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.", 64 + "EventCode": "0x189", 65 + "EventName": "EU_COMP_WAIT", 66 + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction." 67 + }, 68 + { 69 + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.", 70 + "EventCode": "0x18A", 71 + "EventName": "FL_COMP_WAIT", 72 + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction." 73 + }, 74 + { 75 + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.", 76 + "EventCode": "0x18B", 77 + "EventName": "BR_COMP_WAIT", 78 + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction." 79 + }, 80 + { 81 + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.", 82 + "EventCode": "0x18C", 83 + "EventName": "ROB_EMPTY", 84 + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty." 85 + }, 86 + { 87 + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.", 88 + "EventCode": "0x18D", 89 + "EventName": "ROB_EMPTY_STQ_BUSY", 90 + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full." 91 + }, 92 + { 93 + "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.", 94 + "EventCode": "0x18E", 95 + "EventName": "WFE_WFI_CYCLE", 96 + "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction." 97 + }, 98 + { 99 + "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.", 100 + "EventCode": "0x190", 101 + "EventName": "_0INST_COMMIT", 102 + "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only." 103 + }, 104 + { 105 + "PublicDescription": "This event counts every cycle that one instruction is committed.", 106 + "EventCode": "0x191", 107 + "EventName": "_1INST_COMMIT", 108 + "BriefDescription": "This event counts every cycle that one instruction is committed." 109 + }, 110 + { 111 + "PublicDescription": "This event counts every cycle that two instructions are committed.", 112 + "EventCode": "0x192", 113 + "EventName": "_2INST_COMMIT", 114 + "BriefDescription": "This event counts every cycle that two instructions are committed." 115 + }, 116 + { 117 + "PublicDescription": "This event counts every cycle that three instructions are committed.", 118 + "EventCode": "0x193", 119 + "EventName": "_3INST_COMMIT", 120 + "BriefDescription": "This event counts every cycle that three instructions are committed." 121 + }, 122 + { 123 + "PublicDescription": "This event counts every cycle that four instructions are committed.", 124 + "EventCode": "0x194", 125 + "EventName": "_4INST_COMMIT", 126 + "BriefDescription": "This event counts every cycle that four instructions are committed." 127 + }, 128 + { 129 + "PublicDescription": "This event counts every cycle that only any micro-operations are committed.", 130 + "EventCode": "0x198", 131 + "EventName": "UOP_ONLY_COMMIT", 132 + "BriefDescription": "This event counts every cycle that only any micro-operations are committed." 133 + }, 134 + { 135 + "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.", 136 + "EventCode": "0x199", 137 + "EventName": "SINGLE_MOVPRFX_COMMIT", 138 + "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed." 139 + }, 140 + { 141 + "PublicDescription": "This event counts energy consumption per cycle of core.", 142 + "EventCode": "0x1E0", 143 + "EventName": "EA_CORE", 144 + "BriefDescription": "This event counts energy consumption per cycle of core." 145 + }, 146 + { 147 + "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", 148 + "EventCode": "0x230", 149 + "EventName": "L1HWPF_STREAM_PF", 150 + "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher." 151 + }, 152 + { 153 + "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", 154 + "EventCode": "0x231", 155 + "EventName": "L1HWPF_INJ_ALLOC_PF", 156 + "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." 157 + }, 158 + { 159 + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", 160 + "EventCode": "0x232", 161 + "EventName": "L1HWPF_INJ_NOALLOC_PF", 162 + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." 163 + }, 164 + { 165 + "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", 166 + "EventCode": "0x233", 167 + "EventName": "L2HWPF_STREAM_PF", 168 + "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher." 169 + }, 170 + { 171 + "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", 172 + "EventCode": "0x234", 173 + "EventName": "L2HWPF_INJ_ALLOC_PF", 174 + "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." 175 + }, 176 + { 177 + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", 178 + "EventCode": "0x235", 179 + "EventName": "L2HWPF_INJ_NOALLOC_PF", 180 + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." 181 + }, 182 + { 183 + "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.", 184 + "EventCode": "0x236", 185 + "EventName": "L2HWPF_OTHER", 186 + "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes." 187 + } 188 + ]

+194

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "STALL_FRONTEND" 4 + }, 5 + { 6 + "ArchStdEvent": "STALL_BACKEND" 7 + }, 8 + { 9 + "PublicDescription": "This event counts valid cycles of EAGA pipeline.", 10 + "EventCode": "0x1A0", 11 + "EventName": "EAGA_VAL", 12 + "BriefDescription": "This event counts valid cycles of EAGA pipeline." 13 + }, 14 + { 15 + "PublicDescription": "This event counts valid cycles of EAGB pipeline.", 16 + "EventCode": "0x1A1", 17 + "EventName": "EAGB_VAL", 18 + "BriefDescription": "This event counts valid cycles of EAGB pipeline." 19 + }, 20 + { 21 + "PublicDescription": "This event counts valid cycles of EXA pipeline.", 22 + "EventCode": "0x1A2", 23 + "EventName": "EXA_VAL", 24 + "BriefDescription": "This event counts valid cycles of EXA pipeline." 25 + }, 26 + { 27 + "PublicDescription": "This event counts valid cycles of EXB pipeline.", 28 + "EventCode": "0x1A3", 29 + "EventName": "EXB_VAL", 30 + "BriefDescription": "This event counts valid cycles of EXB pipeline." 31 + }, 32 + { 33 + "PublicDescription": "This event counts valid cycles of FLA pipeline.", 34 + "EventCode": "0x1A4", 35 + "EventName": "FLA_VAL", 36 + "BriefDescription": "This event counts valid cycles of FLA pipeline." 37 + }, 38 + { 39 + "PublicDescription": "This event counts valid cycles of FLB pipeline.", 40 + "EventCode": "0x1A5", 41 + "EventName": "FLB_VAL", 42 + "BriefDescription": "This event counts valid cycles of FLB pipeline." 43 + }, 44 + { 45 + "PublicDescription": "This event counts valid cycles of PRX pipeline.", 46 + "EventCode": "0x1A6", 47 + "EventName": "PRX_VAL", 48 + "BriefDescription": "This event counts valid cycles of PRX pipeline." 49 + }, 50 + { 51 + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.", 52 + "EventCode": "0x1B4", 53 + "EventName": "FLA_VAL_PRD_CNT", 54 + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1." 55 + }, 56 + { 57 + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.", 58 + "EventCode": "0x1B5", 59 + "EventName": "FLB_VAL_PRD_CNT", 60 + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1." 61 + }, 62 + { 63 + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.", 64 + "EventCode": "0x240", 65 + "EventName": "L1_PIPE0_VAL", 66 + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0." 67 + }, 68 + { 69 + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.", 70 + "EventCode": "0x241", 71 + "EventName": "L1_PIPE1_VAL", 72 + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1." 73 + }, 74 + { 75 + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", 76 + "EventCode": "0x250", 77 + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", 78 + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1." 79 + }, 80 + { 81 + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", 82 + "EventCode": "0x251", 83 + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", 84 + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1." 85 + }, 86 + { 87 + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", 88 + "EventCode": "0x252", 89 + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", 90 + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1." 91 + }, 92 + { 93 + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", 94 + "EventCode": "0x253", 95 + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", 96 + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1." 97 + }, 98 + { 99 + "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.", 100 + "EventCode": "0x260", 101 + "EventName": "L1_PIPE0_COMP", 102 + "BriefDescription": "This event counts completed requests in L1D cache pipeline#0." 103 + }, 104 + { 105 + "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.", 106 + "EventCode": "0x261", 107 + "EventName": "L1_PIPE1_COMP", 108 + "BriefDescription": "This event counts completed requests in L1D cache pipeline#1." 109 + }, 110 + { 111 + "PublicDescription": "This event counts completed requests in L1I cache pipeline.", 112 + "EventCode": "0x268", 113 + "EventName": "L1I_PIPE_COMP", 114 + "BriefDescription": "This event counts completed requests in L1I cache pipeline." 115 + }, 116 + { 117 + "PublicDescription": "This event counts valid cycles of L1I cache pipeline.", 118 + "EventCode": "0x269", 119 + "EventName": "L1I_PIPE_VAL", 120 + "BriefDescription": "This event counts valid cycles of L1I cache pipeline." 121 + }, 122 + { 123 + "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.", 124 + "EventCode": "0x274", 125 + "EventName": "L1_PIPE_ABORT_STLD_INTLK", 126 + "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock." 127 + }, 128 + { 129 + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", 130 + "EventCode": "0x2A0", 131 + "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0", 132 + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0." 133 + }, 134 + { 135 + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", 136 + "EventCode": "0x2A1", 137 + "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0", 138 + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0." 139 + }, 140 + { 141 + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.", 142 + "EventCode": "0x2B0", 143 + "EventName": "L1_PIPE_COMP_GATHER_2FLOW", 144 + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined." 145 + }, 146 + { 147 + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.", 148 + "EventCode": "0x2B1", 149 + "EventName": "L1_PIPE_COMP_GATHER_1FLOW", 150 + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined." 151 + }, 152 + { 153 + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.", 154 + "EventCode": "0x2B2", 155 + "EventName": "L1_PIPE_COMP_GATHER_0FLOW", 156 + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0." 157 + }, 158 + { 159 + "PublicDescription": "This event counts the number of flows of the scatter instructions.", 160 + "EventCode": "0x2B3", 161 + "EventName": "L1_PIPE_COMP_SCATTER_1FLOW", 162 + "BriefDescription": "This event counts the number of flows of the scatter instructions." 163 + }, 164 + { 165 + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.", 166 + "EventCode": "0x2B8", 167 + "EventName": "L1_PIPE0_COMP_PRD_CNT", 168 + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1." 169 + }, 170 + { 171 + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.", 172 + "EventCode": "0x2B9", 173 + "EventName": "L1_PIPE1_COMP_PRD_CNT", 174 + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1." 175 + }, 176 + { 177 + "PublicDescription": "This event counts valid cycles of L2 cache pipeline.", 178 + "EventCode": "0x330", 179 + "EventName": "L2_PIPE_VAL", 180 + "BriefDescription": "This event counts valid cycles of L2 cache pipeline." 181 + }, 182 + { 183 + "PublicDescription": "This event counts completed requests in L2 cache pipeline.", 184 + "EventCode": "0x350", 185 + "EventName": "L2_PIPE_COMP_ALL", 186 + "BriefDescription": "This event counts completed requests in L2 cache pipeline." 187 + }, 188 + { 189 + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", 190 + "EventCode": "0x370", 191 + "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH", 192 + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." 193 + } 194 + ]

+110

tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json

··· 1 + [ 2 + { 3 + "ArchStdEvent": "SIMD_INST_RETIRED" 4 + }, 5 + { 6 + "ArchStdEvent": "SVE_INST_RETIRED" 7 + }, 8 + { 9 + "ArchStdEvent": "UOP_SPEC" 10 + }, 11 + { 12 + "ArchStdEvent": "SVE_MATH_SPEC" 13 + }, 14 + { 15 + "ArchStdEvent": "FP_SPEC" 16 + }, 17 + { 18 + "ArchStdEvent": "FP_FMA_SPEC" 19 + }, 20 + { 21 + "ArchStdEvent": "FP_RECPE_SPEC" 22 + }, 23 + { 24 + "ArchStdEvent": "FP_CVT_SPEC" 25 + }, 26 + { 27 + "ArchStdEvent": "ASE_SVE_INT_SPEC" 28 + }, 29 + { 30 + "ArchStdEvent": "SVE_PRED_SPEC" 31 + }, 32 + { 33 + "ArchStdEvent": "SVE_MOVPRFX_SPEC" 34 + }, 35 + { 36 + "ArchStdEvent": "SVE_MOVPRFX_U_SPEC" 37 + }, 38 + { 39 + "ArchStdEvent": "ASE_SVE_LD_SPEC" 40 + }, 41 + { 42 + "ArchStdEvent": "ASE_SVE_ST_SPEC" 43 + }, 44 + { 45 + "ArchStdEvent": "PRF_SPEC" 46 + }, 47 + { 48 + "ArchStdEvent": "BASE_LD_REG_SPEC" 49 + }, 50 + { 51 + "ArchStdEvent": "BASE_ST_REG_SPEC" 52 + }, 53 + { 54 + "ArchStdEvent": "SVE_LDR_REG_SPEC" 55 + }, 56 + { 57 + "ArchStdEvent": "SVE_STR_REG_SPEC" 58 + }, 59 + { 60 + "ArchStdEvent": "SVE_LDR_PREG_SPEC" 61 + }, 62 + { 63 + "ArchStdEvent": "SVE_STR_PREG_SPEC" 64 + }, 65 + { 66 + "ArchStdEvent": "SVE_PRF_CONTIG_SPEC" 67 + }, 68 + { 69 + "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC" 70 + }, 71 + { 72 + "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC" 73 + }, 74 + { 75 + "ArchStdEvent": "SVE_LD_GATHER_SPEC" 76 + }, 77 + { 78 + "ArchStdEvent": "SVE_ST_SCATTER_SPEC" 79 + }, 80 + { 81 + "ArchStdEvent": "SVE_PRF_GATHER_SPEC" 82 + }, 83 + { 84 + "ArchStdEvent": "SVE_LDFF_SPEC" 85 + }, 86 + { 87 + "ArchStdEvent": "FP_SCALE_OPS_SPEC" 88 + }, 89 + { 90 + "ArchStdEvent": "FP_FIXED_OPS_SPEC" 91 + }, 92 + { 93 + "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC" 94 + }, 95 + { 96 + "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC" 97 + }, 98 + { 99 + "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC" 100 + }, 101 + { 102 + "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC" 103 + }, 104 + { 105 + "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC" 106 + }, 107 + { 108 + "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC" 109 + } 110 + ]

+233

tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json

··· 1 + [ 2 + { 3 + "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", 4 + "PublicDescription": "Frontend bound L1 topdown metric", 5 + "BriefDescription": "Frontend bound L1 topdown metric", 6 + "MetricGroup": "TopDownL1", 7 + "MetricName": "frontend_bound" 8 + }, 9 + { 10 + "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", 11 + "PublicDescription": "Bad Speculation L1 topdown metric", 12 + "BriefDescription": "Bad Speculation L1 topdown metric", 13 + "MetricGroup": "TopDownL1", 14 + "MetricName": "bad_speculation" 15 + }, 16 + { 17 + "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", 18 + "PublicDescription": "Retiring L1 topdown metric", 19 + "BriefDescription": "Retiring L1 topdown metric", 20 + "MetricGroup": "TopDownL1", 21 + "MetricName": "retiring" 22 + }, 23 + { 24 + "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", 25 + "PublicDescription": "Backend Bound L1 topdown metric", 26 + "BriefDescription": "Backend Bound L1 topdown metric", 27 + "MetricGroup": "TopDownL1", 28 + "MetricName": "backend_bound" 29 + }, 30 + { 31 + "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", 32 + "PublicDescription": "Fetch latency bound L2 topdown metric", 33 + "BriefDescription": "Fetch latency bound L2 topdown metric", 34 + "MetricGroup": "TopDownL2", 35 + "MetricName": "fetch_latency_bound" 36 + }, 37 + { 38 + "MetricExpr": "frontend_bound - fetch_latency_bound", 39 + "PublicDescription": "Fetch bandwidth bound L2 topdown metric", 40 + "BriefDescription": "Fetch bandwidth bound L2 topdown metric", 41 + "MetricGroup": "TopDownL2", 42 + "MetricName": "fetch_bandwidth_bound" 43 + }, 44 + { 45 + "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", 46 + "PublicDescription": "Branch mispredicts L2 topdown metric", 47 + "BriefDescription": "Branch mispredicts L2 topdown metric", 48 + "MetricGroup": "TopDownL2", 49 + "MetricName": "branch_mispredicts" 50 + }, 51 + { 52 + "MetricExpr": "bad_speculation - branch_mispredicts", 53 + "PublicDescription": "Machine clears L2 topdown metric", 54 + "BriefDescription": "Machine clears L2 topdown metric", 55 + "MetricGroup": "TopDownL2", 56 + "MetricName": "machine_clears" 57 + }, 58 + { 59 + "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", 60 + "PublicDescription": "Core bound L2 topdown metric", 61 + "BriefDescription": "Core bound L2 topdown metric", 62 + "MetricGroup": "TopDownL2", 63 + "MetricName": "core_bound" 64 + }, 65 + { 66 + "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", 67 + "PublicDescription": "Memory bound L2 topdown metric", 68 + "BriefDescription": "Memory bound L2 topdown metric", 69 + "MetricGroup": "TopDownL2", 70 + "MetricName": "memory_bound" 71 + }, 72 + { 73 + "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", 74 + "PublicDescription": "Idle by itlb miss L3 topdown metric", 75 + "BriefDescription": "Idle by itlb miss L3 topdown metric", 76 + "MetricGroup": "TopDownL3", 77 + "MetricName": "idle_by_itlb_miss" 78 + }, 79 + { 80 + "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", 81 + "PublicDescription": "Idle by icache miss L3 topdown metric", 82 + "BriefDescription": "Idle by icache miss L3 topdown metric", 83 + "MetricGroup": "TopDownL3", 84 + "MetricName": "idle_by_icache_miss" 85 + }, 86 + { 87 + "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", 88 + "PublicDescription": "BP misp flush L3 topdown metric", 89 + "BriefDescription": "BP misp flush L3 topdown metric", 90 + "MetricGroup": "TopDownL3", 91 + "MetricName": "bp_misp_flush" 92 + }, 93 + { 94 + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", 95 + "PublicDescription": "OOO flush L3 topdown metric", 96 + "BriefDescription": "OOO flush L3 topdown metric", 97 + "MetricGroup": "TopDownL3", 98 + "MetricName": "ooo_flush" 99 + }, 100 + { 101 + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", 102 + "PublicDescription": "Static predictor flush L3 topdown metric", 103 + "BriefDescription": "Static predictor flush L3 topdown metric", 104 + "MetricGroup": "TopDownL3", 105 + "MetricName": "sp_flush" 106 + }, 107 + { 108 + "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", 109 + "PublicDescription": "Indirect branch L3 topdown metric", 110 + "BriefDescription": "Indirect branch L3 topdown metric", 111 + "MetricGroup": "TopDownL3", 112 + "MetricName": "indirect_branch" 113 + }, 114 + { 115 + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1014@ + armv8_pmuv3_0@event\\=0x1018@) / BR_MIS_PRED", 116 + "PublicDescription": "Push branch L3 topdown metric", 117 + "BriefDescription": "Push branch L3 topdown metric", 118 + "MetricGroup": "TopDownL3", 119 + "MetricName": "push_branch" 120 + }, 121 + { 122 + "MetricExpr": "armv8_pmuv3_0@event\\=0x100c@ / BR_MIS_PRED", 123 + "PublicDescription": "Pop branch L3 topdown metric", 124 + "BriefDescription": "Pop branch L3 topdown metric", 125 + "MetricGroup": "TopDownL3", 126 + "MetricName": "pop_branch" 127 + }, 128 + { 129 + "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1014@ - armv8_pmuv3_0@event\\=0x1018@ - armv8_pmuv3_0@event\\=0x100c@) / BR_MIS_PRED", 130 + "PublicDescription": "Other branch L3 topdown metric", 131 + "BriefDescription": "Other branch L3 topdown metric", 132 + "MetricGroup": "TopDownL3", 133 + "MetricName": "other_branch" 134 + }, 135 + { 136 + "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", 137 + "PublicDescription": "Nuke flush L3 topdown metric", 138 + "BriefDescription": "Nuke flush L3 topdown metric", 139 + "MetricGroup": "TopDownL3", 140 + "MetricName": "nuke_flush" 141 + }, 142 + { 143 + "MetricExpr": "1 - nuke_flush", 144 + "PublicDescription": "Other flush L3 topdown metric", 145 + "BriefDescription": "Other flush L3 topdown metric", 146 + "MetricGroup": "TopDownL3", 147 + "MetricName": "other_flush" 148 + }, 149 + { 150 + "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", 151 + "PublicDescription": "Sync stall L3 topdown metric", 152 + "BriefDescription": "Sync stall L3 topdown metric", 153 + "MetricGroup": "TopDownL3", 154 + "MetricName": "sync_stall" 155 + }, 156 + { 157 + "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", 158 + "PublicDescription": "Rob stall L3 topdown metric", 159 + "BriefDescription": "Rob stall L3 topdown metric", 160 + "MetricGroup": "TopDownL3", 161 + "MetricName": "rob_stall" 162 + }, 163 + { 164 + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", 165 + "PublicDescription": "Ptag stall L3 topdown metric", 166 + "BriefDescription": "Ptag stall L3 topdown metric", 167 + "MetricGroup": "TopDownL3", 168 + "MetricName": "ptag_stall" 169 + }, 170 + { 171 + "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", 172 + "PublicDescription": "SaveOpQ stall L3 topdown metric", 173 + "BriefDescription": "SaveOpQ stall L3 topdown metric", 174 + "MetricGroup": "TopDownL3", 175 + "MetricName": "saveopq_stall" 176 + }, 177 + { 178 + "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", 179 + "PublicDescription": "PC buffer stall L3 topdown metric", 180 + "BriefDescription": "PC buffer stall L3 topdown metric", 181 + "MetricGroup": "TopDownL3", 182 + "MetricName": "pc_buffer_stall" 183 + }, 184 + { 185 + "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", 186 + "PublicDescription": "Divider L3 topdown metric", 187 + "BriefDescription": "Divider L3 topdown metric", 188 + "MetricGroup": "TopDownL3", 189 + "MetricName": "divider" 190 + }, 191 + { 192 + "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", 193 + "PublicDescription": "FSU stall L3 topdown metric", 194 + "BriefDescription": "FSU stall L3 topdown metric", 195 + "MetricGroup": "TopDownL3", 196 + "MetricName": "fsu_stall" 197 + }, 198 + { 199 + "MetricExpr": "core_bound - divider - fsu_stall", 200 + "PublicDescription": "EXE ports util L3 topdown metric", 201 + "BriefDescription": "EXE ports util L3 topdown metric", 202 + "MetricGroup": "TopDownL3", 203 + "MetricName": "exe_ports_util" 204 + }, 205 + { 206 + "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", 207 + "PublicDescription": "L1 bound L3 topdown metric", 208 + "BriefDescription": "L1 bound L3 topdown metric", 209 + "MetricGroup": "TopDownL3", 210 + "MetricName": "l1_bound" 211 + }, 212 + { 213 + "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", 214 + "PublicDescription": "L2 bound L3 topdown metric", 215 + "BriefDescription": "L2 bound L3 topdown metric", 216 + "MetricGroup": "TopDownL3", 217 + "MetricName": "l2_bound" 218 + }, 219 + { 220 + "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", 221 + "PublicDescription": "Mem bound L3 topdown metric", 222 + "BriefDescription": "Mem bound L3 topdown metric", 223 + "MetricGroup": "TopDownL3", 224 + "MetricName": "mem_bound" 225 + }, 226 + { 227 + "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", 228 + "PublicDescription": "Store bound L3 topdown metric", 229 + "BriefDescription": "Store bound L3 topdown metric", 230 + "MetricGroup": "TopDownL3", 231 + "MetricName": "store_bound" 232 + }, 233 + ]

+1

tools/perf/pmu-events/arch/arm64/mapfile.csv

··· 20 20 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 21 21 0x00000000420f5160,v1,cavium/thunderx2,core 22 22 0x00000000430f0af0,v1,cavium/thunderx2,core 23 + 0x00000000460f0010,v1,fujitsu/a64fx,core 23 24 0x00000000480fd010,v1,hisilicon/hip08,core 24 25 0x00000000500f0000,v1,ampere/emag,core

+1

tools/perf/pmu-events/arch/powerpc/mapfile.csv

··· 15 15 # Power8 entries 16 16 004[bcd][[:xdigit:]]{4},1,power8,core 17 17 004e[[:xdigit:]]{4},1,power9,core 18 + 0080[[:xdigit:]]{4},1,power10,core

+47

tools/perf/pmu-events/arch/powerpc/power10/cache.json

··· 1 + [ 2 + { 3 + "EventCode": "1003C", 4 + "EventName": "PM_EXEC_STALL_DMISS_L2L3", 5 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3." 6 + }, 7 + { 8 + "EventCode": "34056", 9 + "EventName": "PM_EXEC_STALL_LOAD_FINISH", 10 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ." 11 + }, 12 + { 13 + "EventCode": "3006C", 14 + "EventName": "PM_RUN_CYC_SMT2_MODE", 15 + "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode." 16 + }, 17 + { 18 + "EventCode": "300F4", 19 + "EventName": "PM_RUN_INST_CMPL_CONC", 20 + "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set." 21 + }, 22 + { 23 + "EventCode": "4C016", 24 + "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT", 25 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict." 26 + }, 27 + { 28 + "EventCode": "4D014", 29 + "EventName": "PM_EXEC_STALL_LOAD", 30 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit." 31 + }, 32 + { 33 + "EventCode": "4D016", 34 + "EventName": "PM_EXEC_STALL_PTESYNC", 35 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit." 36 + }, 37 + { 38 + "EventCode": "401EA", 39 + "EventName": "PM_THRESH_EXC_128", 40 + "BriefDescription": "Threshold counter exceeded a value of 128." 41 + }, 42 + { 43 + "EventCode": "400F6", 44 + "EventName": "PM_BR_MPRED_CMPL", 45 + "BriefDescription": "A mispredicted branch completed. Includes direction and target." 46 + } 47 + ]

+7

tools/perf/pmu-events/arch/powerpc/power10/floating_point.json

··· 1 + [ 2 + { 3 + "EventCode": "4016E", 4 + "EventName": "PM_THRESH_NOT_MET", 5 + "BriefDescription": "Threshold counter did not meet threshold." 6 + } 7 + ]

+217

tools/perf/pmu-events/arch/powerpc/power10/frontend.json

··· 1 + [ 2 + { 3 + "EventCode": "10004", 4 + "EventName": "PM_EXEC_STALL_TRANSLATION", 5 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve." 6 + }, 7 + { 8 + "EventCode": "10010", 9 + "EventName": "PM_PMC4_OVERFLOW", 10 + "BriefDescription": "The event selected for PMC4 caused the event counter to overflow." 11 + }, 12 + { 13 + "EventCode": "10020", 14 + "EventName": "PM_PMC4_REWIND", 15 + "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged." 16 + }, 17 + { 18 + "EventCode": "10038", 19 + "EventName": "PM_DISP_STALL_TRANSLATION", 20 + "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss." 21 + }, 22 + { 23 + "EventCode": "1003A", 24 + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2", 25 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict." 26 + }, 27 + { 28 + "EventCode": "1E050", 29 + "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC", 30 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR." 31 + }, 32 + { 33 + "EventCode": "1F054", 34 + "EventName": "PM_DTLB_HIT", 35 + "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT." 36 + }, 37 + { 38 + "EventCode": "101E8", 39 + "EventName": "PM_THRESH_EXC_256", 40 + "BriefDescription": "Threshold counter exceeded a count of 256." 41 + }, 42 + { 43 + "EventCode": "101EC", 44 + "EventName": "PM_THRESH_MET", 45 + "BriefDescription": "Threshold exceeded." 46 + }, 47 + { 48 + "EventCode": "100F2", 49 + "EventName": "PM_1PLUS_PPC_CMPL", 50 + "BriefDescription": "Cycles in which at least one instruction is completed by this thread." 51 + }, 52 + { 53 + "EventCode": "100F6", 54 + "EventName": "PM_IERAT_MISS", 55 + "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event." 56 + }, 57 + { 58 + "EventCode": "100F8", 59 + "EventName": "PM_DISP_STALL_CYC", 60 + "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)." 61 + }, 62 + { 63 + "EventCode": "20114", 64 + "EventName": "PM_MRK_L2_RC_DISP", 65 + "BriefDescription": "Marked instruction RC dispatched in L2." 66 + }, 67 + { 68 + "EventCode": "2C010", 69 + "EventName": "PM_EXEC_STALL_LSU", 70 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions." 71 + }, 72 + { 73 + "EventCode": "2C016", 74 + "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS", 75 + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss." 76 + }, 77 + { 78 + "EventCode": "2C01E", 79 + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3", 80 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict." 81 + }, 82 + { 83 + "EventCode": "2D01A", 84 + "EventName": "PM_DISP_STALL_IC_MISS", 85 + "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss." 86 + }, 87 + { 88 + "EventCode": "2D01C", 89 + "EventName": "PM_CMPL_STALL_STCX", 90 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." 91 + }, 92 + { 93 + "EventCode": "2E018", 94 + "EventName": "PM_DISP_STALL_FETCH", 95 + "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held." 96 + }, 97 + { 98 + "EventCode": "2E01A", 99 + "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC", 100 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full." 101 + }, 102 + { 103 + "EventCode": "2C142", 104 + "EventName": "PM_MRK_XFER_FROM_SRC_PMC2", 105 + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 106 + }, 107 + { 108 + "EventCode": "24050", 109 + "EventName": "PM_IOPS_DISP", 110 + "BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction." 111 + }, 112 + { 113 + "EventCode": "2405E", 114 + "EventName": "PM_ISSUE_CANCEL", 115 + "BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction." 116 + }, 117 + { 118 + "EventCode": "200FA", 119 + "EventName": "PM_BR_TAKEN_CMPL", 120 + "BriefDescription": "Branch Taken instruction completed." 121 + }, 122 + { 123 + "EventCode": "30012", 124 + "EventName": "PM_FLUSH_COMPLETION", 125 + "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush." 126 + }, 127 + { 128 + "EventCode": "30014", 129 + "EventName": "PM_EXEC_STALL_STORE", 130 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit." 131 + }, 132 + { 133 + "EventCode": "30018", 134 + "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC", 135 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together." 136 + }, 137 + { 138 + "EventCode": "30026", 139 + "EventName": "PM_EXEC_STALL_STORE_MISS", 140 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1." 141 + }, 142 + { 143 + "EventCode": "3012A", 144 + "EventName": "PM_MRK_L2_RC_DONE", 145 + "BriefDescription": "L2 RC machine completed the transaction for the marked instruction." 146 + }, 147 + { 148 + "EventCode": "3F046", 149 + "EventName": "PM_ITLB_HIT_1G", 150 + "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." 151 + }, 152 + { 153 + "EventCode": "34058", 154 + "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS", 155 + "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss." 156 + }, 157 + { 158 + "EventCode": "3D05C", 159 + "EventName": "PM_DISP_STALL_HELD_RENAME_CYC", 160 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC." 161 + }, 162 + { 163 + "EventCode": "3E052", 164 + "EventName": "PM_DISP_STALL_IC_L3", 165 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3." 166 + }, 167 + { 168 + "EventCode": "3E054", 169 + "EventName": "PM_LD_MISS_L1", 170 + "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." 171 + }, 172 + { 173 + "EventCode": "301EA", 174 + "EventName": "PM_THRESH_EXC_1024", 175 + "BriefDescription": "Threshold counter exceeded a value of 1024." 176 + }, 177 + { 178 + "EventCode": "300FA", 179 + "EventName": "PM_INST_FROM_L3MISS", 180 + "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." 181 + }, 182 + { 183 + "EventCode": "40006", 184 + "EventName": "PM_ISSUE_KILL", 185 + "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group." 186 + }, 187 + { 188 + "EventCode": "40116", 189 + "EventName": "PM_MRK_LARX_FIN", 190 + "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." 191 + }, 192 + { 193 + "EventCode": "4C010", 194 + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS", 195 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch." 196 + }, 197 + { 198 + "EventCode": "4D01E", 199 + "EventName": "PM_DISP_STALL_BR_MPRED", 200 + "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch." 201 + }, 202 + { 203 + "EventCode": "4E010", 204 + "EventName": "PM_DISP_STALL_IC_L3MISS", 205 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3." 206 + }, 207 + { 208 + "EventCode": "4E01A", 209 + "EventName": "PM_DISP_STALL_HELD_CYC", 210 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason." 211 + }, 212 + { 213 + "EventCode": "44056", 214 + "EventName": "PM_VECTOR_ST_CMPL", 215 + "BriefDescription": "Vector store instructions completed." 216 + } 217 + ]

+12

tools/perf/pmu-events/arch/powerpc/power10/locks.json

··· 1 + [ 2 + { 3 + "EventCode": "1E058", 4 + "EventName": "PM_STCX_FAIL_FIN", 5 + "BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." 6 + }, 7 + { 8 + "EventCode": "4E050", 9 + "EventName": "PM_STCX_PASS_FIN", 10 + "BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock." 11 + } 12 + ]

+147

tools/perf/pmu-events/arch/powerpc/power10/marked.json

··· 1 + [ 2 + { 3 + "EventCode": "1002C", 4 + "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS", 5 + "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request." 6 + }, 7 + { 8 + "EventCode": "10132", 9 + "EventName": "PM_MRK_INST_ISSUED", 10 + "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction." 11 + }, 12 + { 13 + "EventCode": "101E0", 14 + "EventName": "PM_MRK_INST_DISP", 15 + "BriefDescription": "The thread has dispatched a randomly sampled marked instruction." 16 + }, 17 + { 18 + "EventCode": "101E2", 19 + "EventName": "PM_MRK_BR_TAKEN_CMPL", 20 + "BriefDescription": "Marked Branch Taken instruction completed." 21 + }, 22 + { 23 + "EventCode": "20112", 24 + "EventName": "PM_MRK_NTF_FIN", 25 + "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch." 26 + }, 27 + { 28 + "EventCode": "2C01C", 29 + "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP", 30 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip." 31 + }, 32 + { 33 + "EventCode": "20138", 34 + "EventName": "PM_MRK_ST_NEST", 35 + "BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2." 36 + }, 37 + { 38 + "EventCode": "2013A", 39 + "EventName": "PM_MRK_BRU_FIN", 40 + "BriefDescription": "Marked Branch instruction finished." 41 + }, 42 + { 43 + "EventCode": "2C144", 44 + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2", 45 + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]." 46 + }, 47 + { 48 + "EventCode": "24156", 49 + "EventName": "PM_MRK_STCX_FIN", 50 + "BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." 51 + }, 52 + { 53 + "EventCode": "24158", 54 + "EventName": "PM_MRK_INST", 55 + "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens." 56 + }, 57 + { 58 + "EventCode": "2415C", 59 + "EventName": "PM_MRK_BR_CMPL", 60 + "BriefDescription": "A marked branch completed. All branches are included." 61 + }, 62 + { 63 + "EventCode": "200FD", 64 + "EventName": "PM_L1_ICACHE_MISS", 65 + "BriefDescription": "Demand iCache Miss." 66 + }, 67 + { 68 + "EventCode": "30130", 69 + "EventName": "PM_MRK_INST_FIN", 70 + "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU." 71 + }, 72 + { 73 + "EventCode": "34146", 74 + "EventName": "PM_MRK_LD_CMPL", 75 + "BriefDescription": "Marked loads completed." 76 + }, 77 + { 78 + "EventCode": "3E158", 79 + "EventName": "PM_MRK_STCX_FAIL", 80 + "BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." 81 + }, 82 + { 83 + "EventCode": "3E15A", 84 + "EventName": "PM_MRK_ST_FIN", 85 + "BriefDescription": "The marked instruction was a store of any kind." 86 + }, 87 + { 88 + "EventCode": "30068", 89 + "EventName": "PM_L1_ICACHE_RELOADED_PREF", 90 + "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)." 91 + }, 92 + { 93 + "EventCode": "301E4", 94 + "EventName": "PM_MRK_BR_MPRED_CMPL", 95 + "BriefDescription": "Marked Branch Mispredicted. Includes direction and target." 96 + }, 97 + { 98 + "EventCode": "300F6", 99 + "EventName": "PM_LD_DEMAND_MISS_L1", 100 + "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish." 101 + }, 102 + { 103 + "EventCode": "300FE", 104 + "EventName": "PM_DATA_FROM_L3MISS", 105 + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." 106 + }, 107 + { 108 + "EventCode": "40012", 109 + "EventName": "PM_L1_ICACHE_RELOADED_ALL", 110 + "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch." 111 + }, 112 + { 113 + "EventCode": "40134", 114 + "EventName": "PM_MRK_INST_TIMEO", 115 + "BriefDescription": "Marked instruction finish timeout (instruction was lost)." 116 + }, 117 + { 118 + "EventCode": "4003C", 119 + "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", 120 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." 121 + }, 122 + { 123 + "EventCode": "4505A", 124 + "EventName": "PM_SP_FLOP_CMPL", 125 + "BriefDescription": "Single Precision floating point instructions completed." 126 + }, 127 + { 128 + "EventCode": "4D058", 129 + "EventName": "PM_VECTOR_FLOP_CMPL", 130 + "BriefDescription": "Vector floating point instructions completed." 131 + }, 132 + { 133 + "EventCode": "4D05A", 134 + "EventName": "PM_NON_MATH_FLOP_CMPL", 135 + "BriefDescription": "Non Math instructions completed." 136 + }, 137 + { 138 + "EventCode": "401E0", 139 + "EventName": "PM_MRK_INST_CMPL", 140 + "BriefDescription": "marked instruction completed." 141 + }, 142 + { 143 + "EventCode": "400FE", 144 + "EventName": "PM_DATA_FROM_MEMORY", 145 + "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss." 146 + } 147 + ]

+192

tools/perf/pmu-events/arch/powerpc/power10/memory.json

··· 1 + [ 2 + { 3 + "EventCode": "1000A", 4 + "EventName": "PM_PMC3_REWIND", 5 + "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged." 6 + }, 7 + { 8 + "EventCode": "1C040", 9 + "EventName": "PM_XFER_FROM_SRC_PMC1", 10 + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 11 + }, 12 + { 13 + "EventCode": "1C142", 14 + "EventName": "PM_MRK_XFER_FROM_SRC_PMC1", 15 + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 16 + }, 17 + { 18 + "EventCode": "1C144", 19 + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1", 20 + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]." 21 + }, 22 + { 23 + "EventCode": "1C056", 24 + "EventName": "PM_DERAT_MISS_4K", 25 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 26 + }, 27 + { 28 + "EventCode": "1C058", 29 + "EventName": "PM_DTLB_MISS_16G", 30 + "BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 31 + }, 32 + { 33 + "EventCode": "1C05C", 34 + "EventName": "PM_DTLB_MISS_2M", 35 + "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 36 + }, 37 + { 38 + "EventCode": "1E056", 39 + "EventName": "PM_EXEC_STALL_STORE_PIPE", 40 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions." 41 + }, 42 + { 43 + "EventCode": "1F150", 44 + "EventName": "PM_MRK_ST_L2_CYC", 45 + "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion." 46 + }, 47 + { 48 + "EventCode": "10062", 49 + "EventName": "PM_LD_L3MISS_PEND_CYC", 50 + "BriefDescription": "Cycles L3 miss was pending for this thread." 51 + }, 52 + { 53 + "EventCode": "20010", 54 + "EventName": "PM_PMC1_OVERFLOW", 55 + "BriefDescription": "The event selected for PMC1 caused the event counter to overflow." 56 + }, 57 + { 58 + "EventCode": "2001A", 59 + "EventName": "PM_ITLB_HIT", 60 + "BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." 61 + }, 62 + { 63 + "EventCode": "2003E", 64 + "EventName": "PM_PTESYNC_FIN", 65 + "BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time." 66 + }, 67 + { 68 + "EventCode": "2C040", 69 + "EventName": "PM_XFER_FROM_SRC_PMC2", 70 + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 71 + }, 72 + { 73 + "EventCode": "2C054", 74 + "EventName": "PM_DERAT_MISS_64K", 75 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 76 + }, 77 + { 78 + "EventCode": "2C056", 79 + "EventName": "PM_DTLB_MISS_4K", 80 + "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 81 + }, 82 + { 83 + "EventCode": "2D154", 84 + "EventName": "PM_MRK_DERAT_MISS_64K", 85 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 86 + }, 87 + { 88 + "EventCode": "200F6", 89 + "EventName": "PM_DERAT_MISS", 90 + "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 91 + }, 92 + { 93 + "EventCode": "3000A", 94 + "EventName": "PM_DISP_STALL_ITLB_MISS", 95 + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." 96 + }, 97 + { 98 + "EventCode": "30016", 99 + "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS", 100 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve." 101 + }, 102 + { 103 + "EventCode": "3C040", 104 + "EventName": "PM_XFER_FROM_SRC_PMC3", 105 + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 106 + }, 107 + { 108 + "EventCode": "3C142", 109 + "EventName": "PM_MRK_XFER_FROM_SRC_PMC3", 110 + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 111 + }, 112 + { 113 + "EventCode": "3C144", 114 + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3", 115 + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]." 116 + }, 117 + { 118 + "EventCode": "3C054", 119 + "EventName": "PM_DERAT_MISS_16M", 120 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 121 + }, 122 + { 123 + "EventCode": "3C056", 124 + "EventName": "PM_DTLB_MISS_64K", 125 + "BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 126 + }, 127 + { 128 + "EventCode": "3C058", 129 + "EventName": "PM_LARX_FIN", 130 + "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." 131 + }, 132 + { 133 + "EventCode": "301E2", 134 + "EventName": "PM_MRK_ST_CMPL", 135 + "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores." 136 + }, 137 + { 138 + "EventCode": "300FC", 139 + "EventName": "PM_DTLB_MISS", 140 + "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity." 141 + }, 142 + { 143 + "EventCode": "4D02C", 144 + "EventName": "PM_PMC1_REWIND", 145 + "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged." 146 + }, 147 + { 148 + "EventCode": "4003E", 149 + "EventName": "PM_LD_CMPL", 150 + "BriefDescription": "Loads completed." 151 + }, 152 + { 153 + "EventCode": "4C040", 154 + "EventName": "PM_XFER_FROM_SRC_PMC4", 155 + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 156 + }, 157 + { 158 + "EventCode": "4C142", 159 + "EventName": "PM_MRK_XFER_FROM_SRC_PMC4", 160 + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." 161 + }, 162 + { 163 + "EventCode": "4C144", 164 + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4", 165 + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]." 166 + }, 167 + { 168 + "EventCode": "4C056", 169 + "EventName": "PM_DTLB_MISS_16M", 170 + "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 171 + }, 172 + { 173 + "EventCode": "4C05A", 174 + "EventName": "PM_DTLB_MISS_1G", 175 + "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 176 + }, 177 + { 178 + "EventCode": "4C15E", 179 + "EventName": "PM_MRK_DTLB_MISS_64K", 180 + "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 181 + }, 182 + { 183 + "EventCode": "4D056", 184 + "EventName": "PM_NON_FMA_FLOP_CMPL", 185 + "BriefDescription": "Non FMA instruction completed." 186 + }, 187 + { 188 + "EventCode": "40164", 189 + "EventName": "PM_MRK_DERAT_MISS_2M", 190 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 191 + } 192 + ]

+297

tools/perf/pmu-events/arch/powerpc/power10/others.json

··· 1 + [ 2 + { 3 + "EventCode": "10016", 4 + "EventName": "PM_VSU0_ISSUE", 5 + "BriefDescription": "VSU instructions issued to VSU pipe 0." 6 + }, 7 + { 8 + "EventCode": "1001C", 9 + "EventName": "PM_ULTRAVISOR_INST_CMPL", 10 + "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state." 11 + }, 12 + { 13 + "EventCode": "100F0", 14 + "EventName": "PM_CYC", 15 + "BriefDescription": "Processor cycles." 16 + }, 17 + { 18 + "EventCode": "10134", 19 + "EventName": "PM_MRK_ST_DONE_L2", 20 + "BriefDescription": "Marked stores completed in L2 (RC machine done)." 21 + }, 22 + { 23 + "EventCode": "1505E", 24 + "EventName": "PM_LD_HIT_L1", 25 + "BriefDescription": "Loads that finished without experiencing an L1 miss." 26 + }, 27 + { 28 + "EventCode": "1D05E", 29 + "EventName": "PM_DISP_STALL_HELD_HALT_CYC", 30 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." 31 + }, 32 + { 33 + "EventCode": "1E054", 34 + "EventName": "PM_EXEC_STALL_DMISS_L21_L31", 35 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." 36 + }, 37 + { 38 + "EventCode": "1E05A", 39 + "EventName": "PM_CMPL_STALL_LWSYNC", 40 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." 41 + }, 42 + { 43 + "EventCode": "1F056", 44 + "EventName": "PM_DISP_SS0_2_INSTR_CYC", 45 + "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions." 46 + }, 47 + { 48 + "EventCode": "1F15C", 49 + "EventName": "PM_MRK_STCX_L2_CYC", 50 + "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)." 51 + }, 52 + { 53 + "EventCode": "10066", 54 + "EventName": "PM_ADJUNCT_CYC", 55 + "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011." 56 + }, 57 + { 58 + "EventCode": "101E4", 59 + "EventName": "PM_MRK_L1_ICACHE_MISS", 60 + "BriefDescription": "Marked Instruction suffered an icache Miss." 61 + }, 62 + { 63 + "EventCode": "101EA", 64 + "EventName": "PM_MRK_L1_RELOAD_VALID", 65 + "BriefDescription": "Marked demand reload." 66 + }, 67 + { 68 + "EventCode": "100F4", 69 + "EventName": "PM_FLOP_CMPL", 70 + "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops." 71 + }, 72 + { 73 + "EventCode": "100FA", 74 + "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC", 75 + "BriefDescription": "Cycles when at least one thread has the run latch set." 76 + }, 77 + { 78 + "EventCode": "100FC", 79 + "EventName": "PM_LD_REF_L1", 80 + "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included." 81 + }, 82 + { 83 + "EventCode": "20006", 84 + "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", 85 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." 86 + }, 87 + { 88 + "EventCode": "2000C", 89 + "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC", 90 + "BriefDescription": "Cycles when the run latch is set for all threads." 91 + }, 92 + { 93 + "EventCode": "2E010", 94 + "EventName": "PM_ADJUNCT_INST_CMPL", 95 + "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state." 96 + }, 97 + { 98 + "EventCode": "2E014", 99 + "EventName": "PM_STCX_FIN", 100 + "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." 101 + }, 102 + { 103 + "EventCode": "20130", 104 + "EventName": "PM_MRK_INST_DECODED", 105 + "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only." 106 + }, 107 + { 108 + "EventCode": "20132", 109 + "EventName": "PM_MRK_DFU_ISSUE", 110 + "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time." 111 + }, 112 + { 113 + "EventCode": "20134", 114 + "EventName": "PM_MRK_FXU_ISSUE", 115 + "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time." 116 + }, 117 + { 118 + "EventCode": "2505C", 119 + "EventName": "PM_VSU_ISSUE", 120 + "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations." 121 + }, 122 + { 123 + "EventCode": "2F054", 124 + "EventName": "PM_DISP_SS1_2_INSTR_CYC", 125 + "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions." 126 + }, 127 + { 128 + "EventCode": "2F056", 129 + "EventName": "PM_DISP_SS1_4_INSTR_CYC", 130 + "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions." 131 + }, 132 + { 133 + "EventCode": "2006C", 134 + "EventName": "PM_RUN_CYC_SMT4_MODE", 135 + "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode." 136 + }, 137 + { 138 + "EventCode": "201E0", 139 + "EventName": "PM_MRK_DATA_FROM_MEMORY", 140 + "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load." 141 + }, 142 + { 143 + "EventCode": "201E4", 144 + "EventName": "PM_MRK_DATA_FROM_L3MISS", 145 + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load." 146 + }, 147 + { 148 + "EventCode": "201E8", 149 + "EventName": "PM_THRESH_EXC_512", 150 + "BriefDescription": "Threshold counter exceeded a value of 512." 151 + }, 152 + { 153 + "EventCode": "200F2", 154 + "EventName": "PM_INST_DISP", 155 + "BriefDescription": "PowerPC instructions dispatched." 156 + }, 157 + { 158 + "EventCode": "30132", 159 + "EventName": "PM_MRK_VSU_FIN", 160 + "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit." 161 + }, 162 + { 163 + "EventCode": "30038", 164 + "EventName": "PM_EXEC_STALL_DMISS_LMEM", 165 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory." 166 + }, 167 + { 168 + "EventCode": "3F04A", 169 + "EventName": "PM_LSU_ST5_FIN", 170 + "BriefDescription": "LSU Finished an internal operation in ST2 port." 171 + }, 172 + { 173 + "EventCode": "34054", 174 + "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", 175 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." 176 + }, 177 + { 178 + "EventCode": "3405A", 179 + "EventName": "PM_PRIVILEGED_INST_CMPL", 180 + "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state." 181 + }, 182 + { 183 + "EventCode": "3F150", 184 + "EventName": "PM_MRK_ST_DRAIN_CYC", 185 + "BriefDescription": "cycles to drain st from core to L2." 186 + }, 187 + { 188 + "EventCode": "3F054", 189 + "EventName": "PM_DISP_SS0_4_INSTR_CYC", 190 + "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions." 191 + }, 192 + { 193 + "EventCode": "3F056", 194 + "EventName": "PM_DISP_SS0_8_INSTR_CYC", 195 + "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions." 196 + }, 197 + { 198 + "EventCode": "30162", 199 + "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD", 200 + "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill." 201 + }, 202 + { 203 + "EventCode": "40114", 204 + "EventName": "PM_MRK_START_PROBE_NOP_DISP", 205 + "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0." 206 + }, 207 + { 208 + "EventCode": "4001C", 209 + "EventName": "PM_VSU_FIN", 210 + "BriefDescription": "VSU instructions finished." 211 + }, 212 + { 213 + "EventCode": "4C01A", 214 + "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE", 215 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip." 216 + }, 217 + { 218 + "EventCode": "4D012", 219 + "EventName": "PM_PMC3_SAVED", 220 + "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged." 221 + }, 222 + { 223 + "EventCode": "4D022", 224 + "EventName": "PM_HYPERVISOR_INST_CMPL", 225 + "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state." 226 + }, 227 + { 228 + "EventCode": "4D026", 229 + "EventName": "PM_ULTRAVISOR_CYC", 230 + "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110." 231 + }, 232 + { 233 + "EventCode": "4D028", 234 + "EventName": "PM_PRIVILEGED_CYC", 235 + "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00." 236 + }, 237 + { 238 + "EventCode": "40030", 239 + "EventName": "PM_INST_FIN", 240 + "BriefDescription": "Instructions finished." 241 + }, 242 + { 243 + "EventCode": "44146", 244 + "EventName": "PM_MRK_STCX_CORE_CYC", 245 + "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2." 246 + }, 247 + { 248 + "EventCode": "44054", 249 + "EventName": "PM_VECTOR_LD_CMPL", 250 + "BriefDescription": "Vector load instructions completed." 251 + }, 252 + { 253 + "EventCode": "45054", 254 + "EventName": "PM_FMA_CMPL", 255 + "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only." 256 + }, 257 + { 258 + "EventCode": "45056", 259 + "EventName": "PM_SCALAR_FLOP_CMPL", 260 + "BriefDescription": "Scalar floating point instructions completed." 261 + }, 262 + { 263 + "EventCode": "4505C", 264 + "EventName": "PM_MATH_FLOP_CMPL", 265 + "BriefDescription": "Math floating point instructions completed." 266 + }, 267 + { 268 + "EventCode": "4D05E", 269 + "EventName": "PM_BR_CMPL", 270 + "BriefDescription": "A branch completed. All branches are included." 271 + }, 272 + { 273 + "EventCode": "4E15E", 274 + "EventName": "PM_MRK_INST_FLUSHED", 275 + "BriefDescription": "The marked instruction was flushed." 276 + }, 277 + { 278 + "EventCode": "401E6", 279 + "EventName": "PM_MRK_INST_FROM_L3MISS", 280 + "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction." 281 + }, 282 + { 283 + "EventCode": "401E8", 284 + "EventName": "PM_MRK_DATA_FROM_L2MISS", 285 + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load." 286 + }, 287 + { 288 + "EventCode": "400F0", 289 + "EventName": "PM_LD_DEMAND_MISS_L1_FIN", 290 + "BriefDescription": "Load Missed L1, counted at finish time." 291 + }, 292 + { 293 + "EventCode": "400FA", 294 + "EventName": "PM_RUN_INST_CMPL", 295 + "BriefDescription": "Completed PowerPC instructions gated by the run latch." 296 + } 297 + ]

+297

tools/perf/pmu-events/arch/powerpc/power10/pipeline.json

··· 1 + [ 2 + { 3 + "EventCode": "100FE", 4 + "EventName": "PM_INST_CMPL", 5 + "BriefDescription": "PowerPC instructions completed." 6 + }, 7 + { 8 + "EventCode": "10006", 9 + "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", 10 + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." 11 + }, 12 + { 13 + "EventCode": "1000C", 14 + "EventName": "PM_LSU_LD0_FIN", 15 + "BriefDescription": "LSU Finished an internal operation in LD0 port." 16 + }, 17 + { 18 + "EventCode": "1000E", 19 + "EventName": "PM_MMA_ISSUED", 20 + "BriefDescription": "MMA instructions issued." 21 + }, 22 + { 23 + "EventCode": "10012", 24 + "EventName": "PM_LSU_ST0_FIN", 25 + "BriefDescription": "LSU Finished an internal operation in ST0 port." 26 + }, 27 + { 28 + "EventCode": "10014", 29 + "EventName": "PM_LSU_ST4_FIN", 30 + "BriefDescription": "LSU Finished an internal operation in ST4 port." 31 + }, 32 + { 33 + "EventCode": "10018", 34 + "EventName": "PM_IC_DEMAND_CYC", 35 + "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss." 36 + }, 37 + { 38 + "EventCode": "10022", 39 + "EventName": "PM_PMC2_SAVED", 40 + "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged." 41 + }, 42 + { 43 + "EventCode": "10024", 44 + "EventName": "PM_PMC5_OVERFLOW", 45 + "BriefDescription": "The event selected for PMC5 caused the event counter to overflow." 46 + }, 47 + { 48 + "EventCode": "10058", 49 + "EventName": "PM_EXEC_STALL_FIN_AT_DISP", 50 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU." 51 + }, 52 + { 53 + "EventCode": "1005A", 54 + "EventName": "PM_FLUSH_MPRED", 55 + "BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction." 56 + }, 57 + { 58 + "EventCode": "1C05A", 59 + "EventName": "PM_DERAT_MISS_2M", 60 + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." 61 + }, 62 + { 63 + "EventCode": "10064", 64 + "EventName": "PM_DISP_STALL_IC_L2", 65 + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." 66 + }, 67 + { 68 + "EventCode": "10068", 69 + "EventName": "PM_BR_FIN", 70 + "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional." 71 + }, 72 + { 73 + "EventCode": "1006A", 74 + "EventName": "PM_FX_LSU_FIN", 75 + "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time." 76 + }, 77 + { 78 + "EventCode": "1006C", 79 + "EventName": "PM_RUN_CYC_ST_MODE", 80 + "BriefDescription": "Cycles when the run latch is set and the core is in ST mode." 81 + }, 82 + { 83 + "EventCode": "20004", 84 + "EventName": "PM_ISSUE_STALL", 85 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet." 86 + }, 87 + { 88 + "EventCode": "2000A", 89 + "EventName": "PM_HYPERVISOR_CYC", 90 + "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010." 91 + }, 92 + { 93 + "EventCode": "2000E", 94 + "EventName": "PM_LSU_LD1_FIN", 95 + "BriefDescription": "LSU Finished an internal operation in LD1 port." 96 + }, 97 + { 98 + "EventCode": "2C014", 99 + "EventName": "PM_CMPL_STALL_SPECIAL", 100 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing." 101 + }, 102 + { 103 + "EventCode": "2C018", 104 + "EventName": "PM_EXEC_STALL_DMISS_L3MISS", 105 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3." 106 + }, 107 + { 108 + "EventCode": "2D010", 109 + "EventName": "PM_LSU_ST1_FIN", 110 + "BriefDescription": "LSU Finished an internal operation in ST1 port." 111 + }, 112 + { 113 + "EventCode": "2D012", 114 + "EventName": "PM_VSU1_ISSUE", 115 + "BriefDescription": "VSU instructions issued to VSU pipe 1." 116 + }, 117 + { 118 + "EventCode": "2D018", 119 + "EventName": "PM_EXEC_STALL_VSU", 120 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)." 121 + }, 122 + { 123 + "EventCode": "2E01E", 124 + "EventName": "PM_EXEC_STALL_NTC_FLUSH", 125 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children." 126 + }, 127 + { 128 + "EventCode": "2013C", 129 + "EventName": "PM_MRK_FX_LSU_FIN", 130 + "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time." 131 + }, 132 + { 133 + "EventCode": "2405A", 134 + "EventName": "PM_NTC_FIN", 135 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status." 136 + }, 137 + { 138 + "EventCode": "201E2", 139 + "EventName": "PM_MRK_LD_MISS_L1", 140 + "BriefDescription": "Marked DL1 Demand Miss counted at finish time." 141 + }, 142 + { 143 + "EventCode": "200F4", 144 + "EventName": "PM_RUN_CYC", 145 + "BriefDescription": "Processor cycles gated by the run latch." 146 + }, 147 + { 148 + "EventCode": "30004", 149 + "EventName": "PM_DISP_STALL_FLUSH", 150 + "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." 151 + }, 152 + { 153 + "EventCode": "30008", 154 + "EventName": "PM_EXEC_STALL", 155 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category." 156 + }, 157 + { 158 + "EventCode": "3001A", 159 + "EventName": "PM_LSU_ST2_FIN", 160 + "BriefDescription": "LSU Finished an internal operation in ST2 port." 161 + }, 162 + { 163 + "EventCode": "30020", 164 + "EventName": "PM_PMC2_REWIND", 165 + "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged." 166 + }, 167 + { 168 + "EventCode": "30022", 169 + "EventName": "PM_PMC4_SAVED", 170 + "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged." 171 + }, 172 + { 173 + "EventCode": "30024", 174 + "EventName": "PM_PMC6_OVERFLOW", 175 + "BriefDescription": "The event selected for PMC6 caused the event counter to overflow." 176 + }, 177 + { 178 + "EventCode": "30028", 179 + "EventName": "PM_CMPL_STALL_MEM_ECC", 180 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC." 181 + }, 182 + { 183 + "EventCode": "30036", 184 + "EventName": "PM_EXEC_STALL_SIMPLE_FX", 185 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit." 186 + }, 187 + { 188 + "EventCode": "3003A", 189 + "EventName": "PM_CMPL_STALL_EXCEPTION", 190 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete." 191 + }, 192 + { 193 + "EventCode": "3F044", 194 + "EventName": "PM_VSU2_ISSUE", 195 + "BriefDescription": "VSU instructions issued to VSU pipe 2." 196 + }, 197 + { 198 + "EventCode": "30058", 199 + "EventName": "PM_TLBIE_FIN", 200 + "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted." 201 + }, 202 + { 203 + "EventCode": "3D058", 204 + "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE", 205 + "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)." 206 + }, 207 + { 208 + "EventCode": "30066", 209 + "EventName": "PM_LSU_FIN", 210 + "BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)." 211 + }, 212 + { 213 + "EventCode": "40004", 214 + "EventName": "PM_FXU_ISSUE", 215 + "BriefDescription": "A fixed point instruction was issued to the VSU." 216 + }, 217 + { 218 + "EventCode": "40008", 219 + "EventName": "PM_NTC_ALL_FIN", 220 + "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline." 221 + }, 222 + { 223 + "EventCode": "40010", 224 + "EventName": "PM_PMC3_OVERFLOW", 225 + "BriefDescription": "The event selected for PMC3 caused the event counter to overflow." 226 + }, 227 + { 228 + "EventCode": "4C012", 229 + "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS", 230 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve." 231 + }, 232 + { 233 + "EventCode": "4C018", 234 + "EventName": "PM_CMPL_STALL", 235 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason." 236 + }, 237 + { 238 + "EventCode": "4C01E", 239 + "EventName": "PM_LSU_ST3_FIN", 240 + "BriefDescription": "LSU Finished an internal operation in ST3 port." 241 + }, 242 + { 243 + "EventCode": "4D018", 244 + "EventName": "PM_EXEC_STALL_BRU", 245 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit." 246 + }, 247 + { 248 + "EventCode": "4D01A", 249 + "EventName": "PM_CMPL_STALL_HWSYNC", 250 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing." 251 + }, 252 + { 253 + "EventCode": "4D01C", 254 + "EventName": "PM_EXEC_STALL_TLBIEL", 255 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest." 256 + }, 257 + { 258 + "EventCode": "4E012", 259 + "EventName": "PM_EXEC_STALL_UNKNOWN", 260 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together." 261 + }, 262 + { 263 + "EventCode": "4D020", 264 + "EventName": "PM_VSU3_ISSUE", 265 + "BriefDescription": "VSU instruction was issued to VSU pipe 3." 266 + }, 267 + { 268 + "EventCode": "40132", 269 + "EventName": "PM_MRK_LSU_FIN", 270 + "BriefDescription": "LSU marked instruction finish." 271 + }, 272 + { 273 + "EventCode": "45058", 274 + "EventName": "PM_IC_MISS_CMPL", 275 + "BriefDescription": "Non-speculative icache miss, counted at completion." 276 + }, 277 + { 278 + "EventCode": "4D050", 279 + "EventName": "PM_VSU_NON_FLOP_CMPL", 280 + "BriefDescription": "Non-floating point VSU instructions completed." 281 + }, 282 + { 283 + "EventCode": "4D052", 284 + "EventName": "PM_2FLOP_CMPL", 285 + "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed." 286 + }, 287 + { 288 + "EventCode": "400F2", 289 + "EventName": "PM_1PLUS_PPC_DISP", 290 + "BriefDescription": "Cycles at least one Instr Dispatched." 291 + }, 292 + { 293 + "EventCode": "400F8", 294 + "EventName": "PM_FLUSH", 295 + "BriefDescription": "Flush (any type)." 296 + } 297 + ]

+22

tools/perf/pmu-events/arch/powerpc/power10/pmc.json

··· 1 + [ 2 + { 3 + "EventCode": "301E8", 4 + "EventName": "PM_THRESH_EXC_64", 5 + "BriefDescription": "Threshold counter exceeded a value of 64." 6 + }, 7 + { 8 + "EventCode": "45050", 9 + "EventName": "PM_1FLOP_CMPL", 10 + "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." 11 + }, 12 + { 13 + "EventCode": "45052", 14 + "EventName": "PM_4FLOP_CMPL", 15 + "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." 16 + }, 17 + { 18 + "EventCode": "4D054", 19 + "EventName": "PM_8FLOP_CMPL", 20 + "BriefDescription": "Four Double Precision vector instructions completed." 21 + } 22 + ]

+57

tools/perf/pmu-events/arch/powerpc/power10/translation.json

··· 1 + [ 2 + { 3 + "EventCode": "1F15E", 4 + "EventName": "PM_MRK_START_PROBE_NOP_CMPL", 5 + "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed." 6 + }, 7 + { 8 + "EventCode": "20016", 9 + "EventName": "PM_ST_FIN", 10 + "BriefDescription": "Store finish count. Includes speculative activity." 11 + }, 12 + { 13 + "EventCode": "20018", 14 + "EventName": "PM_ST_FWD", 15 + "BriefDescription": "Store forwards that finished." 16 + }, 17 + { 18 + "EventCode": "2011C", 19 + "EventName": "PM_MRK_NTF_CYC", 20 + "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)." 21 + }, 22 + { 23 + "EventCode": "2E01C", 24 + "EventName": "PM_EXEC_STALL_TLBIE", 25 + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit." 26 + }, 27 + { 28 + "EventCode": "201E6", 29 + "EventName": "PM_THRESH_EXC_32", 30 + "BriefDescription": "Threshold counter exceeded a value of 32." 31 + }, 32 + { 33 + "EventCode": "200F0", 34 + "EventName": "PM_ST_CMPL", 35 + "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)." 36 + }, 37 + { 38 + "EventCode": "200FE", 39 + "EventName": "PM_DATA_FROM_L2MISS", 40 + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss." 41 + }, 42 + { 43 + "EventCode": "30010", 44 + "EventName": "PM_PMC2_OVERFLOW", 45 + "BriefDescription": "The event selected for PMC2 caused the event counter to overflow." 46 + }, 47 + { 48 + "EventCode": "4D010", 49 + "EventName": "PM_PMC1_SAVED", 50 + "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged." 51 + }, 52 + { 53 + "EventCode": "4D05C", 54 + "EventName": "PM_DPP_FLOP_CMPL", 55 + "BriefDescription": "Double-Precision or Quad-Precision instructions completed." 56 + } 57 + ]

+6 -6

tools/perf/pmu-events/arch/powerpc/power8/metrics.json

··· 885 885 "MetricName": "flush_rate_percent" 886 886 }, 887 887 { 888 - "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", 888 + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid", 889 889 "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 890 890 "MetricGroup": "general", 891 891 "MetricName": "gct_util_11to14_slots_percent" 892 892 }, 893 893 { 894 - "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", 894 + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid", 895 895 "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 896 896 "MetricGroup": "general", 897 897 "MetricName": "gct_util_15to17_slots_percent" 898 898 }, 899 899 { 900 - "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", 900 + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid", 901 901 "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 902 902 "MetricGroup": "general", 903 903 "MetricName": "gct_util_18plus_slots_percent" 904 904 }, 905 905 { 906 - "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", 906 + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid", 907 907 "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 908 908 "MetricGroup": "general", 909 909 "MetricName": "gct_util_1to2_slots_percent" 910 910 }, 911 911 { 912 - "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", 912 + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid", 913 913 "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 914 914 "MetricGroup": "general", 915 915 "MetricName": "gct_util_3to6_slots_percent" 916 916 }, 917 917 { 918 - "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", 918 + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid", 919 919 "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", 920 920 "MetricGroup": "general", 921 921 "MetricName": "gct_util_7to10_slots_percent"

+1 -133

tools/perf/pmu-events/arch/powerpc/power9/metrics.json

··· 1210 1210 "MetricName": "inst_from_rmem_percent" 1211 1211 }, 1212 1212 { 1213 - "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", 1214 - "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", 1215 - "MetricGroup": "l2_stats", 1216 - "MetricName": "l2_co_m_rd_util" 1217 - }, 1218 - { 1219 - "BriefDescription": "L2 dcache invalidates per run inst (per core)", 1220 - "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", 1221 - "MetricGroup": "l2_stats", 1222 - "MetricName": "l2_dc_inv_rate_percent" 1223 - }, 1224 - { 1225 1213 "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", 1226 1214 "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", 1227 1215 "MetricGroup": "l2_stats", 1228 1216 "MetricName": "l2_dem_ld_disp_percent" 1229 - }, 1230 - { 1231 - "BriefDescription": "L2 Icache invalidates per run inst (per core)", 1232 - "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", 1233 - "MetricGroup": "l2_stats", 1234 - "MetricName": "l2_ic_inv_rate_percent" 1235 - }, 1236 - { 1237 - "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", 1238 - "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", 1239 - "MetricGroup": "l2_stats", 1240 - "MetricName": "l2_inst_miss_ratio_percent" 1241 - }, 1242 - { 1243 - "BriefDescription": "Average number of cycles between L2 Load hits", 1244 - "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", 1245 - "MetricGroup": "l2_stats", 1246 - "MetricName": "l2_ld_hit_frequency" 1247 - }, 1248 - { 1249 - "BriefDescription": "Average number of cycles between L2 Load misses", 1250 - "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", 1251 - "MetricGroup": "l2_stats", 1252 - "MetricName": "l2_ld_miss_frequency" 1253 - }, 1254 - { 1255 - "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", 1256 - "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", 1257 - "MetricGroup": "l2_stats", 1258 - "MetricName": "l2_ld_miss_ratio_percent" 1259 - }, 1260 - { 1261 - "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", 1262 - "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", 1263 - "MetricGroup": "l2_stats", 1264 - "MetricName": "l2_ld_rd_util" 1265 - }, 1266 - { 1267 - "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", 1268 - "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", 1269 - "MetricGroup": "l2_stats", 1270 - "MetricName": "l2_ldmiss_wr_util" 1271 - }, 1272 - { 1273 - "BriefDescription": "L2 local pump prediction success", 1274 - "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", 1275 - "MetricGroup": "l2_stats", 1276 - "MetricName": "l2_local_pred_correct_percent" 1277 - }, 1278 - { 1279 - "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", 1280 - "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", 1281 - "MetricGroup": "l2_stats", 1282 - "MetricName": "l2_mod_co_percent" 1283 - }, 1284 - { 1285 - "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", 1286 - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", 1287 - "MetricGroup": "l2_stats", 1288 - "MetricName": "l2_rc_ld_disp_addr_fail_percent" 1289 - }, 1290 - { 1291 - "BriefDescription": "% of L2 Load RC dispatch attempts that failed", 1292 - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", 1293 - "MetricGroup": "l2_stats", 1294 - "MetricName": "l2_rc_ld_disp_fail_percent" 1295 - }, 1296 - { 1297 - "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", 1298 - "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", 1299 - "MetricGroup": "l2_stats", 1300 - "MetricName": "l2_rc_st_disp_addr_fail_percent" 1301 - }, 1302 - { 1303 - "BriefDescription": "% of L2 Store RC dispatch attempts that failed", 1304 - "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", 1305 - "MetricGroup": "l2_stats", 1306 - "MetricName": "l2_rc_st_disp_fail_percent" 1307 - }, 1308 - { 1309 - "BriefDescription": "L2 Cache Read Utilization (per core)", 1310 - "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", 1311 - "MetricGroup": "l2_stats", 1312 - "MetricName": "l2_rd_util_percent" 1313 - }, 1314 - { 1315 - "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", 1316 - "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", 1317 - "MetricGroup": "l2_stats", 1318 - "MetricName": "l2_shr_co_percent" 1319 1217 }, 1320 1218 { 1321 1219 "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", ··· 1222 1324 "MetricName": "l2_st_miss_ratio_percent" 1223 1325 }, 1224 1326 { 1225 - "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", 1226 - "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", 1227 - "MetricGroup": "l2_stats", 1228 - "MetricName": "l2_st_rd_util" 1229 - }, 1230 - { 1231 1327 "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", 1232 1328 "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", 1233 1329 "MetricGroup": "l2_stats", 1234 1330 "MetricName": "l2_st_wr_util" 1235 - }, 1236 - { 1237 - "BriefDescription": "L2 Cache Write Utilization (per core)", 1238 - "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", 1239 - "MetricGroup": "l2_stats", 1240 - "MetricName": "l2_wr_util_percent" 1241 - }, 1242 - { 1243 - "BriefDescription": "Average number of cycles between L3 Load hits", 1244 - "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", 1245 - "MetricGroup": "l3_stats", 1246 - "MetricName": "l3_ld_hit_frequency" 1247 - }, 1248 - { 1249 - "BriefDescription": "Average number of cycles between L3 Load misses", 1250 - "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", 1251 - "MetricGroup": "l3_stats", 1252 - "MetricName": "l3_ld_miss_frequency" 1253 - }, 1254 - { 1255 - "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", 1256 - "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", 1257 - "MetricGroup": "l3_stats", 1258 - "MetricName": "l3_wi_usage" 1259 1331 }, 1260 1332 { 1261 1333 "BriefDescription": "Average icache miss latency", ··· 1691 1823 "MetricName": "custom_secs" 1692 1824 }, 1693 1825 { 1694 - "BriefDescription": "Percentage Cycles atleast one instruction dispatched", 1826 + "BriefDescription": "Percentage Cycles at least one instruction dispatched", 1695 1827 "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", 1696 1828 "MetricName": "cycles_atleast_one_inst_dispatched_percent" 1697 1829 },

+24 -24

tools/perf/pmu-events/arch/x86/amdzen1/cache.json

··· 38 38 "EventName": "ic_fetch_stall.ic_stall_any", 39 39 "EventCode": "0x87", 40 40 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 41 - "UMask": "0x4" 41 + "UMask": "0x04" 42 42 }, 43 43 { 44 44 "EventName": "ic_fetch_stall.ic_stall_dq_empty", 45 45 "EventCode": "0x87", 46 46 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 47 - "UMask": "0x2" 47 + "UMask": "0x02" 48 48 }, 49 49 { 50 50 "EventName": "ic_fetch_stall.ic_stall_back_pressure", 51 51 "EventCode": "0x87", 52 52 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 53 - "UMask": "0x1" 53 + "UMask": "0x01" 54 54 }, 55 55 { 56 56 "EventName": "ic_cache_inval.l2_invalidating_probe", 57 57 "EventCode": "0x8c", 58 58 "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 59 - "UMask": "0x2" 59 + "UMask": "0x02" 60 60 }, 61 61 { 62 62 "EventName": "ic_cache_inval.fill_invalidated", 63 63 "EventCode": "0x8c", 64 64 "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 65 - "UMask": "0x1" 65 + "UMask": "0x01" 66 66 }, 67 67 { 68 68 "EventName": "bp_tlb_rel", ··· 97 97 "EventName": "l2_request_g1.change_to_x", 98 98 "EventCode": "0x60", 99 99 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", 100 - "UMask": "0x8" 100 + "UMask": "0x08" 101 101 }, 102 102 { 103 103 "EventName": "l2_request_g1.prefetch_l2_cmd", 104 104 "EventCode": "0x60", 105 105 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", 106 - "UMask": "0x4" 106 + "UMask": "0x04" 107 107 }, 108 108 { 109 109 "EventName": "l2_request_g1.l2_hw_pf", 110 110 "EventCode": "0x60", 111 111 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", 112 - "UMask": "0x2" 112 + "UMask": "0x02" 113 113 }, 114 114 { 115 115 "EventName": "l2_request_g1.group2", 116 116 "EventCode": "0x60", 117 117 "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", 118 - "UMask": "0x1" 118 + "UMask": "0x01" 119 119 }, 120 120 { 121 121 "EventName": "l2_request_g1.all_no_prefetch", ··· 150 150 "EventName": "l2_request_g2.ic_rd_sized_nc", 151 151 "EventCode": "0x61", 152 152 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", 153 - "UMask": "0x8" 153 + "UMask": "0x08" 154 154 }, 155 155 { 156 156 "EventName": "l2_request_g2.smc_inval", 157 157 "EventCode": "0x61", 158 158 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", 159 - "UMask": "0x4" 159 + "UMask": "0x04" 160 160 }, 161 161 { 162 162 "EventName": "l2_request_g2.bus_locks_originator", 163 163 "EventCode": "0x61", 164 164 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", 165 - "UMask": "0x2" 165 + "UMask": "0x02" 166 166 }, 167 167 { 168 168 "EventName": "l2_request_g2.bus_locks_responses", 169 169 "EventCode": "0x61", 170 170 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", 171 - "UMask": "0x1" 171 + "UMask": "0x01" 172 172 }, 173 173 { 174 174 "EventName": "l2_latency.l2_cycles_waiting_on_fills", 175 175 "EventCode": "0x62", 176 176 "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 177 - "UMask": "0x1" 177 + "UMask": "0x01" 178 178 }, 179 179 { 180 180 "EventName": "l2_wcb_req.wcb_write", ··· 192 192 "EventName": "l2_wcb_req.zero_byte_store", 193 193 "EventCode": "0x63", 194 194 "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 195 - "UMask": "0x4" 195 + "UMask": "0x04" 196 196 }, 197 197 { 198 198 "EventName": "l2_wcb_req.cl_zero", 199 199 "EventCode": "0x63", 200 200 "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 201 - "UMask": "0x1" 201 + "UMask": "0x01" 202 202 }, 203 203 { 204 204 "EventName": "l2_cache_req_stat.ls_rd_blk_cs", ··· 228 228 "EventName": "l2_cache_req_stat.ls_rd_blk_c", 229 229 "EventCode": "0x64", 230 230 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", 231 - "UMask": "0x8" 231 + "UMask": "0x08" 232 232 }, 233 233 { 234 234 "EventName": "l2_cache_req_stat.ic_fill_hit_x", 235 235 "EventCode": "0x64", 236 236 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", 237 - "UMask": "0x4" 237 + "UMask": "0x04" 238 238 }, 239 239 { 240 240 "EventName": "l2_cache_req_stat.ic_fill_hit_s", 241 241 "EventCode": "0x64", 242 242 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", 243 - "UMask": "0x2" 243 + "UMask": "0x02" 244 244 }, 245 245 { 246 246 "EventName": "l2_cache_req_stat.ic_fill_miss", 247 247 "EventCode": "0x64", 248 248 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", 249 - "UMask": "0x1" 249 + "UMask": "0x01" 250 250 }, 251 251 { 252 252 "EventName": "l2_cache_req_stat.ic_access_in_l2", 253 253 "EventCode": "0x64", 254 254 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", 255 - "UMask": "0x7" 255 + "UMask": "0x07" 256 256 }, 257 257 { 258 258 "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", 259 259 "EventCode": "0x64", 260 260 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", 261 - "UMask": "0x9" 261 + "UMask": "0x09" 262 262 }, 263 263 { 264 264 "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", ··· 270 270 "EventName": "l2_fill_pending.l2_fill_busy", 271 271 "EventCode": "0x6d", 272 272 "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", 273 - "UMask": "0x1" 273 + "UMask": "0x01" 274 274 }, 275 275 { 276 276 "EventName": "l2_pf_hit_l2", 277 277 "EventCode": "0x70", 278 - "BriefDescription": "L2 prefetch hit in L2.", 278 + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", 279 279 "UMask": "0xff" 280 280 }, 281 281 {

+6 -6

tools/perf/pmu-events/arch/x86/amdzen1/core.json

··· 68 68 "EventCode": "0xcb", 69 69 "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 70 70 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 71 - "UMask": "0x4" 71 + "UMask": "0x04" 72 72 }, 73 73 { 74 74 "EventName": "ex_ret_mmx_fp_instr.mmx_instr", 75 75 "EventCode": "0xcb", 76 76 "BriefDescription": "MMX instructions.", 77 77 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", 78 - "UMask": "0x2" 78 + "UMask": "0x02" 79 79 }, 80 80 { 81 81 "EventName": "ex_ret_mmx_fp_instr.x87_instr", 82 82 "EventCode": "0xcb", 83 83 "BriefDescription": "x87 instructions.", 84 84 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", 85 - "UMask": "0x1" 85 + "UMask": "0x01" 86 86 }, 87 87 { 88 88 "EventName": "ex_ret_cond", ··· 103 103 "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", 104 104 "EventCode": "0x1cf", 105 105 "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 106 - "UMask": "0x4" 106 + "UMask": "0x04" 107 107 }, 108 108 { 109 109 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", 110 110 "EventCode": "0x1cf", 111 111 "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 112 - "UMask": "0x2" 112 + "UMask": "0x02" 113 113 }, 114 114 { 115 115 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", 116 116 "EventCode": "0x1cf", 117 117 "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 118 - "UMask": "0x1" 118 + "UMask": "0x01" 119 119 }, 120 120 { 121 121 "EventName": "ex_ret_fus_brnch_inst",

+21 -21

tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json

··· 39 39 "EventCode": "0x00", 40 40 "BriefDescription": "Total number uOps assigned to all fpu pipes.", 41 41 "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", 42 - "UMask": "0xf" 42 + "UMask": "0x0f" 43 43 }, 44 44 { 45 45 "EventName": "fpu_pipe_assignment.total3", 46 46 "EventCode": "0x00", 47 47 "BriefDescription": "Total number of fp uOps on pipe 3.", 48 48 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", 49 - "UMask": "0x8" 49 + "UMask": "0x08" 50 50 }, 51 51 { 52 52 "EventName": "fpu_pipe_assignment.total2", 53 53 "EventCode": "0x00", 54 54 "BriefDescription": "Total number of fp uOps on pipe 2.", 55 55 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", 56 - "UMask": "0x4" 56 + "UMask": "0x04" 57 57 }, 58 58 { 59 59 "EventName": "fpu_pipe_assignment.total1", 60 60 "EventCode": "0x00", 61 61 "BriefDescription": "Total number of fp uOps on pipe 1.", 62 62 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", 63 - "UMask": "0x2" 63 + "UMask": "0x02" 64 64 }, 65 65 { 66 66 "EventName": "fpu_pipe_assignment.total0", 67 67 "EventCode": "0x00", 68 68 "BriefDescription": "Total number of fp uOps on pipe 0.", 69 69 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", 70 - "UMask": "0x1" 70 + "UMask": "0x01" 71 71 }, 72 72 { 73 73 "EventName": "fp_sched_empty", ··· 79 79 "EventCode": "0x02", 80 80 "BriefDescription": "All Ops.", 81 81 "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", 82 - "UMask": "0x7" 82 + "UMask": "0x07" 83 83 }, 84 84 { 85 85 "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", 86 86 "EventCode": "0x02", 87 87 "BriefDescription": "Divide and square root Ops.", 88 88 "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", 89 - "UMask": "0x4" 89 + "UMask": "0x04" 90 90 }, 91 91 { 92 92 "EventName": "fp_retx87_fp_ops.mul_ops", 93 93 "EventCode": "0x02", 94 94 "BriefDescription": "Multiply Ops.", 95 95 "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", 96 - "UMask": "0x2" 96 + "UMask": "0x02" 97 97 }, 98 98 { 99 99 "EventName": "fp_retx87_fp_ops.add_sub_ops", 100 100 "EventCode": "0x02", 101 101 "BriefDescription": "Add/subtract Ops.", 102 102 "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", 103 - "UMask": "0x1" 103 + "UMask": "0x01" 104 104 }, 105 105 { 106 106 "EventName": "fp_ret_sse_avx_ops.all", ··· 142 142 "EventCode": "0x03", 143 143 "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", 144 144 "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", 145 - "UMask": "0x8" 145 + "UMask": "0x08" 146 146 }, 147 147 { 148 148 "EventName": "fp_ret_sse_avx_ops.sp_div_flops", 149 149 "EventCode": "0x03", 150 150 "BriefDescription": "Single-precision divide/square root FLOPS.", 151 151 "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", 152 - "UMask": "0x4" 152 + "UMask": "0x04" 153 153 }, 154 154 { 155 155 "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", 156 156 "EventCode": "0x03", 157 157 "BriefDescription": "Single-precision multiply FLOPS.", 158 158 "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", 159 - "UMask": "0x2" 159 + "UMask": "0x02" 160 160 }, 161 161 { 162 162 "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", 163 163 "EventCode": "0x03", 164 164 "BriefDescription": "Single-precision add/subtract FLOPS.", 165 165 "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", 166 - "UMask": "0x1" 166 + "UMask": "0x01" 167 167 }, 168 168 { 169 169 "EventName": "fp_num_mov_elim_scal_op.optimized", 170 170 "EventCode": "0x04", 171 171 "BriefDescription": "Number of Scalar Ops optimized.", 172 172 "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", 173 - "UMask": "0x8" 173 + "UMask": "0x08" 174 174 }, 175 175 { 176 176 "EventName": "fp_num_mov_elim_scal_op.opt_potential", 177 177 "EventCode": "0x04", 178 178 "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", 179 179 "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", 180 - "UMask": "0x4" 180 + "UMask": "0x04" 181 181 }, 182 182 { 183 183 "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", 184 184 "EventCode": "0x04", 185 185 "BriefDescription": "Number of SSE Move Ops eliminated.", 186 186 "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", 187 - "UMask": "0x2" 187 + "UMask": "0x02" 188 188 }, 189 189 { 190 190 "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", 191 191 "EventCode": "0x04", 192 192 "BriefDescription": "Number of SSE Move Ops.", 193 193 "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", 194 - "UMask": "0x1" 194 + "UMask": "0x01" 195 195 }, 196 196 { 197 197 "EventName": "fp_retired_ser_ops.x87_ctrl_ret", 198 198 "EventCode": "0x05", 199 199 "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", 200 200 "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", 201 - "UMask": "0x8" 201 + "UMask": "0x08" 202 202 }, 203 203 { 204 204 "EventName": "fp_retired_ser_ops.x87_bot_ret", 205 205 "EventCode": "0x05", 206 206 "BriefDescription": "x87 bottom-executing uOps retired.", 207 207 "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", 208 - "UMask": "0x4" 208 + "UMask": "0x04" 209 209 }, 210 210 { 211 211 "EventName": "fp_retired_ser_ops.sse_ctrl_ret", 212 212 "EventCode": "0x05", 213 213 "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", 214 214 "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", 215 - "UMask": "0x2" 215 + "UMask": "0x02" 216 216 }, 217 217 { 218 218 "EventName": "fp_retired_ser_ops.sse_bot_ret", 219 219 "EventCode": "0x05", 220 220 "BriefDescription": "SSE bottom-executing uOps retired.", 221 221 "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", 222 - "UMask": "0x1" 222 + "UMask": "0x01" 223 223 } 224 224 ]

+21 -21

tools/perf/pmu-events/arch/x86/amdzen1/memory.json

··· 3 3 "EventName": "ls_locks.bus_lock", 4 4 "EventCode": "0x25", 5 5 "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", 6 - "UMask": "0x1" 6 + "UMask": "0x01" 7 7 }, 8 8 { 9 9 "EventName": "ls_dispatch.ld_st_dispatch", 10 10 "EventCode": "0x29", 11 11 "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", 12 - "UMask": "0x4" 12 + "UMask": "0x04" 13 13 }, 14 14 { 15 15 "EventName": "ls_dispatch.store_dispatch", 16 16 "EventCode": "0x29", 17 17 "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", 18 - "UMask": "0x2" 18 + "UMask": "0x02" 19 19 }, 20 20 { 21 21 "EventName": "ls_dispatch.ld_dispatch", 22 22 "EventCode": "0x29", 23 23 "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", 24 - "UMask": "0x1" 24 + "UMask": "0x01" 25 25 }, 26 26 { 27 27 "EventName": "ls_stlf", ··· 37 37 "EventName": "ls_mab_alloc.dc_prefetcher", 38 38 "EventCode": "0x41", 39 39 "BriefDescription": "LS MAB allocates by type - DC prefetcher.", 40 - "UMask": "0x8" 40 + "UMask": "0x08" 41 41 }, 42 42 { 43 43 "EventName": "ls_mab_alloc.stores", 44 44 "EventCode": "0x41", 45 45 "BriefDescription": "LS MAB allocates by type - stores.", 46 - "UMask": "0x2" 46 + "UMask": "0x02" 47 47 }, 48 48 { 49 49 "EventName": "ls_mab_alloc.loads", ··· 85 85 "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 86 86 "EventCode": "0x45", 87 87 "BriefDescription": "L1 DTLB Reload of a page of 1G size.", 88 - "UMask": "0x8" 88 + "UMask": "0x08" 89 89 }, 90 90 { 91 91 "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 92 92 "EventCode": "0x45", 93 93 "BriefDescription": "L1 DTLB Reload of a page of 2M size.", 94 - "UMask": "0x4" 94 + "UMask": "0x04" 95 95 }, 96 96 { 97 97 "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", 98 98 "EventCode": "0x45", 99 99 "BriefDescription": "L1 DTLB Reload of a page of 32K size.", 100 - "UMask": "0x2" 100 + "UMask": "0x02" 101 101 }, 102 102 { 103 103 "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 104 104 "EventCode": "0x45", 105 105 "BriefDescription": "L1 DTLB Reload of a page of 4K size.", 106 - "UMask": "0x1" 106 + "UMask": "0x01" 107 107 }, 108 108 { 109 109 "EventName": "ls_tablewalker.iside", 110 110 "EventCode": "0x46", 111 111 "BriefDescription": "Total Page Table Walks on I-side.", 112 - "UMask": "0xc" 112 + "UMask": "0x0c" 113 113 }, 114 114 { 115 115 "EventName": "ls_tablewalker.ic_type1", 116 116 "EventCode": "0x46", 117 117 "BriefDescription": "Total Page Table Walks IC Type 1.", 118 - "UMask": "0x8" 118 + "UMask": "0x08" 119 119 }, 120 120 { 121 121 "EventName": "ls_tablewalker.ic_type0", 122 122 "EventCode": "0x46", 123 123 "BriefDescription": "Total Page Table Walks IC Type 0.", 124 - "UMask": "0x4" 124 + "UMask": "0x04" 125 125 }, 126 126 { 127 127 "EventName": "ls_tablewalker.dside", 128 128 "EventCode": "0x46", 129 129 "BriefDescription": "Total Page Table Walks on D-side.", 130 - "UMask": "0x3" 130 + "UMask": "0x03" 131 131 }, 132 132 { 133 133 "EventName": "ls_tablewalker.dc_type1", 134 134 "EventCode": "0x46", 135 135 "BriefDescription": "Total Page Table Walks DC Type 1.", 136 - "UMask": "0x2" 136 + "UMask": "0x02" 137 137 }, 138 138 { 139 139 "EventName": "ls_tablewalker.dc_type0", 140 140 "EventCode": "0x46", 141 141 "BriefDescription": "Total Page Table Walks DC Type 0.", 142 - "UMask": "0x1" 142 + "UMask": "0x01" 143 143 }, 144 144 { 145 145 "EventName": "ls_misal_accesses", ··· 150 150 "EventName": "ls_pref_instr_disp.prefetch_nta", 151 151 "EventCode": "0x4b", 152 152 "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", 153 - "UMask": "0x4" 153 + "UMask": "0x04" 154 154 }, 155 155 { 156 156 "EventName": "ls_pref_instr_disp.store_prefetch_w", 157 157 "EventCode": "0x4b", 158 158 "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", 159 - "UMask": "0x2" 159 + "UMask": "0x02" 160 160 }, 161 161 { 162 162 "EventName": "ls_pref_instr_disp.load_prefetch_w", 163 163 "EventCode": "0x4b", 164 164 "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", 165 - "UMask": "0x1" 165 + "UMask": "0x01" 166 166 }, 167 167 { 168 168 "EventName": "ls_inef_sw_pref.mab_mch_cnt", 169 169 "EventCode": "0x52", 170 170 "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", 171 - "UMask": "0x2" 171 + "UMask": "0x02" 172 172 }, 173 173 { 174 174 "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 175 175 "EventCode": "0x52", 176 176 "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", 177 - "UMask": "0x1" 177 + "UMask": "0x01" 178 178 }, 179 179 { 180 180 "EventName": "ls_not_halted_cyc",

+6 -6

tools/perf/pmu-events/arch/x86/amdzen1/other.json

··· 3 3 "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 4 4 "EventCode": "0x28a", 5 5 "BriefDescription": "OC Mode Switch. OC to IC mode switch.", 6 - "UMask": "0x2" 6 + "UMask": "0x02" 7 7 }, 8 8 { 9 9 "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 10 10 "EventCode": "0x28a", 11 11 "BriefDescription": "OC Mode Switch. IC to OC mode switch.", 12 - "UMask": "0x1" 12 + "UMask": "0x01" 13 13 }, 14 14 { 15 15 "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", ··· 33 33 "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", 34 34 "EventCode": "0xaf", 35 35 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", 36 - "UMask": "0x8" 36 + "UMask": "0x08" 37 37 }, 38 38 { 39 39 "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", 40 40 "EventCode": "0xaf", 41 41 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", 42 - "UMask": "0x4" 42 + "UMask": "0x04" 43 43 }, 44 44 { 45 45 "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", 46 46 "EventCode": "0xaf", 47 47 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", 48 - "UMask": "0x2" 48 + "UMask": "0x02" 49 49 }, 50 50 { 51 51 "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", 52 52 "EventCode": "0xaf", 53 53 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", 54 - "UMask": "0x1" 54 + "UMask": "0x01" 55 55 } 56 56 ]

+4 -4

tools/perf/pmu-events/arch/x86/amdzen1/recommended.json

··· 10 10 "EventName": "all_dc_accesses", 11 11 "EventCode": "0x29", 12 12 "BriefDescription": "All L1 Data Cache Accesses", 13 - "UMask": "0x7" 13 + "UMask": "0x07" 14 14 }, 15 15 { 16 16 "MetricName": "all_l2_cache_accesses", ··· 79 79 "UMask": "0x70" 80 80 }, 81 81 { 82 - "MetricName": "l2_cache_hits_from_l2_hwpf", 82 + "EventName": "l2_cache_hits_from_l2_hwpf", 83 + "EventCode": "0x70", 83 84 "BriefDescription": "L2 Cache Hits from L2 HWPF", 84 - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 85 - "MetricGroup": "l2_cache" 85 + "UMask": "0xff" 86 86 }, 87 87 { 88 88 "EventName": "l3_accesses",

+4 -4

tools/perf/pmu-events/arch/x86/amdzen2/branch.json

··· 24 24 "EventName": "bp_l1_tlb_fetch_hit", 25 25 "EventCode": "0x94", 26 26 "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", 27 - "UMask": "0xFF" 27 + "UMask": "0xff" 28 28 }, 29 29 { 30 30 "EventName": "bp_l1_tlb_fetch_hit.if1g", 31 31 "EventCode": "0x94", 32 32 "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", 33 - "UMask": "0x4" 33 + "UMask": "0x04" 34 34 }, 35 35 { 36 36 "EventName": "bp_l1_tlb_fetch_hit.if2m", 37 37 "EventCode": "0x94", 38 38 "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", 39 - "UMask": "0x2" 39 + "UMask": "0x02" 40 40 }, 41 41 { 42 42 "EventName": "bp_l1_tlb_fetch_hit.if4k", 43 43 "EventCode": "0x94", 44 44 "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", 45 - "UMask": "0x1" 45 + "UMask": "0x01" 46 46 }, 47 47 { 48 48 "EventName": "bp_tlb_rel",

+30 -30

tools/perf/pmu-events/arch/x86/amdzen2/cache.json

··· 27 27 "EventName": "l2_request_g1.change_to_x", 28 28 "EventCode": "0x60", 29 29 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", 30 - "UMask": "0x8" 30 + "UMask": "0x08" 31 31 }, 32 32 { 33 33 "EventName": "l2_request_g1.prefetch_l2_cmd", 34 34 "EventCode": "0x60", 35 35 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", 36 - "UMask": "0x4" 36 + "UMask": "0x04" 37 37 }, 38 38 { 39 39 "EventName": "l2_request_g1.l2_hw_pf", 40 40 "EventCode": "0x60", 41 41 "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", 42 - "UMask": "0x2" 42 + "UMask": "0x02" 43 43 }, 44 44 { 45 45 "EventName": "l2_request_g1.group2", 46 46 "EventCode": "0x60", 47 47 "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", 48 - "UMask": "0x1" 48 + "UMask": "0x01" 49 49 }, 50 50 { 51 51 "EventName": "l2_request_g1.all_no_prefetch", ··· 80 80 "EventName": "l2_request_g2.ic_rd_sized_nc", 81 81 "EventCode": "0x61", 82 82 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", 83 - "UMask": "0x8" 83 + "UMask": "0x08" 84 84 }, 85 85 { 86 86 "EventName": "l2_request_g2.smc_inval", 87 87 "EventCode": "0x61", 88 88 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", 89 - "UMask": "0x4" 89 + "UMask": "0x04" 90 90 }, 91 91 { 92 92 "EventName": "l2_request_g2.bus_locks_originator", 93 93 "EventCode": "0x61", 94 94 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", 95 - "UMask": "0x2" 95 + "UMask": "0x02" 96 96 }, 97 97 { 98 98 "EventName": "l2_request_g2.bus_locks_responses", 99 99 "EventCode": "0x61", 100 100 "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", 101 - "UMask": "0x1" 101 + "UMask": "0x01" 102 102 }, 103 103 { 104 104 "EventName": "l2_latency.l2_cycles_waiting_on_fills", 105 105 "EventCode": "0x62", 106 106 "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 107 - "UMask": "0x1" 107 + "UMask": "0x01" 108 108 }, 109 109 { 110 110 "EventName": "l2_wcb_req.wcb_write", ··· 122 122 "EventName": "l2_wcb_req.zero_byte_store", 123 123 "EventCode": "0x63", 124 124 "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 125 - "UMask": "0x4" 125 + "UMask": "0x04" 126 126 }, 127 127 { 128 128 "EventName": "l2_wcb_req.cl_zero", 129 129 "EventCode": "0x63", 130 130 "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 131 - "UMask": "0x1" 131 + "UMask": "0x01" 132 132 }, 133 133 { 134 134 "EventName": "l2_cache_req_stat.ls_rd_blk_cs", ··· 158 158 "EventName": "l2_cache_req_stat.ls_rd_blk_c", 159 159 "EventCode": "0x64", 160 160 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", 161 - "UMask": "0x8" 161 + "UMask": "0x08" 162 162 }, 163 163 { 164 164 "EventName": "l2_cache_req_stat.ic_fill_hit_x", 165 165 "EventCode": "0x64", 166 166 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", 167 - "UMask": "0x4" 167 + "UMask": "0x04" 168 168 }, 169 169 { 170 170 "EventName": "l2_cache_req_stat.ic_fill_hit_s", 171 171 "EventCode": "0x64", 172 172 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", 173 - "UMask": "0x2" 173 + "UMask": "0x02" 174 174 }, 175 175 { 176 176 "EventName": "l2_cache_req_stat.ic_fill_miss", 177 177 "EventCode": "0x64", 178 178 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", 179 - "UMask": "0x1" 179 + "UMask": "0x01" 180 180 }, 181 181 { 182 182 "EventName": "l2_cache_req_stat.ic_access_in_l2", 183 183 "EventCode": "0x64", 184 184 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", 185 - "UMask": "0x7" 185 + "UMask": "0x07" 186 186 }, 187 187 { 188 188 "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", 189 189 "EventCode": "0x64", 190 190 "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", 191 - "UMask": "0x9" 191 + "UMask": "0x09" 192 192 }, 193 193 { 194 194 "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", ··· 200 200 "EventName": "l2_fill_pending.l2_fill_busy", 201 201 "EventCode": "0x6d", 202 202 "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", 203 - "UMask": "0x1" 203 + "UMask": "0x01" 204 204 }, 205 205 { 206 206 "EventName": "l2_pf_hit_l2", 207 207 "EventCode": "0x70", 208 - "BriefDescription": "L2 prefetch hit in L2.", 208 + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", 209 209 "UMask": "0xff" 210 210 }, 211 211 { ··· 255 255 "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", 256 256 "EventCode": "0x85", 257 257 "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", 258 - "UMask": "0x4" 258 + "UMask": "0x04" 259 259 }, 260 260 { 261 261 "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", 262 262 "EventCode": "0x85", 263 263 "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", 264 - "UMask": "0x2" 264 + "UMask": "0x02" 265 265 }, 266 266 { 267 267 "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", 268 268 "EventCode": "0x85", 269 269 "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", 270 - "UMask": "0x1" 270 + "UMask": "0x01" 271 271 }, 272 272 { 273 273 "EventName": "bp_snp_re_sync", ··· 278 278 "EventName": "ic_fetch_stall.ic_stall_any", 279 279 "EventCode": "0x87", 280 280 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 281 - "UMask": "0x4" 281 + "UMask": "0x04" 282 282 }, 283 283 { 284 284 "EventName": "ic_fetch_stall.ic_stall_dq_empty", 285 285 "EventCode": "0x87", 286 286 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 287 - "UMask": "0x2" 287 + "UMask": "0x02" 288 288 }, 289 289 { 290 290 "EventName": "ic_fetch_stall.ic_stall_back_pressure", 291 291 "EventCode": "0x87", 292 292 "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 293 - "UMask": "0x1" 293 + "UMask": "0x01" 294 294 }, 295 295 { 296 296 "EventName": "ic_cache_inval.l2_invalidating_probe", 297 297 "EventCode": "0x8c", 298 298 "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 299 - "UMask": "0x2" 299 + "UMask": "0x02" 300 300 }, 301 301 { 302 302 "EventName": "ic_cache_inval.fill_invalidated", 303 303 "EventCode": "0x8c", 304 304 "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 305 - "UMask": "0x1" 305 + "UMask": "0x01" 306 306 }, 307 307 { 308 308 "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 309 309 "EventCode": "0x28a", 310 310 "BriefDescription": "OC Mode Switch. OC to IC mode switch.", 311 - "UMask": "0x2" 311 + "UMask": "0x02" 312 312 }, 313 313 { 314 314 "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 315 315 "EventCode": "0x28a", 316 316 "BriefDescription": "OC Mode Switch. IC to OC mode switch.", 317 - "UMask": "0x1" 317 + "UMask": "0x01" 318 318 }, 319 319 { 320 320 "EventName": "l3_request_g1.caching_l3_cache_accesses", ··· 353 353 }, 354 354 { 355 355 "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", 356 - "EventCode": "0x9A", 356 + "EventCode": "0x9a", 357 357 "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", 358 358 "UMask": "0x3f", 359 359 "Unit": "L3PMC"

+6 -6

tools/perf/pmu-events/arch/x86/amdzen2/core.json

··· 68 68 "EventCode": "0xcb", 69 69 "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 70 70 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 71 - "UMask": "0x4" 71 + "UMask": "0x04" 72 72 }, 73 73 { 74 74 "EventName": "ex_ret_mmx_fp_instr.mmx_instr", 75 75 "EventCode": "0xcb", 76 76 "BriefDescription": "MMX instructions.", 77 77 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", 78 - "UMask": "0x2" 78 + "UMask": "0x02" 79 79 }, 80 80 { 81 81 "EventName": "ex_ret_mmx_fp_instr.x87_instr", 82 82 "EventCode": "0xcb", 83 83 "BriefDescription": "x87 instructions.", 84 84 "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", 85 - "UMask": "0x1" 85 + "UMask": "0x01" 86 86 }, 87 87 { 88 88 "EventName": "ex_ret_cond", ··· 108 108 "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", 109 109 "EventCode": "0x1cf", 110 110 "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 111 - "UMask": "0x4" 111 + "UMask": "0x04" 112 112 }, 113 113 { 114 114 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", 115 115 "EventCode": "0x1cf", 116 116 "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 117 - "UMask": "0x2" 117 + "UMask": "0x02" 118 118 }, 119 119 { 120 120 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", 121 121 "EventCode": "0x1cf", 122 122 "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 123 - "UMask": "0x1" 123 + "UMask": "0x01" 124 124 }, 125 125 { 126 126 "EventName": "ex_ret_fus_brnch_inst",

+21 -21

tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json

··· 4 4 "EventCode": "0x00", 5 5 "BriefDescription": "Total number of fp uOps.", 6 6 "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", 7 - "UMask": "0xf" 7 + "UMask": "0x0f" 8 8 }, 9 9 { 10 10 "EventName": "fpu_pipe_assignment.total3", 11 11 "EventCode": "0x00", 12 12 "BriefDescription": "Total number uOps assigned to pipe 3.", 13 13 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", 14 - "UMask": "0x8" 14 + "UMask": "0x08" 15 15 }, 16 16 { 17 17 "EventName": "fpu_pipe_assignment.total2", 18 18 "EventCode": "0x00", 19 19 "BriefDescription": "Total number uOps assigned to pipe 2.", 20 20 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", 21 - "UMask": "0x4" 21 + "UMask": "0x04" 22 22 }, 23 23 { 24 24 "EventName": "fpu_pipe_assignment.total1", 25 25 "EventCode": "0x00", 26 26 "BriefDescription": "Total number uOps assigned to pipe 1.", 27 27 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", 28 - "UMask": "0x2" 28 + "UMask": "0x02" 29 29 }, 30 30 { 31 31 "EventName": "fpu_pipe_assignment.total0", 32 32 "EventCode": "0x00", 33 33 "BriefDescription": "Total number of fp uOps on pipe 0.", 34 34 "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", 35 - "UMask": "0x1" 35 + "UMask": "0x01" 36 36 }, 37 37 { 38 38 "EventName": "fp_ret_sse_avx_ops.all", ··· 45 45 "EventCode": "0x03", 46 46 "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 47 47 "PublicDescription": "", 48 - "UMask": "0x8" 48 + "UMask": "0x08" 49 49 }, 50 50 { 51 51 "EventName": "fp_ret_sse_avx_ops.div_flops", 52 52 "EventCode": "0x03", 53 53 "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 54 - "UMask": "0x4" 54 + "UMask": "0x04" 55 55 }, 56 56 { 57 57 "EventName": "fp_ret_sse_avx_ops.mult_flops", 58 58 "EventCode": "0x03", 59 59 "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 60 - "UMask": "0x2" 60 + "UMask": "0x02" 61 61 }, 62 62 { 63 63 "EventName": "fp_ret_sse_avx_ops.add_sub_flops", 64 64 "EventCode": "0x03", 65 65 "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 66 - "UMask": "0x1" 66 + "UMask": "0x01" 67 67 }, 68 68 { 69 69 "EventName": "fp_num_mov_elim_scal_op.optimized", 70 70 "EventCode": "0x04", 71 71 "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 72 - "UMask": "0x8" 72 + "UMask": "0x08" 73 73 }, 74 74 { 75 75 "EventName": "fp_num_mov_elim_scal_op.opt_potential", 76 76 "EventCode": "0x04", 77 77 "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 78 - "UMask": "0x4" 78 + "UMask": "0x04" 79 79 }, 80 80 { 81 81 "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", 82 82 "EventCode": "0x04", 83 83 "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 84 - "UMask": "0x2" 84 + "UMask": "0x02" 85 85 }, 86 86 { 87 87 "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", 88 88 "EventCode": "0x04", 89 89 "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 90 - "UMask": "0x1" 90 + "UMask": "0x01" 91 91 }, 92 92 { 93 93 "EventName": "fp_retired_ser_ops.sse_bot_ret", 94 94 "EventCode": "0x05", 95 95 "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", 96 - "UMask": "0x8" 96 + "UMask": "0x08" 97 97 }, 98 98 { 99 99 "EventName": "fp_retired_ser_ops.sse_ctrl_ret", 100 100 "EventCode": "0x05", 101 101 "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", 102 - "UMask": "0x4" 102 + "UMask": "0x04" 103 103 }, 104 104 { 105 105 "EventName": "fp_retired_ser_ops.x87_bot_ret", 106 106 "EventCode": "0x05", 107 107 "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", 108 - "UMask": "0x2" 108 + "UMask": "0x02" 109 109 }, 110 110 { 111 111 "EventName": "fp_retired_ser_ops.x87_ctrl_ret", 112 112 "EventCode": "0x05", 113 113 "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", 114 - "UMask": "0x1" 114 + "UMask": "0x01" 115 115 }, 116 116 { 117 117 "EventName": "fp_disp_faults.ymm_spill_fault", 118 118 "EventCode": "0x0e", 119 119 "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", 120 - "UMask": "0x8" 120 + "UMask": "0x08" 121 121 }, 122 122 { 123 123 "EventName": "fp_disp_faults.ymm_fill_fault", 124 124 "EventCode": "0x0e", 125 125 "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", 126 - "UMask": "0x4" 126 + "UMask": "0x04" 127 127 }, 128 128 { 129 129 "EventName": "fp_disp_faults.xmm_fill_fault", 130 130 "EventCode": "0x0e", 131 131 "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", 132 - "UMask": "0x2" 132 + "UMask": "0x02" 133 133 }, 134 134 { 135 135 "EventName": "fp_disp_faults.x87_fill_fault", 136 136 "EventCode": "0x0e", 137 137 "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", 138 - "UMask": "0x1" 138 + "UMask": "0x01" 139 139 } 140 140 ]

+43 -43

tools/perf/pmu-events/arch/x86/amdzen2/memory.json

··· 4 4 "EventCode": "0x24", 5 5 "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", 6 6 "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", 7 - "UMask": "0x2" 7 + "UMask": "0x02" 8 8 }, 9 9 { 10 10 "EventName": "ls_locks.spec_lock_hi_spec", 11 11 "EventCode": "0x25", 12 12 "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", 13 - "UMask": "0x8" 13 + "UMask": "0x08" 14 14 }, 15 15 { 16 16 "EventName": "ls_locks.spec_lock_lo_spec", 17 17 "EventCode": "0x25", 18 18 "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", 19 - "UMask": "0x4" 19 + "UMask": "0x04" 20 20 }, 21 21 { 22 22 "EventName": "ls_locks.non_spec_lock", 23 23 "EventCode": "0x25", 24 24 "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", 25 - "UMask": "0x2" 25 + "UMask": "0x02" 26 26 }, 27 27 { 28 28 "EventName": "ls_locks.bus_lock", 29 29 "EventCode": "0x25", 30 30 "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", 31 - "UMask": "0x1" 31 + "UMask": "0x01" 32 32 }, 33 33 { 34 34 "EventName": "ls_ret_cl_flush", ··· 44 44 "EventName": "ls_dispatch.ld_st_dispatch", 45 45 "EventCode": "0x29", 46 46 "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", 47 - "UMask": "0x4" 47 + "UMask": "0x04" 48 48 }, 49 49 { 50 50 "EventName": "ls_dispatch.store_dispatch", 51 51 "EventCode": "0x29", 52 52 "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 53 - "UMask": "0x2" 53 + "UMask": "0x02" 54 54 }, 55 55 { 56 56 "EventName": "ls_dispatch.ld_dispatch", 57 57 "EventCode": "0x29", 58 58 "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 59 - "UMask": "0x1" 59 + "UMask": "0x01" 60 60 }, 61 61 { 62 62 "EventName": "ls_smi_rx", 63 - "EventCode": "0x2B", 63 + "EventCode": "0x2b", 64 64 "BriefDescription": "Number of SMIs received." 65 65 }, 66 66 { 67 67 "EventName": "ls_int_taken", 68 - "EventCode": "0x2C", 68 + "EventCode": "0x2c", 69 69 "BriefDescription": "Number of interrupts taken." 70 70 }, 71 71 { 72 72 "EventName": "ls_rdtsc", 73 - "EventCode": "0x2D", 73 + "EventCode": "0x2d", 74 74 "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." 75 75 }, 76 76 { ··· 93 93 "EventName": "ls_mab_alloc.dc_prefetcher", 94 94 "EventCode": "0x41", 95 95 "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", 96 - "UMask": "0x8" 96 + "UMask": "0x08" 97 97 }, 98 98 { 99 99 "EventName": "ls_mab_alloc.stores", 100 100 "EventCode": "0x41", 101 101 "BriefDescription": "LS MAB Allocates by Type. Stores.", 102 - "UMask": "0x2" 102 + "UMask": "0x02" 103 103 }, 104 104 { 105 105 "EventName": "ls_mab_alloc.loads", 106 106 "EventCode": "0x41", 107 107 "BriefDescription": "LS MAB Allocates by Type. Loads.", 108 - "UMask": "0x1" 108 + "UMask": "0x01" 109 109 }, 110 110 { 111 111 "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", ··· 123 123 "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", 124 124 "EventCode": "0x43", 125 125 "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", 126 - "UMask": "0x8" 126 + "UMask": "0x08" 127 127 }, 128 128 { 129 129 "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", 130 130 "EventCode": "0x43", 131 131 "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", 132 - "UMask": "0x2" 132 + "UMask": "0x02" 133 133 }, 134 134 { 135 135 "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", 136 136 "EventCode": "0x43", 137 137 "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", 138 - "UMask": "0x1" 138 + "UMask": "0x01" 139 139 }, 140 140 { 141 141 "EventName": "ls_l1_d_tlb_miss.all", ··· 171 171 "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 172 172 "EventCode": "0x45", 173 173 "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", 174 - "UMask": "0x8" 174 + "UMask": "0x08" 175 175 }, 176 176 { 177 177 "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 178 178 "EventCode": "0x45", 179 179 "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", 180 - "UMask": "0x4" 180 + "UMask": "0x04" 181 181 }, 182 182 { 183 183 "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 184 184 "EventCode": "0x45", 185 185 "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", 186 - "UMask": "0x2" 186 + "UMask": "0x02" 187 187 }, 188 188 { 189 189 "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 190 190 "EventCode": "0x45", 191 191 "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", 192 - "UMask": "0x1" 192 + "UMask": "0x01" 193 193 }, 194 194 { 195 195 "EventName": "ls_tablewalker.iside", 196 196 "EventCode": "0x46", 197 197 "BriefDescription": "Total Page Table Walks on I-side.", 198 - "UMask": "0xc" 198 + "UMask": "0x0c" 199 199 }, 200 200 { 201 201 "EventName": "ls_tablewalker.ic_type1", 202 202 "EventCode": "0x46", 203 203 "BriefDescription": "Total Page Table Walks IC Type 1.", 204 - "UMask": "0x8" 204 + "UMask": "0x08" 205 205 }, 206 206 { 207 207 "EventName": "ls_tablewalker.ic_type0", 208 208 "EventCode": "0x46", 209 209 "BriefDescription": "Total Page Table Walks IC Type 0.", 210 - "UMask": "0x4" 210 + "UMask": "0x04" 211 211 }, 212 212 { 213 213 "EventName": "ls_tablewalker.dside", 214 214 "EventCode": "0x46", 215 215 "BriefDescription": "Total Page Table Walks on D-side.", 216 - "UMask": "0x3" 216 + "UMask": "0x03" 217 217 }, 218 218 { 219 219 "EventName": "ls_tablewalker.dc_type1", 220 220 "EventCode": "0x46", 221 221 "BriefDescription": "Total Page Table Walks DC Type 1.", 222 - "UMask": "0x2" 222 + "UMask": "0x02" 223 223 }, 224 224 { 225 225 "EventName": "ls_tablewalker.dc_type0", 226 226 "EventCode": "0x46", 227 227 "BriefDescription": "Total Page Table Walks DC Type 0.", 228 - "UMask": "0x1" 228 + "UMask": "0x01" 229 229 }, 230 230 { 231 231 "EventName": "ls_misal_accesses", ··· 242 242 "EventName": "ls_pref_instr_disp.prefetch_nta", 243 243 "EventCode": "0x4b", 244 244 "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", 245 - "UMask": "0x4" 245 + "UMask": "0x04" 246 246 }, 247 247 { 248 248 "EventName": "ls_pref_instr_disp.prefetch_w", 249 249 "EventCode": "0x4b", 250 250 "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", 251 - "UMask": "0x2" 251 + "UMask": "0x02" 252 252 }, 253 253 { 254 254 "EventName": "ls_pref_instr_disp.prefetch", 255 255 "EventCode": "0x4b", 256 256 "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", 257 - "UMask": "0x1" 257 + "UMask": "0x01" 258 258 }, 259 259 { 260 260 "EventName": "ls_inef_sw_pref.mab_mch_cnt", 261 261 "EventCode": "0x52", 262 262 "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", 263 - "UMask": "0x2" 263 + "UMask": "0x02" 264 264 }, 265 265 { 266 266 "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 267 267 "EventCode": "0x52", 268 268 "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", 269 - "UMask": "0x1" 269 + "UMask": "0x01" 270 270 }, 271 271 { 272 272 "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", ··· 284 284 "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", 285 285 "EventCode": "0x59", 286 286 "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", 287 - "UMask": "0x8" 287 + "UMask": "0x08" 288 288 }, 289 289 { 290 290 "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", 291 291 "EventCode": "0x59", 292 292 "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", 293 - "UMask": "0x2" 293 + "UMask": "0x02" 294 294 }, 295 295 { 296 296 "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", 297 297 "EventCode": "0x59", 298 298 "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", 299 - "UMask": "0x1" 299 + "UMask": "0x01" 300 300 }, 301 301 { 302 302 "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", 303 - "EventCode": "0x5A", 303 + "EventCode": "0x5a", 304 304 "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", 305 305 "UMask": "0x40" 306 306 }, 307 307 { 308 308 "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", 309 - "EventCode": "0x5A", 309 + "EventCode": "0x5a", 310 310 "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", 311 311 "UMask": "0x10" 312 312 }, 313 313 { 314 314 "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", 315 - "EventCode": "0x5A", 315 + "EventCode": "0x5a", 316 316 "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", 317 - "UMask": "0x8" 317 + "UMask": "0x08" 318 318 }, 319 319 { 320 320 "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", 321 - "EventCode": "0x5A", 321 + "EventCode": "0x5a", 322 322 "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", 323 - "UMask": "0x2" 323 + "UMask": "0x02" 324 324 }, 325 325 { 326 326 "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", 327 - "EventCode": "0x5A", 327 + "EventCode": "0x5a", 328 328 "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", 329 - "UMask": "0x1" 329 + "UMask": "0x01" 330 330 }, 331 331 { 332 332 "EventName": "ls_not_halted_cyc",

+10 -10

tools/perf/pmu-events/arch/x86/amdzen2/other.json

··· 14 14 "EventName": "de_dis_uops_from_decoder.opcache_dispatched", 15 15 "EventCode": "0xaa", 16 16 "BriefDescription": "Count of dispatched Ops from OpCache.", 17 - "UMask": "0x2" 17 + "UMask": "0x02" 18 18 }, 19 19 { 20 20 "EventName": "de_dis_uops_from_decoder.decoder_dispatched", 21 21 "EventCode": "0xaa", 22 22 "BriefDescription": "Count of dispatched Ops from Decoder.", 23 - "UMask": "0x1" 23 + "UMask": "0x01" 24 24 }, 25 25 { 26 26 "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", ··· 50 50 "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 51 51 "EventCode": "0xae", 52 52 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", 53 - "UMask": "0x8" 53 + "UMask": "0x08" 54 54 }, 55 55 { 56 56 "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", 57 57 "EventCode": "0xae", 58 58 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", 59 - "UMask": "0x4" 59 + "UMask": "0x04" 60 60 }, 61 61 { 62 62 "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", 63 63 "EventCode": "0xae", 64 64 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", 65 - "UMask": "0x2" 65 + "UMask": "0x02" 66 66 }, 67 67 { 68 68 "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", 69 69 "EventCode": "0xae", 70 70 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", 71 - "UMask": "0x1" 71 + "UMask": "0x01" 72 72 }, 73 73 { 74 74 "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", ··· 92 92 "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", 93 93 "EventCode": "0xaf", 94 94 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", 95 - "UMask": "0x8" 95 + "UMask": "0x08" 96 96 }, 97 97 { 98 98 "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", 99 99 "EventCode": "0xaf", 100 100 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", 101 - "UMask": "0x4" 101 + "UMask": "0x04" 102 102 }, 103 103 { 104 104 "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", 105 105 "EventCode": "0xaf", 106 106 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", 107 - "UMask": "0x2" 107 + "UMask": "0x02" 108 108 }, 109 109 { 110 110 "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", 111 111 "EventCode": "0xaf", 112 112 "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", 113 - "UMask": "0x1" 113 + "UMask": "0x01" 114 114 } 115 115 ]

+4 -4

tools/perf/pmu-events/arch/x86/amdzen2/recommended.json

··· 10 10 "EventName": "all_dc_accesses", 11 11 "EventCode": "0x29", 12 12 "BriefDescription": "All L1 Data Cache Accesses", 13 - "UMask": "0x7" 13 + "UMask": "0x07" 14 14 }, 15 15 { 16 16 "MetricName": "all_l2_cache_accesses", ··· 79 79 "UMask": "0x70" 80 80 }, 81 81 { 82 - "MetricName": "l2_cache_hits_from_l2_hwpf", 82 + "EventName": "l2_cache_hits_from_l2_hwpf", 83 + "EventCode": "0x70", 83 84 "BriefDescription": "L2 Cache Hits from L2 HWPF", 84 - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 85 - "MetricGroup": "l2_cache" 85 + "UMask": "0xff" 86 86 }, 87 87 { 88 88 "EventName": "l3_accesses",

+53

tools/perf/pmu-events/arch/x86/amdzen3/branch.json

··· 1 + [ 2 + { 3 + "EventName": "bp_l1_btb_correct", 4 + "EventCode": "0x8a", 5 + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." 6 + }, 7 + { 8 + "EventName": "bp_l2_btb_correct", 9 + "EventCode": "0x8b", 10 + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." 11 + }, 12 + { 13 + "EventName": "bp_dyn_ind_pred", 14 + "EventCode": "0x8e", 15 + "BriefDescription": "Dynamic Indirect Predictions.", 16 + "PublicDescription": "The number of times a branch used the indirect predictor to make a prediction." 17 + }, 18 + { 19 + "EventName": "bp_de_redirect", 20 + "EventCode": "0x91", 21 + "BriefDescription": "Decode Redirects", 22 + "PublicDescription": "The number of times the instruction decoder overrides the predicted target." 23 + }, 24 + { 25 + "EventName": "bp_l1_tlb_fetch_hit", 26 + "EventCode": "0x94", 27 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", 28 + "UMask": "0xff" 29 + }, 30 + { 31 + "EventName": "bp_l1_tlb_fetch_hit.if1g", 32 + "EventCode": "0x94", 33 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (1G page size).", 34 + "UMask": "0x04" 35 + }, 36 + { 37 + "EventName": "bp_l1_tlb_fetch_hit.if2m", 38 + "EventCode": "0x94", 39 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (2M page size).", 40 + "UMask": "0x02" 41 + }, 42 + { 43 + "EventName": "bp_l1_tlb_fetch_hit.if4k", 44 + "EventCode": "0x94", 45 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instrcution TLB hit (4K or 16K page size).", 46 + "UMask": "0x01" 47 + }, 48 + { 49 + "EventName": "bp_tlb_rel", 50 + "EventCode": "0x99", 51 + "BriefDescription": "The number of ITLB reload requests." 52 + } 53 + ]

+402

tools/perf/pmu-events/arch/x86/amdzen3/cache.json

··· 1 + [ 2 + { 3 + "EventName": "l2_request_g1.rd_blk_l", 4 + "EventCode": "0x60", 5 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", 6 + "UMask": "0x80" 7 + }, 8 + { 9 + "EventName": "l2_request_g1.rd_blk_x", 10 + "EventCode": "0x60", 11 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", 12 + "UMask": "0x40" 13 + }, 14 + { 15 + "EventName": "l2_request_g1.ls_rd_blk_c_s", 16 + "EventCode": "0x60", 17 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", 18 + "UMask": "0x20" 19 + }, 20 + { 21 + "EventName": "l2_request_g1.cacheable_ic_read", 22 + "EventCode": "0x60", 23 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", 24 + "UMask": "0x10" 25 + }, 26 + { 27 + "EventName": "l2_request_g1.change_to_x", 28 + "EventCode": "0x60", 29 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", 30 + "UMask": "0x08" 31 + }, 32 + { 33 + "EventName": "l2_request_g1.prefetch_l2_cmd", 34 + "EventCode": "0x60", 35 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", 36 + "UMask": "0x04" 37 + }, 38 + { 39 + "EventName": "l2_request_g1.l2_hw_pf", 40 + "EventCode": "0x60", 41 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", 42 + "UMask": "0x02" 43 + }, 44 + { 45 + "EventName": "l2_request_g1.group2", 46 + "EventCode": "0x60", 47 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", 48 + "UMask": "0x01" 49 + }, 50 + { 51 + "EventName": "l2_request_g1.all_no_prefetch", 52 + "EventCode": "0x60", 53 + "UMask": "0xf9" 54 + }, 55 + { 56 + "EventName": "l2_request_g2.group1", 57 + "EventCode": "0x61", 58 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", 59 + "UMask": "0x80" 60 + }, 61 + { 62 + "EventName": "l2_request_g2.ls_rd_sized", 63 + "EventCode": "0x61", 64 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", 65 + "UMask": "0x40" 66 + }, 67 + { 68 + "EventName": "l2_request_g2.ls_rd_sized_nc", 69 + "EventCode": "0x61", 70 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", 71 + "UMask": "0x20" 72 + }, 73 + { 74 + "EventName": "l2_request_g2.ic_rd_sized", 75 + "EventCode": "0x61", 76 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", 77 + "UMask": "0x10" 78 + }, 79 + { 80 + "EventName": "l2_request_g2.ic_rd_sized_nc", 81 + "EventCode": "0x61", 82 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", 83 + "UMask": "0x08" 84 + }, 85 + { 86 + "EventName": "l2_request_g2.smc_inval", 87 + "EventCode": "0x61", 88 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", 89 + "UMask": "0x04" 90 + }, 91 + { 92 + "EventName": "l2_request_g2.bus_locks_originator", 93 + "EventCode": "0x61", 94 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", 95 + "UMask": "0x02" 96 + }, 97 + { 98 + "EventName": "l2_request_g2.bus_locks_responses", 99 + "EventCode": "0x61", 100 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", 101 + "UMask": "0x01" 102 + }, 103 + { 104 + "EventName": "l2_latency.l2_cycles_waiting_on_fills", 105 + "EventCode": "0x62", 106 + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 107 + "UMask": "0x01" 108 + }, 109 + { 110 + "EventName": "l2_wcb_req.wcb_write", 111 + "EventCode": "0x63", 112 + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", 113 + "UMask": "0x40" 114 + }, 115 + { 116 + "EventName": "l2_wcb_req.wcb_close", 117 + "EventCode": "0x63", 118 + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", 119 + "UMask": "0x20" 120 + }, 121 + { 122 + "EventName": "l2_wcb_req.zero_byte_store", 123 + "EventCode": "0x63", 124 + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 125 + "UMask": "0x04" 126 + }, 127 + { 128 + "EventName": "l2_wcb_req.cl_zero", 129 + "EventCode": "0x63", 130 + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 131 + "UMask": "0x01" 132 + }, 133 + { 134 + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", 135 + "EventCode": "0x64", 136 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", 137 + "UMask": "0x80" 138 + }, 139 + { 140 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", 141 + "EventCode": "0x64", 142 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2. Modifiable.", 143 + "UMask": "0x40" 144 + }, 145 + { 146 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", 147 + "EventCode": "0x64", 148 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit non-modifiable line in L2.", 149 + "UMask": "0x20" 150 + }, 151 + { 152 + "EventName": "l2_cache_req_stat.ls_rd_blk_x", 153 + "EventCode": "0x64", 154 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", 155 + "UMask": "0x10" 156 + }, 157 + { 158 + "EventName": "l2_cache_req_stat.ls_rd_blk_c", 159 + "EventCode": "0x64", 160 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types). Use l2_cache_misses_from_dc_misses instead.", 161 + "UMask": "0x08" 162 + }, 163 + { 164 + "EventName": "l2_cache_req_stat.ic_fill_hit_x", 165 + "EventCode": "0x64", 166 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", 167 + "UMask": "0x04" 168 + }, 169 + { 170 + "EventName": "l2_cache_req_stat.ic_fill_hit_s", 171 + "EventCode": "0x64", 172 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit non-modifiable line in L2.", 173 + "UMask": "0x02" 174 + }, 175 + { 176 + "EventName": "l2_cache_req_stat.ic_fill_miss", 177 + "EventCode": "0x64", 178 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2. Use l2_cache_misses_from_ic_miss instead.", 179 + "UMask": "0x01" 180 + }, 181 + { 182 + "EventName": "l2_cache_req_stat.ic_access_in_l2", 183 + "EventCode": "0x64", 184 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", 185 + "UMask": "0x07" 186 + }, 187 + { 188 + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", 189 + "EventCode": "0x64", 190 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", 191 + "UMask": "0x09" 192 + }, 193 + { 194 + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", 195 + "EventCode": "0x64", 196 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", 197 + "UMask": "0xf6" 198 + }, 199 + { 200 + "EventName": "l2_fill_pending.l2_fill_busy", 201 + "EventCode": "0x6d", 202 + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", 203 + "UMask": "0x01" 204 + }, 205 + { 206 + "EventName": "l2_pf_hit_l2", 207 + "EventCode": "0x70", 208 + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", 209 + "UMask": "0xff" 210 + }, 211 + { 212 + "EventName": "l2_pf_miss_l2_hit_l3", 213 + "EventCode": "0x71", 214 + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", 215 + "UMask": "0xff" 216 + }, 217 + { 218 + "EventName": "l2_pf_miss_l2_l3", 219 + "EventCode": "0x72", 220 + "BriefDescription": "L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", 221 + "UMask": "0xff" 222 + }, 223 + { 224 + "EventName": "ic_fw32", 225 + "EventCode": "0x80", 226 + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." 227 + }, 228 + { 229 + "EventName": "ic_fw32_miss", 230 + "EventCode": "0x81", 231 + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." 232 + }, 233 + { 234 + "EventName": "ic_cache_fill_l2", 235 + "EventCode": "0x82", 236 + "BriefDescription": "Instruction Cache Refills from L2. The number of 64 byte instruction cache line was fulfilled from the L2 cache." 237 + }, 238 + { 239 + "EventName": "ic_cache_fill_sys", 240 + "EventCode": "0x83", 241 + "BriefDescription": "Instruction Cache Refills from System. The number of 64 byte instruction cache line fulfilled from system memory or another cache." 242 + }, 243 + { 244 + "EventName": "bp_l1_tlb_miss_l2_tlb_hit", 245 + "EventCode": "0x84", 246 + "BriefDescription": "L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." 247 + }, 248 + { 249 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", 250 + "EventCode": "0x85", 251 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", 252 + "UMask": "0xff" 253 + }, 254 + { 255 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", 256 + "EventCode": "0x85", 257 + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for >4K Coalesced page.", 258 + "UMask": "0x08" 259 + }, 260 + { 261 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", 262 + "EventCode": "0x85", 263 + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 1G page.", 264 + "UMask": "0x04" 265 + }, 266 + { 267 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", 268 + "EventCode": "0x85", 269 + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 2M page.", 270 + "UMask": "0x02" 271 + }, 272 + { 273 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", 274 + "EventCode": "0x85", 275 + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk to 4K page.", 276 + "UMask": "0x01" 277 + }, 278 + { 279 + "EventName": "bp_snp_re_sync", 280 + "EventCode": "0x86", 281 + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." 282 + }, 283 + { 284 + "EventName": "ic_fetch_stall.ic_stall_any", 285 + "EventCode": "0x87", 286 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 287 + "UMask": "0x04" 288 + }, 289 + { 290 + "EventName": "ic_fetch_stall.ic_stall_dq_empty", 291 + "EventCode": "0x87", 292 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 293 + "UMask": "0x02" 294 + }, 295 + { 296 + "EventName": "ic_fetch_stall.ic_stall_back_pressure", 297 + "EventCode": "0x87", 298 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 299 + "UMask": "0x01" 300 + }, 301 + { 302 + "EventName": "ic_cache_inval.l2_invalidating_probe", 303 + "EventCode": "0x8c", 304 + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 305 + "UMask": "0x02" 306 + }, 307 + { 308 + "EventName": "ic_cache_inval.fill_invalidated", 309 + "EventCode": "0x8c", 310 + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 311 + "UMask": "0x01" 312 + }, 313 + { 314 + "EventName": "ic_tag_hit_miss.all_instruction_cache_accesses", 315 + "EventCode": "0x18e", 316 + "BriefDescription": "All Instruction Cache Accesses. Counts various IC tag related hit and miss events.", 317 + "UMask": "0x1f" 318 + }, 319 + { 320 + "EventName": "ic_tag_hit_miss.instruction_cache_miss", 321 + "EventCode": "0x18e", 322 + "BriefDescription": "Instruction Cache Miss. Counts various IC tag related hit and miss events.", 323 + "UMask": "0x18" 324 + }, 325 + { 326 + "EventName": "ic_tag_hit_miss.instruction_cache_hit", 327 + "EventCode": "0x18e", 328 + "BriefDescription": "Instruction Cache Hit. Counts various IC tag related hit and miss events.", 329 + "UMask": "0x07" 330 + }, 331 + { 332 + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 333 + "EventCode": "0x28a", 334 + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", 335 + "UMask": "0x02" 336 + }, 337 + { 338 + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 339 + "EventCode": "0x28a", 340 + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", 341 + "UMask": "0x01" 342 + }, 343 + { 344 + "EventName": "op_cache_hit_miss.all_op_cache_accesses", 345 + "EventCode": "0x28f", 346 + "BriefDescription": "All Op Cache accesses. Counts Op Cache micro-tag hit/miss events", 347 + "UMask": "0x07" 348 + }, 349 + { 350 + "EventName": "op_cache_hit_miss.op_cache_miss", 351 + "EventCode": "0x28f", 352 + "BriefDescription": "Op Cache Miss. Counts Op Cache micro-tag hit/miss events", 353 + "UMask": "0x04" 354 + }, 355 + { 356 + "EventName": "op_cache_hit_miss.op_cache_hit", 357 + "EventCode": "0x28f", 358 + "BriefDescription": "Op Cache Hit. Counts Op Cache micro-tag hit/miss events", 359 + "UMask": "0x03" 360 + }, 361 + { 362 + "EventName": "l3_request_g1.caching_l3_cache_accesses", 363 + "EventCode": "0x01", 364 + "BriefDescription": "Caching: L3 cache accesses", 365 + "UMask": "0x80", 366 + "Unit": "L3PMC" 367 + }, 368 + { 369 + "EventName": "l3_lookup_state.all_l3_req_typs", 370 + "EventCode": "0x04", 371 + "BriefDescription": "All L3 Request Types. All L3 cache Requests", 372 + "UMask": "0xff", 373 + "Unit": "L3PMC" 374 + }, 375 + { 376 + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", 377 + "EventCode": "0x06", 378 + "BriefDescription": "Other L3 Miss Request Types", 379 + "UMask": "0xfe", 380 + "Unit": "L3PMC" 381 + }, 382 + { 383 + "EventName": "l3_comb_clstr_state.request_miss", 384 + "EventCode": "0x06", 385 + "BriefDescription": "L3 cache misses", 386 + "UMask": "0x01", 387 + "Unit": "L3PMC" 388 + }, 389 + { 390 + "EventName": "xi_sys_fill_latency", 391 + "EventCode": "0x90", 392 + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", 393 + "Unit": "L3PMC" 394 + }, 395 + { 396 + "EventName": "xi_ccx_sdp_req1", 397 + "EventCode": "0x9a", 398 + "BriefDescription": "L3 Misses by Request Type. Ignores SliceID, EnAllSlices, CoreID, EnAllCores and ThreadMask. Requires unit mask 0xFF to engage event for counting.", 399 + "UMask": "0xff", 400 + "Unit": "L3PMC" 401 + } 402 + ]

+137

tools/perf/pmu-events/arch/x86/amdzen3/core.json

··· 1 + [ 2 + { 3 + "EventName": "ex_ret_instr", 4 + "EventCode": "0xc0", 5 + "BriefDescription": "Retired Instructions." 6 + }, 7 + { 8 + "EventName": "ex_ret_ops", 9 + "EventCode": "0xc1", 10 + "BriefDescription": "Retired Ops. Use macro_ops_retired instead.", 11 + "PublicDescription": "The number of macro-ops retired." 12 + }, 13 + { 14 + "EventName": "ex_ret_brn", 15 + "EventCode": "0xc2", 16 + "BriefDescription": "Retired Branch Instructions.", 17 + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." 18 + }, 19 + { 20 + "EventName": "ex_ret_brn_misp", 21 + "EventCode": "0xc3", 22 + "BriefDescription": "Retired Branch Instructions Mispredicted.", 23 + "PublicDescription": "The number of retired branch instructions, that were mispredicted." 24 + }, 25 + { 26 + "EventName": "ex_ret_brn_tkn", 27 + "EventCode": "0xc4", 28 + "BriefDescription": "Retired Taken Branch Instructions.", 29 + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." 30 + }, 31 + { 32 + "EventName": "ex_ret_brn_tkn_misp", 33 + "EventCode": "0xc5", 34 + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", 35 + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." 36 + }, 37 + { 38 + "EventName": "ex_ret_brn_far", 39 + "EventCode": "0xc6", 40 + "BriefDescription": "Retired Far Control Transfers.", 41 + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." 42 + }, 43 + { 44 + "EventName": "ex_ret_brn_resync", 45 + "EventCode": "0xc7", 46 + "BriefDescription": "Retired Branch Resyncs.", 47 + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." 48 + }, 49 + { 50 + "EventName": "ex_ret_near_ret", 51 + "EventCode": "0xc8", 52 + "BriefDescription": "Retired Near Returns.", 53 + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." 54 + }, 55 + { 56 + "EventName": "ex_ret_near_ret_mispred", 57 + "EventCode": "0xc9", 58 + "BriefDescription": "Retired Near Returns Mispredicted.", 59 + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." 60 + }, 61 + { 62 + "EventName": "ex_ret_brn_ind_misp", 63 + "EventCode": "0xca", 64 + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", 65 + "PublicDescription": "The number of indirect branches retired that were not correctly predicted. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction. Note that only EX mispredicts are counted." 66 + }, 67 + { 68 + "EventName": "ex_ret_mmx_fp_instr.sse_instr", 69 + "EventCode": "0xcb", 70 + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 71 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS.", 72 + "UMask": "0x04" 73 + }, 74 + { 75 + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", 76 + "EventCode": "0xcb", 77 + "BriefDescription": "MMX instructions.", 78 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", 79 + "UMask": "0x02" 80 + }, 81 + { 82 + "EventName": "ex_ret_mmx_fp_instr.x87_instr", 83 + "EventCode": "0xcb", 84 + "BriefDescription": "x87 instructions.", 85 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", 86 + "UMask": "0x01" 87 + }, 88 + { 89 + "EventName": "ex_ret_ind_brch_instr", 90 + "EventCode": "0xcc", 91 + "BriefDescription": "Retired Indirect Branch Instructions. The number of indirect branches retired." 92 + }, 93 + { 94 + "EventName": "ex_ret_cond", 95 + "EventCode": "0xd1", 96 + "BriefDescription": "Retired Conditional Branch Instructions." 97 + }, 98 + { 99 + "EventName": "ex_div_busy", 100 + "EventCode": "0xd3", 101 + "BriefDescription": "Div Cycles Busy count." 102 + }, 103 + { 104 + "EventName": "ex_div_count", 105 + "EventCode": "0xd4", 106 + "BriefDescription": "Div Op Count." 107 + }, 108 + { 109 + "EventName": "ex_ret_msprd_brnch_instr_dir_msmtch", 110 + "EventCode": "0x1c7", 111 + "BriefDescription": "Retired Mispredicted Branch Instructions due to Direction Mismatch", 112 + "PublicDescription": "The number of retired conditional branch instructions that were not correctly predicted because of a branch direction mismatch." 113 + }, 114 + { 115 + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", 116 + "EventCode": "0x1cf", 117 + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 118 + "UMask": "0x04" 119 + }, 120 + { 121 + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", 122 + "EventCode": "0x1cf", 123 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 124 + "UMask": "0x02" 125 + }, 126 + { 127 + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", 128 + "EventCode": "0x1cf", 129 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 130 + "UMask": "0x01" 131 + }, 132 + { 133 + "EventName": "ex_ret_fused_instr", 134 + "EventCode": "0x1d0", 135 + "BriefDescription": "Counts retired Fused Instructions." 136 + } 137 + ]

+98

tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json

··· 1 + [ 2 + { 3 + "EventName": "remote_outbound_data_controller_0", 4 + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", 5 + "EventCode": "0x7c7", 6 + "UMask": "0x02", 7 + "PerPkg": "1", 8 + "Unit": "DFPMC" 9 + }, 10 + { 11 + "EventName": "remote_outbound_data_controller_1", 12 + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", 13 + "EventCode": "0x807", 14 + "UMask": "0x02", 15 + "PerPkg": "1", 16 + "Unit": "DFPMC" 17 + }, 18 + { 19 + "EventName": "remote_outbound_data_controller_2", 20 + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", 21 + "EventCode": "0x847", 22 + "UMask": "0x02", 23 + "PerPkg": "1", 24 + "Unit": "DFPMC" 25 + }, 26 + { 27 + "EventName": "remote_outbound_data_controller_3", 28 + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", 29 + "EventCode": "0x887", 30 + "UMask": "0x02", 31 + "PerPkg": "1", 32 + "Unit": "DFPMC" 33 + }, 34 + { 35 + "EventName": "dram_channel_data_controller_0", 36 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 37 + "EventCode": "0x07", 38 + "UMask": "0x38", 39 + "PerPkg": "1", 40 + "Unit": "DFPMC" 41 + }, 42 + { 43 + "EventName": "dram_channel_data_controller_1", 44 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 45 + "EventCode": "0x47", 46 + "UMask": "0x38", 47 + "PerPkg": "1", 48 + "Unit": "DFPMC" 49 + }, 50 + { 51 + "EventName": "dram_channel_data_controller_2", 52 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 53 + "EventCode": "0x87", 54 + "UMask": "0x38", 55 + "PerPkg": "1", 56 + "Unit": "DFPMC" 57 + }, 58 + { 59 + "EventName": "dram_channel_data_controller_3", 60 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 61 + "EventCode": "0xc7", 62 + "UMask": "0x38", 63 + "PerPkg": "1", 64 + "Unit": "DFPMC" 65 + }, 66 + { 67 + "EventName": "dram_channel_data_controller_4", 68 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 69 + "EventCode": "0x107", 70 + "UMask": "0x38", 71 + "PerPkg": "1", 72 + "Unit": "DFPMC" 73 + }, 74 + { 75 + "EventName": "dram_channel_data_controller_5", 76 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 77 + "EventCode": "0x147", 78 + "UMask": "0x38", 79 + "PerPkg": "1", 80 + "Unit": "DFPMC" 81 + }, 82 + { 83 + "EventName": "dram_channel_data_controller_6", 84 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 85 + "EventCode": "0x187", 86 + "UMask": "0x38", 87 + "PerPkg": "1", 88 + "Unit": "DFPMC" 89 + }, 90 + { 91 + "EventName": "dram_channel_data_controller_7", 92 + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", 93 + "EventCode": "0x1c7", 94 + "UMask": "0x38", 95 + "PerPkg": "1", 96 + "Unit": "DFPMC" 97 + } 98 + ]

+139

tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json

··· 1 + [ 2 + { 3 + "EventName": "fpu_pipe_assignment.total", 4 + "EventCode": "0x00", 5 + "BriefDescription": "Total number of fp uOps.", 6 + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", 7 + "UMask": "0x0f" 8 + }, 9 + { 10 + "EventName": "fpu_pipe_assignment.total3", 11 + "EventCode": "0x00", 12 + "BriefDescription": "Total number uOps assigned to pipe 3.", 13 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", 14 + "UMask": "0x08" 15 + }, 16 + { 17 + "EventName": "fpu_pipe_assignment.total2", 18 + "EventCode": "0x00", 19 + "BriefDescription": "Total number uOps assigned to pipe 2.", 20 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", 21 + "UMask": "0x04" 22 + }, 23 + { 24 + "EventName": "fpu_pipe_assignment.total1", 25 + "EventCode": "0x00", 26 + "BriefDescription": "Total number uOps assigned to pipe 1.", 27 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", 28 + "UMask": "0x02" 29 + }, 30 + { 31 + "EventName": "fpu_pipe_assignment.total0", 32 + "EventCode": "0x00", 33 + "BriefDescription": "Total number of fp uOps on pipe 0.", 34 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", 35 + "UMask": "0x01" 36 + }, 37 + { 38 + "EventName": "fp_ret_sse_avx_ops.all", 39 + "EventCode": "0x03", 40 + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 41 + "UMask": "0xff" 42 + }, 43 + { 44 + "EventName": "fp_ret_sse_avx_ops.mac_flops", 45 + "EventCode": "0x03", 46 + "BriefDescription": "Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", 47 + "UMask": "0x08" 48 + }, 49 + { 50 + "EventName": "fp_ret_sse_avx_ops.div_flops", 51 + "EventCode": "0x03", 52 + "BriefDescription": "Divide/square root FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", 53 + "UMask": "0x04" 54 + }, 55 + { 56 + "EventName": "fp_ret_sse_avx_ops.mult_flops", 57 + "EventCode": "0x03", 58 + "BriefDescription": "Multiply FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", 59 + "UMask": "0x02" 60 + }, 61 + { 62 + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", 63 + "EventCode": "0x03", 64 + "BriefDescription": "Add/subtract FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", 65 + "UMask": "0x01" 66 + }, 67 + { 68 + "EventName": "fp_num_mov_elim_scal_op.optimized", 69 + "EventCode": "0x04", 70 + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 71 + "UMask": "0x08" 72 + }, 73 + { 74 + "EventName": "fp_num_mov_elim_scal_op.opt_potential", 75 + "EventCode": "0x04", 76 + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 77 + "UMask": "0x04" 78 + }, 79 + { 80 + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", 81 + "EventCode": "0x04", 82 + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 83 + "UMask": "0x02" 84 + }, 85 + { 86 + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", 87 + "EventCode": "0x04", 88 + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 89 + "UMask": "0x01" 90 + }, 91 + { 92 + "EventName": "fp_retired_ser_ops.sse_bot_ret", 93 + "EventCode": "0x05", 94 + "BriefDescription": "SSE/AVX bottom-executing ops retired. The number of serializing Ops retired.", 95 + "UMask": "0x08" 96 + }, 97 + { 98 + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", 99 + "EventCode": "0x05", 100 + "BriefDescription": "SSE/AVX control word mispredict traps. The number of serializing Ops retired.", 101 + "UMask": "0x04" 102 + }, 103 + { 104 + "EventName": "fp_retired_ser_ops.x87_bot_ret", 105 + "EventCode": "0x05", 106 + "BriefDescription": "x87 bottom-executing ops retired. The number of serializing Ops retired.", 107 + "UMask": "0x02" 108 + }, 109 + { 110 + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", 111 + "EventCode": "0x05", 112 + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", 113 + "UMask": "0x01" 114 + }, 115 + { 116 + "EventName": "fp_disp_faults.ymm_spill_fault", 117 + "EventCode": "0x0e", 118 + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", 119 + "UMask": "0x08" 120 + }, 121 + { 122 + "EventName": "fp_disp_faults.ymm_fill_fault", 123 + "EventCode": "0x0e", 124 + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", 125 + "UMask": "0x04" 126 + }, 127 + { 128 + "EventName": "fp_disp_faults.xmm_fill_fault", 129 + "EventCode": "0x0e", 130 + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", 131 + "UMask": "0x02" 132 + }, 133 + { 134 + "EventName": "fp_disp_faults.x87_fill_fault", 135 + "EventCode": "0x0e", 136 + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", 137 + "UMask": "0x01" 138 + } 139 + ]

+428

tools/perf/pmu-events/arch/x86/amdzen3/memory.json

··· 1 + [ 2 + { 3 + "EventName": "ls_bad_status2.stli_other", 4 + "EventCode": "0x24", 5 + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", 6 + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", 7 + "UMask": "0x02" 8 + }, 9 + { 10 + "EventName": "ls_locks.spec_lock_hi_spec", 11 + "EventCode": "0x25", 12 + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", 13 + "UMask": "0x08" 14 + }, 15 + { 16 + "EventName": "ls_locks.spec_lock_lo_spec", 17 + "EventCode": "0x25", 18 + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", 19 + "UMask": "0x04" 20 + }, 21 + { 22 + "EventName": "ls_locks.non_spec_lock", 23 + "EventCode": "0x25", 24 + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", 25 + "UMask": "0x02" 26 + }, 27 + { 28 + "EventName": "ls_locks.bus_lock", 29 + "EventCode": "0x25", 30 + "BriefDescription": "Retired lock instructions. Comparable to legacy bus lock.", 31 + "UMask": "0x01" 32 + }, 33 + { 34 + "EventName": "ls_ret_cl_flush", 35 + "EventCode": "0x26", 36 + "BriefDescription": "The number of retired CLFLUSH instructions. This is a non-speculative event." 37 + }, 38 + { 39 + "EventName": "ls_ret_cpuid", 40 + "EventCode": "0x27", 41 + "BriefDescription": "The number of CPUID instructions retired." 42 + }, 43 + { 44 + "EventName": "ls_dispatch.ld_st_dispatch", 45 + "EventCode": "0x29", 46 + "BriefDescription": "Load-op-Store Dispatch. Dispatch of a single op that performs a load from and store to the same memory address. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 47 + "UMask": "0x04" 48 + }, 49 + { 50 + "EventName": "ls_dispatch.store_dispatch", 51 + "EventCode": "0x29", 52 + "BriefDescription": "Dispatch of a single op that performs a memory store. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 53 + "UMask": "0x02" 54 + }, 55 + { 56 + "EventName": "ls_dispatch.ld_dispatch", 57 + "EventCode": "0x29", 58 + "BriefDescription": "Dispatch of a single op that performs a memory load. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 59 + "UMask": "0x01" 60 + }, 61 + { 62 + "EventName": "ls_smi_rx", 63 + "EventCode": "0x2b", 64 + "BriefDescription": "Counts the number of SMIs received." 65 + }, 66 + { 67 + "EventName": "ls_int_taken", 68 + "EventCode": "0x2c", 69 + "BriefDescription": "Counts the number of interrupts taken." 70 + }, 71 + { 72 + "EventName": "ls_rdtsc", 73 + "EventCode": "0x2d", 74 + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." 75 + }, 76 + { 77 + "EventName": "ls_stlf", 78 + "EventCode": "0x35", 79 + "BriefDescription": "Number of STLF hits." 80 + }, 81 + { 82 + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", 83 + "EventCode": "0x37", 84 + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full.", 85 + "UMask": "0x01" 86 + }, 87 + { 88 + "EventName": "ls_dc_accesses", 89 + "EventCode": "0x40", 90 + "BriefDescription": "Number of accesses to the dcache for load/store references.", 91 + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." 92 + }, 93 + { 94 + "EventName": "ls_mab_alloc.all_allocations", 95 + "EventCode": "0x41", 96 + "BriefDescription": "All Allocations. Counts when a LS pipe allocates a MAB entry.", 97 + "UMask": "0x7f" 98 + }, 99 + { 100 + "EventName": "ls_mab_alloc.hardware_prefetcher_allocations", 101 + "EventCode": "0x41", 102 + "BriefDescription": "Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB entry.", 103 + "UMask": "0x40" 104 + }, 105 + { 106 + "EventName": "ls_mab_alloc.load_store_allocations", 107 + "EventCode": "0x41", 108 + "BriefDescription": "Load Store Allocations. Counts when a LS pipe allocates a MAB entry.", 109 + "UMask": "0x3f" 110 + }, 111 + { 112 + "EventName": "ls_mab_alloc.dc_prefetcher", 113 + "EventCode": "0x41", 114 + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", 115 + "UMask": "0x08" 116 + }, 117 + { 118 + "EventName": "ls_mab_alloc.stores", 119 + "EventCode": "0x41", 120 + "BriefDescription": "LS MAB Allocates by Type. Stores.", 121 + "UMask": "0x02" 122 + }, 123 + { 124 + "EventName": "ls_mab_alloc.loads", 125 + "EventCode": "0x41", 126 + "BriefDescription": "LS MAB Allocates by Type. Loads.", 127 + "UMask": "0x01" 128 + }, 129 + { 130 + "EventName": "ls_dmnd_fills_from_sys.mem_io_remote", 131 + "EventCode": "0x43", 132 + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", 133 + "UMask": "0x40" 134 + }, 135 + { 136 + "EventName": "ls_dmnd_fills_from_sys.ext_cache_remote", 137 + "EventCode": "0x43", 138 + "BriefDescription": "Demand Data Cache Fills by Data Source. From CCX Cache in different Node.", 139 + "UMask": "0x10" 140 + }, 141 + { 142 + "EventName": "ls_dmnd_fills_from_sys.mem_io_local", 143 + "EventCode": "0x43", 144 + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in same node.", 145 + "UMask": "0x08" 146 + }, 147 + { 148 + "EventName": "ls_dmnd_fills_from_sys.ext_cache_local", 149 + "EventCode": "0x43", 150 + "BriefDescription": "Demand Data Cache Fills by Data Source. From cache of different CCX in same node.", 151 + "UMask": "0x04" 152 + }, 153 + { 154 + "EventName": "ls_dmnd_fills_from_sys.int_cache", 155 + "EventCode": "0x43", 156 + "BriefDescription": "Demand Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", 157 + "UMask": "0x02" 158 + }, 159 + { 160 + "EventName": "ls_dmnd_fills_from_sys.lcl_l2", 161 + "EventCode": "0x43", 162 + "BriefDescription": "Demand Data Cache Fills by Data Source. From Local L2 to the core.", 163 + "UMask": "0x01" 164 + }, 165 + { 166 + "EventName": "ls_any_fills_from_sys.mem_io_remote", 167 + "EventCode": "0x44", 168 + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", 169 + "UMask": "0x40" 170 + }, 171 + { 172 + "EventName": "ls_any_fills_from_sys.ext_cache_remote", 173 + "EventCode": "0x44", 174 + "BriefDescription": "Any Data Cache Fills by Data Source. From CCX Cache in different Node.", 175 + "UMask": "0x10" 176 + }, 177 + { 178 + "EventName": "ls_any_fills_from_sys.mem_io_local", 179 + "EventCode": "0x44", 180 + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in same node.", 181 + "UMask": "0x08" 182 + }, 183 + { 184 + "EventName": "ls_any_fills_from_sys.ext_cache_local", 185 + "EventCode": "0x44", 186 + "BriefDescription": "Any Data Cache Fills by Data Source. From cache of different CCX in same node.", 187 + "UMask": "0x04" 188 + }, 189 + { 190 + "EventName": "ls_any_fills_from_sys.int_cache", 191 + "EventCode": "0x44", 192 + "BriefDescription": "Any Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", 193 + "UMask": "0x02" 194 + }, 195 + { 196 + "EventName": "ls_any_fills_from_sys.lcl_l2", 197 + "EventCode": "0x44", 198 + "BriefDescription": "Any Data Cache Fills by Data Source. From Local L2 to the core.", 199 + "UMask": "0x01" 200 + }, 201 + { 202 + "EventName": "ls_l1_d_tlb_miss.all", 203 + "EventCode": "0x45", 204 + "BriefDescription": "All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead.", 205 + "UMask": "0xff" 206 + }, 207 + { 208 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 209 + "EventCode": "0x45", 210 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB.", 211 + "UMask": "0x80" 212 + }, 213 + { 214 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 215 + "EventCode": "0x45", 216 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB.", 217 + "UMask": "0x40" 218 + }, 219 + { 220 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 221 + "EventCode": "0x45", 222 + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2 TLB.", 223 + "UMask": "0x20" 224 + }, 225 + { 226 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 227 + "EventCode": "0x45", 228 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB.", 229 + "UMask": "0x10" 230 + }, 231 + { 232 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 233 + "EventCode": "0x45", 234 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", 235 + "UMask": "0x08" 236 + }, 237 + { 238 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 239 + "EventCode": "0x45", 240 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", 241 + "UMask": "0x04" 242 + }, 243 + { 244 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 245 + "EventCode": "0x45", 246 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB.", 247 + "UMask": "0x02" 248 + }, 249 + { 250 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 251 + "EventCode": "0x45", 252 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", 253 + "UMask": "0x01" 254 + }, 255 + { 256 + "EventName": "ls_tablewalker.iside", 257 + "EventCode": "0x46", 258 + "BriefDescription": "Total Page Table Walks on I-side.", 259 + "UMask": "0x0c" 260 + }, 261 + { 262 + "EventName": "ls_tablewalker.ic_type1", 263 + "EventCode": "0x46", 264 + "BriefDescription": "Total Page Table Walks IC Type 1.", 265 + "UMask": "0x08" 266 + }, 267 + { 268 + "EventName": "ls_tablewalker.ic_type0", 269 + "EventCode": "0x46", 270 + "BriefDescription": "Total Page Table Walks IC Type 0.", 271 + "UMask": "0x04" 272 + }, 273 + { 274 + "EventName": "ls_tablewalker.dside", 275 + "EventCode": "0x46", 276 + "BriefDescription": "Total Page Table Walks on D-side.", 277 + "UMask": "0x03" 278 + }, 279 + { 280 + "EventName": "ls_tablewalker.dc_type1", 281 + "EventCode": "0x46", 282 + "BriefDescription": "Total Page Table Walks DC Type 1.", 283 + "UMask": "0x02" 284 + }, 285 + { 286 + "EventName": "ls_tablewalker.dc_type0", 287 + "EventCode": "0x46", 288 + "BriefDescription": "Total Page Table Walks DC Type 0.", 289 + "UMask": "0x01" 290 + }, 291 + { 292 + "EventName": "ls_misal_loads.ma4k", 293 + "EventCode": "0x47", 294 + "BriefDescription": "The number of 4KB misaligned (i.e., page crossing) loads.", 295 + "UMask": "0x02" 296 + }, 297 + { 298 + "EventName": "ls_misal_loads.ma64", 299 + "EventCode": "0x47", 300 + "BriefDescription": "The number of 64B misaligned (i.e., cacheline crossing) loads.", 301 + "UMask": "0x01" 302 + }, 303 + { 304 + "EventName": "ls_pref_instr_disp", 305 + "EventCode": "0x4b", 306 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", 307 + "UMask": "0xff" 308 + }, 309 + { 310 + "EventName": "ls_pref_instr_disp.prefetch_nta", 311 + "EventCode": "0x4b", 312 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", 313 + "UMask": "0x04" 314 + }, 315 + { 316 + "EventName": "ls_pref_instr_disp.prefetch_w", 317 + "EventCode": "0x4b", 318 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchW instruction. See docAPM3 PREFETCHW.", 319 + "UMask": "0x02" 320 + }, 321 + { 322 + "EventName": "ls_pref_instr_disp.prefetch", 323 + "EventCode": "0x4b", 324 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", 325 + "UMask": "0x01" 326 + }, 327 + { 328 + "EventName": "ls_inef_sw_pref.mab_mch_cnt", 329 + "EventCode": "0x52", 330 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", 331 + "UMask": "0x02" 332 + }, 333 + { 334 + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 335 + "EventCode": "0x52", 336 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", 337 + "UMask": "0x01" 338 + }, 339 + { 340 + "EventName": "ls_sw_pf_dc_fills.mem_io_remote", 341 + "EventCode": "0x59", 342 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", 343 + "UMask": "0x40" 344 + }, 345 + { 346 + "EventName": "ls_sw_pf_dc_fills.ext_cache_remote", 347 + "EventCode": "0x59", 348 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", 349 + "UMask": "0x10" 350 + }, 351 + { 352 + "EventName": "ls_sw_pf_dc_fills.mem_io_local", 353 + "EventCode": "0x59", 354 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", 355 + "UMask": "0x08" 356 + }, 357 + { 358 + "EventName": "ls_sw_pf_dc_fills.ext_cache_local", 359 + "EventCode": "0x59", 360 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", 361 + "UMask": "0x04" 362 + }, 363 + { 364 + "EventName": "ls_sw_pf_dc_fills.int_cache", 365 + "EventCode": "0x59", 366 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", 367 + "UMask": "0x02" 368 + }, 369 + { 370 + "EventName": "ls_sw_pf_dc_fills.lcl_l2", 371 + "EventCode": "0x59", 372 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", 373 + "UMask": "0x01" 374 + }, 375 + { 376 + "EventName": "ls_hw_pf_dc_fills.mem_io_remote", 377 + "EventCode": "0x5a", 378 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", 379 + "UMask": "0x40" 380 + }, 381 + { 382 + "EventName": "ls_hw_pf_dc_fills.ext_cache_remote", 383 + "EventCode": "0x5a", 384 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", 385 + "UMask": "0x10" 386 + }, 387 + { 388 + "EventName": "ls_hw_pf_dc_fills.mem_io_local", 389 + "EventCode": "0x5a", 390 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", 391 + "UMask": "0x08" 392 + }, 393 + { 394 + "EventName": "ls_hw_pf_dc_fills.ext_cache_local", 395 + "EventCode": "0x5a", 396 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", 397 + "UMask": "0x04" 398 + }, 399 + { 400 + "EventName": "ls_hw_pf_dc_fills.int_cache", 401 + "EventCode": "0x5a", 402 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", 403 + "UMask": "0x02" 404 + }, 405 + { 406 + "EventName": "ls_hw_pf_dc_fills.lcl_l2", 407 + "EventCode": "0x5a", 408 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", 409 + "UMask": "0x01" 410 + }, 411 + { 412 + "EventName": "ls_alloc_mab_count", 413 + "EventCode": "0x5f", 414 + "BriefDescription": "Count of Allocated Mabs", 415 + "PublicDescription": "This event counts the in-flight L1 data cache misses (allocated Miss Address Buffers) divided by 4 and rounded down each cycle unless used with the MergeEvent functionality. If the MergeEvent is used, it counts the exact number of outstanding L1 data cache misses. See 2.1.17.3 [Large Increment per Cycle Events]." 416 + }, 417 + { 418 + "EventName": "ls_not_halted_cyc", 419 + "EventCode": "0x76", 420 + "BriefDescription": "Cycles not in Halt." 421 + }, 422 + { 423 + "EventName": "ls_tlb_flush.all_tlb_flushes", 424 + "EventCode": "0x78", 425 + "BriefDescription": "All TLB Flushes. Requires unit mask 0xFF to engage event for counting. Use all_tlbs_flushed instead", 426 + "UMask": "0xff" 427 + } 428 + ]

+103

tools/perf/pmu-events/arch/x86/amdzen3/other.json

··· 1 + [ 2 + { 3 + "EventName": "de_dis_uop_queue_empty_di0", 4 + "EventCode": "0xa9", 5 + "BriefDescription": "Cycles where the Micro-Op Queue is empty." 6 + }, 7 + { 8 + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", 9 + "EventCode": "0xab", 10 + "BriefDescription": "Any Integer dispatch. Types of Oops Dispatched from Decoder.", 11 + "UMask": "0x08" 12 + }, 13 + { 14 + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", 15 + "EventCode": "0xab", 16 + "BriefDescription": "Any FP dispatch. Types of Oops Dispatched from Decoder.", 17 + "UMask": "0x04" 18 + }, 19 + { 20 + "EventName": "de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", 21 + "EventCode": "0xae", 22 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP Flush recovery stall.", 23 + "UMask": "0x80" 24 + }, 25 + { 26 + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 27 + "EventCode": "0xae", 28 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", 29 + "UMask": "0x40" 30 + }, 31 + { 32 + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 33 + "EventCode": "0xae", 34 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", 35 + "UMask": "0x20" 36 + }, 37 + { 38 + "EventName": "de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", 39 + "EventCode": "0xae", 40 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Taken branch buffer resource stall.", 41 + "UMask": "0x10" 42 + }, 43 + { 44 + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 45 + "EventCode": "0xae", 46 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", 47 + "UMask": "0x08" 48 + }, 49 + { 50 + "EventName": "de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", 51 + "EventCode": "0xae", 52 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Store Queue resource stall. Applies to all ops with store semantics.", 53 + "UMask": "0x04" 54 + }, 55 + { 56 + "EventName": "de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", 57 + "EventCode": "0xae", 58 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Load Queue resource stall. Applies to all ops with load semantics.", 59 + "UMask": "0x02" 60 + }, 61 + { 62 + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", 63 + "EventCode": "0xae", 64 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Integer Physical Register File resource stall. Integer Physical Register File, applies to all ops that have an integer destination register.", 65 + "UMask": "0x01" 66 + }, 67 + { 68 + "EventName": "de_dis_dispatch_token_stalls2.retire_token_stall", 69 + "EventCode": "0xaf", 70 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Insufficient Retire Queue tokens available.", 71 + "UMask": "0x20" 72 + }, 73 + { 74 + "EventName": "de_dis_dispatch_token_stalls2.agsq_token_stall", 75 + "EventCode": "0xaf", 76 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", 77 + "UMask": "0x10" 78 + }, 79 + { 80 + "EventName": "de_dis_dispatch_token_stalls2.int_sch3_token_stall", 81 + "EventCode": "0xaf", 82 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 3 available.", 83 + "UMask": "0x08" 84 + }, 85 + { 86 + "EventName": "de_dis_dispatch_token_stalls2.int_sch2_token_stall", 87 + "EventCode": "0xaf", 88 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 2 available.", 89 + "UMask": "0x04" 90 + }, 91 + { 92 + "EventName": "de_dis_dispatch_token_stalls2.int_sch1_token_stall", 93 + "EventCode": "0xaf", 94 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 1 available.", 95 + "UMask": "0x02" 96 + }, 97 + { 98 + "EventName": "de_dis_dispatch_token_stalls2.int_sch0_token_stall", 99 + "EventCode": "0xaf", 100 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 0 available.", 101 + "UMask": "0x01" 102 + } 103 + ]

+214

tools/perf/pmu-events/arch/x86/amdzen3/recommended.json

··· 1 + [ 2 + { 3 + "MetricName": "branch_misprediction_ratio", 4 + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", 5 + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", 6 + "MetricGroup": "branch_prediction", 7 + "ScaleUnit": "100%" 8 + }, 9 + { 10 + "EventName": "all_data_cache_accesses", 11 + "EventCode": "0x29", 12 + "BriefDescription": "All L1 Data Cache Accesses", 13 + "UMask": "0x07" 14 + }, 15 + { 16 + "MetricName": "all_l2_cache_accesses", 17 + "BriefDescription": "All L2 Cache Accesses", 18 + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 19 + "MetricGroup": "l2_cache" 20 + }, 21 + { 22 + "EventName": "l2_cache_accesses_from_ic_misses", 23 + "EventCode": "0x60", 24 + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", 25 + "UMask": "0x10" 26 + }, 27 + { 28 + "EventName": "l2_cache_accesses_from_dc_misses", 29 + "EventCode": "0x60", 30 + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", 31 + "UMask": "0xe8" 32 + }, 33 + { 34 + "MetricName": "l2_cache_accesses_from_l2_hwpf", 35 + "BriefDescription": "L2 Cache Accesses from L2 HWPF", 36 + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 37 + "MetricGroup": "l2_cache" 38 + }, 39 + { 40 + "MetricName": "all_l2_cache_misses", 41 + "BriefDescription": "All L2 Cache Misses", 42 + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 43 + "MetricGroup": "l2_cache" 44 + }, 45 + { 46 + "EventName": "l2_cache_misses_from_ic_miss", 47 + "EventCode": "0x64", 48 + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", 49 + "UMask": "0x01" 50 + }, 51 + { 52 + "EventName": "l2_cache_misses_from_dc_misses", 53 + "EventCode": "0x64", 54 + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", 55 + "UMask": "0x08" 56 + }, 57 + { 58 + "MetricName": "l2_cache_misses_from_l2_hwpf", 59 + "BriefDescription": "L2 Cache Misses from L2 Cache HWPF", 60 + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", 61 + "MetricGroup": "l2_cache" 62 + }, 63 + { 64 + "MetricName": "all_l2_cache_hits", 65 + "BriefDescription": "All L2 Cache Hits", 66 + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", 67 + "MetricGroup": "l2_cache" 68 + }, 69 + { 70 + "EventName": "l2_cache_hits_from_ic_misses", 71 + "EventCode": "0x64", 72 + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", 73 + "UMask": "0x06" 74 + }, 75 + { 76 + "EventName": "l2_cache_hits_from_dc_misses", 77 + "EventCode": "0x64", 78 + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", 79 + "UMask": "0xf0" 80 + }, 81 + { 82 + "EventName": "l2_cache_hits_from_l2_hwpf", 83 + "EventCode": "0x70", 84 + "BriefDescription": "L2 Cache Hits from L2 Cache HWPF", 85 + "UMask": "0xff" 86 + }, 87 + { 88 + "EventName": "l3_cache_accesses", 89 + "EventCode": "0x04", 90 + "BriefDescription": "L3 Cache Accesses", 91 + "UMask": "0xff", 92 + "Unit": "L3PMC" 93 + }, 94 + { 95 + "EventName": "l3_misses", 96 + "EventCode": "0x04", 97 + "BriefDescription": "L3 Misses (includes cacheline state change requests)", 98 + "UMask": "0x01", 99 + "Unit": "L3PMC" 100 + }, 101 + { 102 + "MetricName": "l3_read_miss_latency", 103 + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", 104 + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1", 105 + "MetricGroup": "l3_cache", 106 + "ScaleUnit": "1core clocks" 107 + }, 108 + { 109 + "MetricName": "op_cache_fetch_miss_ratio", 110 + "BriefDescription": "Op Cache (64B) Fetch Miss Ratio", 111 + "MetricExpr": "d_ratio(op_cache_hit_miss.op_cache_miss, op_cache_hit_miss.all_op_cache_accesses)", 112 + "MetricGroup": "l2_cache" 113 + }, 114 + { 115 + "MetricName": "ic_fetch_miss_ratio", 116 + "BriefDescription": "Instruction Cache (32B) Fetch Miss Ratio", 117 + "MetricExpr": "d_ratio(ic_tag_hit_miss.instruction_cache_miss, ic_tag_hit_miss.all_instruction_cache_accesses)", 118 + "MetricGroup": "l2_cache", 119 + "ScaleUnit": "100%" 120 + }, 121 + { 122 + "EventName": "l1_data_cache_fills_from_memory", 123 + "EventCode": "0x44", 124 + "BriefDescription": "L1 Data Cache Fills: From Memory", 125 + "UMask": "0x48" 126 + }, 127 + { 128 + "EventName": "l1_data_cache_fills_from_remote_node", 129 + "EventCode": "0x44", 130 + "BriefDescription": "L1 Data Cache Fills: From Remote Node", 131 + "UMask": "0x50" 132 + }, 133 + { 134 + "EventName": "l1_data_cache_fills_from_within_same_ccx", 135 + "EventCode": "0x44", 136 + "BriefDescription": "L1 Data Cache Fills: From within same CCX", 137 + "UMask": "0x03" 138 + }, 139 + { 140 + "EventName": "l1_data_cache_fills_from_external_ccx_cache", 141 + "EventCode": "0x44", 142 + "BriefDescription": "L1 Data Cache Fills: From External CCX Cache", 143 + "UMask": "0x14" 144 + }, 145 + { 146 + "EventName": "l1_data_cache_fills_all", 147 + "EventCode": "0x44", 148 + "BriefDescription": "L1 Data Cache Fills: All", 149 + "UMask": "0xff" 150 + }, 151 + { 152 + "MetricName": "l1_itlb_misses", 153 + "BriefDescription": "L1 ITLB Misses", 154 + "MetricExpr": "bp_l1_tlb_miss_l2_tlb_hit + bp_l1_tlb_miss_l2_tlb_miss", 155 + "MetricGroup": "tlb" 156 + }, 157 + { 158 + "EventName": "l2_itlb_misses", 159 + "EventCode": "0x85", 160 + "BriefDescription": "L2 ITLB Misses & Instruction page walks", 161 + "UMask": "0x07" 162 + }, 163 + { 164 + "EventName": "l1_dtlb_misses", 165 + "EventCode": "0x45", 166 + "BriefDescription": "L1 DTLB Misses", 167 + "UMask": "0xff" 168 + }, 169 + { 170 + "EventName": "l2_dtlb_misses", 171 + "EventCode": "0x45", 172 + "BriefDescription": "L2 DTLB Misses & Data page walks", 173 + "UMask": "0xf0" 174 + }, 175 + { 176 + "EventName": "all_tlbs_flushed", 177 + "EventCode": "0x78", 178 + "BriefDescription": "All TLBs Flushed", 179 + "UMask": "0xff" 180 + }, 181 + { 182 + "MetricName": "macro_ops_dispatched", 183 + "BriefDescription": "Macro-ops Dispatched", 184 + "MetricExpr": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch + de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", 185 + "MetricGroup": "decoder" 186 + }, 187 + { 188 + "EventName": "sse_avx_stalls", 189 + "EventCode": "0x0e", 190 + "BriefDescription": "Mixed SSE/AVX Stalls", 191 + "UMask": "0x0e" 192 + }, 193 + { 194 + "EventName": "macro_ops_retired", 195 + "EventCode": "0xc1", 196 + "BriefDescription": "Macro-ops Retired" 197 + }, 198 + { 199 + "MetricName": "all_remote_links_outbound", 200 + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", 201 + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", 202 + "MetricGroup": "data_fabric", 203 + "PerPkg": "1", 204 + "ScaleUnit": "3e-5MiB" 205 + }, 206 + { 207 + "MetricName": "nps1_die_to_dram", 208 + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", 209 + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", 210 + "MetricGroup": "data_fabric", 211 + "PerPkg": "1", 212 + "ScaleUnit": "6.1e-5MiB" 213 + } 214 + ]

+4 -1

tools/perf/pmu-events/arch/x86/mapfile.csv

··· 24 24 GenuineIntel-6-1A,v2,nehalemep,core 25 25 GenuineIntel-6-2E,v2,nehalemex,core 26 26 GenuineIntel-6-[4589]E,v24,skylake,core 27 + GenuineIntel-6-A[56],v24,skylake,core 27 28 GenuineIntel-6-37,v13,silvermont,core 28 29 GenuineIntel-6-4D,v13,silvermont,core 29 30 GenuineIntel-6-4C,v13,silvermont,core ··· 36 35 GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core 37 36 GenuineIntel-6-7D,v1,icelake,core 38 37 GenuineIntel-6-7E,v1,icelake,core 38 + GenuineIntel-6-8[CD],v1,icelake,core 39 + GenuineIntel-6-A7,v1,icelake,core 39 40 GenuineIntel-6-86,v1,tremontx,core 40 41 AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core 41 42 AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core 42 - AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core 43 + AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen3,core

+3 -1

tools/perf/pmu-events/jevents.c

··· 285 285 { "imx8_ddr", "imx8_ddr" }, 286 286 { "L3PMC", "amd_l3" }, 287 287 { "DFPMC", "amd_df" }, 288 + { "cpu_core", "cpu_core" }, 289 + { "cpu_atom", "cpu_atom" }, 288 290 {} 289 291 }; 290 292 ··· 1151 1149 * and directory tree could result in build failure due to table 1152 1150 * names not being found. 1153 1151 * 1154 - * Atleast for now, be strict with processing JSON file names. 1152 + * At least for now, be strict with processing JSON file names. 1155 1153 * i.e. if JSON file name cannot be mapped to C-style table name, 1156 1154 * fail. 1157 1155 */

+1 -1

tools/perf/scripts/python/netdev-times.py

··· 356 356 return 357 357 rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, 358 358 'irq_list':irq_list, 'event_list':event_list} 359 - # merge information realted to a NET_RX softirq 359 + # merge information related to a NET_RX softirq 360 360 receive_hunk_list.append(rec_data) 361 361 362 362 def handle_napi_poll(event_info):

+4

tools/perf/tests/attr.c

··· 34 34 #include "event.h" 35 35 #include "util.h" 36 36 #include "tests.h" 37 + #include "pmu.h" 37 38 38 39 #define ENV "PERF_TEST_ATTR" 39 40 ··· 184 183 char path_perf[PATH_MAX]; 185 184 char path_dir[PATH_MAX]; 186 185 char *exec_path; 186 + 187 + if (perf_pmu__has_hybrid()) 188 + return TEST_SKIP; 187 189 188 190 /* First try development tree tests. */ 189 191 if (!lstat("./tests", &st))

+3 -3

tools/perf/tests/bp_signal.c

··· 225 225 * 226 226 * The test case check following error conditions: 227 227 * - we get stuck in signal handler because of debug 228 - * exception being triggered receursively due to 228 + * exception being triggered recursively due to 229 229 * the wrong RF EFLAG management 230 230 * 231 231 * - we never trigger the sig_handler breakpoint due 232 - * to the rong RF EFLAG management 232 + * to the wrong RF EFLAG management 233 233 * 234 234 */ 235 235 ··· 242 242 ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); 243 243 244 244 /* 245 - * Kick off the test by trigering 'fd1' 245 + * Kick off the test by triggering 'fd1' 246 246 * breakpoint. 247 247 */ 248 248 test_function();

+1 -1

tools/perf/tests/code-reading.c

··· 658 658 /* 659 659 * Both cpus and threads are now owned by evlist 660 660 * and will be freed by following perf_evlist__set_maps 661 - * call. Getting refference to keep them alive. 661 + * call. Getting reference to keep them alive. 662 662 */ 663 663 perf_cpu_map__get(cpus); 664 664 perf_thread_map__get(threads);

+4 -4

tools/perf/tests/demangle-ocaml-test.c

··· 19 19 { "main", 20 20 NULL }, 21 21 { "camlStdlib__array__map_154", 22 - "Stdlib.array.map" }, 22 + "Stdlib.array.map_154" }, 23 23 { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453", 24 - "Stdlib.anon_fn[stdlib.ml:334,0--54]" }, 24 + "Stdlib.anon_fn[stdlib.ml:334,0--54]_1453" }, 25 25 { "camlStdlib__bytes__$2b$2b_2205", 26 - "Stdlib.bytes.++" }, 26 + "Stdlib.bytes.++_2205" }, 27 27 }; 28 28 29 - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { 29 + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { 30 30 buf = ocaml_demangle_sym(test_cases[i].mangled); 31 31 if ((buf == NULL && test_cases[i].demangled != NULL) 32 32 || (buf != NULL && test_cases[i].demangled == NULL)

+12 -7

tools/perf/tests/evsel-roundtrip-name.c

··· 4 4 #include "parse-events.h" 5 5 #include "tests.h" 6 6 #include "debug.h" 7 + #include "pmu.h" 7 8 #include <errno.h> 8 9 #include <linux/kernel.h> 9 10 ··· 63 62 return ret; 64 63 } 65 64 66 - static int __perf_evsel__name_array_test(const char *names[], int nr_names) 65 + static int __perf_evsel__name_array_test(const char *names[], int nr_names, 66 + int distance) 67 67 { 68 68 int i, err; 69 69 struct evsel *evsel; ··· 84 82 85 83 err = 0; 86 84 evlist__for_each_entry(evlist, evsel) { 87 - if (strcmp(evsel__name(evsel), names[evsel->idx])) { 85 + if (strcmp(evsel__name(evsel), names[evsel->idx / distance])) { 88 86 --err; 89 - pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]); 87 + pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx / distance]); 90 88 } 91 89 } 92 90 ··· 95 93 return err; 96 94 } 97 95 98 - #define perf_evsel__name_array_test(names) \ 99 - __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) 96 + #define perf_evsel__name_array_test(names, distance) \ 97 + __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance) 100 98 101 99 int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused) 102 100 { 103 101 int err = 0, ret = 0; 104 102 105 - err = perf_evsel__name_array_test(evsel__hw_names); 103 + if (perf_pmu__has_hybrid()) 104 + return perf_evsel__name_array_test(evsel__hw_names, 2); 105 + 106 + err = perf_evsel__name_array_test(evsel__hw_names, 1); 106 107 if (err) 107 108 ret = err; 108 109 109 - err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1); 110 + err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1, 1); 110 111 if (err) 111 112 ret = err; 112 113

+2 -2

tools/perf/tests/hists_cumulate.c

··· 47 47 }; 48 48 49 49 /* 50 - * Will be casted to struct ip_callchain which has all 64 bit entries 50 + * Will be cast to struct ip_callchain which has all 64 bit entries 51 51 * of nr and ips[]. 52 52 */ 53 53 static u64 fake_callchains[][10] = { ··· 297 297 return err; 298 298 } 299 299 300 - /* callcain + NO children */ 300 + /* callchain + NO children */ 301 301 static int test2(struct evsel *evsel, struct machine *machine) 302 302 { 303 303 int err;

+7 -7

tools/perf/tests/hists_filter.c

··· 150 150 } 151 151 152 152 TEST_ASSERT_VAL("Invalid nr samples", 153 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 153 + hists->stats.nr_samples == 10); 154 154 TEST_ASSERT_VAL("Invalid nr hist entries", 155 155 hists->nr_entries == 9); 156 156 TEST_ASSERT_VAL("Invalid total period", 157 157 hists->stats.total_period == 1000); 158 158 TEST_ASSERT_VAL("Unmatched nr samples", 159 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 159 + hists->stats.nr_samples == 160 160 hists->stats.nr_non_filtered_samples); 161 161 TEST_ASSERT_VAL("Unmatched nr hist entries", 162 162 hists->nr_entries == hists->nr_non_filtered_entries); ··· 175 175 176 176 /* normal stats should be invariant */ 177 177 TEST_ASSERT_VAL("Invalid nr samples", 178 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 178 + hists->stats.nr_samples == 10); 179 179 TEST_ASSERT_VAL("Invalid nr hist entries", 180 180 hists->nr_entries == 9); 181 181 TEST_ASSERT_VAL("Invalid total period", ··· 204 204 205 205 /* normal stats should be invariant */ 206 206 TEST_ASSERT_VAL("Invalid nr samples", 207 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 207 + hists->stats.nr_samples == 10); 208 208 TEST_ASSERT_VAL("Invalid nr hist entries", 209 209 hists->nr_entries == 9); 210 210 TEST_ASSERT_VAL("Invalid total period", ··· 239 239 240 240 /* normal stats should be invariant */ 241 241 TEST_ASSERT_VAL("Invalid nr samples", 242 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 242 + hists->stats.nr_samples == 10); 243 243 TEST_ASSERT_VAL("Invalid nr hist entries", 244 244 hists->nr_entries == 9); 245 245 TEST_ASSERT_VAL("Invalid total period", ··· 268 268 269 269 /* normal stats should be invariant */ 270 270 TEST_ASSERT_VAL("Invalid nr samples", 271 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 271 + hists->stats.nr_samples == 10); 272 272 TEST_ASSERT_VAL("Invalid nr hist entries", 273 273 hists->nr_entries == 9); 274 274 TEST_ASSERT_VAL("Invalid total period", ··· 299 299 300 300 /* normal stats should be invariant */ 301 301 TEST_ASSERT_VAL("Invalid nr samples", 302 - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); 302 + hists->stats.nr_samples == 10); 303 303 TEST_ASSERT_VAL("Invalid nr hist entries", 304 304 hists->nr_entries == 9); 305 305 TEST_ASSERT_VAL("Invalid total period",

+20 -2

tools/perf/tests/make

··· 155 155 run += make_with_babeltrace 156 156 run += make_with_clangllvm 157 157 run += make_with_libpfm4 158 - run += make_with_gtk2 159 158 run += make_help 160 159 run += make_doc 161 160 run += make_perf_o ··· 171 172 # run += make_install_info 172 173 # run += make_install_pdf 173 174 run += make_minimal 174 - run += make_static 175 175 176 176 ifneq ($(call has,ctags),) 177 177 run += make_tags ··· 296 298 clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null && $(MAKE) -s $(O_OPT) -C ../build clean >/dev/null) 297 299 298 300 $(run): 301 + $(call clean) 302 + @TMP_DEST=$$(mktemp -d); \ 303 + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ 304 + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ 305 + ( eval $$cmd ) >> $@ 2>&1; \ 306 + echo " test: $(call test,$@)" >> $@ 2>&1; \ 307 + $(call test,$@) && \ 308 + rm -rf $@ $$TMP_DEST || (cat $@ ; false) 309 + 310 + make_with_gtk2: 311 + $(call clean) 312 + @TMP_DEST=$$(mktemp -d); \ 313 + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ 314 + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ 315 + ( eval $$cmd ) >> $@ 2>&1; \ 316 + echo " test: $(call test,$@)" >> $@ 2>&1; \ 317 + $(call test,$@) && \ 318 + rm -rf $@ $$TMP_DEST || (cat $@ ; false) 319 + 320 + make_static: 299 321 $(call clean) 300 322 @TMP_DEST=$$(mktemp -d); \ 301 323 cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \

+172 -1

tools/perf/tests/parse-events.c

··· 20 20 21 21 #if defined(__s390x__) 22 22 /* Return true if kvm module is available and loaded. Test this 23 - * and retun success when trace point kvm_s390_create_vm 23 + * and return success when trace point kvm_s390_create_vm 24 24 * exists. Otherwise this test always fails. 25 25 */ 26 26 static bool kvm_s390_create_vm_valid(void) ··· 1512 1512 return test__checkevent_tracepoint_multi(evlist); 1513 1513 } 1514 1514 1515 + static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) 1516 + { 1517 + struct evsel *evsel = evlist__first(evlist); 1518 + 1519 + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); 1520 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1521 + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); 1522 + return 0; 1523 + } 1524 + 1525 + static int test__hybrid_hw_group_event(struct evlist *evlist) 1526 + { 1527 + struct evsel *evsel, *leader; 1528 + 1529 + evsel = leader = evlist__first(evlist); 1530 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1531 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1532 + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); 1533 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1534 + 1535 + evsel = evsel__next(evsel); 1536 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1537 + TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); 1538 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1539 + return 0; 1540 + } 1541 + 1542 + static int test__hybrid_sw_hw_group_event(struct evlist *evlist) 1543 + { 1544 + struct evsel *evsel, *leader; 1545 + 1546 + evsel = leader = evlist__first(evlist); 1547 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1548 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); 1549 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1550 + 1551 + evsel = evsel__next(evsel); 1552 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1553 + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); 1554 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1555 + return 0; 1556 + } 1557 + 1558 + static int test__hybrid_hw_sw_group_event(struct evlist *evlist) 1559 + { 1560 + struct evsel *evsel, *leader; 1561 + 1562 + evsel = leader = evlist__first(evlist); 1563 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1564 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1565 + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); 1566 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1567 + 1568 + evsel = evsel__next(evsel); 1569 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); 1570 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1571 + return 0; 1572 + } 1573 + 1574 + static int test__hybrid_group_modifier1(struct evlist *evlist) 1575 + { 1576 + struct evsel *evsel, *leader; 1577 + 1578 + evsel = leader = evlist__first(evlist); 1579 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1580 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1581 + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); 1582 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1583 + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); 1584 + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); 1585 + 1586 + evsel = evsel__next(evsel); 1587 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1588 + TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); 1589 + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 1590 + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); 1591 + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); 1592 + return 0; 1593 + } 1594 + 1595 + static int test__hybrid_raw1(struct evlist *evlist) 1596 + { 1597 + struct evsel *evsel = evlist__first(evlist); 1598 + 1599 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1600 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1601 + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); 1602 + 1603 + /* The type of second event is randome value */ 1604 + evsel = evsel__next(evsel); 1605 + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); 1606 + return 0; 1607 + } 1608 + 1609 + static int test__hybrid_raw2(struct evlist *evlist) 1610 + { 1611 + struct evsel *evsel = evlist__first(evlist); 1612 + 1613 + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); 1614 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); 1615 + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); 1616 + return 0; 1617 + } 1618 + 1619 + static int test__hybrid_cache_event(struct evlist *evlist) 1620 + { 1621 + struct evsel *evsel = evlist__first(evlist); 1622 + 1623 + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); 1624 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); 1625 + TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); 1626 + 1627 + evsel = evsel__next(evsel); 1628 + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); 1629 + TEST_ASSERT_VAL("wrong config", 0x10002 == (evsel->core.attr.config & 0xffffffff)); 1630 + return 0; 1631 + } 1632 + 1515 1633 struct evlist_test { 1516 1634 const char *name; 1517 1635 __u32 type; ··· 1986 1868 }, 1987 1869 }; 1988 1870 1871 + static struct evlist_test test__hybrid_events[] = { 1872 + { 1873 + .name = "cpu_core/cpu-cycles/", 1874 + .check = test__hybrid_hw_event_with_pmu, 1875 + .id = 0, 1876 + }, 1877 + { 1878 + .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", 1879 + .check = test__hybrid_hw_group_event, 1880 + .id = 1, 1881 + }, 1882 + { 1883 + .name = "{cpu-clock,cpu_core/cpu-cycles/}", 1884 + .check = test__hybrid_sw_hw_group_event, 1885 + .id = 2, 1886 + }, 1887 + { 1888 + .name = "{cpu_core/cpu-cycles/,cpu-clock}", 1889 + .check = test__hybrid_hw_sw_group_event, 1890 + .id = 3, 1891 + }, 1892 + { 1893 + .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", 1894 + .check = test__hybrid_group_modifier1, 1895 + .id = 4, 1896 + }, 1897 + { 1898 + .name = "r1a", 1899 + .check = test__hybrid_raw1, 1900 + .id = 5, 1901 + }, 1902 + { 1903 + .name = "cpu_core/r1a/", 1904 + .check = test__hybrid_raw2, 1905 + .id = 6, 1906 + }, 1907 + { 1908 + .name = "cpu_core/config=10,config1,config2=3,period=1000/u", 1909 + .check = test__checkevent_pmu, 1910 + .id = 7, 1911 + }, 1912 + { 1913 + .name = "cpu_core/LLC-loads/,cpu_atom/LLC-load-misses/", 1914 + .check = test__hybrid_cache_event, 1915 + .id = 8, 1916 + }, 1917 + }; 1918 + 1989 1919 static int test_event(struct evlist_test *e) 1990 1920 { 1991 1921 struct parse_events_error err; ··· 2200 2034 if (!ret2) \ 2201 2035 ret2 = ret1; \ 2202 2036 } while (0) 2037 + 2038 + if (perf_pmu__has_hybrid()) { 2039 + TEST_EVENTS(test__hybrid_events); 2040 + return ret2; 2041 + } 2203 2042 2204 2043 TEST_EVENTS(test__events); 2205 2044

+8 -4

tools/perf/tests/parse-metric.c

··· 11 11 #include "debug.h" 12 12 #include "expr.h" 13 13 #include "stat.h" 14 + #include "pmu.h" 14 15 15 16 static struct pmu_event pme_test[] = { 16 17 { ··· 99 98 if (!strcmp(name, v->event)) 100 99 return v->val; 101 100 v++; 102 - }; 101 + } 103 102 return 0; 104 103 } 105 104 ··· 187 186 *ratio2 = compute_single(&metric_events, evlist, &st, name2); 188 187 189 188 out: 190 - /* ... clenup. */ 189 + /* ... cleanup. */ 191 190 metricgroup__rblist_exit(&metric_events); 192 191 runtime_stat__exit(&st); 193 192 evlist__free_stats(evlist); ··· 373 372 { 374 373 TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); 375 374 TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); 376 - TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); 377 375 TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); 378 376 TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); 379 - TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); 380 377 TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0); 378 + 379 + if (!perf_pmu__has_hybrid()) { 380 + TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); 381 + TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); 382 + } 381 383 return 0; 382 384 }

+12

tools/perf/tests/perf-time-to-tsc.c

··· 20 20 #include "tsc.h" 21 21 #include "mmap.h" 22 22 #include "tests.h" 23 + #include "pmu.h" 23 24 24 25 #define CHECK__(x) { \ 25 26 while ((x) < 0) { \ ··· 88 87 evsel->core.attr.comm = 1; 89 88 evsel->core.attr.disabled = 1; 90 89 evsel->core.attr.enable_on_exec = 0; 90 + 91 + /* 92 + * For hybrid "cycles:u", it creates two events. 93 + * Init the second evsel here. 94 + */ 95 + if (perf_pmu__has_hybrid()) { 96 + evsel = evsel__next(evsel); 97 + evsel->core.attr.comm = 1; 98 + evsel->core.attr.disabled = 1; 99 + evsel->core.attr.enable_on_exec = 0; 100 + } 91 101 92 102 CHECK__(evlist__open(evlist)); 93 103

+82 -1

tools/perf/tests/pmu-events.c

··· 12 12 #include "util/evlist.h" 13 13 #include "util/expr.h" 14 14 #include "util/parse-events.h" 15 + #include "metricgroup.h" 15 16 16 17 struct perf_pmu_test_event { 17 18 /* used for matching against events from generated pmu-events.c */ ··· 472 471 pr_debug("On expression %s\n", pe->metric_expr); 473 472 } 474 473 474 + struct metric { 475 + struct list_head list; 476 + struct metric_ref metric_ref; 477 + }; 478 + 479 + static int resolve_metric_simple(struct expr_parse_ctx *pctx, 480 + struct list_head *compound_list, 481 + struct pmu_events_map *map, 482 + const char *metric_name) 483 + { 484 + struct hashmap_entry *cur, *cur_tmp; 485 + struct metric *metric, *tmp; 486 + size_t bkt; 487 + bool all; 488 + int rc; 489 + 490 + do { 491 + all = true; 492 + hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { 493 + struct metric_ref *ref; 494 + struct pmu_event *pe; 495 + 496 + pe = metricgroup__find_metric(cur->key, map); 497 + if (!pe) 498 + continue; 499 + 500 + if (!strcmp(metric_name, (char *)cur->key)) { 501 + pr_warning("Recursion detected for metric %s\n", metric_name); 502 + rc = -1; 503 + goto out_err; 504 + } 505 + 506 + all = false; 507 + 508 + /* The metric key itself needs to go out.. */ 509 + expr__del_id(pctx, cur->key); 510 + 511 + metric = malloc(sizeof(*metric)); 512 + if (!metric) { 513 + rc = -ENOMEM; 514 + goto out_err; 515 + } 516 + 517 + ref = &metric->metric_ref; 518 + ref->metric_name = pe->metric_name; 519 + ref->metric_expr = pe->metric_expr; 520 + list_add_tail(&metric->list, compound_list); 521 + 522 + rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); 523 + if (rc) 524 + goto out_err; 525 + break; /* The hashmap has been modified, so restart */ 526 + } 527 + } while (!all); 528 + 529 + return 0; 530 + 531 + out_err: 532 + list_for_each_entry_safe(metric, tmp, compound_list, list) 533 + free(metric); 534 + 535 + return rc; 536 + 537 + } 538 + 475 539 static int test_parsing(void) 476 540 { 477 - struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); 541 + struct pmu_events_map *cpus_map = pmu_events_map__find(); 478 542 struct pmu_events_map *map; 479 543 struct pmu_event *pe; 480 544 int i, j, k; ··· 554 488 break; 555 489 j = 0; 556 490 for (;;) { 491 + struct metric *metric, *tmp; 557 492 struct hashmap_entry *cur; 493 + LIST_HEAD(compound_list); 558 494 size_t bkt; 559 495 560 496 pe = &map->table[j++]; ··· 570 502 expr_failure("Parse other failed", map, pe); 571 503 ret++; 572 504 continue; 505 + } 506 + 507 + if (resolve_metric_simple(&ctx, &compound_list, map, 508 + pe->metric_name)) { 509 + expr_failure("Could not resolve metrics", map, pe); 510 + ret++; 511 + goto exit; /* Don't tolerate errors due to severity */ 573 512 } 574 513 575 514 /* ··· 594 519 ret++; 595 520 } 596 521 522 + list_for_each_entry_safe(metric, tmp, &compound_list, list) { 523 + expr__add_ref(&ctx, &metric->metric_ref); 524 + free(metric); 525 + } 526 + 597 527 if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { 598 528 expr_failure("Parse failed", map, pe); 599 529 ret++; ··· 607 527 } 608 528 } 609 529 /* TODO: fail when not ok */ 530 + exit: 610 531 return ret == 0 ? TEST_OK : TEST_SKIP; 611 532 } 612 533

+58 -7

tools/perf/tests/shell/buildid.sh

··· 14 14 exit 2 15 15 fi 16 16 17 + # check what we need to test windows binaries 18 + add_pe=1 19 + run_pe=1 20 + if ! perf version --build-options | grep -q 'libbfd: .* on '; then 21 + echo "WARNING: perf not built with libbfd. PE binaries will not be tested." 22 + add_pe=0 23 + run_pe=0 24 + fi 25 + if ! which wine > /dev/null; then 26 + echo "WARNING: wine not found. PE binaries will not be run." 27 + run_pe=0 28 + fi 29 + 30 + # set up wine 31 + if [ ${run_pe} -eq 1 ]; then 32 + wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX) 33 + export WINEPREFIX=${wineprefix} 34 + # clear display variables to prevent wine from popping up dialogs 35 + unset DISPLAY 36 + unset WAYLAND_DISPLAY 37 + fi 38 + 17 39 ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) 18 40 ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) 41 + ex_pe=$(dirname $0)/../pe-file.exe 19 42 20 43 echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - 21 44 echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - 22 45 23 - echo "test binaries: ${ex_sha1} ${ex_md5}" 46 + echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}" 24 47 25 48 check() 26 49 { 27 - id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` 28 - 50 + case $1 in 51 + *.exe) 52 + # We don't have a tool that can pull a nicely formatted build-id out of 53 + # a PE file, but we can extract the whole section with objcopy and 54 + # format it ourselves. The .buildid section is a Debug Directory 55 + # containing a CodeView entry: 56 + # https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only 57 + # https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md 58 + # The build-id starts at byte 33 and must be rearranged into a GUID. 59 + id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \ 60 + cut -c 33-48 | hexdump -ve '/1 "%02x"' | \ 61 + sed 's@^$..$$..$$..$$..$$..$$..$$..$$..$$.*$0a$@\4\3\2\1\6\5\8\7\9@'` 62 + ;; 63 + *) 64 + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` 65 + ;; 66 + esac 29 67 echo "build id: ${id}" 30 68 31 69 link=${build_id_dir}/.build-id/${id:0:2}/${id:2} ··· 88 50 exit 1 89 51 fi 90 52 91 - ${perf} buildid-cache -l | grep $id 53 + ${perf} buildid-cache -l | grep ${id} 92 54 if [ $? -ne 0 ]; then 93 55 echo "failed: ${id} is not reported by \"perf buildid-cache -l\"" 94 56 exit 1 ··· 117 79 { 118 80 data=$(mktemp /tmp/perf.data.XXX) 119 81 build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) 82 + log=$(mktemp /tmp/perf.log.XXX) 120 83 perf="perf --buildid-dir ${build_id_dir}" 121 84 122 - ${perf} record --buildid-all -o ${data} ${1} 85 + echo "running: perf record $@" 86 + ${perf} record --buildid-all -o ${data} $@ &> ${log} 123 87 if [ $? -ne 0 ]; then 124 - echo "failed: record ${1}" 88 + echo "failed: record $@" 89 + echo "see log: ${log}" 125 90 exit 1 126 91 fi 127 92 128 - check ${1} 93 + check ${@: -1} 129 94 95 + rm -f ${log} 130 96 rm -rf ${build_id_dir} 131 97 rm -rf ${data} 132 98 } ··· 138 96 # add binaries manual via perf buildid-cache -a 139 97 test_add ${ex_sha1} 140 98 test_add ${ex_md5} 99 + if [ ${add_pe} -eq 1 ]; then 100 + test_add ${ex_pe} 101 + fi 141 102 142 103 # add binaries via perf record post processing 143 104 test_record ${ex_sha1} 144 105 test_record ${ex_md5} 106 + if [ ${run_pe} -eq 1 ]; then 107 + test_record wine ${ex_pe} 108 + fi 145 109 146 110 # cleanup 147 111 rm ${ex_sha1} ${ex_md5} 112 + if [ ${run_pe} -eq 1 ]; then 113 + rm -r ${wineprefix} 114 + fi 148 115 149 116 exit ${err}

+33 -21

tools/perf/tests/shell/daemon.sh

··· 98 98 fi 99 99 } 100 100 101 + daemon_exit() 102 + { 103 + local config=$1 104 + 105 + local line=`perf daemon --config ${config} -x: | head -1` 106 + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` 107 + 108 + # Reset trap handler. 109 + trap - SIGINT SIGTERM 110 + 111 + # stop daemon 112 + perf daemon stop --config ${config} 113 + 114 + # ... and wait for the pid to go away 115 + tail --pid=${pid} -f /dev/null 116 + } 117 + 101 118 daemon_start() 102 119 { 103 120 local config=$1 ··· 122 105 123 106 perf daemon start --config ${config} 124 107 108 + # Clean up daemon if interrupted. 109 + trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM 110 + 125 111 # wait for the session to ping 126 112 local state="FAIL" 113 + local retries=0 127 114 while [ "${state}" != "OK" ]; do 128 115 state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'` 129 116 sleep 0.05 117 + retries=$((${retries} +1)) 118 + if [ ${retries} -ge 600 ]; then 119 + echo "FAILED: Timeout waiting for daemon to ping" 120 + daemon_exit ${config} 121 + exit 1 122 + fi 130 123 done 131 - } 132 - 133 - daemon_exit() 134 - { 135 - local base=$1 136 - local config=$2 137 - 138 - local line=`perf daemon --config ${config} -x: | head -1` 139 - local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` 140 - 141 - # stop daemon 142 - perf daemon stop --config ${config} 143 - 144 - # ... and wait for the pid to go away 145 - tail --pid=${pid} -f /dev/null 146 124 } 147 125 148 126 test_list() ··· 183 171 ${base}/session-time/ack "0" 184 172 185 173 # stop daemon 186 - daemon_exit ${base} ${config} 174 + daemon_exit ${config} 187 175 188 176 rm -rf ${base} 189 177 rm -f ${config} ··· 300 288 done 301 289 302 290 # stop daemon 303 - daemon_exit ${base} ${config} 291 + daemon_exit ${config} 304 292 305 293 rm -rf ${base} 306 294 rm -f ${config} ··· 345 333 fi 346 334 347 335 # stop daemon 348 - daemon_exit ${base} ${config} 336 + daemon_exit ${config} 349 337 350 338 # check that sessions are gone 351 339 if [ -d "/proc/${pid_size}" ]; then ··· 386 374 perf daemon signal --config ${config} 387 375 388 376 # stop daemon 389 - daemon_exit ${base} ${config} 377 + daemon_exit ${config} 390 378 391 379 # count is 2 perf.data for signals and 1 for perf record finished 392 380 count=`ls ${base}/session-test/ | grep perf.data | wc -l` ··· 432 420 fi 433 421 434 422 # stop daemon 435 - daemon_exit ${base} ${config} 423 + daemon_exit ${config} 436 424 437 425 rm -rf ${base} 438 426 rm -f ${config} ··· 469 457 fi 470 458 471 459 # stop daemon 472 - daemon_exit ${base} ${config} 460 + daemon_exit ${config} 473 461 474 462 rm -rf ${base} 475 463 rm -f ${config}

+31

tools/perf/tests/shell/stat+csv_summary.sh

··· 1 + #!/bin/sh 2 + # perf stat csv summary test 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + # 8 + # 1.001364330 9224197 cycles 8012885033 100.00 9 + # summary 9224197 cycles 8012885033 100.00 10 + # 11 + perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \ 12 + grep -e summary | \ 13 + while read summary num event run pct 14 + do 15 + if [ $summary != "summary" ]; then 16 + exit 1 17 + fi 18 + done 19 + 20 + # 21 + # 1.001360298 9148534 cycles 8012853854 100.00 22 + #9148534 cycles 8012853854 100.00 23 + # 24 + perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \ 25 + grep -e summary | \ 26 + while read num event run pct 27 + do 28 + exit 1 29 + done 30 + 31 + exit 0

+3

tools/perf/tests/shell/stat+shadow_stat.sh

··· 7 7 # skip if system-wide mode is forbidden 8 8 perf stat -a true > /dev/null 2>&1 || exit 2 9 9 10 + # skip if on hybrid platform 11 + perf stat -a -e cycles sleep 1 2>&1 | grep -e cpu_core && exit 2 12 + 10 13 test_global_aggr() 11 14 { 12 15 perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \

+31

tools/perf/tests/shell/stat_bpf_counters.sh

··· 1 + #!/bin/sh 2 + # perf stat --bpf-counters test 3 + # SPDX-License-Identifier: GPL-2.0 4 + 5 + set -e 6 + 7 + # check whether $2 is within +/- 10% of $1 8 + compare_number() 9 + { 10 + first_num=$1 11 + second_num=$2 12 + 13 + # upper bound is first_num * 110% 14 + upper=$(( $first_num + $first_num / 10 )) 15 + # lower bound is first_num * 90% 16 + lower=$(( $first_num - $first_num / 10 )) 17 + 18 + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then 19 + echo "The difference between $first_num and $second_num are greater than 10%." 20 + exit 1 21 + fi 22 + } 23 + 24 + # skip if --bpf-counters is not supported 25 + perf stat --bpf-counters true > /dev/null 2>&1 || exit 2 26 + 27 + base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') 28 + bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') 29 + 30 + compare_number $base_cycles $bpf_cycles 31 + exit 0

+5 -1

tools/perf/tests/switch-tracking.c

··· 18 18 #include "record.h" 19 19 #include "tests.h" 20 20 #include "util/mmap.h" 21 + #include "pmu.h" 21 22 22 23 static int spin_sleep(void) 23 24 { ··· 372 371 cpu_clocks_evsel = evlist__last(evlist); 373 372 374 373 /* Second event */ 375 - err = parse_events(evlist, "cycles:u", NULL); 374 + if (perf_pmu__has_hybrid()) 375 + err = parse_events(evlist, "cpu_core/cycles/u", NULL); 376 + else 377 + err = parse_events(evlist, "cycles:u", NULL); 376 378 if (err) { 377 379 pr_debug("Failed to parse event cycles:u\n"); 378 380 goto out_err;

+12 -3

tools/perf/tests/topology.c

··· 8 8 #include "session.h" 9 9 #include "evlist.h" 10 10 #include "debug.h" 11 + #include "pmu.h" 11 12 #include <linux/err.h> 12 13 13 14 #define TEMPL "/tmp/perf-test-XXXXXX" ··· 41 40 session = perf_session__new(&data, false, NULL); 42 41 TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); 43 42 44 - session->evlist = evlist__new_default(); 45 - TEST_ASSERT_VAL("can't get evlist", session->evlist); 43 + if (!perf_pmu__has_hybrid()) { 44 + session->evlist = evlist__new_default(); 45 + TEST_ASSERT_VAL("can't get evlist", session->evlist); 46 + } else { 47 + struct parse_events_error err; 48 + 49 + session->evlist = evlist__new(); 50 + TEST_ASSERT_VAL("can't get evlist", session->evlist); 51 + parse_events(session->evlist, "cpu_core/cycles/", &err); 52 + } 46 53 47 54 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); 48 55 perf_header__set_feat(&session->header, HEADER_NRCPUS); ··· 89 80 * CPU 1 is on core_id 1 and physical_package_id 3 90 81 * 91 82 * Core_id and physical_package_id are platform and architecture 92 - * dependend and might have higher numbers than the CPU id. 83 + * dependent and might have higher numbers than the CPU id. 93 84 * This actually depends on the configuration. 94 85 * 95 86 * In this case process_cpu_topology() prints error message:

+3 -4

tools/perf/trace/beauty/fsconfig.sh

··· 10 10 linux_mount=${linux_header_dir}/mount.h 11 11 12 12 printf "static const char *fsconfig_cmds[] = {\n" 13 - regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*' 14 - egrep $regex ${linux_mount} | \ 15 - sed -r "s/$regex/\2 \1/g" | \ 16 - xargs printf "\t[%s] = \"%s\",\n" 13 + ms='[[:space:]]*' 14 + sed -nr "s/^${ms}FSCONFIG_([[:alnum:]_]+)${ms}=${ms}([[:digit:]]+)${ms},.*/\t[\2] = \"\1\",/p" \ 15 + ${linux_mount} 17 16 printf "};\n"

+1 -1

tools/perf/trace/beauty/include/linux/socket.h

··· 85 85 86 86 /* 87 87 * POSIX 1003.1g - ancillary data object information 88 - * Ancillary data consits of a sequence of pairs of 88 + * Ancillary data consists of a sequence of pairs of 89 89 * (cmsghdr, cmsg_data[]) 90 90 */ 91 91

+24 -3

tools/perf/ui/browsers/annotate.c

··· 381 381 return true; 382 382 } 383 383 384 + #define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) 385 + 386 + static void annotate_browser__show_full_location(struct ui_browser *browser) 387 + { 388 + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 389 + struct disasm_line *cursor = disasm_line(ab->selection); 390 + struct annotation_line *al = &cursor->al; 391 + 392 + if (al->offset != -1) 393 + ui_helpline__puts("Only available for source code lines."); 394 + else if (al->fileloc == NULL) 395 + ui_helpline__puts("No source file location."); 396 + else { 397 + char help_line[SYM_TITLE_MAX_SIZE]; 398 + sprintf (help_line, "Source file location: %s", al->fileloc); 399 + ui_helpline__puts(help_line); 400 + } 401 + } 402 + 384 403 static void ui_browser__init_asm_mode(struct ui_browser *browser) 385 404 { 386 405 struct annotation *notes = browser__annotation(browser); 387 406 ui_browser__reset_index(browser); 388 407 browser->nr_entries = notes->nr_asm_entries; 389 408 } 390 - 391 - #define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) 392 409 393 410 static int sym_title(struct symbol *sym, struct map *map, char *title, 394 411 size_t sz, int percent_type) ··· 415 398 } 416 399 417 400 /* 418 - * This can be called from external jumps, i.e. jumps from one functon 401 + * This can be called from external jumps, i.e. jumps from one function 419 402 * to another, like from the kernel's entry_SYSCALL_64 function to the 420 403 * swapgs_restore_regs_and_return_to_usermode() function. 421 404 * ··· 764 747 "c Show min/max cycle\n" 765 748 "/ Search string\n" 766 749 "k Toggle line numbers\n" 750 + "l Show full source file location\n" 767 751 "P Print to [symbol_name].annotation file.\n" 768 752 "r Run available scripts\n" 769 753 "p Toggle percent type [local/global]\n" ··· 777 759 continue; 778 760 case 'k': 779 761 notes->options->show_linenr = !notes->options->show_linenr; 762 + continue; 763 + case 'l': 764 + annotate_browser__show_full_location (&browser->b); 780 765 continue; 781 766 case 'H': 782 767 nd = browser->curr_hot;

+10 -9

tools/perf/ui/browsers/hists.c

··· 117 117 browser->rows -= browser->extra_title_lines; 118 118 /* 119 119 * Verify if we were at the last line and that line isn't 120 - * visibe because we now show the header line(s). 120 + * visible because we now show the header line(s). 121 121 */ 122 122 index_row = browser->index - browser->top_idx; 123 123 if (index_row >= browser->rows) ··· 682 682 switch (key) { 683 683 case K_TIMER: { 684 684 struct hist_browser_timer *hbt = browser->hbt; 685 + struct evsel *evsel = hists_to_evsel(browser->hists); 685 686 u64 nr_entries; 686 687 687 688 WARN_ON_ONCE(!hbt); ··· 697 696 ui_browser__update_nr_entries(&browser->b, nr_entries); 698 697 699 698 if (warn_lost_event && 700 - (browser->hists->stats.nr_lost_warned != 701 - browser->hists->stats.nr_events[PERF_RECORD_LOST])) { 702 - browser->hists->stats.nr_lost_warned = 703 - browser->hists->stats.nr_events[PERF_RECORD_LOST]; 699 + (evsel->evlist->stats.nr_lost_warned != 700 + evsel->evlist->stats.nr_events[PERF_RECORD_LOST])) { 701 + evsel->evlist->stats.nr_lost_warned = 702 + evsel->evlist->stats.nr_events[PERF_RECORD_LOST]; 704 703 ui_browser__warn_lost_events(&browser->b); 705 704 } 706 705 ··· 3417 3416 struct evsel *evsel = list_entry(entry, struct evsel, core.node); 3418 3417 struct hists *hists = evsel__hists(evsel); 3419 3418 bool current_entry = ui_browser__is_current_entry(browser, row); 3420 - unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 3419 + unsigned long nr_events = hists->stats.nr_samples; 3421 3420 const char *ev_name = evsel__name(evsel); 3422 3421 char bf[256], unit; 3423 3422 const char *warn = " "; ··· 3433 3432 3434 3433 for_each_group_member(pos, evsel) { 3435 3434 struct hists *pos_hists = evsel__hists(pos); 3436 - nr_events += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; 3435 + nr_events += pos_hists->stats.nr_samples; 3437 3436 } 3438 3437 } 3439 3438 ··· 3442 3441 unit, unit == ' ' ? "" : " ", ev_name); 3443 3442 ui_browser__printf(browser, "%s", bf); 3444 3443 3445 - nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; 3444 + nr_events = evsel->evlist->stats.nr_events[PERF_RECORD_LOST]; 3446 3445 if (nr_events != 0) { 3447 3446 menu->lost_events = true; 3448 3447 if (!current_entry) ··· 3648 3647 { 3649 3648 struct hists *hists = evsel__hists(browser->block_evsel); 3650 3649 const char *evname = evsel__name(browser->block_evsel); 3651 - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 3650 + unsigned long nr_samples = hists->stats.nr_samples; 3652 3651 int ret; 3653 3652 3654 3653 ret = scnprintf(bf, size, "# Samples: %lu", nr_samples);

+13 -2

tools/perf/ui/stdio/hist.c

··· 897 897 return ret; 898 898 } 899 899 900 - size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) 900 + size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, 901 + bool skip_empty) 901 902 { 902 903 int i; 903 904 size_t ret = 0; 905 + u32 total = stats->nr_events[0]; 904 906 905 907 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 906 908 const char *name; ··· 910 908 name = perf_event__name(i); 911 909 if (!strcmp(name, "UNKNOWN")) 912 910 continue; 911 + if (skip_empty && !stats->nr_events[i]) 912 + continue; 913 913 914 - ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]); 914 + if (i && total) { 915 + ret += fprintf(fp, "%16s events: %10d (%4.1f%%)\n", 916 + name, stats->nr_events[i], 917 + 100.0 * stats->nr_events[i] / total); 918 + } else { 919 + ret += fprintf(fp, "%16s events: %10d\n", 920 + name, stats->nr_events[i]); 921 + } 915 922 } 916 923 917 924 return ret;

+5

tools/perf/util/Build

··· 10 10 perf-y += env.o 11 11 perf-y += event.o 12 12 perf-y += evlist.o 13 + perf-y += evlist-hybrid.o 13 14 perf-y += sideband_evlist.o 14 15 perf-y += evsel.o 15 16 perf-y += evsel_fprintf.o ··· 24 23 perf-y += mmap.o 25 24 perf-y += memswap.o 26 25 perf-y += parse-events.o 26 + perf-y += parse-events-hybrid.o 27 27 perf-y += perf_regs.o 28 28 perf-y += path.o 29 29 perf-y += print_binary.o ··· 71 69 perf-y += pmu.o 72 70 perf-y += pmu-flex.o 73 71 perf-y += pmu-bison.o 72 + perf-y += pmu-hybrid.o 74 73 perf-y += trace-event-read.o 75 74 perf-y += trace-event-info.o 76 75 perf-y += trace-event-scripting.o ··· 105 102 perf-y += thread-stack.o 106 103 perf-y += spark.o 107 104 perf-y += topdown.o 105 + perf-y += iostat.o 108 106 perf-y += stream.o 109 107 perf-$(CONFIG_AUXTRACE) += auxtrace.o 110 108 perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ ··· 168 164 perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o 169 165 170 166 perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o 167 + perf-y += data-convert-json.o 171 168 172 169 perf-y += scripting-engines/ 173 170

+28 -18

tools/perf/util/annotate.c

··· 1161 1161 s64 offset; 1162 1162 char *line; 1163 1163 int line_nr; 1164 + char *fileloc; 1164 1165 }; 1165 1166 1166 1167 static void annotation_line__init(struct annotation_line *al, ··· 1171 1170 al->offset = args->offset; 1172 1171 al->line = strdup(args->line); 1173 1172 al->line_nr = args->line_nr; 1173 + al->fileloc = args->fileloc; 1174 1174 al->data_nr = nr; 1175 1175 } 1176 1176 ··· 1368 1366 { 1369 1367 struct disasm_line *dl = container_of(al, struct disasm_line, al); 1370 1368 static const char *prev_line; 1371 - static const char *prev_color; 1372 1369 1373 1370 if (al->offset != -1) { 1374 1371 double max_percent = 0.0; ··· 1406 1405 1407 1406 color = get_percent_color(max_percent); 1408 1407 1409 - /* 1410 - * Also color the filename and line if needed, with 1411 - * the same color than the percentage. Don't print it 1412 - * twice for close colored addr with the same filename:line 1413 - */ 1414 - if (al->path) { 1415 - if (!prev_line || strcmp(prev_line, al->path) 1416 - || color != prev_color) { 1417 - color_fprintf(stdout, color, " %s", al->path); 1418 - prev_line = al->path; 1419 - prev_color = color; 1420 - } 1421 - } 1422 - 1423 1408 for (i = 0; i < nr_percent; i++) { 1424 1409 struct annotation_data *data = &al->data[i]; 1425 1410 double percent; ··· 1426 1439 printf(" : "); 1427 1440 1428 1441 disasm_line__print(dl, start, addr_fmt_width); 1442 + 1443 + /* 1444 + * Also color the filename and line if needed, with 1445 + * the same color than the percentage. Don't print it 1446 + * twice for close colored addr with the same filename:line 1447 + */ 1448 + if (al->path) { 1449 + if (!prev_line || strcmp(prev_line, al->path)) { 1450 + color_fprintf(stdout, color, " // %s", al->path); 1451 + prev_line = al->path; 1452 + } 1453 + } 1454 + 1429 1455 printf("\n"); 1430 1456 } else if (max_lines && printed >= max_lines) 1431 1457 return 1; ··· 1454 1454 if (!*al->line) 1455 1455 printf(" %*s:\n", width, " "); 1456 1456 else 1457 - printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); 1457 + printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line); 1458 1458 } 1459 1459 1460 1460 return 0; ··· 1482 1482 */ 1483 1483 static int symbol__parse_objdump_line(struct symbol *sym, 1484 1484 struct annotate_args *args, 1485 - char *parsed_line, int *line_nr) 1485 + char *parsed_line, int *line_nr, char **fileloc) 1486 1486 { 1487 1487 struct map *map = args->ms.map; 1488 1488 struct annotation *notes = symbol__annotation(sym); ··· 1494 1494 /* /filename:linenr ? Save line number and ignore. */ 1495 1495 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1496 1496 *line_nr = atoi(parsed_line + match[1].rm_so); 1497 + *fileloc = strdup(parsed_line); 1497 1498 return 0; 1498 1499 } 1499 1500 ··· 1514 1513 args->offset = offset; 1515 1514 args->line = parsed_line; 1516 1515 args->line_nr = *line_nr; 1516 + args->fileloc = *fileloc; 1517 1517 args->ms.sym = sym; 1518 1518 1519 1519 dl = disasm_line__new(args); ··· 1809 1807 args->offset = -1; 1810 1808 args->line = strdup(srcline); 1811 1809 args->line_nr = 0; 1810 + args->fileloc = NULL; 1812 1811 args->ms.sym = sym; 1813 1812 dl = disasm_line__new(args); 1814 1813 if (dl) { ··· 1821 1818 args->offset = pc; 1822 1819 args->line = buf + prev_buf_size; 1823 1820 args->line_nr = 0; 1821 + args->fileloc = NULL; 1824 1822 args->ms.sym = sym; 1825 1823 dl = disasm_line__new(args); 1826 1824 if (dl) ··· 1856 1852 args->offset = -1; 1857 1853 args->line = strdup("to be implemented"); 1858 1854 args->line_nr = 0; 1855 + args->fileloc = NULL; 1859 1856 dl = disasm_line__new(args); 1860 1857 if (dl) 1861 1858 annotation_line__add(&dl->al, &notes->src->source); ··· 1938 1933 bool delete_extract = false; 1939 1934 bool decomp = false; 1940 1935 int lineno = 0; 1936 + char *fileloc = NULL; 1941 1937 int nline; 1942 1938 char *line; 1943 1939 size_t line_len; ··· 2066 2060 * See disasm_line__new() and struct disasm_line::line_nr. 2067 2061 */ 2068 2062 if (symbol__parse_objdump_line(sym, args, expanded_line, 2069 - &lineno) < 0) 2063 + &lineno, &fileloc) < 0) 2070 2064 break; 2071 2065 nline++; 2072 2066 } ··· 3150 3144 opt->use_offset = perf_config_bool("use_offset", value); 3151 3145 } else if (!strcmp(var, "annotate.disassembler_style")) { 3152 3146 opt->disassembler_style = value; 3147 + } else if (!strcmp(var, "annotate.demangle")) { 3148 + symbol_conf.demangle = perf_config_bool("demangle", value); 3149 + } else if (!strcmp(var, "annotate.demangle_kernel")) { 3150 + symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value); 3153 3151 } else { 3154 3152 pr_debug("%s variable unknown, ignoring...", var); 3155 3153 }

+2

tools/perf/util/annotate.h

··· 84 84 print_lines, 85 85 full_path, 86 86 show_linenr, 87 + show_fileloc, 87 88 show_nr_jumps, 88 89 show_minmax_cycle, 89 90 show_asm_raw, ··· 137 136 s64 offset; 138 137 char *line; 139 138 int line_nr; 139 + char *fileloc; 140 140 int jump_sources; 141 141 float ipc; 142 142 u64 cycles;

+1 -1

tools/perf/util/bpf-loader.c

··· 671 671 * After probing, let's consider prologue, which 672 672 * adds program fetcher to BPF programs. 673 673 * 674 - * hook_load_preprocessorr() hooks pre-processor 674 + * hook_load_preprocessor() hooks pre-processor 675 675 * to bpf_program, let it generate prologue 676 676 * dynamically during loading. 677 677 */

+539 -5

tools/perf/util/bpf_counter.c

··· 5 5 #include <assert.h> 6 6 #include <limits.h> 7 7 #include <unistd.h> 8 + #include <sys/file.h> 8 9 #include <sys/time.h> 9 10 #include <sys/resource.h> 10 11 #include <linux/err.h> ··· 13 12 #include <bpf/bpf.h> 14 13 #include <bpf/btf.h> 15 14 #include <bpf/libbpf.h> 15 + #include <api/fs/fs.h> 16 + #include <perf/bpf_perf.h> 16 17 17 18 #include "bpf_counter.h" 18 19 #include "counts.h" 19 20 #include "debug.h" 20 21 #include "evsel.h" 22 + #include "evlist.h" 21 23 #include "target.h" 24 + #include "cpumap.h" 25 + #include "thread_map.h" 22 26 23 27 #include "bpf_skel/bpf_prog_profiler.skel.h" 28 + #include "bpf_skel/bperf_u.h" 29 + #include "bpf_skel/bperf_leader.skel.h" 30 + #include "bpf_skel/bperf_follower.skel.h" 31 + 32 + #define ATTR_MAP_SIZE 16 24 33 25 34 static inline void *u64_to_ptr(__u64 ptr) 26 35 { ··· 215 204 return 0; 216 205 } 217 206 207 + static int bpf_program_profiler__disable(struct evsel *evsel) 208 + { 209 + struct bpf_counter *counter; 210 + 211 + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { 212 + assert(counter->skel != NULL); 213 + bpf_prog_profiler_bpf__detach(counter->skel); 214 + } 215 + return 0; 216 + } 217 + 218 218 static int bpf_program_profiler__read(struct evsel *evsel) 219 219 { 220 220 // perf_cpu_map uses /sys/devices/system/cpu/online ··· 291 269 struct bpf_counter_ops bpf_program_profiler_ops = { 292 270 .load = bpf_program_profiler__load, 293 271 .enable = bpf_program_profiler__enable, 272 + .disable = bpf_program_profiler__disable, 294 273 .read = bpf_program_profiler__read, 295 274 .destroy = bpf_program_profiler__destroy, 296 275 .install_pe = bpf_program_profiler__install_pe, 297 276 }; 298 277 278 + static __u32 bpf_link_get_id(int fd) 279 + { 280 + struct bpf_link_info link_info = {0}; 281 + __u32 link_info_len = sizeof(link_info); 282 + 283 + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); 284 + return link_info.id; 285 + } 286 + 287 + static __u32 bpf_link_get_prog_id(int fd) 288 + { 289 + struct bpf_link_info link_info = {0}; 290 + __u32 link_info_len = sizeof(link_info); 291 + 292 + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); 293 + return link_info.prog_id; 294 + } 295 + 296 + static __u32 bpf_map_get_id(int fd) 297 + { 298 + struct bpf_map_info map_info = {0}; 299 + __u32 map_info_len = sizeof(map_info); 300 + 301 + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); 302 + return map_info.id; 303 + } 304 + 305 + static bool bperf_attr_map_compatible(int attr_map_fd) 306 + { 307 + struct bpf_map_info map_info = {0}; 308 + __u32 map_info_len = sizeof(map_info); 309 + int err; 310 + 311 + err = bpf_obj_get_info_by_fd(attr_map_fd, &map_info, &map_info_len); 312 + 313 + if (err) 314 + return false; 315 + return (map_info.key_size == sizeof(struct perf_event_attr)) && 316 + (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); 317 + } 318 + 319 + static int bperf_lock_attr_map(struct target *target) 320 + { 321 + char path[PATH_MAX]; 322 + int map_fd, err; 323 + 324 + if (target->attr_map) { 325 + scnprintf(path, PATH_MAX, "%s", target->attr_map); 326 + } else { 327 + scnprintf(path, PATH_MAX, "%s/fs/bpf/%s", sysfs__mountpoint(), 328 + BPF_PERF_DEFAULT_ATTR_MAP_PATH); 329 + } 330 + 331 + if (access(path, F_OK)) { 332 + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, 333 + sizeof(struct perf_event_attr), 334 + sizeof(struct perf_event_attr_map_entry), 335 + ATTR_MAP_SIZE, 0); 336 + if (map_fd < 0) 337 + return -1; 338 + 339 + err = bpf_obj_pin(map_fd, path); 340 + if (err) { 341 + /* someone pinned the map in parallel? */ 342 + close(map_fd); 343 + map_fd = bpf_obj_get(path); 344 + if (map_fd < 0) 345 + return -1; 346 + } 347 + } else { 348 + map_fd = bpf_obj_get(path); 349 + if (map_fd < 0) 350 + return -1; 351 + } 352 + 353 + if (!bperf_attr_map_compatible(map_fd)) { 354 + close(map_fd); 355 + return -1; 356 + 357 + } 358 + err = flock(map_fd, LOCK_EX); 359 + if (err) { 360 + close(map_fd); 361 + return -1; 362 + } 363 + return map_fd; 364 + } 365 + 366 + /* trigger the leader program on a cpu */ 367 + static int bperf_trigger_reading(int prog_fd, int cpu) 368 + { 369 + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 370 + .ctx_in = NULL, 371 + .ctx_size_in = 0, 372 + .flags = BPF_F_TEST_RUN_ON_CPU, 373 + .cpu = cpu, 374 + .retval = 0, 375 + ); 376 + 377 + return bpf_prog_test_run_opts(prog_fd, &opts); 378 + } 379 + 380 + static int bperf_check_target(struct evsel *evsel, 381 + struct target *target, 382 + enum bperf_filter_type *filter_type, 383 + __u32 *filter_entry_cnt) 384 + { 385 + if (evsel->leader->core.nr_members > 1) { 386 + pr_err("bpf managed perf events do not yet support groups.\n"); 387 + return -1; 388 + } 389 + 390 + /* determine filter type based on target */ 391 + if (target->system_wide) { 392 + *filter_type = BPERF_FILTER_GLOBAL; 393 + *filter_entry_cnt = 1; 394 + } else if (target->cpu_list) { 395 + *filter_type = BPERF_FILTER_CPU; 396 + *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel)); 397 + } else if (target->tid) { 398 + *filter_type = BPERF_FILTER_PID; 399 + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); 400 + } else if (target->pid || evsel->evlist->workload.pid != -1) { 401 + *filter_type = BPERF_FILTER_TGID; 402 + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); 403 + } else { 404 + pr_err("bpf managed perf events do not yet support these targets.\n"); 405 + return -1; 406 + } 407 + 408 + return 0; 409 + } 410 + 411 + static struct perf_cpu_map *all_cpu_map; 412 + 413 + static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, 414 + struct perf_event_attr_map_entry *entry) 415 + { 416 + struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); 417 + int link_fd, diff_map_fd, err; 418 + struct bpf_link *link = NULL; 419 + 420 + if (!skel) { 421 + pr_err("Failed to open leader skeleton\n"); 422 + return -1; 423 + } 424 + 425 + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); 426 + err = bperf_leader_bpf__load(skel); 427 + if (err) { 428 + pr_err("Failed to load leader skeleton\n"); 429 + goto out; 430 + } 431 + 432 + err = -1; 433 + link = bpf_program__attach(skel->progs.on_switch); 434 + if (!link) { 435 + pr_err("Failed to attach leader program\n"); 436 + goto out; 437 + } 438 + 439 + link_fd = bpf_link__fd(link); 440 + diff_map_fd = bpf_map__fd(skel->maps.diff_readings); 441 + entry->link_id = bpf_link_get_id(link_fd); 442 + entry->diff_map_id = bpf_map_get_id(diff_map_fd); 443 + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); 444 + assert(err == 0); 445 + 446 + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); 447 + assert(evsel->bperf_leader_link_fd >= 0); 448 + 449 + /* 450 + * save leader_skel for install_pe, which is called within 451 + * following evsel__open_per_cpu call 452 + */ 453 + evsel->leader_skel = skel; 454 + evsel__open_per_cpu(evsel, all_cpu_map, -1); 455 + 456 + out: 457 + bperf_leader_bpf__destroy(skel); 458 + bpf_link__destroy(link); 459 + return err; 460 + } 461 + 462 + static int bperf__load(struct evsel *evsel, struct target *target) 463 + { 464 + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; 465 + int attr_map_fd, diff_map_fd = -1, err; 466 + enum bperf_filter_type filter_type; 467 + __u32 filter_entry_cnt, i; 468 + 469 + if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) 470 + return -1; 471 + 472 + if (!all_cpu_map) { 473 + all_cpu_map = perf_cpu_map__new(NULL); 474 + if (!all_cpu_map) 475 + return -1; 476 + } 477 + 478 + evsel->bperf_leader_prog_fd = -1; 479 + evsel->bperf_leader_link_fd = -1; 480 + 481 + /* 482 + * Step 1: hold a fd on the leader program and the bpf_link, if 483 + * the program is not already gone, reload the program. 484 + * Use flock() to ensure exclusive access to the perf_event_attr 485 + * map. 486 + */ 487 + attr_map_fd = bperf_lock_attr_map(target); 488 + if (attr_map_fd < 0) { 489 + pr_err("Failed to lock perf_event_attr map\n"); 490 + return -1; 491 + } 492 + 493 + err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry); 494 + if (err) { 495 + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY); 496 + if (err) 497 + goto out; 498 + } 499 + 500 + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); 501 + if (evsel->bperf_leader_link_fd < 0 && 502 + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) 503 + goto out; 504 + 505 + /* 506 + * The bpf_link holds reference to the leader program, and the 507 + * leader program holds reference to the maps. Therefore, if 508 + * link_id is valid, diff_map_id should also be valid. 509 + */ 510 + evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id( 511 + bpf_link_get_prog_id(evsel->bperf_leader_link_fd)); 512 + assert(evsel->bperf_leader_prog_fd >= 0); 513 + 514 + diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id); 515 + assert(diff_map_fd >= 0); 516 + 517 + /* 518 + * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check 519 + * whether the kernel support it 520 + */ 521 + err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0); 522 + if (err) { 523 + pr_err("The kernel does not support test_run for raw_tp BPF programs.\n" 524 + "Therefore, --use-bpf might show inaccurate readings\n"); 525 + goto out; 526 + } 527 + 528 + /* Step 2: load the follower skeleton */ 529 + evsel->follower_skel = bperf_follower_bpf__open(); 530 + if (!evsel->follower_skel) { 531 + pr_err("Failed to open follower skeleton\n"); 532 + goto out; 533 + } 534 + 535 + /* attach fexit program to the leader program */ 536 + bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX, 537 + evsel->bperf_leader_prog_fd, "on_switch"); 538 + 539 + /* connect to leader diff_reading map */ 540 + bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd); 541 + 542 + /* set up reading map */ 543 + bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, 544 + filter_entry_cnt); 545 + /* set up follower filter based on target */ 546 + bpf_map__set_max_entries(evsel->follower_skel->maps.filter, 547 + filter_entry_cnt); 548 + err = bperf_follower_bpf__load(evsel->follower_skel); 549 + if (err) { 550 + pr_err("Failed to load follower skeleton\n"); 551 + bperf_follower_bpf__destroy(evsel->follower_skel); 552 + evsel->follower_skel = NULL; 553 + goto out; 554 + } 555 + 556 + for (i = 0; i < filter_entry_cnt; i++) { 557 + int filter_map_fd; 558 + __u32 key; 559 + 560 + if (filter_type == BPERF_FILTER_PID || 561 + filter_type == BPERF_FILTER_TGID) 562 + key = evsel->core.threads->map[i].pid; 563 + else if (filter_type == BPERF_FILTER_CPU) 564 + key = evsel->core.cpus->map[i]; 565 + else 566 + break; 567 + 568 + filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); 569 + bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); 570 + } 571 + 572 + evsel->follower_skel->bss->type = filter_type; 573 + 574 + err = bperf_follower_bpf__attach(evsel->follower_skel); 575 + 576 + out: 577 + if (err && evsel->bperf_leader_link_fd >= 0) 578 + close(evsel->bperf_leader_link_fd); 579 + if (err && evsel->bperf_leader_prog_fd >= 0) 580 + close(evsel->bperf_leader_prog_fd); 581 + if (diff_map_fd >= 0) 582 + close(diff_map_fd); 583 + 584 + flock(attr_map_fd, LOCK_UN); 585 + close(attr_map_fd); 586 + 587 + return err; 588 + } 589 + 590 + static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) 591 + { 592 + struct bperf_leader_bpf *skel = evsel->leader_skel; 593 + 594 + return bpf_map_update_elem(bpf_map__fd(skel->maps.events), 595 + &cpu, &fd, BPF_ANY); 596 + } 597 + 598 + /* 599 + * trigger the leader prog on each cpu, so the accum_reading map could get 600 + * the latest readings. 601 + */ 602 + static int bperf_sync_counters(struct evsel *evsel) 603 + { 604 + int num_cpu, i, cpu; 605 + 606 + num_cpu = all_cpu_map->nr; 607 + for (i = 0; i < num_cpu; i++) { 608 + cpu = all_cpu_map->map[i]; 609 + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); 610 + } 611 + return 0; 612 + } 613 + 614 + static int bperf__enable(struct evsel *evsel) 615 + { 616 + evsel->follower_skel->bss->enabled = 1; 617 + return 0; 618 + } 619 + 620 + static int bperf__disable(struct evsel *evsel) 621 + { 622 + evsel->follower_skel->bss->enabled = 0; 623 + return 0; 624 + } 625 + 626 + static int bperf__read(struct evsel *evsel) 627 + { 628 + struct bperf_follower_bpf *skel = evsel->follower_skel; 629 + __u32 num_cpu_bpf = cpu__max_cpu(); 630 + struct bpf_perf_event_value values[num_cpu_bpf]; 631 + int reading_map_fd, err = 0; 632 + __u32 i, j, num_cpu; 633 + 634 + bperf_sync_counters(evsel); 635 + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); 636 + 637 + for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { 638 + __u32 cpu; 639 + 640 + err = bpf_map_lookup_elem(reading_map_fd, &i, values); 641 + if (err) 642 + goto out; 643 + switch (evsel->follower_skel->bss->type) { 644 + case BPERF_FILTER_GLOBAL: 645 + assert(i == 0); 646 + 647 + num_cpu = all_cpu_map->nr; 648 + for (j = 0; j < num_cpu; j++) { 649 + cpu = all_cpu_map->map[j]; 650 + perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; 651 + perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; 652 + perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; 653 + } 654 + break; 655 + case BPERF_FILTER_CPU: 656 + cpu = evsel->core.cpus->map[i]; 657 + perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; 658 + perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; 659 + perf_counts(evsel->counts, i, 0)->run = values[cpu].running; 660 + break; 661 + case BPERF_FILTER_PID: 662 + case BPERF_FILTER_TGID: 663 + perf_counts(evsel->counts, 0, i)->val = 0; 664 + perf_counts(evsel->counts, 0, i)->ena = 0; 665 + perf_counts(evsel->counts, 0, i)->run = 0; 666 + 667 + for (cpu = 0; cpu < num_cpu_bpf; cpu++) { 668 + perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; 669 + perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; 670 + perf_counts(evsel->counts, 0, i)->run += values[cpu].running; 671 + } 672 + break; 673 + default: 674 + break; 675 + } 676 + } 677 + out: 678 + return err; 679 + } 680 + 681 + static int bperf__destroy(struct evsel *evsel) 682 + { 683 + bperf_follower_bpf__destroy(evsel->follower_skel); 684 + close(evsel->bperf_leader_prog_fd); 685 + close(evsel->bperf_leader_link_fd); 686 + return 0; 687 + } 688 + 689 + /* 690 + * bperf: share hardware PMCs with BPF 691 + * 692 + * perf uses performance monitoring counters (PMC) to monitor system 693 + * performance. The PMCs are limited hardware resources. For example, 694 + * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. 695 + * 696 + * Modern data center systems use these PMCs in many different ways: 697 + * system level monitoring, (maybe nested) container level monitoring, per 698 + * process monitoring, profiling (in sample mode), etc. In some cases, 699 + * there are more active perf_events than available hardware PMCs. To allow 700 + * all perf_events to have a chance to run, it is necessary to do expensive 701 + * time multiplexing of events. 702 + * 703 + * On the other hand, many monitoring tools count the common metrics 704 + * (cycles, instructions). It is a waste to have multiple tools create 705 + * multiple perf_events of "cycles" and occupy multiple PMCs. 706 + * 707 + * bperf tries to reduce such wastes by allowing multiple perf_events of 708 + * "cycles" or "instructions" (at different scopes) to share PMUs. Instead 709 + * of having each perf-stat session to read its own perf_events, bperf uses 710 + * BPF programs to read the perf_events and aggregate readings to BPF maps. 711 + * Then, the perf-stat session(s) reads the values from these BPF maps. 712 + * 713 + * || 714 + * shared progs and maps <- || -> per session progs and maps 715 + * || 716 + * --------------- || 717 + * | perf_events | || 718 + * --------------- fexit || ----------------- 719 + * | --------||----> | follower prog | 720 + * --------------- / || --- ----------------- 721 + * cs -> | leader prog |/ ||/ | | 722 + * --> --------------- /|| -------------- ------------------ 723 + * / | | / || | filter map | | accum_readings | 724 + * / ------------ ------------ || -------------- ------------------ 725 + * | | prev map | | diff map | || | 726 + * | ------------ ------------ || | 727 + * \ || | 728 + * = \ ==================================================== | ============ 729 + * \ / user space 730 + * \ / 731 + * \ / 732 + * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM 733 + * \ / 734 + * \ / 735 + * \------ perf-stat ----------------------/ 736 + * 737 + * The figure above shows the architecture of bperf. Note that the figure 738 + * is divided into 3 regions: shared progs and maps (top left), per session 739 + * progs and maps (top right), and user space (bottom). 740 + * 741 + * The leader prog is triggered on each context switch (cs). The leader 742 + * prog reads perf_events and stores the difference (current_reading - 743 + * previous_reading) to the diff map. For the same metric, e.g. "cycles", 744 + * multiple perf-stat sessions share the same leader prog. 745 + * 746 + * Each perf-stat session creates a follower prog as fexit program to the 747 + * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38) 748 + * follower progs to the same leader prog. The follower prog checks current 749 + * task and processor ID to decide whether to add the value from the diff 750 + * map to its accumulated reading map (accum_readings). 751 + * 752 + * Finally, perf-stat user space reads the value from accum_reading map. 753 + * 754 + * Besides context switch, it is also necessary to trigger the leader prog 755 + * before perf-stat reads the value. Otherwise, the accum_reading map may 756 + * not have the latest reading from the perf_events. This is achieved by 757 + * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU. 758 + * 759 + * Comment before the definition of struct perf_event_attr_map_entry 760 + * describes how different sessions of perf-stat share information about 761 + * the leader prog. 762 + */ 763 + 764 + struct bpf_counter_ops bperf_ops = { 765 + .load = bperf__load, 766 + .enable = bperf__enable, 767 + .disable = bperf__disable, 768 + .read = bperf__read, 769 + .install_pe = bperf__install_pe, 770 + .destroy = bperf__destroy, 771 + }; 772 + 773 + static inline bool bpf_counter_skip(struct evsel *evsel) 774 + { 775 + return list_empty(&evsel->bpf_counter_list) && 776 + evsel->follower_skel == NULL; 777 + } 778 + 299 779 int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) 300 780 { 301 - if (list_empty(&evsel->bpf_counter_list)) 781 + if (bpf_counter_skip(evsel)) 302 782 return 0; 303 783 return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); 304 784 } 305 785 306 786 int bpf_counter__load(struct evsel *evsel, struct target *target) 307 787 { 308 - if (target__has_bpf(target)) 788 + if (target->bpf_str) 309 789 evsel->bpf_counter_ops = &bpf_program_profiler_ops; 790 + else if (target->use_bpf || evsel->bpf_counter || 791 + evsel__match_bpf_counter_events(evsel->name)) 792 + evsel->bpf_counter_ops = &bperf_ops; 310 793 311 794 if (evsel->bpf_counter_ops) 312 795 return evsel->bpf_counter_ops->load(evsel, target); ··· 820 293 821 294 int bpf_counter__enable(struct evsel *evsel) 822 295 { 823 - if (list_empty(&evsel->bpf_counter_list)) 296 + if (bpf_counter_skip(evsel)) 824 297 return 0; 825 298 return evsel->bpf_counter_ops->enable(evsel); 826 299 } 827 300 301 + int bpf_counter__disable(struct evsel *evsel) 302 + { 303 + if (bpf_counter_skip(evsel)) 304 + return 0; 305 + return evsel->bpf_counter_ops->disable(evsel); 306 + } 307 + 828 308 int bpf_counter__read(struct evsel *evsel) 829 309 { 830 - if (list_empty(&evsel->bpf_counter_list)) 310 + if (bpf_counter_skip(evsel)) 831 311 return -EAGAIN; 832 312 return evsel->bpf_counter_ops->read(evsel); 833 313 } 834 314 835 315 void bpf_counter__destroy(struct evsel *evsel) 836 316 { 837 - if (list_empty(&evsel->bpf_counter_list)) 317 + if (bpf_counter_skip(evsel)) 838 318 return; 839 319 evsel->bpf_counter_ops->destroy(evsel); 840 320 evsel->bpf_counter_ops = NULL;

+8 -1

tools/perf/util/bpf_counter.h

··· 18 18 struct bpf_counter_ops { 19 19 bpf_counter_evsel_target_op load; 20 20 bpf_counter_evsel_op enable; 21 + bpf_counter_evsel_op disable; 21 22 bpf_counter_evsel_op read; 22 23 bpf_counter_evsel_op destroy; 23 24 bpf_counter_evsel_install_pe_op install_pe; ··· 33 32 34 33 int bpf_counter__load(struct evsel *evsel, struct target *target); 35 34 int bpf_counter__enable(struct evsel *evsel); 35 + int bpf_counter__disable(struct evsel *evsel); 36 36 int bpf_counter__read(struct evsel *evsel); 37 37 void bpf_counter__destroy(struct evsel *evsel); 38 38 int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); 39 39 40 40 #else /* HAVE_BPF_SKEL */ 41 41 42 - #include<linux/err.h> 42 + #include <linux/err.h> 43 43 44 44 static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, 45 45 struct target *target __maybe_unused) ··· 49 47 } 50 48 51 49 static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) 50 + { 51 + return 0; 52 + } 53 + 54 + static inline int bpf_counter__disable(struct evsel *evsel __maybe_unused) 52 55 { 53 56 return 0; 54 57 }

+14

tools/perf/util/bpf_skel/bperf.h

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2021 Facebook 3 + 4 + #ifndef __BPERF_STAT_H 5 + #define __BPERF_STAT_H 6 + 7 + typedef struct { 8 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 9 + __uint(key_size, sizeof(__u32)); 10 + __uint(value_size, sizeof(struct bpf_perf_event_value)); 11 + __uint(max_entries, 1); 12 + } reading_map; 13 + 14 + #endif /* __BPERF_STAT_H */

+69

tools/perf/util/bpf_skel/bperf_follower.bpf.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2021 Facebook 3 + #include <linux/bpf.h> 4 + #include <linux/perf_event.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + #include "bperf.h" 8 + #include "bperf_u.h" 9 + 10 + reading_map diff_readings SEC(".maps"); 11 + reading_map accum_readings SEC(".maps"); 12 + 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_HASH); 15 + __uint(key_size, sizeof(__u32)); 16 + __uint(value_size, sizeof(__u32)); 17 + } filter SEC(".maps"); 18 + 19 + enum bperf_filter_type type = 0; 20 + int enabled = 0; 21 + 22 + SEC("fexit/XXX") 23 + int BPF_PROG(fexit_XXX) 24 + { 25 + struct bpf_perf_event_value *diff_val, *accum_val; 26 + __u32 filter_key, zero = 0; 27 + __u32 *accum_key; 28 + 29 + if (!enabled) 30 + return 0; 31 + 32 + switch (type) { 33 + case BPERF_FILTER_GLOBAL: 34 + accum_key = &zero; 35 + goto do_add; 36 + case BPERF_FILTER_CPU: 37 + filter_key = bpf_get_smp_processor_id(); 38 + break; 39 + case BPERF_FILTER_PID: 40 + filter_key = bpf_get_current_pid_tgid() & 0xffffffff; 41 + break; 42 + case BPERF_FILTER_TGID: 43 + filter_key = bpf_get_current_pid_tgid() >> 32; 44 + break; 45 + default: 46 + return 0; 47 + } 48 + 49 + accum_key = bpf_map_lookup_elem(&filter, &filter_key); 50 + if (!accum_key) 51 + return 0; 52 + 53 + do_add: 54 + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); 55 + if (!diff_val) 56 + return 0; 57 + 58 + accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); 59 + if (!accum_val) 60 + return 0; 61 + 62 + accum_val->counter += diff_val->counter; 63 + accum_val->enabled += diff_val->enabled; 64 + accum_val->running += diff_val->running; 65 + 66 + return 0; 67 + } 68 + 69 + char LICENSE[] SEC("license") = "Dual BSD/GPL";

+46

tools/perf/util/bpf_skel/bperf_leader.bpf.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2021 Facebook 3 + #include <linux/bpf.h> 4 + #include <linux/perf_event.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + #include "bperf.h" 8 + 9 + struct { 10 + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 11 + __uint(key_size, sizeof(__u32)); 12 + __uint(value_size, sizeof(int)); 13 + __uint(map_flags, BPF_F_PRESERVE_ELEMS); 14 + } events SEC(".maps"); 15 + 16 + reading_map prev_readings SEC(".maps"); 17 + reading_map diff_readings SEC(".maps"); 18 + 19 + SEC("raw_tp/sched_switch") 20 + int BPF_PROG(on_switch) 21 + { 22 + struct bpf_perf_event_value val, *prev_val, *diff_val; 23 + __u32 key = bpf_get_smp_processor_id(); 24 + __u32 zero = 0; 25 + long err; 26 + 27 + prev_val = bpf_map_lookup_elem(&prev_readings, &zero); 28 + if (!prev_val) 29 + return 0; 30 + 31 + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); 32 + if (!diff_val) 33 + return 0; 34 + 35 + err = bpf_perf_event_read_value(&events, key, &val, sizeof(val)); 36 + if (err) 37 + return 0; 38 + 39 + diff_val->counter = val.counter - prev_val->counter; 40 + diff_val->enabled = val.enabled - prev_val->enabled; 41 + diff_val->running = val.running - prev_val->running; 42 + *prev_val = val; 43 + return 0; 44 + } 45 + 46 + char LICENSE[] SEC("license") = "Dual BSD/GPL";

+14

tools/perf/util/bpf_skel/bperf_u.h

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2021 Facebook 3 + 4 + #ifndef __BPERF_STAT_U_H 5 + #define __BPERF_STAT_U_H 6 + 7 + enum bperf_filter_type { 8 + BPERF_FILTER_GLOBAL = 1, 9 + BPERF_FILTER_CPU, 10 + BPERF_FILTER_PID, 11 + BPERF_FILTER_TGID, 12 + }; 13 + 14 + #endif /* __BPERF_STAT_U_H */

+1 -2

tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c

··· 52 52 static inline void 53 53 fexit_update_maps(struct bpf_perf_event_value *after) 54 54 { 55 - struct bpf_perf_event_value *before, diff, *accum; 55 + struct bpf_perf_event_value *before, diff; 56 56 __u32 zero = 0; 57 57 58 58 before = bpf_map_lookup_elem(&fentry_readings, &zero); ··· 78 78 { 79 79 struct bpf_perf_event_value reading; 80 80 __u32 cpu = bpf_get_smp_processor_id(); 81 - __u32 one = 1, zero = 0; 82 81 int err; 83 82 84 83 /* read all events before updating the maps, to reduce error */

+1 -1

tools/perf/util/call-path.h

··· 23 23 * @children: tree of call paths of functions called 24 24 * 25 25 * In combination with the call_return structure, the call_path structure 26 - * defines a context-sensitve call-graph. 26 + * defines a context-sensitive call-graph. 27 27 */ 28 28 struct call_path { 29 29 struct call_path *parent;

+1 -1

tools/perf/util/callchain.c

··· 877 877 if (!node) 878 878 return -1; 879 879 880 - /* lookup in childrens */ 880 + /* lookup in children */ 881 881 while (*p) { 882 882 enum match_result ret; 883 883

+8 -1

tools/perf/util/config.c

··· 18 18 #include "util/hist.h" /* perf_hist_config */ 19 19 #include "util/llvm-utils.h" /* perf_llvm_config */ 20 20 #include "util/stat.h" /* perf_stat__set_big_num */ 21 + #include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ 21 22 #include "build-id.h" 22 23 #include "debug.h" 23 24 #include "config.h" ··· 458 457 if (!strcmp(var, "stat.big-num")) 459 458 perf_stat__set_big_num(perf_config_bool(var, value)); 460 459 460 + if (!strcmp(var, "stat.no-csv-summary")) 461 + perf_stat__set_no_csv_summary(perf_config_bool(var, value)); 462 + 463 + if (!strcmp(var, "stat.bpf-counter-events")) 464 + evsel__bpf_counter_events = strdup(value); 465 + 461 466 /* Add other config variables here. */ 462 467 return 0; 463 468 } ··· 706 699 /* perf_config_set can contain both user and system config items. 707 700 * So we should know where each value is from. 708 701 * The classification would be needed when a particular config file 709 - * is overwrited by setting feature i.e. set_config(). 702 + * is overwritten by setting feature i.e. set_config(). 710 703 */ 711 704 if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { 712 705 section->from_system_config = true;

+35 -5

tools/perf/util/cs-etm-decoder/cs-etm-decoder.c

··· 6 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 7 */ 8 8 9 + #include <linux/coresight-pmu.h> 9 10 #include <linux/err.h> 10 11 #include <linux/list.h> 11 12 #include <linux/zalloc.h> ··· 317 316 * This is the first timestamp we've seen since the beginning of traces 318 317 * or a discontinuity. Since timestamps packets are generated *after* 319 318 * range packets have been generated, we need to estimate the time at 320 - * which instructions started by substracting the number of instructions 319 + * which instructions started by subtracting the number of instructions 321 320 * executed to the timestamp. 322 321 */ 323 322 packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; ··· 492 491 const ocsd_generic_trace_elem *elem, 493 492 const uint8_t trace_chan_id) 494 493 { 495 - pid_t tid; 494 + pid_t tid = -1; 495 + static u64 pid_fmt; 496 + int ret; 496 497 497 - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ 498 - if (!elem->context.ctxt_id_valid) 498 + /* 499 + * As all the ETMs run at the same exception level, the system should 500 + * have the same PID format crossing CPUs. So cache the PID format 501 + * and reuse it for sequential decoding. 502 + */ 503 + if (!pid_fmt) { 504 + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); 505 + if (ret) 506 + return OCSD_RESP_FATAL_SYS_ERR; 507 + } 508 + 509 + /* 510 + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. 511 + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 512 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. 513 + */ 514 + switch (pid_fmt) { 515 + case BIT(ETM_OPT_CTXTID): 516 + if (elem->context.ctxt_id_valid) 517 + tid = elem->context.context_id; 518 + break; 519 + case BIT(ETM_OPT_CTXTID2): 520 + if (elem->context.vmid_valid) 521 + tid = elem->context.vmid; 522 + break; 523 + default: 524 + break; 525 + } 526 + 527 + if (tid == -1) 499 528 return OCSD_RESP_CONT; 500 529 501 - tid = elem->context.context_id; 502 530 if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) 503 531 return OCSD_RESP_FATAL_SYS_ERR; 504 532

+239 -46

tools/perf/util/cs-etm.c

··· 7 7 */ 8 8 9 9 #include <linux/bitops.h> 10 + #include <linux/coresight-pmu.h> 10 11 #include <linux/err.h> 11 12 #include <linux/kernel.h> 12 13 #include <linux/log2.h> ··· 157 156 return 0; 158 157 } 159 158 159 + /* 160 + * The returned PID format is presented by two bits: 161 + * 162 + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; 163 + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. 164 + * 165 + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 166 + * are enabled at the same time when the session runs on an EL2 kernel. 167 + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 168 + * recorded in the trace data, the tool will selectively use 169 + * CONTEXTIDR_EL2 as PID. 170 + */ 171 + int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) 172 + { 173 + struct int_node *inode; 174 + u64 *metadata, val; 175 + 176 + inode = intlist__find(traceid_list, trace_chan_id); 177 + if (!inode) 178 + return -EINVAL; 179 + 180 + metadata = inode->priv; 181 + 182 + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 183 + val = metadata[CS_ETM_ETMCR]; 184 + /* CONTEXTIDR is traced */ 185 + if (val & BIT(ETM_OPT_CTXTID)) 186 + *pid_fmt = BIT(ETM_OPT_CTXTID); 187 + } else { 188 + val = metadata[CS_ETMV4_TRCCONFIGR]; 189 + /* CONTEXTIDR_EL2 is traced */ 190 + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 191 + *pid_fmt = BIT(ETM_OPT_CTXTID2); 192 + /* CONTEXTIDR_EL1 is traced */ 193 + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 194 + *pid_fmt = BIT(ETM_OPT_CTXTID); 195 + } 196 + 197 + return 0; 198 + } 199 + 160 200 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 161 201 u8 trace_chan_id) 162 202 { 163 203 /* 164 - * Wnen a timestamp packet is encountered the backend code 204 + * When a timestamp packet is encountered the backend code 165 205 * is stopped so that the front end has time to process packets 166 206 * that were accumulated in the traceID queue. Since there can 167 207 * be more than one channel per cs_etm_queue, we need to specify ··· 1697 1655 * | 1 1 0 1 1 1 1 1 | imm8 | 1698 1656 * +-----------------+--------+ 1699 1657 * 1700 - * According to the specifiction, it only defines SVC for T32 1658 + * According to the specification, it only defines SVC for T32 1701 1659 * with 16 bits instruction and has no definition for 32bits; 1702 1660 * so below only read 2 bytes as instruction size for T32. 1703 1661 */ ··· 1929 1887 1930 1888 /* 1931 1889 * If the previous packet is an exception return packet 1932 - * and the return address just follows SVC instuction, 1890 + * and the return address just follows SVC instruction, 1933 1891 * it needs to calibrate the previous packet sample flags 1934 1892 * as PERF_IP_FLAG_SYSCALLRET. 1935 1893 */ ··· 2003 1961 * contain exception type related info so we cannot decide 2004 1962 * the exception type purely based on exception return packet. 2005 1963 * If we record the exception number from exception packet and 2006 - * reuse it for excpetion return packet, this is not reliable 1964 + * reuse it for exception return packet, this is not reliable 2007 1965 * due the trace can be discontinuity or the interrupt can 2008 1966 * be nested, thus the recorded exception number cannot be 2009 1967 * used for exception return packet for these two cases. ··· 2477 2435 } 2478 2436 2479 2437 static const char * const cs_etm_global_header_fmts[] = { 2480 - [CS_HEADER_VERSION_0] = " Header version %llx\n", 2438 + [CS_HEADER_VERSION] = " Header version %llx\n", 2481 2439 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 2482 2440 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 2483 2441 }; ··· 2485 2443 static const char * const cs_etm_priv_fmts[] = { 2486 2444 [CS_ETM_MAGIC] = " Magic number %llx\n", 2487 2445 [CS_ETM_CPU] = " CPU %lld\n", 2446 + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", 2488 2447 [CS_ETM_ETMCR] = " ETMCR %llx\n", 2489 2448 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 2490 2449 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", ··· 2495 2452 static const char * const cs_etmv4_priv_fmts[] = { 2496 2453 [CS_ETM_MAGIC] = " Magic number %llx\n", 2497 2454 [CS_ETM_CPU] = " CPU %lld\n", 2455 + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", 2498 2456 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 2499 2457 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 2500 2458 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", ··· 2505 2461 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 2506 2462 }; 2507 2463 2464 + static const char * const param_unk_fmt = 2465 + " Unknown parameter [%d] %llx\n"; 2466 + static const char * const magic_unk_fmt = 2467 + " Magic number Unknown %llx\n"; 2468 + 2469 + static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) 2470 + { 2471 + int i = *offset, j, nr_params = 0, fmt_offset; 2472 + __u64 magic; 2473 + 2474 + /* check magic value */ 2475 + magic = val[i + CS_ETM_MAGIC]; 2476 + if ((magic != __perf_cs_etmv3_magic) && 2477 + (magic != __perf_cs_etmv4_magic)) { 2478 + /* failure - note bad magic value */ 2479 + fprintf(stdout, magic_unk_fmt, magic); 2480 + return -EINVAL; 2481 + } 2482 + 2483 + /* print common header block */ 2484 + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); 2485 + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); 2486 + 2487 + if (magic == __perf_cs_etmv3_magic) { 2488 + nr_params = CS_ETM_NR_TRC_PARAMS_V0; 2489 + fmt_offset = CS_ETM_ETMCR; 2490 + /* after common block, offset format index past NR_PARAMS */ 2491 + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) 2492 + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2493 + } else if (magic == __perf_cs_etmv4_magic) { 2494 + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; 2495 + fmt_offset = CS_ETMV4_TRCCONFIGR; 2496 + /* after common block, offset format index past NR_PARAMS */ 2497 + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) 2498 + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2499 + } 2500 + *offset = i; 2501 + return 0; 2502 + } 2503 + 2504 + static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) 2505 + { 2506 + int i = *offset, j, total_params = 0; 2507 + __u64 magic; 2508 + 2509 + magic = val[i + CS_ETM_MAGIC]; 2510 + /* total params to print is NR_PARAMS + common block size for v1 */ 2511 + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; 2512 + 2513 + if (magic == __perf_cs_etmv3_magic) { 2514 + for (j = 0; j < total_params; j++, i++) { 2515 + /* if newer record - could be excess params */ 2516 + if (j >= CS_ETM_PRIV_MAX) 2517 + fprintf(stdout, param_unk_fmt, j, val[i]); 2518 + else 2519 + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2520 + } 2521 + } else if (magic == __perf_cs_etmv4_magic) { 2522 + for (j = 0; j < total_params; j++, i++) { 2523 + /* if newer record - could be excess params */ 2524 + if (j >= CS_ETMV4_PRIV_MAX) 2525 + fprintf(stdout, param_unk_fmt, j, val[i]); 2526 + else 2527 + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2528 + } 2529 + } else { 2530 + /* failure - note bad magic value and error out */ 2531 + fprintf(stdout, magic_unk_fmt, magic); 2532 + return -EINVAL; 2533 + } 2534 + *offset = i; 2535 + return 0; 2536 + } 2537 + 2508 2538 static void cs_etm__print_auxtrace_info(__u64 *val, int num) 2509 2539 { 2510 - int i, j, cpu = 0; 2540 + int i, cpu = 0, version, err; 2511 2541 2512 - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2542 + /* bail out early on bad header version */ 2543 + version = val[0]; 2544 + if (version > CS_HEADER_CURRENT_VERSION) { 2545 + /* failure.. return */ 2546 + fprintf(stdout, " Unknown Header Version = %x, ", version); 2547 + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); 2548 + return; 2549 + } 2550 + 2551 + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) 2513 2552 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 2514 2553 2515 - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 2516 - if (val[i] == __perf_cs_etmv3_magic) 2517 - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 2518 - fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2519 - else if (val[i] == __perf_cs_etmv4_magic) 2520 - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 2521 - fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2522 - else 2523 - /* failure.. return */ 2554 + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { 2555 + if (version == 0) 2556 + err = cs_etm__print_cpu_metadata_v0(val, &i); 2557 + else if (version == 1) 2558 + err = cs_etm__print_cpu_metadata_v1(val, &i); 2559 + if (err) 2524 2560 return; 2525 2561 } 2562 + } 2563 + 2564 + /* 2565 + * Read a single cpu parameter block from the auxtrace_info priv block. 2566 + * 2567 + * For version 1 there is a per cpu nr_params entry. If we are handling 2568 + * version 1 file, then there may be less, the same, or more params 2569 + * indicated by this value than the compile time number we understand. 2570 + * 2571 + * For a version 0 info block, there are a fixed number, and we need to 2572 + * fill out the nr_param value in the metadata we create. 2573 + */ 2574 + static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2575 + int out_blk_size, int nr_params_v0) 2576 + { 2577 + u64 *metadata = NULL; 2578 + int hdr_version; 2579 + int nr_in_params, nr_out_params, nr_cmn_params; 2580 + int i, k; 2581 + 2582 + metadata = zalloc(sizeof(*metadata) * out_blk_size); 2583 + if (!metadata) 2584 + return NULL; 2585 + 2586 + /* read block current index & version */ 2587 + i = *buff_in_offset; 2588 + hdr_version = buff_in[CS_HEADER_VERSION]; 2589 + 2590 + if (!hdr_version) { 2591 + /* read version 0 info block into a version 1 metadata block */ 2592 + nr_in_params = nr_params_v0; 2593 + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2594 + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2595 + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2596 + /* remaining block params at offset +1 from source */ 2597 + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2598 + metadata[k + 1] = buff_in[i + k]; 2599 + /* version 0 has 2 common params */ 2600 + nr_cmn_params = 2; 2601 + } else { 2602 + /* read version 1 info block - input and output nr_params may differ */ 2603 + /* version 1 has 3 common params */ 2604 + nr_cmn_params = 3; 2605 + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2606 + 2607 + /* if input has more params than output - skip excess */ 2608 + nr_out_params = nr_in_params + nr_cmn_params; 2609 + if (nr_out_params > out_blk_size) 2610 + nr_out_params = out_blk_size; 2611 + 2612 + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2613 + metadata[k] = buff_in[i + k]; 2614 + 2615 + /* record the actual nr params we copied */ 2616 + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2617 + } 2618 + 2619 + /* adjust in offset by number of in params used */ 2620 + i += nr_in_params + nr_cmn_params; 2621 + *buff_in_offset = i; 2622 + return metadata; 2526 2623 } 2527 2624 2528 2625 int cs_etm__process_auxtrace_info(union perf_event *event, ··· 2677 2492 int info_header_size; 2678 2493 int total_size = auxtrace_info->header.size; 2679 2494 int priv_size = 0; 2680 - int num_cpu; 2681 - int err = 0, idx = -1; 2682 - int i, j, k; 2495 + int num_cpu, trcidr_idx; 2496 + int err = 0; 2497 + int i, j; 2683 2498 u64 *ptr, *hdr = NULL; 2684 2499 u64 **metadata = NULL; 2500 + u64 hdr_version; 2685 2501 2686 2502 /* 2687 2503 * sizeof(auxtrace_info_event::type) + ··· 2698 2512 /* First the global part */ 2699 2513 ptr = (u64 *) auxtrace_info->priv; 2700 2514 2701 - /* Look for version '0' of the header */ 2702 - if (ptr[0] != 0) 2515 + /* Look for version of the header */ 2516 + hdr_version = ptr[0]; 2517 + if (hdr_version > CS_HEADER_CURRENT_VERSION) { 2518 + /* print routine will print an error on bad version */ 2519 + if (dump_trace) 2520 + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); 2703 2521 return -EINVAL; 2522 + } 2704 2523 2705 - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 2524 + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); 2706 2525 if (!hdr) 2707 2526 return -ENOMEM; 2708 2527 2709 2528 /* Extract header information - see cs-etm.h for format */ 2710 - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2529 + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) 2711 2530 hdr[i] = ptr[i]; 2712 2531 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 2713 2532 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & ··· 2743 2552 */ 2744 2553 for (j = 0; j < num_cpu; j++) { 2745 2554 if (ptr[i] == __perf_cs_etmv3_magic) { 2746 - metadata[j] = zalloc(sizeof(*metadata[j]) * 2747 - CS_ETM_PRIV_MAX); 2748 - if (!metadata[j]) { 2749 - err = -ENOMEM; 2750 - goto err_free_metadata; 2751 - } 2752 - for (k = 0; k < CS_ETM_PRIV_MAX; k++) 2753 - metadata[j][k] = ptr[i + k]; 2555 + metadata[j] = 2556 + cs_etm__create_meta_blk(ptr, &i, 2557 + CS_ETM_PRIV_MAX, 2558 + CS_ETM_NR_TRC_PARAMS_V0); 2754 2559 2755 2560 /* The traceID is our handle */ 2756 - idx = metadata[j][CS_ETM_ETMTRACEIDR]; 2757 - i += CS_ETM_PRIV_MAX; 2561 + trcidr_idx = CS_ETM_ETMTRACEIDR; 2562 + 2758 2563 } else if (ptr[i] == __perf_cs_etmv4_magic) { 2759 - metadata[j] = zalloc(sizeof(*metadata[j]) * 2760 - CS_ETMV4_PRIV_MAX); 2761 - if (!metadata[j]) { 2762 - err = -ENOMEM; 2763 - goto err_free_metadata; 2764 - } 2765 - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 2766 - metadata[j][k] = ptr[i + k]; 2564 + metadata[j] = 2565 + cs_etm__create_meta_blk(ptr, &i, 2566 + CS_ETMV4_PRIV_MAX, 2567 + CS_ETMV4_NR_TRC_PARAMS_V0); 2767 2568 2768 2569 /* The traceID is our handle */ 2769 - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 2770 - i += CS_ETMV4_PRIV_MAX; 2570 + trcidr_idx = CS_ETMV4_TRCTRACEIDR; 2571 + } 2572 + 2573 + if (!metadata[j]) { 2574 + err = -ENOMEM; 2575 + goto err_free_metadata; 2771 2576 } 2772 2577 2773 2578 /* Get an RB node for this CPU */ 2774 - inode = intlist__findnew(traceid_list, idx); 2579 + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); 2775 2580 2776 2581 /* Something went wrong, no need to continue */ 2777 2582 if (!inode) { ··· 2788 2601 } 2789 2602 2790 2603 /* 2791 - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 2604 + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 2792 2605 * CS_ETMV4_PRIV_MAX mark how many double words are in the 2793 2606 * global metadata, and each cpu's metadata respectively. 2794 2607 * The following tests if the correct number of double words was ··· 2890 2703 intlist__delete(traceid_list); 2891 2704 err_free_hdr: 2892 2705 zfree(&hdr); 2893 - 2706 + /* 2707 + * At this point, as a minimum we have valid header. Dump the rest of 2708 + * the info section - the print routines will error out on structural 2709 + * issues. 2710 + */ 2711 + if (dump_trace) 2712 + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 2894 2713 return err; 2895 2714 }

+29 -7

tools/perf/util/cs-etm.h

··· 12 12 13 13 struct perf_session; 14 14 15 - /* Versionning header in case things need tro change in the future. That way 15 + /* 16 + * Versioning header in case things need to change in the future. That way 16 17 * decoding of old snapshot is still possible. 17 18 */ 18 19 enum { 19 20 /* Starting with 0x0 */ 20 - CS_HEADER_VERSION_0, 21 + CS_HEADER_VERSION, 21 22 /* PMU->type (32 bit), total # of CPUs (32 bit) */ 22 23 CS_PMU_TYPE_CPUS, 23 24 CS_ETM_SNAPSHOT, 24 - CS_HEADER_VERSION_0_MAX, 25 + CS_HEADER_VERSION_MAX, 25 26 }; 27 + 28 + /* 29 + * Update the version for new format. 30 + * 31 + * New version 1 format adds a param count to the per cpu metadata. 32 + * This allows easy adding of new metadata parameters. 33 + * Requires that new params always added after current ones. 34 + * Also allows client reader to handle file versions that are different by 35 + * checking the number of params in the file vs the number expected. 36 + */ 37 + #define CS_HEADER_CURRENT_VERSION 1 26 38 27 39 /* Beginning of header common to both ETMv3 and V4 */ 28 40 enum { 29 41 CS_ETM_MAGIC, 30 42 CS_ETM_CPU, 43 + /* Number of trace config params in following ETM specific block */ 44 + CS_ETM_NR_TRC_PARAMS, 45 + CS_ETM_COMMON_BLK_MAX_V1, 31 46 }; 32 47 33 48 /* ETMv3/PTM metadata */ 34 49 enum { 35 50 /* Dynamic, configurable parameters */ 36 - CS_ETM_ETMCR = CS_ETM_CPU + 1, 51 + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, 37 52 CS_ETM_ETMTRACEIDR, 38 53 /* RO, taken from sysFS */ 39 54 CS_ETM_ETMCCER, ··· 56 41 CS_ETM_PRIV_MAX, 57 42 }; 58 43 44 + /* define fixed version 0 length - allow new format reader to read old files. */ 45 + #define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) 46 + 59 47 /* ETMv4 metadata */ 60 48 enum { 61 49 /* Dynamic, configurable parameters */ 62 - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, 50 + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, 63 51 CS_ETMV4_TRCTRACEIDR, 64 52 /* RO, taken from sysFS */ 65 53 CS_ETMV4_TRCIDR0, ··· 73 55 CS_ETMV4_PRIV_MAX, 74 56 }; 75 57 58 + /* define fixed version 0 length - allow new format reader to read old files. */ 59 + #define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) 60 + 76 61 /* 77 62 * ETMv3 exception encoding number: 78 - * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) 63 + * See Embedded Trace Macrocell specification (ARM IHI 0014Q) 79 64 * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. 80 65 */ 81 66 enum { ··· 183 162 184 163 #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) 185 164 186 - #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) 165 + #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) 187 166 188 167 #define __perf_cs_etmv3_magic 0x3030303030303030ULL 189 168 #define __perf_cs_etmv4_magic 0x4040404040404040ULL ··· 194 173 int cs_etm__process_auxtrace_info(union perf_event *event, 195 174 struct perf_session *session); 196 175 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); 176 + int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); 197 177 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 198 178 pid_t tid, u8 trace_chan_id); 199 179 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);

+2 -2

tools/perf/util/data-convert-bt.c

··· 21 21 #include <babeltrace/ctf/events.h> 22 22 #include <traceevent/event-parse.h> 23 23 #include "asm/bug.h" 24 - #include "data-convert-bt.h" 24 + #include "data-convert.h" 25 25 #include "session.h" 26 26 #include "debug.h" 27 27 #include "tool.h" ··· 949 949 /* 950 950 * Add '_' prefix to potential keywork. According to 951 951 * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com), 952 - * futher CTF spec updating may require us to use '$'. 952 + * further CTF spec updating may require us to use '$'. 953 953 */ 954 954 if (dup < 0) 955 955 len = strlen(name) + sizeof("_");

-11

tools/perf/util/data-convert-bt.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef __DATA_CONVERT_BT_H 3 - #define __DATA_CONVERT_BT_H 4 - #include "data-convert.h" 5 - #ifdef HAVE_LIBBABELTRACE_SUPPORT 6 - 7 - int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, 8 - struct perf_data_convert_opts *opts); 9 - 10 - #endif /* HAVE_LIBBABELTRACE_SUPPORT */ 11 - #endif /* __DATA_CONVERT_BT_H */

+384

tools/perf/util/data-convert-json.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * JSON export. 4 + * 5 + * Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com> 6 + */ 7 + 8 + #include "data-convert.h" 9 + 10 + #include <fcntl.h> 11 + #include <inttypes.h> 12 + #include <sys/stat.h> 13 + #include <unistd.h> 14 + 15 + #include "linux/compiler.h" 16 + #include "linux/err.h" 17 + #include "util/auxtrace.h" 18 + #include "util/debug.h" 19 + #include "util/dso.h" 20 + #include "util/event.h" 21 + #include "util/evsel.h" 22 + #include "util/evlist.h" 23 + #include "util/header.h" 24 + #include "util/map.h" 25 + #include "util/session.h" 26 + #include "util/symbol.h" 27 + #include "util/thread.h" 28 + #include "util/tool.h" 29 + 30 + struct convert_json { 31 + struct perf_tool tool; 32 + FILE *out; 33 + bool first; 34 + u64 events_count; 35 + }; 36 + 37 + // Outputs a JSON-encoded string surrounded by quotes with characters escaped. 38 + static void output_json_string(FILE *out, const char *s) 39 + { 40 + fputc('"', out); 41 + while (*s) { 42 + switch (*s) { 43 + 44 + // required escapes with special forms as per RFC 8259 45 + case '"': fputs("\\\"", out); break; 46 + case '\\': fputs("\\\\", out); break; 47 + case '\b': fputs("\\b", out); break; 48 + case '\f': fputs("\\f", out); break; 49 + case '\n': fputs("\\n", out); break; 50 + case '\r': fputs("\\r", out); break; 51 + case '\t': fputs("\\t", out); break; 52 + 53 + default: 54 + // all other control characters must be escaped by hex code 55 + if (*s <= 0x1f) 56 + fprintf(out, "\\u%04x", *s); 57 + else 58 + fputc(*s, out); 59 + break; 60 + } 61 + 62 + ++s; 63 + } 64 + fputc('"', out); 65 + } 66 + 67 + // Outputs an optional comma, newline and indentation to delimit a new value 68 + // from the previous one in a JSON object or array. 69 + static void output_json_delimiters(FILE *out, bool comma, int depth) 70 + { 71 + int i; 72 + 73 + if (comma) 74 + fputc(',', out); 75 + fputc('\n', out); 76 + for (i = 0; i < depth; ++i) 77 + fputc('\t', out); 78 + } 79 + 80 + // Outputs a printf format string (with delimiter) as a JSON value. 81 + __printf(4, 5) 82 + static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...) 83 + { 84 + va_list args; 85 + 86 + output_json_delimiters(out, comma, depth); 87 + va_start(args, format); 88 + vfprintf(out, format, args); 89 + va_end(args); 90 + } 91 + 92 + // Outputs a JSON key-value pair where the value is a string. 93 + static void output_json_key_string(FILE *out, bool comma, int depth, 94 + const char *key, const char *value) 95 + { 96 + output_json_delimiters(out, comma, depth); 97 + output_json_string(out, key); 98 + fputs(": ", out); 99 + output_json_string(out, value); 100 + } 101 + 102 + // Outputs a JSON key-value pair where the value is a printf format string. 103 + __printf(5, 6) 104 + static void output_json_key_format(FILE *out, bool comma, int depth, 105 + const char *key, const char *format, ...) 106 + { 107 + va_list args; 108 + 109 + output_json_delimiters(out, comma, depth); 110 + output_json_string(out, key); 111 + fputs(": ", out); 112 + va_start(args, format); 113 + vfprintf(out, format, args); 114 + va_end(args); 115 + } 116 + 117 + static void output_sample_callchain_entry(struct perf_tool *tool, 118 + u64 ip, struct addr_location *al) 119 + { 120 + struct convert_json *c = container_of(tool, struct convert_json, tool); 121 + FILE *out = c->out; 122 + 123 + output_json_format(out, false, 4, "{"); 124 + output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip); 125 + 126 + if (al && al->sym && al->sym->namelen) { 127 + fputc(',', out); 128 + output_json_key_string(out, false, 5, "symbol", al->sym->name); 129 + 130 + if (al->map && al->map->dso) { 131 + const char *dso = al->map->dso->short_name; 132 + 133 + if (dso && strlen(dso) > 0) { 134 + fputc(',', out); 135 + output_json_key_string(out, false, 5, "dso", dso); 136 + } 137 + } 138 + } 139 + 140 + output_json_format(out, false, 4, "}"); 141 + } 142 + 143 + static int process_sample_event(struct perf_tool *tool, 144 + union perf_event *event __maybe_unused, 145 + struct perf_sample *sample, 146 + struct evsel *evsel __maybe_unused, 147 + struct machine *machine) 148 + { 149 + struct convert_json *c = container_of(tool, struct convert_json, tool); 150 + FILE *out = c->out; 151 + struct addr_location al, tal; 152 + u8 cpumode = PERF_RECORD_MISC_USER; 153 + 154 + if (machine__resolve(machine, &al, sample) < 0) { 155 + pr_err("Sample resolution failed!\n"); 156 + return -1; 157 + } 158 + 159 + ++c->events_count; 160 + 161 + if (c->first) 162 + c->first = false; 163 + else 164 + fputc(',', out); 165 + output_json_format(out, false, 2, "{"); 166 + 167 + output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time); 168 + output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_); 169 + output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid); 170 + 171 + if (al.thread->cpu >= 0) 172 + output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu); 173 + 174 + output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread)); 175 + 176 + output_json_key_format(out, true, 3, "callchain", "["); 177 + if (sample->callchain) { 178 + unsigned int i; 179 + bool ok; 180 + bool first_callchain = true; 181 + 182 + for (i = 0; i < sample->callchain->nr; ++i) { 183 + u64 ip = sample->callchain->ips[i]; 184 + 185 + if (ip >= PERF_CONTEXT_MAX) { 186 + switch (ip) { 187 + case PERF_CONTEXT_HV: 188 + cpumode = PERF_RECORD_MISC_HYPERVISOR; 189 + break; 190 + case PERF_CONTEXT_KERNEL: 191 + cpumode = PERF_RECORD_MISC_KERNEL; 192 + break; 193 + case PERF_CONTEXT_USER: 194 + cpumode = PERF_RECORD_MISC_USER; 195 + break; 196 + default: 197 + pr_debug("invalid callchain context: %" 198 + PRId64 "\n", (s64) ip); 199 + break; 200 + } 201 + continue; 202 + } 203 + 204 + if (first_callchain) 205 + first_callchain = false; 206 + else 207 + fputc(',', out); 208 + 209 + ok = thread__find_symbol(al.thread, cpumode, ip, &tal); 210 + output_sample_callchain_entry(tool, ip, ok ? &tal : NULL); 211 + } 212 + } else { 213 + output_sample_callchain_entry(tool, sample->ip, &al); 214 + } 215 + output_json_format(out, false, 3, "]"); 216 + 217 + output_json_format(out, false, 2, "}"); 218 + return 0; 219 + } 220 + 221 + static void output_headers(struct perf_session *session, struct convert_json *c) 222 + { 223 + struct stat st; 224 + struct perf_header *header = &session->header; 225 + int ret; 226 + int fd = perf_data__fd(session->data); 227 + int i; 228 + FILE *out = c->out; 229 + 230 + output_json_key_format(out, false, 2, "header-version", "%u", header->version); 231 + 232 + ret = fstat(fd, &st); 233 + if (ret >= 0) { 234 + time_t stctime = st.st_mtime; 235 + char buf[256]; 236 + 237 + strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime)); 238 + output_json_key_string(out, true, 2, "captured-on", buf); 239 + } else { 240 + pr_debug("Failed to get mtime of source file, not writing captured-on"); 241 + } 242 + 243 + output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset); 244 + output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); 245 + output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); 246 + 247 + output_json_key_string(out, true, 2, "hostname", header->env.hostname); 248 + output_json_key_string(out, true, 2, "os-release", header->env.os_release); 249 + output_json_key_string(out, true, 2, "arch", header->env.arch); 250 + 251 + output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); 252 + output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); 253 + output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); 254 + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); 255 + 256 + if (header->env.clock.enabled) { 257 + output_json_key_format(out, true, 2, "clockid", 258 + "%u", header->env.clock.clockid); 259 + output_json_key_format(out, true, 2, "clock-time", 260 + "%" PRIu64, header->env.clock.clockid_ns); 261 + output_json_key_format(out, true, 2, "real-time", 262 + "%" PRIu64, header->env.clock.tod_ns); 263 + } 264 + 265 + output_json_key_string(out, true, 2, "perf-version", header->env.version); 266 + 267 + output_json_key_format(out, true, 2, "cmdline", "["); 268 + for (i = 0; i < header->env.nr_cmdline; i++) { 269 + output_json_delimiters(out, i != 0, 3); 270 + output_json_string(c->out, header->env.cmdline_argv[i]); 271 + } 272 + output_json_format(out, false, 2, "]"); 273 + } 274 + 275 + int bt_convert__perf2json(const char *input_name, const char *output_name, 276 + struct perf_data_convert_opts *opts __maybe_unused) 277 + { 278 + struct perf_session *session; 279 + int fd; 280 + int ret = -1; 281 + 282 + struct convert_json c = { 283 + .tool = { 284 + .sample = process_sample_event, 285 + .mmap = perf_event__process_mmap, 286 + .mmap2 = perf_event__process_mmap2, 287 + .comm = perf_event__process_comm, 288 + .namespaces = perf_event__process_namespaces, 289 + .cgroup = perf_event__process_cgroup, 290 + .exit = perf_event__process_exit, 291 + .fork = perf_event__process_fork, 292 + .lost = perf_event__process_lost, 293 + .tracing_data = perf_event__process_tracing_data, 294 + .build_id = perf_event__process_build_id, 295 + .id_index = perf_event__process_id_index, 296 + .auxtrace_info = perf_event__process_auxtrace_info, 297 + .auxtrace = perf_event__process_auxtrace, 298 + .event_update = perf_event__process_event_update, 299 + .ordered_events = true, 300 + .ordering_requires_timestamps = true, 301 + }, 302 + .first = true, 303 + .events_count = 0, 304 + }; 305 + 306 + struct perf_data data = { 307 + .mode = PERF_DATA_MODE_READ, 308 + .path = input_name, 309 + .force = opts->force, 310 + }; 311 + 312 + if (opts->all) { 313 + pr_err("--all is currently unsupported for JSON output.\n"); 314 + goto err; 315 + } 316 + if (opts->tod) { 317 + pr_err("--tod is currently unsupported for JSON output.\n"); 318 + goto err; 319 + } 320 + 321 + fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666); 322 + if (fd == -1) { 323 + if (errno == EEXIST) 324 + pr_err("Output file exists. Use --force to overwrite it.\n"); 325 + else 326 + pr_err("Error opening output file!\n"); 327 + goto err; 328 + } 329 + 330 + c.out = fdopen(fd, "w"); 331 + if (!c.out) { 332 + fprintf(stderr, "Error opening output file!\n"); 333 + close(fd); 334 + goto err; 335 + } 336 + 337 + session = perf_session__new(&data, false, &c.tool); 338 + if (IS_ERR(session)) { 339 + fprintf(stderr, "Error creating perf session!\n"); 340 + goto err_fclose; 341 + } 342 + 343 + if (symbol__init(&session->header.env) < 0) { 344 + fprintf(stderr, "Symbol init error!\n"); 345 + goto err_session_delete; 346 + } 347 + 348 + // The opening brace is printed manually because it isn't delimited from a 349 + // previous value (i.e. we don't want a leading newline) 350 + fputc('{', c.out); 351 + 352 + // Version number for future-proofing. Most additions should be able to be 353 + // done in a backwards-compatible way so this should only need to be bumped 354 + // if some major breaking change must be made. 355 + output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1"); 356 + 357 + // Output headers 358 + output_json_format(c.out, true, 1, "\"headers\": {"); 359 + output_headers(session, &c); 360 + output_json_format(c.out, false, 1, "}"); 361 + 362 + // Output samples 363 + output_json_format(c.out, true, 1, "\"samples\": ["); 364 + perf_session__process_events(session); 365 + output_json_format(c.out, false, 1, "]"); 366 + output_json_format(c.out, false, 0, "}"); 367 + fputc('\n', c.out); 368 + 369 + fprintf(stderr, 370 + "[ perf data convert: Converted '%s' into JSON data '%s' ]\n", 371 + data.path, output_name); 372 + 373 + fprintf(stderr, 374 + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", 375 + (ftell(c.out)) / 1024.0 / 1024.0, c.events_count); 376 + 377 + ret = 0; 378 + err_session_delete: 379 + perf_session__delete(session); 380 + err_fclose: 381 + fclose(c.out); 382 + err: 383 + return ret; 384 + }

+10

tools/perf/util/data-convert.h

··· 2 2 #ifndef __DATA_CONVERT_H 3 3 #define __DATA_CONVERT_H 4 4 5 + #include <stdbool.h> 6 + 5 7 struct perf_data_convert_opts { 6 8 bool force; 7 9 bool all; 8 10 bool tod; 9 11 }; 12 + 13 + #ifdef HAVE_LIBBABELTRACE_SUPPORT 14 + int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, 15 + struct perf_data_convert_opts *opts); 16 + #endif /* HAVE_LIBBABELTRACE_SUPPORT */ 17 + 18 + int bt_convert__perf2json(const char *input_name, const char *to_ctf, 19 + struct perf_data_convert_opts *opts); 10 20 11 21 #endif /* __DATA_CONVERT_H */

+2 -2

tools/perf/util/demangle-java.c

··· 147 147 * Demangle Java function signature (openJDK, not GCJ) 148 148 * input: 149 149 * str: string to parse. String is not modified 150 - * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling 150 + * flags: combination of JAVA_DEMANGLE_* flags to modify demangling 151 151 * return: 152 152 * if input can be demangled, then a newly allocated string is returned. 153 153 * if input cannot be demangled, then NULL is returned ··· 164 164 if (!str) 165 165 return NULL; 166 166 167 - /* find start of retunr type */ 167 + /* find start of return type */ 168 168 p = strrchr(str, ')'); 169 169 if (!p) 170 170 return NULL;

-12

tools/perf/util/demangle-ocaml.c

··· 64 64 } 65 65 result[j] = '\0'; 66 66 67 - /* scan backwards to remove an "_" followed by decimal digits */ 68 - if (j != 0 && isdigit(result[j - 1])) { 69 - while (--j) { 70 - if (!isdigit(result[j])) { 71 - break; 72 - } 73 - } 74 - if (result[j] == '_') { 75 - result[j] = '\0'; 76 - } 77 - } 78 - 79 67 return result; 80 68 }

+1 -1

tools/perf/util/dso.h

··· 216 216 217 217 /* dso__for_each_symbol - iterate over the symbols of given type 218 218 * 219 - * @dso: the 'struct dso *' in which symbols itereated 219 + * @dso: the 'struct dso *' in which symbols are iterated 220 220 * @pos: the 'struct symbol *' to use as a loop cursor 221 221 * @n: the 'struct rb_node *' to use as a temporary storage 222 222 */

+3 -3

tools/perf/util/dwarf-aux.c

··· 91 91 return NULL; 92 92 } while (laddr == addr); 93 93 l++; 94 - /* Going foward to find the statement line */ 94 + /* Going forward to find the statement line */ 95 95 do { 96 96 line = dwarf_onesrcline(lines, l++); 97 97 if (!line || dwarf_lineaddr(line, &laddr) != 0 || ··· 177 177 * die_get_linkage_name - Get the linkage name of the object 178 178 * @dw_die: A DIE of the object 179 179 * 180 - * Get the linkage name attiribute of given @dw_die. 180 + * Get the linkage name attribute of given @dw_die. 181 181 * For C++ binary, the linkage name will be the mangled symbol. 182 182 */ 183 183 const char *die_get_linkage_name(Dwarf_Die *dw_die) ··· 739 739 * @data: user data 740 740 * 741 741 * Walk on the instances of give @in_die. @in_die must be an inlined function 742 - * declartion. This returns the return value of @callback if it returns 742 + * declaration. This returns the return value of @callback if it returns 743 743 * non-zero value, or -ENOENT if there is no instance. 744 744 */ 745 745 int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),

+1 -1

tools/perf/util/dwarf-aux.h

··· 22 22 int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, 23 23 const char **fname, int *lineno); 24 24 25 - /* Walk on funcitons at given address */ 25 + /* Walk on functions at given address */ 26 26 int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, 27 27 int (*callback)(Dwarf_Die *, void *), void *data); 28 28

+3

tools/perf/util/dwarf-regs.c

··· 24 24 #include "../arch/s390/include/dwarf-regs-table.h" 25 25 #include "../arch/sparc/include/dwarf-regs-table.h" 26 26 #include "../arch/xtensa/include/dwarf-regs-table.h" 27 + #include "../arch/mips/include/dwarf-regs-table.h" 27 28 28 29 #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) 29 30 ··· 54 53 return __get_dwarf_regstr(sparc_regstr_tbl, n); 55 54 case EM_XTENSA: 56 55 return __get_dwarf_regstr(xtensa_regstr_tbl, n); 56 + case EM_MIPS: 57 + return __get_dwarf_regstr(mips_regstr_tbl, n); 57 58 default: 58 59 pr_err("ELF MACHINE %x is not supported.\n", machine); 59 60 }

+3

tools/perf/util/event.h

··· 147 147 u8 cpumode; 148 148 u16 misc; 149 149 u16 ins_lat; 150 + u16 p_stage_cyc; 150 151 bool no_hw_idx; /* No hw_idx collected in branch_stack */ 151 152 char insn[MAX_INSN]; 152 153 void *raw_data; ··· 428 427 429 428 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); 430 429 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); 430 + const char *arch_perf_header_entry(const char *se_header); 431 + int arch_support_sort_key(const char *sort_key); 431 432 432 433 #endif /* __PERF_RECORD_H */

+10 -5

tools/perf/util/events_stats.h

··· 21 21 * all struct perf_record_lost_samples.lost fields reported. 22 22 * 23 23 * The total_period is needed because by default auto-freq is used, so 24 - * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get 24 + * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get 25 25 * the total number of low level events, it is necessary to to sum all struct 26 26 * perf_record_sample.period and stash the result in total_period. 27 27 */ 28 28 struct events_stats { 29 - u64 total_period; 30 - u64 total_non_filtered_period; 31 29 u64 total_lost; 32 30 u64 total_lost_samples; 33 31 u64 total_aux_lost; 34 32 u64 total_aux_partial; 35 33 u64 total_invalid_chains; 36 34 u32 nr_events[PERF_RECORD_HEADER_MAX]; 37 - u32 nr_non_filtered_samples; 38 35 u32 nr_lost_warned; 39 36 u32 nr_unknown_events; 40 37 u32 nr_invalid_chains; ··· 41 44 u32 nr_proc_map_timeout; 42 45 }; 43 46 47 + struct hists_stats { 48 + u64 total_period; 49 + u64 total_non_filtered_period; 50 + u32 nr_samples; 51 + u32 nr_non_filtered_samples; 52 + }; 53 + 44 54 void events_stats__inc(struct events_stats *stats, u32 type); 45 55 46 - size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); 56 + size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, 57 + bool skip_empty); 47 58 48 59 #endif /* __PERF_EVENTS_STATS_ */

+88

tools/perf/util/evlist-hybrid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <errno.h> 3 + #include <inttypes.h> 4 + #include "cpumap.h" 5 + #include "evlist.h" 6 + #include "evsel.h" 7 + #include "../perf.h" 8 + #include "util/pmu-hybrid.h" 9 + #include "util/evlist-hybrid.h" 10 + #include "debug.h" 11 + #include <unistd.h> 12 + #include <stdlib.h> 13 + #include <linux/err.h> 14 + #include <linux/string.h> 15 + #include <perf/evlist.h> 16 + #include <perf/evsel.h> 17 + #include <perf/cpumap.h> 18 + 19 + int evlist__add_default_hybrid(struct evlist *evlist, bool precise) 20 + { 21 + struct evsel *evsel; 22 + struct perf_pmu *pmu; 23 + __u64 config; 24 + struct perf_cpu_map *cpus; 25 + 26 + perf_pmu__for_each_hybrid_pmu(pmu) { 27 + config = PERF_COUNT_HW_CPU_CYCLES | 28 + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT); 29 + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, 30 + config); 31 + if (!evsel) 32 + return -ENOMEM; 33 + 34 + cpus = perf_cpu_map__get(pmu->cpus); 35 + evsel->core.cpus = cpus; 36 + evsel->core.own_cpus = perf_cpu_map__get(cpus); 37 + evsel->pmu_name = strdup(pmu->name); 38 + evlist__add(evlist, evsel); 39 + } 40 + 41 + return 0; 42 + } 43 + 44 + static bool group_hybrid_conflict(struct evsel *leader) 45 + { 46 + struct evsel *pos, *prev = NULL; 47 + 48 + for_each_group_evsel(pos, leader) { 49 + if (!evsel__is_hybrid(pos)) 50 + continue; 51 + 52 + if (prev && strcmp(prev->pmu_name, pos->pmu_name)) 53 + return true; 54 + 55 + prev = pos; 56 + } 57 + 58 + return false; 59 + } 60 + 61 + void evlist__warn_hybrid_group(struct evlist *evlist) 62 + { 63 + struct evsel *evsel; 64 + 65 + evlist__for_each_entry(evlist, evsel) { 66 + if (evsel__is_group_leader(evsel) && 67 + evsel->core.nr_members > 1 && 68 + group_hybrid_conflict(evsel)) { 69 + pr_warning("WARNING: events in group from " 70 + "different hybrid PMUs!\n"); 71 + return; 72 + } 73 + } 74 + } 75 + 76 + bool evlist__has_hybrid(struct evlist *evlist) 77 + { 78 + struct evsel *evsel; 79 + 80 + evlist__for_each_entry(evlist, evsel) { 81 + if (evsel->pmu_name && 82 + perf_pmu__is_hybrid(evsel->pmu_name)) { 83 + return true; 84 + } 85 + } 86 + 87 + return false; 88 + }

+14

tools/perf/util/evlist-hybrid.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __PERF_EVLIST_HYBRID_H 3 + #define __PERF_EVLIST_HYBRID_H 4 + 5 + #include <linux/compiler.h> 6 + #include <linux/kernel.h> 7 + #include "evlist.h" 8 + #include <unistd.h> 9 + 10 + int evlist__add_default_hybrid(struct evlist *evlist, bool precise); 11 + void evlist__warn_hybrid_group(struct evlist *evlist); 12 + bool evlist__has_hybrid(struct evlist *evlist); 13 + 14 + #endif /* __PERF_EVLIST_HYBRID_H */

+36 -2

tools/perf/util/evlist.c

··· 17 17 #include "evsel.h" 18 18 #include "debug.h" 19 19 #include "units.h" 20 + #include "bpf_counter.h" 20 21 #include <internal/lib.h> // page_size 21 22 #include "affinity.h" 22 23 #include "../perf.h" ··· 26 25 #include "util/string2.h" 27 26 #include "util/perf_api_probe.h" 28 27 #include "util/evsel_fprintf.h" 28 + #include "util/evlist-hybrid.h" 29 29 #include <signal.h> 30 30 #include <unistd.h> 31 31 #include <sched.h> ··· 38 36 #include <fcntl.h> 39 37 #include <sys/ioctl.h> 40 38 #include <sys/mman.h> 39 + #include <sys/prctl.h> 41 40 42 41 #include <linux/bitops.h> 43 42 #include <linux/hash.h> ··· 249 246 250 247 int __evlist__add_default(struct evlist *evlist, bool precise) 251 248 { 252 - struct evsel *evsel = evsel__new_cycles(precise); 249 + struct evsel *evsel; 253 250 251 + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, 252 + PERF_COUNT_HW_CPU_CYCLES); 254 253 if (evsel == NULL) 255 254 return -ENOMEM; 256 255 ··· 424 419 425 420 if (affinity__setup(&affinity) < 0) 426 421 return; 422 + 423 + evlist__for_each_entry(evlist, pos) 424 + bpf_counter__disable(pos); 427 425 428 426 /* Disable 'immediate' events last */ 429 427 for (imm = 0; imm <= 1; imm++) { ··· 1217 1209 } 1218 1210 } 1219 1211 1220 - /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1212 + /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */ 1221 1213 if ((sample_type & PERF_SAMPLE_READ) && 1222 1214 !(read_format & PERF_FORMAT_ID)) { 1223 1215 return false; ··· 1412 1404 close(child_ready_pipe[0]); 1413 1405 close(go_pipe[1]); 1414 1406 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1407 + 1408 + /* 1409 + * Change the name of this process not to confuse --exclude-perf users 1410 + * that sees 'perf' in the window up to the execvp() and thinks that 1411 + * perf samples are not being excluded. 1412 + */ 1413 + prctl(PR_SET_NAME, "perf-exec"); 1415 1414 1416 1415 /* 1417 1416 * Tell the parent we're ready to go ··· 2144 2129 return evsel; 2145 2130 } 2146 2131 return NULL; 2132 + } 2133 + 2134 + int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) 2135 + { 2136 + struct evsel *evsel; 2137 + int printed = 0; 2138 + 2139 + evlist__for_each_entry(evlist, evsel) { 2140 + if (evsel__is_dummy_event(evsel)) 2141 + continue; 2142 + if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { 2143 + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); 2144 + } else { 2145 + printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); 2146 + break; 2147 + } 2148 + } 2149 + 2150 + return printed; 2147 2151 }

+2

tools/perf/util/evlist.h

··· 365 365 #define EVLIST_DISABLED_MSG "Events disabled\n" 366 366 367 367 struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); 368 + 369 + int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); 368 370 #endif /* __PERF_EVLIST_H */

+33 -5

tools/perf/util/evsel.c

··· 47 47 #include "memswap.h" 48 48 #include "util.h" 49 49 #include "hashmap.h" 50 + #include "pmu-hybrid.h" 50 51 #include "../perf-sys.h" 51 52 #include "util/parse-branch-options.h" 52 53 #include <internal/xyarray.h> ··· 296 295 return perf_event_paranoid_check(1); 297 296 } 298 297 299 - struct evsel *evsel__new_cycles(bool precise) 298 + struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) 300 299 { 301 300 struct perf_event_attr attr = { 302 - .type = PERF_TYPE_HARDWARE, 303 - .config = PERF_COUNT_HW_CPU_CYCLES, 301 + .type = type, 302 + .config = config, 304 303 .exclude_kernel = !perf_event_can_profile_kernel(), 305 304 }; 306 305 struct evsel *evsel; ··· 493 492 "ref-cycles", 494 493 }; 495 494 495 + char *evsel__bpf_counter_events; 496 + 497 + bool evsel__match_bpf_counter_events(const char *name) 498 + { 499 + int name_len; 500 + bool match; 501 + char *ptr; 502 + 503 + if (!evsel__bpf_counter_events) 504 + return false; 505 + 506 + ptr = strstr(evsel__bpf_counter_events, name); 507 + name_len = strlen(name); 508 + 509 + /* check name matches a full token in evsel__bpf_counter_events */ 510 + match = (ptr != NULL) && 511 + ((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) && 512 + ((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0')); 513 + 514 + return match; 515 + } 516 + 496 517 static const char *__evsel__hw_name(u64 config) 497 518 { 498 519 if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config]) ··· 644 621 #define COP(x) (1 << x) 645 622 646 623 /* 647 - * cache operartion stat 624 + * cache operation stat 648 625 * L1I : Read and prefetch only 649 626 * ITLB and BPU : Read-only 650 627 */ ··· 2298 2275 /* 2299 2276 * Undo swap of u64, then swap on individual u32s, 2300 2277 * get the size of the raw area and undo all of the 2301 - * swap. The pevent interface handles endianity by 2278 + * swap. The pevent interface handles endianness by 2302 2279 * itself. 2303 2280 */ 2304 2281 if (swapped) { ··· 2819 2796 2820 2797 hashmap__clear(evsel->per_pkg_mask); 2821 2798 } 2799 + } 2800 + 2801 + bool evsel__is_hybrid(struct evsel *evsel) 2802 + { 2803 + return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); 2822 2804 }

+31 -3

tools/perf/util/evsel.h

··· 20 20 struct bpf_counter_ops; 21 21 struct target; 22 22 struct hashmap; 23 + struct bperf_leader_bpf; 24 + struct bperf_follower_bpf; 23 25 24 26 typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); 25 27 ··· 82 80 bool auto_merge_stats; 83 81 bool collect_stat; 84 82 bool weak_group; 83 + bool bpf_counter; 85 84 int bpf_fd; 86 85 struct bpf_object *bpf_obj; 87 86 }; ··· 116 113 bool merged_stat; 117 114 bool reset_group; 118 115 bool errored; 116 + bool use_config_name; 119 117 struct hashmap *per_pkg_mask; 120 118 struct evsel *leader; 121 119 struct list_head config_terms; ··· 134 130 * See also evsel__has_callchain(). 135 131 */ 136 132 __u64 synth_sample_type; 137 - struct list_head bpf_counter_list; 133 + 134 + /* 135 + * bpf_counter_ops serves two use cases: 136 + * 1. perf-stat -b counting events used byBPF programs 137 + * 2. perf-stat --use-bpf use BPF programs to aggregate counts 138 + */ 138 139 struct bpf_counter_ops *bpf_counter_ops; 140 + 141 + /* for perf-stat -b */ 142 + struct list_head bpf_counter_list; 143 + 144 + /* for perf-stat --use-bpf */ 145 + int bperf_leader_prog_fd; 146 + int bperf_leader_link_fd; 147 + union { 148 + struct bperf_leader_bpf *leader_skel; 149 + struct bperf_follower_bpf *follower_skel; 150 + }; 139 151 }; 140 152 141 153 struct perf_missing_features { ··· 177 157 extern struct perf_missing_features perf_missing_features; 178 158 179 159 struct perf_cpu_map; 180 - struct target; 181 160 struct thread_map; 182 161 struct record_opts; 183 162 ··· 221 202 return evsel__newtp_idx(sys, name, 0); 222 203 } 223 204 224 - struct evsel *evsel__new_cycles(bool precise); 205 + struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); 225 206 226 207 struct tep_event *event_format__new(const char *sys, const char *name); 227 208 ··· 241 222 242 223 bool evsel__is_cache_op_valid(u8 type, u8 op); 243 224 225 + static inline bool evsel__is_bpf(struct evsel *evsel) 226 + { 227 + return evsel->bpf_counter_ops != NULL; 228 + } 229 + 244 230 #define EVSEL__MAX_ALIASES 8 245 231 246 232 extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; ··· 253 229 extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; 254 230 extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; 255 231 extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; 232 + extern char *evsel__bpf_counter_events; 233 + bool evsel__match_bpf_counter_events(const char *name); 234 + 256 235 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); 257 236 const char *evsel__name(struct evsel *evsel); 258 237 ··· 462 435 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); 463 436 464 437 void evsel__zero_per_pkg(struct evsel *evsel); 438 + bool evsel__is_hybrid(struct evsel *evsel); 465 439 #endif /* __PERF_EVSEL_H */

+1 -1

tools/perf/util/expr.h

··· 3 3 #define PARSE_CTX_H 1 4 4 5 5 // There are fixes that need to land upstream before we can use libbpf's headers, 6 - // for now use our copy uncoditionally, since the data structures at this point 6 + // for now use our copy unconditionally, since the data structures at this point 7 7 // are exactly the same, no problem. 8 8 //#ifdef HAVE_LIBBPF_SUPPORT 9 9 //#include <bpf/hashmap.h>

+9 -9

tools/perf/util/header.c

··· 127 127 return 0; 128 128 } 129 129 130 - /* Return: 0 if succeded, -ERR if failed. */ 130 + /* Return: 0 if succeeded, -ERR if failed. */ 131 131 int do_write(struct feat_fd *ff, const void *buf, size_t size) 132 132 { 133 133 if (!ff->buf) ··· 135 135 return __do_write_buf(ff, buf, size); 136 136 } 137 137 138 - /* Return: 0 if succeded, -ERR if failed. */ 138 + /* Return: 0 if succeeded, -ERR if failed. */ 139 139 static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) 140 140 { 141 141 u64 *p = (u64 *) set; ··· 154 154 return 0; 155 155 } 156 156 157 - /* Return: 0 if succeded, -ERR if failed. */ 157 + /* Return: 0 if succeeded, -ERR if failed. */ 158 158 int write_padded(struct feat_fd *ff, const void *bf, 159 159 size_t count, size_t count_aligned) 160 160 { ··· 170 170 #define string_size(str) \ 171 171 (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) 172 172 173 - /* Return: 0 if succeded, -ERR if failed. */ 173 + /* Return: 0 if succeeded, -ERR if failed. */ 174 174 static int do_write_string(struct feat_fd *ff, const char *str) 175 175 { 176 176 u32 len, olen; ··· 266 266 return NULL; 267 267 } 268 268 269 - /* Return: 0 if succeded, -ERR if failed. */ 269 + /* Return: 0 if succeeded, -ERR if failed. */ 270 270 static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) 271 271 { 272 272 unsigned long *set; ··· 2874 2874 int err = -1; 2875 2875 2876 2876 if (ff->ph->needs_swap) { 2877 - pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); 2877 + pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n"); 2878 2878 return 0; 2879 2879 } 2880 2880 ··· 2942 2942 int err = -1; 2943 2943 2944 2944 if (ff->ph->needs_swap) { 2945 - pr_warning("interpreting btf from systems with endianity is not yet supported\n"); 2945 + pr_warning("interpreting btf from systems with endianness is not yet supported\n"); 2946 2946 return 0; 2947 2947 } 2948 2948 ··· 3481 3481 }; 3482 3482 3483 3483 /* 3484 - * In the legacy pipe format, there is an implicit assumption that endiannesss 3484 + * In the legacy pipe format, there is an implicit assumption that endianness 3485 3485 * between host recording the samples, and host parsing the samples is the 3486 3486 * same. This is not always the case given that the pipe output may always be 3487 3487 * redirected into a file and analyzed on a different machine with possibly a 3488 - * different endianness and perf_event ABI revsions in the perf tool itself. 3488 + * different endianness and perf_event ABI revisions in the perf tool itself. 3489 3489 */ 3490 3490 static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) 3491 3491 {

+27 -10

tools/perf/util/hist.c

··· 211 211 hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); 212 212 hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); 213 213 hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); 214 + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); 214 215 if (symbol_conf.nanosecs) 215 216 hists__new_col_len(hists, HISTC_TIME, 16); 216 217 else ··· 290 289 } 291 290 292 291 static void he_stat__add_period(struct he_stat *he_stat, u64 period, 293 - u64 weight, u64 ins_lat) 292 + u64 weight, u64 ins_lat, u64 p_stage_cyc) 294 293 { 295 294 296 295 he_stat->period += period; 297 296 he_stat->weight += weight; 298 297 he_stat->nr_events += 1; 299 298 he_stat->ins_lat += ins_lat; 299 + he_stat->p_stage_cyc += p_stage_cyc; 300 300 } 301 301 302 302 static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) ··· 310 308 dest->nr_events += src->nr_events; 311 309 dest->weight += src->weight; 312 310 dest->ins_lat += src->ins_lat; 311 + dest->p_stage_cyc += src->p_stage_cyc; 313 312 } 314 313 315 314 static void he_stat__decay(struct he_stat *he_stat) ··· 600 597 u64 period = entry->stat.period; 601 598 u64 weight = entry->stat.weight; 602 599 u64 ins_lat = entry->stat.ins_lat; 600 + u64 p_stage_cyc = entry->stat.p_stage_cyc; 603 601 bool leftmost = true; 604 602 605 603 p = &hists->entries_in->rb_root.rb_node; ··· 619 615 620 616 if (!cmp) { 621 617 if (sample_self) { 622 - he_stat__add_period(&he->stat, period, weight, ins_lat); 618 + he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); 623 619 hist_entry__add_callchain_period(he, period); 624 620 } 625 621 if (symbol_conf.cumulate_callchain) 626 - he_stat__add_period(he->stat_acc, period, weight, ins_lat); 622 + he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); 627 623 628 624 /* 629 625 * This mem info was allocated from sample__resolve_mem ··· 735 731 .period = sample->period, 736 732 .weight = sample->weight, 737 733 .ins_lat = sample->ins_lat, 734 + .p_stage_cyc = sample->p_stage_cyc, 738 735 }, 739 736 .parent = sym_parent, 740 737 .filtered = symbol__parent_filter(sym_parent) | al->filtered, ··· 2325 2320 ++stats->nr_events[type]; 2326 2321 } 2327 2322 2328 - void hists__inc_nr_events(struct hists *hists, u32 type) 2323 + static void hists_stats__inc(struct hists_stats *stats) 2329 2324 { 2330 - events_stats__inc(&hists->stats, type); 2325 + ++stats->nr_samples; 2326 + } 2327 + 2328 + void hists__inc_nr_events(struct hists *hists) 2329 + { 2330 + hists_stats__inc(&hists->stats); 2331 2331 } 2332 2332 2333 2333 void hists__inc_nr_samples(struct hists *hists, bool filtered) 2334 2334 { 2335 - events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); 2335 + hists_stats__inc(&hists->stats); 2336 2336 if (!filtered) 2337 2337 hists->stats.nr_non_filtered_samples++; 2338 2338 } ··· 2676 2666 } 2677 2667 } 2678 2668 2679 - size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) 2669 + size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, 2670 + bool skip_empty) 2680 2671 { 2681 2672 struct evsel *pos; 2682 2673 size_t ret = 0; 2683 2674 2684 2675 evlist__for_each_entry(evlist, pos) { 2676 + struct hists *hists = evsel__hists(pos); 2677 + 2678 + if (skip_empty && !hists->stats.nr_samples) 2679 + continue; 2680 + 2685 2681 ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); 2686 - ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); 2682 + ret += fprintf(fp, "%16s events: %10d\n", 2683 + "SAMPLE", hists->stats.nr_samples); 2687 2684 } 2688 2685 2689 2686 return ret; ··· 2710 2693 const struct dso *dso = hists->dso_filter; 2711 2694 struct thread *thread = hists->thread_filter; 2712 2695 int socket_id = hists->socket_filter; 2713 - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 2696 + unsigned long nr_samples = hists->stats.nr_samples; 2714 2697 u64 nr_events = hists->stats.total_period; 2715 2698 struct evsel *evsel = hists_to_evsel(hists); 2716 2699 const char *ev_name = evsel__name(evsel); ··· 2737 2720 nr_samples += pos_hists->stats.nr_non_filtered_samples; 2738 2721 nr_events += pos_hists->stats.total_non_filtered_period; 2739 2722 } else { 2740 - nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; 2723 + nr_samples += pos_hists->stats.nr_samples; 2741 2724 nr_events += pos_hists->stats.total_period; 2742 2725 } 2743 2726 }

+5 -3

tools/perf/util/hist.h

··· 75 75 HISTC_MEM_BLOCKED, 76 76 HISTC_LOCAL_INS_LAT, 77 77 HISTC_GLOBAL_INS_LAT, 78 + HISTC_P_STAGE_CYC, 78 79 HISTC_NR_COLS, /* Last entry */ 79 80 }; 80 81 ··· 96 95 const char *uid_filter_str; 97 96 const char *symbol_filter_str; 98 97 pthread_mutex_t lock; 99 - struct events_stats stats; 98 + struct hists_stats stats; 100 99 u64 event_stream; 101 100 u16 col_len[HISTC_NR_COLS]; 102 101 bool has_callchains; ··· 196 195 u64 hists__total_period(struct hists *hists); 197 196 void hists__reset_stats(struct hists *hists); 198 197 void hists__inc_stats(struct hists *hists, struct hist_entry *h); 199 - void hists__inc_nr_events(struct hists *hists, u32 type); 198 + void hists__inc_nr_events(struct hists *hists); 200 199 void hists__inc_nr_samples(struct hists *hists, bool filtered); 201 200 202 201 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, 203 202 int max_cols, float min_pcnt, FILE *fp, 204 203 bool ignore_callchains); 205 - size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); 204 + size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, 205 + bool skip_empty); 206 206 207 207 void hists__filter_by_dso(struct hists *hists); 208 208 void hists__filter_by_thread(struct hists *hists);

+1 -1

tools/perf/util/intel-pt.c

··· 3569 3569 /* 3570 3570 * Since this thread will not be kept in any rbtree not in a 3571 3571 * list, initialize its list node so that at thread__put() the 3572 - * current thread lifetime assuption is kept and we don't segfault 3572 + * current thread lifetime assumption is kept and we don't segfault 3573 3573 * at list_del_init(). 3574 3574 */ 3575 3575 INIT_LIST_HEAD(&pt->unknown_thread->node);

+53

tools/perf/util/iostat.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "util/iostat.h" 3 + #include "util/debug.h" 4 + 5 + enum iostat_mode_t iostat_mode = IOSTAT_NONE; 6 + 7 + __weak int iostat_prepare(struct evlist *evlist __maybe_unused, 8 + struct perf_stat_config *config __maybe_unused) 9 + { 10 + return -1; 11 + } 12 + 13 + __weak int iostat_parse(const struct option *opt __maybe_unused, 14 + const char *str __maybe_unused, 15 + int unset __maybe_unused) 16 + { 17 + pr_err("iostat mode is not supported on current platform\n"); 18 + return -1; 19 + } 20 + 21 + __weak void iostat_list(struct evlist *evlist __maybe_unused, 22 + struct perf_stat_config *config __maybe_unused) 23 + { 24 + } 25 + 26 + __weak void iostat_release(struct evlist *evlist __maybe_unused) 27 + { 28 + } 29 + 30 + __weak void iostat_print_header_prefix(struct perf_stat_config *config __maybe_unused) 31 + { 32 + } 33 + 34 + __weak void iostat_print_metric(struct perf_stat_config *config __maybe_unused, 35 + struct evsel *evsel __maybe_unused, 36 + struct perf_stat_output_ctx *out __maybe_unused) 37 + { 38 + } 39 + 40 + __weak void iostat_prefix(struct evlist *evlist __maybe_unused, 41 + struct perf_stat_config *config __maybe_unused, 42 + char *prefix __maybe_unused, 43 + struct timespec *ts __maybe_unused) 44 + { 45 + } 46 + 47 + __weak void iostat_print_counters(struct evlist *evlist __maybe_unused, 48 + struct perf_stat_config *config __maybe_unused, 49 + struct timespec *ts __maybe_unused, 50 + char *prefix __maybe_unused, 51 + iostat_print_counter_t print_cnt_cb __maybe_unused) 52 + { 53 + }

+47

tools/perf/util/iostat.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * perf iostat 4 + * 5 + * Copyright (C) 2020, Intel Corporation 6 + * 7 + * Authors: Alexander Antonov <alexander.antonov@linux.intel.com> 8 + */ 9 + 10 + #ifndef _IOSTAT_H 11 + #define _IOSTAT_H 12 + 13 + #include <subcmd/parse-options.h> 14 + #include "util/stat.h" 15 + #include "util/parse-events.h" 16 + #include "util/evlist.h" 17 + 18 + struct option; 19 + struct perf_stat_config; 20 + struct evlist; 21 + struct timespec; 22 + 23 + enum iostat_mode_t { 24 + IOSTAT_NONE = -1, 25 + IOSTAT_RUN = 0, 26 + IOSTAT_LIST = 1 27 + }; 28 + 29 + extern enum iostat_mode_t iostat_mode; 30 + 31 + typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *); 32 + 33 + int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); 34 + int iostat_parse(const struct option *opt, const char *str, 35 + int unset __maybe_unused); 36 + void iostat_list(struct evlist *evlist, struct perf_stat_config *config); 37 + void iostat_release(struct evlist *evlist); 38 + void iostat_prefix(struct evlist *evlist, struct perf_stat_config *config, 39 + char *prefix, struct timespec *ts); 40 + void iostat_print_header_prefix(struct perf_stat_config *config); 41 + void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, 42 + struct perf_stat_output_ctx *out); 43 + void iostat_print_counters(struct evlist *evlist, 44 + struct perf_stat_config *config, struct timespec *ts, 45 + char *prefix, iostat_print_counter_t print_cnt_cb); 46 + 47 + #endif /* _IOSTAT_H */

+20 -10

tools/perf/util/jitdump.c

··· 396 396 397 397 static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) 398 398 { 399 - struct perf_tsc_conversion tc; 399 + struct perf_tsc_conversion tc = { .time_shift = 0, }; 400 + struct perf_record_time_conv *time_conv = &jd->session->time_conv; 400 401 401 402 if (!jd->use_arch_timestamp) 402 403 return timestamp; 403 404 404 - tc.time_shift = jd->session->time_conv.time_shift; 405 - tc.time_mult = jd->session->time_conv.time_mult; 406 - tc.time_zero = jd->session->time_conv.time_zero; 407 - tc.time_cycles = jd->session->time_conv.time_cycles; 408 - tc.time_mask = jd->session->time_conv.time_mask; 409 - tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; 410 - tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; 405 + tc.time_shift = time_conv->time_shift; 406 + tc.time_mult = time_conv->time_mult; 407 + tc.time_zero = time_conv->time_zero; 411 408 412 - if (!tc.cap_user_time_zero) 413 - return 0; 409 + /* 410 + * The event TIME_CONV was extended for the fields from "time_cycles" 411 + * when supported cap_user_time_short, for backward compatibility, 412 + * checks the event size and assigns these extended fields if these 413 + * fields are contained in the event. 414 + */ 415 + if (event_contains(*time_conv, time_cycles)) { 416 + tc.time_cycles = time_conv->time_cycles; 417 + tc.time_mask = time_conv->time_mask; 418 + tc.cap_user_time_zero = time_conv->cap_user_time_zero; 419 + tc.cap_user_time_short = time_conv->cap_user_time_short; 420 + 421 + if (!tc.cap_user_time_zero) 422 + return 0; 423 + } 414 424 415 425 return tsc_to_perf_time(timestamp, &tc); 416 426 }

+1 -1

tools/perf/util/levenshtein.c

··· 30 30 * 31 31 * It does so by calculating the costs of the path ending in characters 32 32 * i (in string1) and j (in string2), respectively, given that the last 33 - * operation is a substition, a swap, a deletion, or an insertion. 33 + * operation is a substitution, a swap, a deletion, or an insertion. 34 34 * 35 35 * This implementation allows the costs to be weighted: 36 36 *

+1 -1

tools/perf/util/libunwind/arm64.c

··· 4 4 * generic one. 5 5 * 6 6 * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch 7 - * name and the defination of this function is included directly from 7 + * name and the definition of this function is included directly from 8 8 * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function 9 9 * is defined no matter what arch the host is. 10 10 *

+1 -1

tools/perf/util/libunwind/x86_32.c

··· 4 4 * generic one. 5 5 * 6 6 * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch 7 - * name and the defination of this function is included directly from 7 + * name and the definition of this function is included directly from 8 8 * 'arch/x86/util/unwind-libunwind.c', to make sure that this function 9 9 * is defined no matter what arch the host is. 10 10 *

+1 -1

tools/perf/util/llvm-utils.c

··· 471 471 472 472 /* 473 473 * This is an optional work. Even it fail we can continue our 474 - * work. Needn't to check error return. 474 + * work. Needn't check error return. 475 475 */ 476 476 llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); 477 477

+6 -6

tools/perf/util/machine.c

··· 905 905 906 906 maps__insert(&machine->kmaps, map); 907 907 908 - /* Put the map here because maps__insert alread got it */ 908 + /* Put the map here because maps__insert already got it */ 909 909 map__put(map); 910 910 out: 911 911 /* put the dso here, corresponding to machine__findnew_module_dso */ ··· 1952 1952 * maps because that is what the kernel just did. 1953 1953 * 1954 1954 * But when synthesizing, this should not be done. If we do, we end up 1955 - * with overlapping maps as we process the sythesized MMAP2 events that 1955 + * with overlapping maps as we process the synthesized MMAP2 events that 1956 1956 * get delivered shortly thereafter. 1957 1957 * 1958 1958 * Use the FORK event misc flags in an internal way to signal this ··· 2038 2038 static bool symbol__match_regex(struct symbol *sym, regex_t *regex) 2039 2039 { 2040 2040 if (!regexec(regex, sym->name, 0, NULL, 0)) 2041 - return 1; 2042 - return 0; 2041 + return true; 2042 + return false; 2043 2043 } 2044 2044 2045 2045 static void ip__resolve_ams(struct thread *thread, ··· 2518 2518 2519 2519 /* 2520 2520 * Check if there are identical LBRs between two samples. 2521 - * Identicall LBRs must have same from, to and flags values. Also, 2521 + * Identical LBRs must have same from, to and flags values. Also, 2522 2522 * they have to be saved in the same LBR registers (same physical 2523 2523 * index). 2524 2524 * ··· 2588 2588 } 2589 2589 2590 2590 /* 2591 - * Recolve LBR callstack chain sample 2591 + * Resolve LBR callstack chain sample 2592 2592 * Return: 2593 2593 * 1 on success get LBR callchain information 2594 2594 * 0 no available LBR callchain information, should try fp

+2 -2

tools/perf/util/map.h

··· 75 75 76 76 /* map__for_each_symbol - iterate over the symbols in the given map 77 77 * 78 - * @map: the 'struct map *' in which symbols itereated 78 + * @map: the 'struct map *' in which symbols are iterated 79 79 * @pos: the 'struct symbol *' to use as a loop cursor 80 80 * @n: the 'struct rb_node *' to use as a temporary storage 81 81 * Note: caller must ensure map->dso is not NULL (map is loaded). ··· 86 86 /* map__for_each_symbol_with_name - iterate over the symbols in the given map 87 87 * that have the given name 88 88 * 89 - * @map: the 'struct map *' in which symbols itereated 89 + * @map: the 'struct map *' in which symbols are iterated 90 90 * @sym_name: the symbol name 91 91 * @pos: the 'struct symbol *' to use as a loop cursor 92 92 */

+1 -2

tools/perf/util/mem-events.h

··· 44 44 45 45 void perf_mem_events__list(void); 46 46 47 - struct mem_info; 48 47 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 49 48 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 50 49 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); ··· 80 81 u32 rmt_dram; /* count of loads miss to remote DRAM */ 81 82 u32 blk_data; /* count of loads blocked by data */ 82 83 u32 blk_addr; /* count of loads blocked by address conflict */ 83 - u32 nomap; /* count of load/stores with no phys adrs */ 84 + u32 nomap; /* count of load/stores with no phys addrs */ 84 85 u32 noparse; /* count of unparsable data sources */ 85 86 }; 86 87

+7 -7

tools/perf/util/metricgroup.c

··· 181 181 * @pctx: the parse context for the metric expression. 182 182 * @metric_no_merge: don't attempt to share events for the metric with other 183 183 * metrics. 184 - * @has_constraint: is there a contraint on the group of events? In which case 184 + * @has_constraint: is there a constraint on the group of events? In which case 185 185 * the events won't be grouped. 186 186 * @metric_events: out argument, null terminated array of evsel's associated 187 187 * with the metric. ··· 618 618 void metricgroup__print(bool metrics, bool metricgroups, char *filter, 619 619 bool raw, bool details) 620 620 { 621 - struct pmu_events_map *map = perf_pmu__find_map(NULL); 621 + struct pmu_events_map *map = pmu_events_map__find(); 622 622 struct pmu_event *pe; 623 623 int i; 624 624 struct rblist groups; ··· 900 900 (match_metric(__pe->metric_group, __metric) || \ 901 901 match_metric(__pe->metric_name, __metric))) 902 902 903 - static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map) 903 + struct pmu_event *metricgroup__find_metric(const char *metric, 904 + struct pmu_events_map *map) 904 905 { 905 906 struct pmu_event *pe; 906 907 int i; ··· 986 985 struct expr_id *parent; 987 986 struct pmu_event *pe; 988 987 989 - pe = find_metric(cur->key, map); 988 + pe = metricgroup__find_metric(cur->key, map); 990 989 if (!pe) 991 990 continue; 992 991 ··· 1254 1253 struct rblist *metric_events) 1255 1254 { 1256 1255 struct evlist *perf_evlist = *(struct evlist **)opt->value; 1257 - struct pmu_events_map *map = perf_pmu__find_map(NULL); 1258 - 1256 + struct pmu_events_map *map = pmu_events_map__find(); 1259 1257 1260 1258 return parse_groups(perf_evlist, str, metric_no_group, 1261 1259 metric_no_merge, NULL, metric_events, map); ··· 1273 1273 1274 1274 bool metricgroup__has_metric(const char *metric) 1275 1275 { 1276 - struct pmu_events_map *map = perf_pmu__find_map(NULL); 1276 + struct pmu_events_map *map = pmu_events_map__find(); 1277 1277 struct pmu_event *pe; 1278 1278 int i; 1279 1279

+2 -2

tools/perf/util/metricgroup.h

··· 9 9 10 10 struct evlist; 11 11 struct evsel; 12 - struct evlist; 13 12 struct option; 14 13 struct rblist; 15 14 struct pmu_events_map; ··· 43 44 bool metric_no_group, 44 45 bool metric_no_merge, 45 46 struct rblist *metric_events); 46 - 47 + struct pmu_event *metricgroup__find_metric(const char *metric, 48 + struct pmu_events_map *map); 47 49 int metricgroup__parse_groups_test(struct evlist *evlist, 48 50 struct pmu_events_map *map, 49 51 const char *str,

+178

tools/perf/util/parse-events-hybrid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/err.h> 3 + #include <linux/zalloc.h> 4 + #include <errno.h> 5 + #include <sys/types.h> 6 + #include <sys/stat.h> 7 + #include <fcntl.h> 8 + #include <sys/param.h> 9 + #include "evlist.h" 10 + #include "evsel.h" 11 + #include "parse-events.h" 12 + #include "parse-events-hybrid.h" 13 + #include "debug.h" 14 + #include "pmu.h" 15 + #include "pmu-hybrid.h" 16 + #include "perf.h" 17 + 18 + static void config_hybrid_attr(struct perf_event_attr *attr, 19 + int type, int pmu_type) 20 + { 21 + /* 22 + * attr.config layout for type PERF_TYPE_HARDWARE and 23 + * PERF_TYPE_HW_CACHE 24 + * 25 + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA 26 + * AA: hardware event ID 27 + * EEEEEEEE: PMU type ID 28 + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB 29 + * BB: hardware cache ID 30 + * CC: hardware cache op ID 31 + * DD: hardware cache op result ID 32 + * EEEEEEEE: PMU type ID 33 + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. 34 + */ 35 + attr->type = type; 36 + attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); 37 + } 38 + 39 + static int create_event_hybrid(__u32 config_type, int *idx, 40 + struct list_head *list, 41 + struct perf_event_attr *attr, char *name, 42 + struct list_head *config_terms, 43 + struct perf_pmu *pmu) 44 + { 45 + struct evsel *evsel; 46 + __u32 type = attr->type; 47 + __u64 config = attr->config; 48 + 49 + config_hybrid_attr(attr, config_type, pmu->type); 50 + evsel = parse_events__add_event_hybrid(list, idx, attr, name, 51 + pmu, config_terms); 52 + if (evsel) 53 + evsel->pmu_name = strdup(pmu->name); 54 + else 55 + return -ENOMEM; 56 + 57 + attr->type = type; 58 + attr->config = config; 59 + return 0; 60 + } 61 + 62 + static int pmu_cmp(struct parse_events_state *parse_state, 63 + struct perf_pmu *pmu) 64 + { 65 + if (!parse_state->hybrid_pmu_name) 66 + return 0; 67 + 68 + return strcmp(parse_state->hybrid_pmu_name, pmu->name); 69 + } 70 + 71 + static int add_hw_hybrid(struct parse_events_state *parse_state, 72 + struct list_head *list, struct perf_event_attr *attr, 73 + char *name, struct list_head *config_terms) 74 + { 75 + struct perf_pmu *pmu; 76 + int ret; 77 + 78 + perf_pmu__for_each_hybrid_pmu(pmu) { 79 + if (pmu_cmp(parse_state, pmu)) 80 + continue; 81 + 82 + ret = create_event_hybrid(PERF_TYPE_HARDWARE, 83 + &parse_state->idx, list, attr, name, 84 + config_terms, pmu); 85 + if (ret) 86 + return ret; 87 + } 88 + 89 + return 0; 90 + } 91 + 92 + static int create_raw_event_hybrid(int *idx, struct list_head *list, 93 + struct perf_event_attr *attr, char *name, 94 + struct list_head *config_terms, 95 + struct perf_pmu *pmu) 96 + { 97 + struct evsel *evsel; 98 + 99 + attr->type = pmu->type; 100 + evsel = parse_events__add_event_hybrid(list, idx, attr, name, 101 + pmu, config_terms); 102 + if (evsel) 103 + evsel->pmu_name = strdup(pmu->name); 104 + else 105 + return -ENOMEM; 106 + 107 + return 0; 108 + } 109 + 110 + static int add_raw_hybrid(struct parse_events_state *parse_state, 111 + struct list_head *list, struct perf_event_attr *attr, 112 + char *name, struct list_head *config_terms) 113 + { 114 + struct perf_pmu *pmu; 115 + int ret; 116 + 117 + perf_pmu__for_each_hybrid_pmu(pmu) { 118 + if (pmu_cmp(parse_state, pmu)) 119 + continue; 120 + 121 + ret = create_raw_event_hybrid(&parse_state->idx, list, attr, 122 + name, config_terms, pmu); 123 + if (ret) 124 + return ret; 125 + } 126 + 127 + return 0; 128 + } 129 + 130 + int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, 131 + struct list_head *list, 132 + struct perf_event_attr *attr, 133 + char *name, struct list_head *config_terms, 134 + bool *hybrid) 135 + { 136 + *hybrid = false; 137 + if (attr->type == PERF_TYPE_SOFTWARE) 138 + return 0; 139 + 140 + if (!perf_pmu__has_hybrid()) 141 + return 0; 142 + 143 + *hybrid = true; 144 + if (attr->type != PERF_TYPE_RAW) { 145 + return add_hw_hybrid(parse_state, list, attr, name, 146 + config_terms); 147 + } 148 + 149 + return add_raw_hybrid(parse_state, list, attr, name, 150 + config_terms); 151 + } 152 + 153 + int parse_events__add_cache_hybrid(struct list_head *list, int *idx, 154 + struct perf_event_attr *attr, char *name, 155 + struct list_head *config_terms, 156 + bool *hybrid, 157 + struct parse_events_state *parse_state) 158 + { 159 + struct perf_pmu *pmu; 160 + int ret; 161 + 162 + *hybrid = false; 163 + if (!perf_pmu__has_hybrid()) 164 + return 0; 165 + 166 + *hybrid = true; 167 + perf_pmu__for_each_hybrid_pmu(pmu) { 168 + if (pmu_cmp(parse_state, pmu)) 169 + continue; 170 + 171 + ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, 172 + attr, name, config_terms, pmu); 173 + if (ret) 174 + return ret; 175 + } 176 + 177 + return 0; 178 + }

+23

tools/perf/util/parse-events-hybrid.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __PERF_PARSE_EVENTS_HYBRID_H 3 + #define __PERF_PARSE_EVENTS_HYBRID_H 4 + 5 + #include <linux/list.h> 6 + #include <stdbool.h> 7 + #include <linux/types.h> 8 + #include <linux/perf_event.h> 9 + #include <string.h> 10 + 11 + int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, 12 + struct list_head *list, 13 + struct perf_event_attr *attr, 14 + char *name, struct list_head *config_terms, 15 + bool *hybrid); 16 + 17 + int parse_events__add_cache_hybrid(struct list_head *list, int *idx, 18 + struct perf_event_attr *attr, char *name, 19 + struct list_head *config_terms, 20 + bool *hybrid, 21 + struct parse_events_state *parse_state); 22 + 23 + #endif /* __PERF_PARSE_EVENTS_HYBRID_H */

+107 -8

tools/perf/util/parse-events.c

··· 37 37 #include "util/evsel_config.h" 38 38 #include "util/event.h" 39 39 #include "util/pfm.h" 40 + #include "util/parse-events-hybrid.h" 41 + #include "util/pmu-hybrid.h" 40 42 #include "perf.h" 41 43 42 44 #define MAX_NAME_LEN 100 ··· 49 47 int parse_events_parse(void *parse_state, void *scanner); 50 48 static int get_config_terms(struct list_head *head_config, 51 49 struct list_head *head_terms __maybe_unused); 50 + static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, 51 + const char *str, char *pmu_name, 52 + struct list_head *list); 52 53 53 54 static struct perf_pmu_event_symbol *perf_pmu_events_list; 54 55 /* ··· 457 452 int parse_events_add_cache(struct list_head *list, int *idx, 458 453 char *type, char *op_result1, char *op_result2, 459 454 struct parse_events_error *err, 460 - struct list_head *head_config) 455 + struct list_head *head_config, 456 + struct parse_events_state *parse_state) 461 457 { 462 458 struct perf_event_attr attr; 463 459 LIST_HEAD(config_terms); 464 460 char name[MAX_NAME_LEN], *config_name; 465 461 int cache_type = -1, cache_op = -1, cache_result = -1; 466 462 char *op_result[2] = { op_result1, op_result2 }; 467 - int i, n; 463 + int i, n, ret; 464 + bool hybrid; 468 465 469 466 /* 470 467 * No fallback - if we cannot get a clear cache type ··· 526 519 if (get_config_terms(head_config, &config_terms)) 527 520 return -ENOMEM; 528 521 } 522 + 523 + ret = parse_events__add_cache_hybrid(list, idx, &attr, 524 + config_name ? : name, &config_terms, 525 + &hybrid, parse_state); 526 + if (hybrid) 527 + return ret; 528 + 529 529 return add_event(list, idx, &attr, config_name ? : name, &config_terms); 530 530 } 531 531 ··· 860 846 struct parse_events_term *term, *temp; 861 847 862 848 /* 863 - * Currectly, all possible user config term 849 + * Currently, all possible user config term 864 850 * belong to bpf object. parse_events__is_hardcoded_term() 865 - * happends to be a good flag. 851 + * happens to be a good flag. 866 852 * 867 853 * See parse_events_config_bpf() and 868 854 * config_term_tracepoint(). ··· 912 898 913 899 /* 914 900 * Caller doesn't know anything about obj_head_config, 915 - * so combine them together again before returnning. 901 + * so combine them together again before returning. 916 902 */ 917 903 if (head_config) 918 904 list_splice_tail(&obj_head_config, head_config); ··· 1199 1185 } 1200 1186 1201 1187 /* 1202 - * Check term availbility after basic checking so 1188 + * Check term availability after basic checking so 1203 1189 * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. 1204 1190 * 1205 - * If check availbility at the entry of this function, 1191 + * If check availability at the entry of this function, 1206 1192 * user will see "'<sysfs term>' is not usable in 'perf stat'" 1207 1193 * if an invalid config term is provided for legacy events 1208 1194 * (for example, instructions/badterm/...), which is confusing. ··· 1433 1419 { 1434 1420 struct perf_event_attr attr; 1435 1421 LIST_HEAD(config_terms); 1422 + bool hybrid; 1423 + int ret; 1436 1424 1437 1425 memset(&attr, 0, sizeof(attr)); 1438 1426 attr.type = type; ··· 1448 1432 if (get_config_terms(head_config, &config_terms)) 1449 1433 return -ENOMEM; 1450 1434 } 1435 + 1436 + ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, 1437 + get_config_name(head_config), 1438 + &config_terms, &hybrid); 1439 + if (hybrid) 1440 + return ret; 1451 1441 1452 1442 return add_event(list, &parse_state->idx, &attr, 1453 1443 get_config_name(head_config), &config_terms); ··· 1476 1454 } 1477 1455 1478 1456 return false; 1457 + } 1458 + 1459 + static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state, 1460 + struct list_head *list, char *name, 1461 + struct list_head *head_config) 1462 + { 1463 + struct parse_events_term *term; 1464 + int ret = -1; 1465 + 1466 + if (parse_state->fake_pmu || !head_config || list_empty(head_config) || 1467 + !perf_pmu__is_hybrid(name)) { 1468 + return -1; 1469 + } 1470 + 1471 + /* 1472 + * More than one term in list. 1473 + */ 1474 + if (head_config->next && head_config->next->next != head_config) 1475 + return -1; 1476 + 1477 + term = list_first_entry(head_config, struct parse_events_term, list); 1478 + if (term && term->config && strcmp(term->config, "event")) { 1479 + ret = parse_events__with_hybrid_pmu(parse_state, term->config, 1480 + name, list); 1481 + } 1482 + 1483 + return ret; 1479 1484 } 1480 1485 1481 1486 int parse_events_add_pmu(struct parse_events_state *parse_state, ··· 1598 1549 if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) 1599 1550 return -ENOMEM; 1600 1551 1552 + if (!parse_events__inside_hybrid_pmu(parse_state, list, name, 1553 + head_config)) { 1554 + return 0; 1555 + } 1556 + 1601 1557 if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { 1602 1558 struct evsel_config_term *pos, *tmp; 1603 1559 ··· 1620 1566 &config_terms, auto_merge_stats, NULL); 1621 1567 if (!evsel) 1622 1568 return -ENOMEM; 1569 + 1570 + if (evsel->name) 1571 + evsel->use_config_name = true; 1623 1572 1624 1573 evsel->pmu_name = name ? strdup(name) : NULL; 1625 1574 evsel->use_uncore_alias = use_uncore_alias; ··· 1861 1804 int pinned; 1862 1805 int weak; 1863 1806 int exclusive; 1807 + int bpf_counter; 1864 1808 }; 1865 1809 1866 1810 static int get_event_modifier(struct event_modifier *mod, char *str, ··· 1882 1824 int exclude = eu | ek | eh; 1883 1825 int exclude_GH = evsel ? evsel->exclude_GH : 0; 1884 1826 int weak = 0; 1827 + int bpf_counter = 0; 1885 1828 1886 1829 memset(mod, 0, sizeof(*mod)); 1887 1830 ··· 1926 1867 exclusive = 1; 1927 1868 } else if (*str == 'W') { 1928 1869 weak = 1; 1870 + } else if (*str == 'b') { 1871 + bpf_counter = 1; 1929 1872 } else 1930 1873 break; 1931 1874 ··· 1959 1898 mod->sample_read = sample_read; 1960 1899 mod->pinned = pinned; 1961 1900 mod->weak = weak; 1901 + mod->bpf_counter = bpf_counter; 1962 1902 mod->exclusive = exclusive; 1963 1903 1964 1904 return 0; ··· 1974 1912 char *p = str; 1975 1913 1976 1914 /* The sizeof includes 0 byte as well. */ 1977 - if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1)) 1915 + if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1)) 1978 1916 return -1; 1979 1917 1980 1918 while (*p) { ··· 2015 1953 evsel->sample_read = mod.sample_read; 2016 1954 evsel->precise_max = mod.precise_max; 2017 1955 evsel->weak_group = mod.weak; 1956 + evsel->bpf_counter = mod.bpf_counter; 2018 1957 2019 1958 if (evsel__is_group_leader(evsel)) { 2020 1959 evsel->core.attr.pinned = mod.pinned; ··· 2222 2159 } 2223 2160 2224 2161 parse_events_terms__delete(parse_state.terms); 2162 + return ret; 2163 + } 2164 + 2165 + static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, 2166 + const char *str, char *pmu_name, 2167 + struct list_head *list) 2168 + { 2169 + struct parse_events_state ps = { 2170 + .list = LIST_HEAD_INIT(ps.list), 2171 + .stoken = PE_START_EVENTS, 2172 + .hybrid_pmu_name = pmu_name, 2173 + .idx = parse_state->idx, 2174 + }; 2175 + int ret; 2176 + 2177 + ret = parse_events__scanner(str, &ps); 2178 + perf_pmu__parse_cleanup(); 2179 + 2180 + if (!ret) { 2181 + if (!list_empty(&ps.list)) { 2182 + list_splice(&ps.list, list); 2183 + parse_state->idx = ps.idx; 2184 + return 0; 2185 + } else 2186 + return -1; 2187 + } 2188 + 2225 2189 return ret; 2226 2190 } 2227 2191 ··· 3274 3184 3275 3185 fail: 3276 3186 return NULL; 3187 + } 3188 + 3189 + struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, 3190 + struct perf_event_attr *attr, 3191 + char *name, struct perf_pmu *pmu, 3192 + struct list_head *config_terms) 3193 + { 3194 + return __add_event(list, idx, attr, true, name, pmu, 3195 + config_terms, false, NULL); 3277 3196 }

+8 -1

tools/perf/util/parse-events.h

··· 138 138 struct list_head *terms; 139 139 int stoken; 140 140 struct perf_pmu *fake_pmu; 141 + char *hybrid_pmu_name; 141 142 }; 142 143 143 144 void parse_events__handle_error(struct parse_events_error *err, int idx, ··· 189 188 int parse_events_add_cache(struct list_head *list, int *idx, 190 189 char *type, char *op_result1, char *op_result2, 191 190 struct parse_events_error *error, 192 - struct list_head *head_config); 191 + struct list_head *head_config, 192 + struct parse_events_state *parse_state); 193 193 int parse_events_add_breakpoint(struct list_head *list, int *idx, 194 194 u64 addr, char *type, u64 len); 195 195 int parse_events_add_pmu(struct parse_events_state *parse_state, ··· 264 262 #endif /* HAVE_LIBELF_SUPPORT */ 265 263 266 264 int perf_pmu__test_parse_init(void); 265 + 266 + struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, 267 + struct perf_event_attr *attr, 268 + char *name, struct perf_pmu *pmu, 269 + struct list_head *config_terms); 267 270 268 271 #endif /* __PERF_PARSE_EVENTS_H */

+1 -1

tools/perf/util/parse-events.l

··· 210 210 name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* 211 211 drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? 212 212 /* If you add a modifier you need to update check_modifier() */ 213 - modifier_event [ukhpPGHSDIWe]+ 213 + modifier_event [ukhpPGHSDIWeb]+ 214 214 modifier_bp [rwx]{1,3} 215 215 216 216 %%

+6 -3

tools/perf/util/parse-events.y

··· 454 454 455 455 list = alloc_list(); 456 456 ABORT_ON(!list); 457 - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6); 457 + err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6, 458 + parse_state); 458 459 parse_events_terms__delete($6); 459 460 free($1); 460 461 free($3); ··· 476 475 477 476 list = alloc_list(); 478 477 ABORT_ON(!list); 479 - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4); 478 + err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4, 479 + parse_state); 480 480 parse_events_terms__delete($4); 481 481 free($1); 482 482 free($3); ··· 497 495 498 496 list = alloc_list(); 499 497 ABORT_ON(!list); 500 - err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2); 498 + err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2, 499 + parse_state); 501 500 parse_events_terms__delete($2); 502 501 free($1); 503 502 if (err) {

+89

tools/perf/util/pmu-hybrid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/list.h> 3 + #include <linux/compiler.h> 4 + #include <linux/string.h> 5 + #include <linux/zalloc.h> 6 + #include <sys/types.h> 7 + #include <errno.h> 8 + #include <fcntl.h> 9 + #include <sys/stat.h> 10 + #include <unistd.h> 11 + #include <stdio.h> 12 + #include <stdbool.h> 13 + #include <stdarg.h> 14 + #include <locale.h> 15 + #include <api/fs/fs.h> 16 + #include "fncache.h" 17 + #include "pmu-hybrid.h" 18 + 19 + LIST_HEAD(perf_pmu__hybrid_pmus); 20 + 21 + bool perf_pmu__hybrid_mounted(const char *name) 22 + { 23 + char path[PATH_MAX]; 24 + const char *sysfs; 25 + FILE *file; 26 + int n, cpu; 27 + 28 + if (strncmp(name, "cpu_", 4)) 29 + return false; 30 + 31 + sysfs = sysfs__mountpoint(); 32 + if (!sysfs) 33 + return false; 34 + 35 + snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name); 36 + if (!file_available(path)) 37 + return false; 38 + 39 + file = fopen(path, "r"); 40 + if (!file) 41 + return false; 42 + 43 + n = fscanf(file, "%u", &cpu); 44 + fclose(file); 45 + if (n <= 0) 46 + return false; 47 + 48 + return true; 49 + } 50 + 51 + struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) 52 + { 53 + struct perf_pmu *pmu; 54 + 55 + if (!name) 56 + return NULL; 57 + 58 + perf_pmu__for_each_hybrid_pmu(pmu) { 59 + if (!strcmp(name, pmu->name)) 60 + return pmu; 61 + } 62 + 63 + return NULL; 64 + } 65 + 66 + bool perf_pmu__is_hybrid(const char *name) 67 + { 68 + return perf_pmu__find_hybrid_pmu(name) != NULL; 69 + } 70 + 71 + char *perf_pmu__hybrid_type_to_pmu(const char *type) 72 + { 73 + char *pmu_name = NULL; 74 + 75 + if (asprintf(&pmu_name, "cpu_%s", type) < 0) 76 + return NULL; 77 + 78 + if (perf_pmu__is_hybrid(pmu_name)) 79 + return pmu_name; 80 + 81 + /* 82 + * pmu may be not scanned, check the sysfs. 83 + */ 84 + if (perf_pmu__hybrid_mounted(pmu_name)) 85 + return pmu_name; 86 + 87 + free(pmu_name); 88 + return NULL; 89 + }

+22

tools/perf/util/pmu-hybrid.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __PMU_HYBRID_H 3 + #define __PMU_HYBRID_H 4 + 5 + #include <linux/perf_event.h> 6 + #include <linux/compiler.h> 7 + #include <linux/list.h> 8 + #include <stdbool.h> 9 + #include "pmu.h" 10 + 11 + extern struct list_head perf_pmu__hybrid_pmus; 12 + 13 + #define perf_pmu__for_each_hybrid_pmu(pmu) \ 14 + list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list) 15 + 16 + bool perf_pmu__hybrid_mounted(const char *name); 17 + 18 + struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); 19 + bool perf_pmu__is_hybrid(const char *name); 20 + char *perf_pmu__hybrid_type_to_pmu(const char *type); 21 + 22 + #endif /* __PMU_HYBRID_H */

+50 -23

tools/perf/util/pmu.c

··· 25 25 #include "string2.h" 26 26 #include "strbuf.h" 27 27 #include "fncache.h" 28 + #include "pmu-hybrid.h" 28 29 29 30 struct perf_pmu perf_pmu__fake; 30 31 ··· 40 39 extern FILE *perf_pmu_in; 41 40 42 41 static LIST_HEAD(pmus); 42 + static bool hybrid_scanned; 43 43 44 44 /* 45 45 * Parse & process all the sysfs attributes located under ··· 285 283 zfree(&newalias->str); 286 284 zfree(&newalias->metric_expr); 287 285 zfree(&newalias->metric_name); 286 + zfree(&newalias->pmu_name); 288 287 parse_events_terms__purge(&newalias->terms); 289 288 free(newalias); 290 289 } ··· 300 297 301 298 list_for_each_entry(a, alist, list) { 302 299 if (!strcasecmp(newalias->name, a->name)) { 300 + if (newalias->pmu_name && a->pmu_name && 301 + !strcasecmp(newalias->pmu_name, a->pmu_name)) { 302 + continue; 303 + } 303 304 perf_pmu_update_alias(a, newalias); 304 305 perf_pmu_free_alias(newalias); 305 306 return true; ··· 313 306 } 314 307 315 308 static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, 316 - char *desc, char *val, 317 - char *long_desc, char *topic, 318 - char *unit, char *perpkg, 319 - char *metric_expr, 320 - char *metric_name, 321 - char *deprecated) 309 + char *desc, char *val, struct pmu_event *pe) 322 310 { 323 311 struct parse_events_term *term; 324 312 struct perf_pmu_alias *alias; 325 313 int ret; 326 314 int num; 327 315 char newval[256]; 316 + char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL, 317 + *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL, 318 + *pmu_name = NULL; 319 + 320 + if (pe) { 321 + long_desc = (char *)pe->long_desc; 322 + topic = (char *)pe->topic; 323 + unit = (char *)pe->unit; 324 + perpkg = (char *)pe->perpkg; 325 + metric_expr = (char *)pe->metric_expr; 326 + metric_name = (char *)pe->metric_name; 327 + deprecated = (char *)pe->deprecated; 328 + pmu_name = (char *)pe->pmu; 329 + } 328 330 329 331 alias = malloc(sizeof(*alias)); 330 332 if (!alias) ··· 398 382 } 399 383 alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; 400 384 alias->str = strdup(newval); 385 + alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; 401 386 402 387 if (deprecated) 403 388 alias->deprecated = true; ··· 423 406 /* Remove trailing newline from sysfs file */ 424 407 strim(buf); 425 408 426 - return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, 427 - NULL, NULL, NULL, NULL); 409 + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); 428 410 } 429 411 430 412 static inline bool pmu_alias_info_file(char *name) ··· 615 599 */ 616 600 #define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" 617 601 #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" 618 - #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" 619 602 620 603 static struct perf_cpu_map *pmu_cpumask(const char *name) 621 604 { ··· 645 630 { 646 631 char path[PATH_MAX]; 647 632 const char *sysfs; 633 + 634 + if (perf_pmu__hybrid_mounted(name)) 635 + return false; 648 636 649 637 sysfs = sysfs__mountpoint(); 650 638 snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); ··· 735 717 return map; 736 718 } 737 719 720 + struct pmu_events_map *__weak pmu_events_map__find(void) 721 + { 722 + return perf_pmu__find_map(NULL); 723 + } 724 + 738 725 bool pmu_uncore_alias_match(const char *pmu_name, const char *name) 739 726 { 740 727 char *tmp = NULL, *tok, *str; ··· 816 793 /* need type casts to override 'const' */ 817 794 __perf_pmu__new_alias(head, NULL, (char *)pe->name, 818 795 (char *)pe->desc, (char *)pe->event, 819 - (char *)pe->long_desc, (char *)pe->topic, 820 - (char *)pe->unit, (char *)pe->perpkg, 821 - (char *)pe->metric_expr, 822 - (char *)pe->metric_name, 823 - (char *)pe->deprecated); 796 + pe); 824 797 } 825 798 } 826 799 ··· 883 864 (char *)pe->name, 884 865 (char *)pe->desc, 885 866 (char *)pe->event, 886 - (char *)pe->long_desc, 887 - (char *)pe->topic, 888 - (char *)pe->unit, 889 - (char *)pe->perpkg, 890 - (char *)pe->metric_expr, 891 - (char *)pe->metric_name, 892 - (char *)pe->deprecated); 867 + pe); 893 868 } 894 869 895 870 return 0; ··· 955 942 pmu->is_uncore = pmu_is_uncore(name); 956 943 if (pmu->is_uncore) 957 944 pmu->id = pmu_id(name); 945 + pmu->is_hybrid = perf_pmu__hybrid_mounted(name); 958 946 pmu->max_precise = pmu_max_precise(name); 959 947 pmu_add_cpu_aliases(&aliases, pmu); 960 948 pmu_add_sys_aliases(&aliases, pmu); ··· 966 952 list_splice(&format, &pmu->format); 967 953 list_splice(&aliases, &pmu->aliases); 968 954 list_add_tail(&pmu->list, &pmus); 955 + 956 + if (pmu->is_hybrid) 957 + list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); 969 958 970 959 pmu->default_config = perf_pmu__get_default_config(pmu); 971 960 ··· 1086 1069 1087 1070 /* 1088 1071 * Sets value based on the format definition (format parameter) 1089 - * and unformated value (value parameter). 1072 + * and unformatted value (value parameter). 1090 1073 */ 1091 1074 static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, 1092 1075 bool zero) ··· 1425 1408 } 1426 1409 1427 1410 /* 1428 - * if no unit or scale foundin aliases, then 1411 + * if no unit or scale found in aliases, then 1429 1412 * set defaults as for evsel 1430 1413 * unit cannot left to NULL 1431 1414 */ ··· 1861 1844 pr_warning("WARNING: event '%s' not valid (bits %s of config " 1862 1845 "'%llx' not supported by kernel)!\n", 1863 1846 name ?: "N/A", buf, config); 1847 + } 1848 + 1849 + bool perf_pmu__has_hybrid(void) 1850 + { 1851 + if (!hybrid_scanned) { 1852 + hybrid_scanned = true; 1853 + perf_pmu__scan(NULL); 1854 + } 1855 + 1856 + return !list_empty(&perf_pmu__hybrid_pmus); 1864 1857 }

+8

tools/perf/util/pmu.h

··· 5 5 #include <linux/bitmap.h> 6 6 #include <linux/compiler.h> 7 7 #include <linux/perf_event.h> 8 + #include <linux/list.h> 8 9 #include <stdbool.h> 9 10 #include "parse-events.h" 10 11 #include "pmu-events/pmu-events.h" ··· 20 19 21 20 #define PERF_PMU_FORMAT_BITS 64 22 21 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" 22 + #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" 23 23 24 24 struct perf_event_attr; 25 25 ··· 36 34 __u32 type; 37 35 bool selectable; 38 36 bool is_uncore; 37 + bool is_hybrid; 39 38 bool auxtrace; 40 39 int max_precise; 41 40 struct perf_event_attr *default_config; ··· 45 42 struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ 46 43 struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ 47 44 struct list_head list; /* ELEM */ 45 + struct list_head hybrid_list; 48 46 }; 49 47 50 48 extern struct perf_pmu perf_pmu__fake; ··· 76 72 bool deprecated; 77 73 char *metric_expr; 78 74 char *metric_name; 75 + char *pmu_name; 79 76 }; 80 77 81 78 struct perf_pmu *perf_pmu__find(const char *name); ··· 119 114 struct pmu_events_map *map); 120 115 121 116 struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); 117 + struct pmu_events_map *pmu_events_map__find(void); 122 118 bool pmu_uncore_alias_match(const char *pmu_name, const char *name); 123 119 void perf_pmu_free_alias(struct perf_pmu_alias *alias); 124 120 ··· 131 125 132 126 void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, 133 127 char *name); 128 + 129 + bool perf_pmu__has_hybrid(void); 134 130 135 131 #endif /* __PMU_H */

+2 -2

tools/perf/util/probe-event.c

··· 3228 3228 return err; 3229 3229 } 3230 3230 3231 - /* Concatinate two arrays */ 3231 + /* Concatenate two arrays */ 3232 3232 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) 3233 3233 { 3234 3234 void *ret; ··· 3258 3258 if (*ntevs + ntevs2 > probe_conf.max_probes) 3259 3259 ret = -E2BIG; 3260 3260 else { 3261 - /* Concatinate the array of probe_trace_event */ 3261 + /* Concatenate the array of probe_trace_event */ 3262 3262 new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs), 3263 3263 *tevs2, ntevs2 * sizeof(**tevs2)); 3264 3264 if (!new_tevs)

+3 -3

tools/perf/util/probe-finder.c

··· 164 164 /* 165 165 * Convert a location into trace_arg. 166 166 * If tvar == NULL, this just checks variable can be converted. 167 - * If fentry == true and vr_die is a parameter, do huristic search 167 + * If fentry == true and vr_die is a parameter, do heuristic search 168 168 * for the location fuzzed by function entry mcount. 169 169 */ 170 170 static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, ··· 498 498 " nor array.\n", varname); 499 499 return -EINVAL; 500 500 } 501 - /* While prcessing unnamed field, we don't care about this */ 501 + /* While processing unnamed field, we don't care about this */ 502 502 if (field->ref && dwarf_diename(vr_die)) { 503 503 pr_err("Semantic error: %s must be referred by '.'\n", 504 504 field->name); ··· 1832 1832 (lf->lno_s > lineno || lf->lno_e < lineno)) 1833 1833 return 0; 1834 1834 1835 - /* Make sure this line can be reversable */ 1835 + /* Make sure this line can be reversible */ 1836 1836 if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0 1837 1837 && (lineno != __lineno || strcmp(fname, __fname))) 1838 1838 return 0;

+2

tools/perf/util/python-ext-sources

··· 37 37 util/affinity.c 38 38 util/rwsem.c 39 39 util/hashmap.c 40 + util/pmu-hybrid.c 41 + util/fncache.c

+6

tools/perf/util/python.c

··· 90 90 */ 91 91 void bpf_counter__destroy(struct evsel *evsel); 92 92 int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); 93 + int bpf_counter__disable(struct evsel *evsel); 93 94 94 95 void bpf_counter__destroy(struct evsel *evsel __maybe_unused) 95 96 { 96 97 } 97 98 98 99 int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) 100 + { 101 + return 0; 102 + } 103 + 104 + int bpf_counter__disable(struct evsel *evsel __maybe_unused) 99 105 { 100 106 return 0; 101 107 }

+5 -5

tools/perf/util/s390-cpumsf.c

··· 45 45 * the data portion is mmap()'ed. 46 46 * 47 47 * To sort the queues in chronological order, all queue access is controlled 48 - * by the auxtrace_heap. This is basicly a stack, each stack element has two 48 + * by the auxtrace_heap. This is basically a stack, each stack element has two 49 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 51 * (nearest) time stamp is at the top. That sort order is maintained at all ··· 65 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 66 * current auxtrace_heap top. 67 67 * 68 - * 3. Auxtrace_queues might run of out data and are feeded by the 68 + * 3. Auxtrace_queues might run of out data and are fed by the 69 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 70 * 71 71 * Event Generation 72 - * Each sampling-data entry in the auxilary trace data generates a perf sample. 72 + * Each sampling-data entry in the auxiliary trace data generates a perf sample. 73 73 * This sample is filled 74 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 75 * etc. This sample is processed with perf_session__deliver_synth_event() to ··· 575 575 * pointer to the queue, the second parameter is the time stamp. This 576 576 * is the time stamp: 577 577 * - of the event that triggered this processing. 578 - * - or the time stamp when the last proccesing of this queue stopped. 578 + * - or the time stamp when the last processing of this queue stopped. 579 579 * In this case it stopped at a 4KB page boundary and record the 580 580 * position on where to continue processing on the next invocation 581 581 * (see buffer->use_data and buffer->use_size). ··· 640 640 goto out; 641 641 } 642 642 643 - pos += dsdes; /* Skip diagnositic entry */ 643 + pos += dsdes; /* Skip diagnostic entry */ 644 644 645 645 /* Check for trailer entry */ 646 646 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {

+1 -3

tools/perf/util/s390-sample-raw.c

··· 160 160 const char *color = PERF_COLOR_BLUE; 161 161 struct cf_ctrset_entry *cep, ce; 162 162 struct pmu_events_map *map; 163 - struct perf_pmu pmu; 164 163 u64 *p; 165 164 166 - memset(&pmu, 0, sizeof(pmu)); 167 - map = perf_pmu__find_map(&pmu); 165 + map = pmu_events_map__find(); 168 166 while (offset < len) { 169 167 cep = (struct cf_ctrset_entry *)(buf + offset); 170 168

+1 -1

tools/perf/util/scripting-engines/trace-event-python.c

··· 1531 1531 * Attempt to use the call path root from the call return 1532 1532 * processor, if the call return processor is in use. Otherwise, 1533 1533 * we allocate a new call path root. This prevents exporting 1534 - * duplicate call path ids when both are in use simultaniously. 1534 + * duplicate call path ids when both are in use simultaneously. 1535 1535 */ 1536 1536 if (tables->dbe.crp) 1537 1537 tables->dbe.cpr = tables->dbe.crp->cpr;

+34 -7

tools/perf/util/session.c

··· 29 29 #include "thread-stack.h" 30 30 #include "sample-raw.h" 31 31 #include "stat.h" 32 + #include "tsc.h" 32 33 #include "ui/progress.h" 33 34 #include "../perf.h" 34 35 #include "arch/common.h" ··· 452 451 return 0; 453 452 } 454 453 454 + static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, 455 + union perf_event *event) 456 + { 457 + if (dump_trace) 458 + perf_event__fprintf_time_conv(event, stdout); 459 + 460 + dump_printf(": unhandled!\n"); 461 + return 0; 462 + } 463 + 455 464 static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, 456 465 union perf_event *event __maybe_unused, 457 466 u64 file_offset __maybe_unused) ··· 543 532 if (tool->stat_round == NULL) 544 533 tool->stat_round = process_stat_round_stub; 545 534 if (tool->time_conv == NULL) 546 - tool->time_conv = process_event_op2_stub; 535 + tool->time_conv = process_event_time_conv_stub; 547 536 if (tool->feature == NULL) 548 537 tool->feature = process_event_op2_stub; 549 538 if (tool->compressed == NULL) ··· 960 949 event->stat_round.time = bswap_64(event->stat_round.time); 961 950 } 962 951 952 + static void perf_event__time_conv_swap(union perf_event *event, 953 + bool sample_id_all __maybe_unused) 954 + { 955 + event->time_conv.time_shift = bswap_64(event->time_conv.time_shift); 956 + event->time_conv.time_mult = bswap_64(event->time_conv.time_mult); 957 + event->time_conv.time_zero = bswap_64(event->time_conv.time_zero); 958 + 959 + if (event_contains(event->time_conv, time_cycles)) { 960 + event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles); 961 + event->time_conv.time_mask = bswap_64(event->time_conv.time_mask); 962 + } 963 + } 964 + 963 965 typedef void (*perf_event__swap_op)(union perf_event *event, 964 966 bool sample_id_all); 965 967 ··· 1009 985 [PERF_RECORD_STAT] = perf_event__stat_swap, 1010 986 [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, 1011 987 [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, 1012 - [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, 988 + [PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap, 1013 989 [PERF_RECORD_HEADER_MAX] = NULL, 1014 990 }; 1015 991 ··· 1093 1069 * in "to" register. 1094 1070 * For example, there is a call stack 1095 1071 * "A"->"B"->"C"->"D". 1096 - * The LBR registers will recorde like 1072 + * The LBR registers will be recorded like 1097 1073 * "C"->"D", "B"->"C", "A"->"B". 1098 1074 * So only the first "to" register and all "from" 1099 1075 * registers are needed to construct the whole stack. ··· 1326 1302 1327 1303 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { 1328 1304 printf("... weight: %" PRIu64 "", sample->weight); 1329 - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) 1305 + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { 1330 1306 printf(",0x%"PRIx16"", sample->ins_lat); 1307 + printf(",0x%"PRIx16"", sample->p_stage_cyc); 1308 + } 1331 1309 printf("\n"); 1332 1310 } 1333 1311 ··· 1610 1584 return tool->event_update(tool, event, &session->evlist); 1611 1585 case PERF_RECORD_HEADER_EVENT_TYPE: 1612 1586 /* 1613 - * Depreceated, but we need to handle it for sake 1587 + * Deprecated, but we need to handle it for sake 1614 1588 * of old data files create in pipe mode. 1615 1589 */ 1616 1590 return 0; ··· 2376 2350 return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); 2377 2351 } 2378 2352 2379 - size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) 2353 + size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, 2354 + bool skip_empty) 2380 2355 { 2381 2356 size_t ret; 2382 2357 const char *msg = ""; ··· 2387 2360 2388 2361 ret = fprintf(fp, "\nAggregated stats:%s\n", msg); 2389 2362 2390 - ret += events_stats__fprintf(&session->evlist->stats, fp); 2363 + ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty); 2391 2364 return ret; 2392 2365 } 2393 2366

+2 -1

tools/perf/util/session.h

··· 113 113 size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, 114 114 bool (fn)(struct dso *dso, int parm), int parm); 115 115 116 - size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); 116 + size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, 117 + bool skip_empty); 117 118 118 119 struct evsel *perf_session__find_first_evtype(struct perf_session *session, 119 120 unsigned int type);

+58 -2

tools/perf/util/sort.c

··· 25 25 #include <traceevent/event-parse.h> 26 26 #include "mem-events.h" 27 27 #include "annotate.h" 28 + #include "event.h" 28 29 #include "time-utils.h" 29 30 #include "cgroup.h" 30 31 #include "machine.h" ··· 37 36 const char *parent_pattern = default_parent_pattern; 38 37 const char *default_sort_order = "comm,dso,symbol"; 39 38 const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; 40 - const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; 39 + const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; 41 40 const char default_top_sort_order[] = "dso,symbol"; 42 41 const char default_diff_sort_order[] = "dso,symbol"; 43 42 const char default_tracepoint_sort_order[] = "trace"; ··· 46 45 regex_t ignore_callees_regex; 47 46 int have_ignore_callees = 0; 48 47 enum sort_mode sort__mode = SORT_MODE__NORMAL; 48 + const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; 49 + const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; 49 50 50 51 /* 51 52 * Replaces all occurrences of a char used with the: ··· 1411 1408 .se_width_idx = HISTC_GLOBAL_INS_LAT, 1412 1409 }; 1413 1410 1411 + static int64_t 1412 + sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) 1413 + { 1414 + return left->stat.p_stage_cyc - right->stat.p_stage_cyc; 1415 + } 1416 + 1417 + static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, 1418 + size_t size, unsigned int width) 1419 + { 1420 + return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); 1421 + } 1422 + 1423 + struct sort_entry sort_p_stage_cyc = { 1424 + .se_header = "Pipeline Stage Cycle", 1425 + .se_cmp = sort__global_p_stage_cyc_cmp, 1426 + .se_snprintf = hist_entry__p_stage_cyc_snprintf, 1427 + .se_width_idx = HISTC_P_STAGE_CYC, 1428 + }; 1429 + 1414 1430 struct sort_entry sort_mem_daddr_sym = { 1415 1431 .se_header = "Data Symbol", 1416 1432 .se_cmp = sort__daddr_cmp, ··· 1838 1816 int taken; 1839 1817 }; 1840 1818 1819 + int __weak arch_support_sort_key(const char *sort_key __maybe_unused) 1820 + { 1821 + return 0; 1822 + } 1823 + 1824 + const char * __weak arch_perf_header_entry(const char *se_header) 1825 + { 1826 + return se_header; 1827 + } 1828 + 1829 + static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) 1830 + { 1831 + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); 1832 + } 1833 + 1841 1834 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } 1842 1835 1843 1836 static struct sort_dimension common_sort_dimensions[] = { ··· 1878 1841 DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), 1879 1842 DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), 1880 1843 DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), 1844 + DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), 1881 1845 }; 1882 1846 1883 1847 #undef DIM ··· 2777 2739 struct evlist *evlist, 2778 2740 int level) 2779 2741 { 2780 - unsigned int i; 2742 + unsigned int i, j; 2743 + 2744 + /* 2745 + * Check to see if there are any arch specific 2746 + * sort dimensions not applicable for the current 2747 + * architecture. If so, Skip that sort key since 2748 + * we don't want to display it in the output fields. 2749 + */ 2750 + for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { 2751 + if (!strcmp(arch_specific_sort_keys[j], tok) && 2752 + !arch_support_sort_key(tok)) { 2753 + return 0; 2754 + } 2755 + } 2781 2756 2782 2757 for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { 2783 2758 struct sort_dimension *sd = &common_sort_dimensions[i]; 2784 2759 2785 2760 if (strncasecmp(tok, sd->name, strlen(tok))) 2786 2761 continue; 2762 + 2763 + for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { 2764 + if (!strcmp(dynamic_headers[j], sd->name)) 2765 + sort_dimension_add_dynamic_header(sd); 2766 + } 2787 2767 2788 2768 if (sd->entry == &sort_parent) { 2789 2769 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);

+2

tools/perf/util/sort.h

··· 51 51 u64 period_guest_us; 52 52 u64 weight; 53 53 u64 ins_lat; 54 + u64 p_stage_cyc; 54 55 u32 nr_events; 55 56 }; 56 57 ··· 235 234 SORT_CODE_PAGE_SIZE, 236 235 SORT_LOCAL_INS_LAT, 237 236 SORT_GLOBAL_INS_LAT, 237 + SORT_PIPELINE_STAGE_CYC, 238 238 239 239 /* branch stack specific sort keys */ 240 240 __SORT_BRANCH_STACK,

+52 -12

tools/perf/util/stat-display.c

··· 17 17 #include "cgroup.h" 18 18 #include <api/fs/fs.h> 19 19 #include "util.h" 20 + #include "iostat.h" 21 + #include "pmu-hybrid.h" 20 22 21 23 #define CNTR_NOT_SUPPORTED "<not supported>" 22 24 #define CNTR_NOT_COUNTED "<not counted>" ··· 312 310 struct outstate *os = ctx; 313 311 char tbuf[1024]; 314 312 313 + /* In case of iostat, print metric header for first root port only */ 314 + if (config->iostat_run && 315 + os->evsel->priv != os->evsel->evlist->selected->priv) 316 + return; 317 + 315 318 if (!valid_only_metric(unit)) 316 319 return; 317 320 unit = fixunit(tbuf, os->evsel, unit); ··· 446 439 if (counter->cgrp) 447 440 os.nfields++; 448 441 } 442 + 443 + if (!config->no_csv_summary && config->csv_output && 444 + config->summary && !config->interval) { 445 + fprintf(config->output, "%16s%s", "summary", config->csv_sep); 446 + } 447 + 449 448 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 450 449 if (config->metric_only) { 451 450 pm(config, &os, NULL, "", "", 0); ··· 539 526 { 540 527 char *new_name; 541 528 char *config; 529 + int ret = 0; 542 530 543 531 if (counter->uniquified_name || 544 532 !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, ··· 554 540 counter->name = new_name; 555 541 } 556 542 } else { 557 - if (asprintf(&new_name, 558 - "%s [%s]", counter->name, counter->pmu_name) > 0) { 543 + if (perf_pmu__has_hybrid()) { 544 + if (!counter->use_config_name) { 545 + ret = asprintf(&new_name, "%s/%s/", 546 + counter->pmu_name, counter->name); 547 + } 548 + } else { 549 + ret = asprintf(&new_name, "%s [%s]", 550 + counter->name, counter->pmu_name); 551 + } 552 + 553 + if (ret) { 559 554 free(counter->name); 560 555 counter->name = new_name; 561 556 } ··· 665 642 ad.val = ad.ena = ad.run = 0; 666 643 ad.nr = 0; 667 644 if (!collect_data(config, counter, aggr_cb, &ad)) 645 + return; 646 + 647 + if (perf_pmu__has_hybrid() && ad.ena == 0) 668 648 return; 669 649 670 650 nr = ad.nr; ··· 978 952 if (config->csv_output) { 979 953 if (config->interval) 980 954 fputs("time,", config->output); 981 - fputs(aggr_header_csv[config->aggr_mode], config->output); 955 + if (!config->iostat_run) 956 + fputs(aggr_header_csv[config->aggr_mode], config->output); 982 957 } 958 + if (config->iostat_run) 959 + iostat_print_header_prefix(config); 983 960 984 961 /* Print metrics headers only */ 985 962 evlist__for_each_entry(evlist, counter) { ··· 1012 983 if (config->interval_clear) 1013 984 puts(CONSOLE_CLEAR); 1014 985 1015 - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); 986 + if (!config->iostat_run) 987 + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); 1016 988 1017 989 if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { 1018 990 switch (config->aggr_mode) { ··· 1049 1019 break; 1050 1020 case AGGR_GLOBAL: 1051 1021 default: 1052 - fprintf(output, "# time"); 1053 - if (!metric_only) 1054 - fprintf(output, " counts %*s events\n", unit_width, "unit"); 1022 + if (!config->iostat_run) { 1023 + fprintf(output, "# time"); 1024 + if (!metric_only) 1025 + fprintf(output, " counts %*s events\n", unit_width, "unit"); 1026 + } 1055 1027 case AGGR_UNSET: 1056 1028 break; 1057 1029 } ··· 1246 1214 struct evsel *counter; 1247 1215 char buf[64], *prefix = NULL; 1248 1216 1217 + if (config->iostat_run) 1218 + evlist->selected = evlist__first(evlist); 1219 + 1249 1220 if (interval) 1250 1221 print_interval(config, evlist, prefix = buf, ts); 1251 1222 else ··· 1261 1226 print_metric_headers(config, evlist, prefix, false); 1262 1227 if (num_print_iv++ == 25) 1263 1228 num_print_iv = 0; 1264 - if (config->aggr_mode == AGGR_GLOBAL && prefix) 1229 + if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) 1265 1230 fprintf(config->output, "%s", prefix); 1266 1231 } 1267 1232 ··· 1278 1243 } 1279 1244 break; 1280 1245 case AGGR_GLOBAL: 1281 - evlist__for_each_entry(evlist, counter) { 1282 - print_counter_aggr(config, counter, prefix); 1246 + if (config->iostat_run) 1247 + iostat_print_counters(evlist, config, ts, prefix = buf, 1248 + print_counter_aggr); 1249 + else { 1250 + evlist__for_each_entry(evlist, counter) { 1251 + print_counter_aggr(config, counter, prefix); 1252 + } 1253 + if (metric_only) 1254 + fputc('\n', config->output); 1283 1255 } 1284 - if (metric_only) 1285 - fputc('\n', config->output); 1286 1256 break; 1287 1257 case AGGR_NONE: 1288 1258 if (metric_only)

+11 -10

tools/perf/util/stat-shadow.c

··· 9 9 #include "expr.h" 10 10 #include "metricgroup.h" 11 11 #include "cgroup.h" 12 + #include "units.h" 12 13 #include <linux/zalloc.h> 14 + #include "iostat.h" 13 15 14 16 /* 15 17 * AGGR_GLOBAL: Use CPU 0 ··· 963 961 struct metric_event *me; 964 962 int num = 1; 965 963 966 - if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 964 + if (config->iostat_run) { 965 + iostat_print_metric(config, evsel, out); 966 + } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 967 967 total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); 968 968 969 969 if (total) { ··· 1274 1270 generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, 1275 1271 evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); 1276 1272 } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { 1277 - char unit = 'M'; 1278 - char unit_buf[10]; 1273 + char unit = ' '; 1274 + char unit_buf[10] = "/sec"; 1279 1275 1280 1276 total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); 1281 - 1282 1277 if (total) 1283 - ratio = 1000.0 * avg / total; 1284 - if (ratio < 0.001) { 1285 - ratio *= 1000; 1286 - unit = 'K'; 1287 - } 1288 - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1278 + ratio = convert_unit_double(1000000000.0 * avg / total, &unit); 1279 + 1280 + if (unit != ' ') 1281 + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1289 1282 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1290 1283 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1291 1284 print_smi_cost(config, cpu, out, st, &rsd);

+1 -2

tools/perf/util/stat.c

··· 76 76 return pct; 77 77 } 78 78 79 - bool __perf_evsel_stat__is(struct evsel *evsel, 80 - enum perf_stat_evsel_id id) 79 + bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id) 81 80 { 82 81 struct perf_stat_evsel *ps = evsel->stats; 83 82

+5 -3

tools/perf/util/stat.h

··· 128 128 bool all_user; 129 129 bool percore_show_thread; 130 130 bool summary; 131 + bool no_csv_summary; 131 132 bool metric_no_group; 132 133 bool metric_no_merge; 133 134 bool stop_read_counter; 134 135 bool quiet; 136 + bool iostat_run; 135 137 FILE *output; 136 138 unsigned int interval; 137 139 unsigned int timeout; ··· 162 160 }; 163 161 164 162 void perf_stat__set_big_num(int set); 163 + void perf_stat__set_no_csv_summary(int set); 165 164 166 165 void update_stats(struct stats *stats, u64 val); 167 166 double avg_stats(struct stats *stats); ··· 190 187 u64 ena; 191 188 }; 192 189 193 - bool __perf_evsel_stat__is(struct evsel *evsel, 194 - enum perf_stat_evsel_id id); 190 + bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); 195 191 196 192 #define perf_stat_evsel__is(evsel, id) \ 197 - __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) 193 + __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id) 198 194 199 195 extern struct runtime_stat rt_stat; 200 196 extern struct stats walltime_nsecs_stats;

+1 -1

tools/perf/util/strbuf.h

··· 12 12 * build complex strings/buffers whose final size isn't easily known. 13 13 * 14 14 * It is NOT legal to copy the ->buf pointer away. 15 - * `strbuf_detach' is the operation that detachs a buffer from its shell 15 + * `strbuf_detach' is the operation that detaches a buffer from its shell 16 16 * while keeping the shell valid wrt its invariants. 17 17 * 18 18 * 2. the ->buf member is a byte array that has at least ->len + 1 bytes

+2 -2

tools/perf/util/strfilter.h

··· 8 8 9 9 /* A node of string filter */ 10 10 struct strfilter_node { 11 - struct strfilter_node *l; /* Tree left branche (for &,|) */ 12 - struct strfilter_node *r; /* Tree right branche (for !,&,|) */ 11 + struct strfilter_node *l; /* Tree left branch (for &,|) */ 12 + struct strfilter_node *r; /* Tree right branch (for !,&,|) */ 13 13 const char *p; /* Operator or rule */ 14 14 }; 15 15

+1 -1

tools/perf/util/symbol-elf.c

··· 1058 1058 curr_dso->symtab_type = dso->symtab_type; 1059 1059 maps__insert(kmaps, curr_map); 1060 1060 /* 1061 - * Add it before we drop the referece to curr_map, i.e. while 1061 + * Add it before we drop the reference to curr_map, i.e. while 1062 1062 * we still are sure to have a reference to this DSO via 1063 1063 * *curr_map->dso. 1064 1064 */

+1 -1

tools/perf/util/symbol_fprintf.c

··· 68 68 69 69 for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { 70 70 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); 71 - fprintf(fp, "%s\n", pos->sym.name); 71 + ret += fprintf(fp, "%s\n", pos->sym.name); 72 72 } 73 73 74 74 return ret;

+2 -2

tools/perf/util/synthetic-events.c

··· 1211 1211 *max = 0; 1212 1212 1213 1213 for (i = 0; i < map->nr; i++) { 1214 - /* bit possition of the cpu is + 1 */ 1214 + /* bit position of the cpu is + 1 */ 1215 1215 int bit = map->map[i] + 1; 1216 1216 1217 1217 if (bit > *max) ··· 1237 1237 * mask = size of 'struct perf_record_record_cpu_map' + 1238 1238 * maximum cpu bit converted to size of longs 1239 1239 * 1240 - * and finaly + the size of 'struct perf_record_cpu_map_data'. 1240 + * and finally + the size of 'struct perf_record_cpu_map_data'. 1241 1241 */ 1242 1242 size_cpus = cpus_size(map); 1243 1243 size_mask = mask_size(map, max);

+4

tools/perf/util/syscalltbl.c

··· 34 34 #include <asm/syscalls.c> 35 35 const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; 36 36 static const char **syscalltbl_native = syscalltbl_arm64; 37 + #elif defined(__mips__) 38 + #include <asm/syscalls_n64.c> 39 + const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; 40 + static const char **syscalltbl_native = syscalltbl_mips_n64; 37 41 #endif 38 42 39 43 struct syscall {

+2 -5

tools/perf/util/target.h

··· 16 16 bool uses_mmap; 17 17 bool default_per_cpu; 18 18 bool per_thread; 19 + bool use_bpf; 20 + const char *attr_map; 19 21 }; 20 22 21 23 enum target_errno { ··· 64 62 static inline bool target__has_cpu(struct target *target) 65 63 { 66 64 return target->system_wide || target->cpu_list; 67 - } 68 - 69 - static inline bool target__has_bpf(struct target *target) 70 - { 71 - return target->bpf_str; 72 65 } 73 66 74 67 static inline bool target__none(struct target *target)

-1

tools/perf/util/thread-stack.h

··· 16 16 struct ip_callchain; 17 17 struct symbol; 18 18 struct dso; 19 - struct comm; 20 19 struct perf_sample; 21 20 struct addr_location; 22 21 struct call_path;

+30

tools/perf/util/tsc.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <errno.h> 3 + #include <inttypes.h> 4 + #include <string.h> 3 5 4 6 #include <linux/compiler.h> 5 7 #include <linux/perf_event.h> ··· 111 109 u64 __weak rdtsc(void) 112 110 { 113 111 return 0; 112 + } 113 + 114 + size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) 115 + { 116 + struct perf_record_time_conv *tc = (struct perf_record_time_conv *)event; 117 + size_t ret; 118 + 119 + ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); 120 + ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); 121 + ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); 122 + 123 + /* 124 + * The event TIME_CONV was extended for the fields from "time_cycles" 125 + * when supported cap_user_time_short, for backward compatibility, 126 + * prints the extended fields only if they are contained in the event. 127 + */ 128 + if (event_contains(*tc, time_cycles)) { 129 + ret += fprintf(fp, "... Time Cycles %" PRI_lu64 "\n", 130 + tc->time_cycles); 131 + ret += fprintf(fp, "... Time Mask %#" PRI_lx64 "\n", 132 + tc->time_mask); 133 + ret += fprintf(fp, "... Cap Time Zero %" PRId32 "\n", 134 + tc->cap_user_time_zero); 135 + ret += fprintf(fp, "... Cap Time Short %" PRId32 "\n", 136 + tc->cap_user_time_short); 137 + } 138 + 139 + return ret; 114 140 }

+4

tools/perf/util/tsc.h

··· 4 4 5 5 #include <linux/types.h> 6 6 7 + #include "event.h" 8 + 7 9 struct perf_tsc_conversion { 8 10 u16 time_shift; 9 11 u32 time_mult; ··· 25 23 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); 26 24 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); 27 25 u64 rdtsc(void); 26 + 27 + size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); 28 28 29 29 #endif // __PERF_TSC_H

+14 -7

tools/perf/util/units.c

··· 33 33 return (unsigned long) -1; 34 34 } 35 35 36 - unsigned long convert_unit(unsigned long value, char *unit) 36 + double convert_unit_double(double value, char *unit) 37 37 { 38 38 *unit = ' '; 39 39 40 - if (value > 1000) { 41 - value /= 1000; 40 + if (value > 1000.0) { 41 + value /= 1000.0; 42 42 *unit = 'K'; 43 43 } 44 44 45 - if (value > 1000) { 46 - value /= 1000; 45 + if (value > 1000.0) { 46 + value /= 1000.0; 47 47 *unit = 'M'; 48 48 } 49 49 50 - if (value > 1000) { 51 - value /= 1000; 50 + if (value > 1000.0) { 51 + value /= 1000.0; 52 52 *unit = 'G'; 53 53 } 54 54 55 55 return value; 56 + } 57 + 58 + unsigned long convert_unit(unsigned long value, char *unit) 59 + { 60 + double v = convert_unit_double((double)value, unit); 61 + 62 + return (unsigned long)v; 56 63 } 57 64 58 65 int unit_number__scnprintf(char *buf, size_t size, u64 n)

+1

tools/perf/util/units.h

··· 12 12 13 13 unsigned long parse_tag_value(const char *str, struct parse_tag *tags); 14 14 15 + double convert_unit_double(double value, char *unit); 15 16 unsigned long convert_unit(unsigned long value, char *unit); 16 17 int unit_number__scnprintf(char *buf, size_t size, u64 n); 17 18

+1 -1

tools/perf/util/unwind-libunwind-local.c

··· 82 82 #define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ 83 83 #define DW_EH_PE_aligned 0x50 /* aligned pointer */ 84 84 85 - /* Flags intentionaly not handled, since they're not needed: 85 + /* Flags intentionally not handled, since they're not needed: 86 86 * #define DW_EH_PE_indirect 0x80 87 87 * #define DW_EH_PE_uleb128 0x01 88 88 * #define DW_EH_PE_udata2 0x02