Merge tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+1

include/linux/perf_event.h

··· 1020 1020 u64 stack_user_size; 1021 1021 1022 1022 u64 phys_addr; 1023 + u64 cgroup; 1023 1024 } ____cacheline_aligned; 1024 1025 1025 1026 /* default value for data source */

+14 -2

include/uapi/linux/perf_event.h

··· 142 142 PERF_SAMPLE_REGS_INTR = 1U << 18, 143 143 PERF_SAMPLE_PHYS_ADDR = 1U << 19, 144 144 PERF_SAMPLE_AUX = 1U << 20, 145 + PERF_SAMPLE_CGROUP = 1U << 21, 145 146 146 - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ 147 + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ 147 148 148 149 __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ 149 150 }; ··· 382 381 ksymbol : 1, /* include ksymbol events */ 383 382 bpf_event : 1, /* include bpf events */ 384 383 aux_output : 1, /* generate AUX records instead of events */ 385 - __reserved_1 : 32; 384 + cgroup : 1, /* include cgroup events */ 385 + __reserved_1 : 31; 386 386 387 387 union { 388 388 __u32 wakeup_events; /* wakeup every n events */ ··· 1013 1011 * }; 1014 1012 */ 1015 1013 PERF_RECORD_BPF_EVENT = 18, 1014 + 1015 + /* 1016 + * struct { 1017 + * struct perf_event_header header; 1018 + * u64 id; 1019 + * char path[]; 1020 + * struct sample_id sample_id; 1021 + * }; 1022 + */ 1023 + PERF_RECORD_CGROUP = 19, 1016 1024 1017 1025 PERF_RECORD_MAX, /* non-ABI */ 1018 1026 };

+2 -1

init/Kconfig

··· 1029 1029 help 1030 1030 This option extends the perf per-cpu mode to restrict monitoring 1031 1031 to threads which belong to the cgroup specified and run on the 1032 - designated cpu. 1032 + designated cpu. Or this can be used to have cgroup ID in samples 1033 + so that it can monitor performance events among cgroups. 1033 1034 1034 1035 Say N if unsure. 1035 1036

+133

kernel/events/core.c

··· 387 387 static atomic_t nr_switch_events __read_mostly; 388 388 static atomic_t nr_ksymbol_events __read_mostly; 389 389 static atomic_t nr_bpf_events __read_mostly; 390 + static atomic_t nr_cgroup_events __read_mostly; 390 391 391 392 static LIST_HEAD(pmus); 392 393 static DEFINE_MUTEX(pmus_lock); ··· 1861 1860 1862 1861 if (sample_type & PERF_SAMPLE_PHYS_ADDR) 1863 1862 size += sizeof(data->phys_addr); 1863 + 1864 + if (sample_type & PERF_SAMPLE_CGROUP) 1865 + size += sizeof(data->cgroup); 1864 1866 1865 1867 event->header_size = size; 1866 1868 } ··· 4612 4608 atomic_dec(&nr_comm_events); 4613 4609 if (event->attr.namespaces) 4614 4610 atomic_dec(&nr_namespaces_events); 4611 + if (event->attr.cgroup) 4612 + atomic_dec(&nr_cgroup_events); 4615 4613 if (event->attr.task) 4616 4614 atomic_dec(&nr_task_events); 4617 4615 if (event->attr.freq) ··· 6870 6864 if (sample_type & PERF_SAMPLE_PHYS_ADDR) 6871 6865 perf_output_put(handle, data->phys_addr); 6872 6866 6867 + if (sample_type & PERF_SAMPLE_CGROUP) 6868 + perf_output_put(handle, data->cgroup); 6869 + 6873 6870 if (sample_type & PERF_SAMPLE_AUX) { 6874 6871 perf_output_put(handle, data->aux_size); 6875 6872 ··· 7071 7062 7072 7063 if (sample_type & PERF_SAMPLE_PHYS_ADDR) 7073 7064 data->phys_addr = perf_virt_to_phys(data->addr); 7065 + 7066 + #ifdef CONFIG_CGROUP_PERF 7067 + if (sample_type & PERF_SAMPLE_CGROUP) { 7068 + struct cgroup *cgrp; 7069 + 7070 + /* protected by RCU */ 7071 + cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup; 7072 + data->cgroup = cgroup_id(cgrp); 7073 + } 7074 + #endif 7074 7075 7075 7076 if (sample_type & PERF_SAMPLE_AUX) { 7076 7077 u64 size; ··· 7753 7734 &namespaces_event, 7754 7735 NULL); 7755 7736 } 7737 + 7738 + /* 7739 + * cgroup tracking 7740 + */ 7741 + #ifdef CONFIG_CGROUP_PERF 7742 + 7743 + struct perf_cgroup_event { 7744 + char *path; 7745 + int path_size; 7746 + struct { 7747 + struct perf_event_header header; 7748 + u64 id; 7749 + char path[]; 7750 + } event_id; 7751 + }; 7752 + 7753 + static int perf_event_cgroup_match(struct perf_event *event) 7754 + { 7755 + return event->attr.cgroup; 7756 + } 7757 + 7758 + static void perf_event_cgroup_output(struct perf_event *event, void *data) 7759 + { 7760 + struct perf_cgroup_event *cgroup_event = data; 7761 + struct perf_output_handle handle; 7762 + struct perf_sample_data sample; 7763 + u16 header_size = cgroup_event->event_id.header.size; 7764 + int ret; 7765 + 7766 + if (!perf_event_cgroup_match(event)) 7767 + return; 7768 + 7769 + perf_event_header__init_id(&cgroup_event->event_id.header, 7770 + &sample, event); 7771 + ret = perf_output_begin(&handle, event, 7772 + cgroup_event->event_id.header.size); 7773 + if (ret) 7774 + goto out; 7775 + 7776 + perf_output_put(&handle, cgroup_event->event_id); 7777 + __output_copy(&handle, cgroup_event->path, cgroup_event->path_size); 7778 + 7779 + perf_event__output_id_sample(event, &handle, &sample); 7780 + 7781 + perf_output_end(&handle); 7782 + out: 7783 + cgroup_event->event_id.header.size = header_size; 7784 + } 7785 + 7786 + static void perf_event_cgroup(struct cgroup *cgrp) 7787 + { 7788 + struct perf_cgroup_event cgroup_event; 7789 + char path_enomem[16] = "//enomem"; 7790 + char *pathname; 7791 + size_t size; 7792 + 7793 + if (!atomic_read(&nr_cgroup_events)) 7794 + return; 7795 + 7796 + cgroup_event = (struct perf_cgroup_event){ 7797 + .event_id = { 7798 + .header = { 7799 + .type = PERF_RECORD_CGROUP, 7800 + .misc = 0, 7801 + .size = sizeof(cgroup_event.event_id), 7802 + }, 7803 + .id = cgroup_id(cgrp), 7804 + }, 7805 + }; 7806 + 7807 + pathname = kmalloc(PATH_MAX, GFP_KERNEL); 7808 + if (pathname == NULL) { 7809 + cgroup_event.path = path_enomem; 7810 + } else { 7811 + /* just to be sure to have enough space for alignment */ 7812 + cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64)); 7813 + cgroup_event.path = pathname; 7814 + } 7815 + 7816 + /* 7817 + * Since our buffer works in 8 byte units we need to align our string 7818 + * size to a multiple of 8. However, we must guarantee the tail end is 7819 + * zero'd out to avoid leaking random bits to userspace. 7820 + */ 7821 + size = strlen(cgroup_event.path) + 1; 7822 + while (!IS_ALIGNED(size, sizeof(u64))) 7823 + cgroup_event.path[size++] = '\0'; 7824 + 7825 + cgroup_event.event_id.header.size += size; 7826 + cgroup_event.path_size = size; 7827 + 7828 + perf_iterate_sb(perf_event_cgroup_output, 7829 + &cgroup_event, 7830 + NULL); 7831 + 7832 + kfree(pathname); 7833 + } 7834 + 7835 + #endif 7756 7836 7757 7837 /* 7758 7838 * mmap tracking ··· 10896 10778 atomic_inc(&nr_comm_events); 10897 10779 if (event->attr.namespaces) 10898 10780 atomic_inc(&nr_namespaces_events); 10781 + if (event->attr.cgroup) 10782 + atomic_inc(&nr_cgroup_events); 10899 10783 if (event->attr.task) 10900 10784 atomic_inc(&nr_task_events); 10901 10785 if (event->attr.freq) ··· 11277 11157 11278 11158 if (attr->sample_type & PERF_SAMPLE_REGS_INTR) 11279 11159 ret = perf_reg_validate(attr->sample_regs_intr); 11160 + 11161 + #ifndef CONFIG_CGROUP_PERF 11162 + if (attr->sample_type & PERF_SAMPLE_CGROUP) 11163 + return -EINVAL; 11164 + #endif 11165 + 11280 11166 out: 11281 11167 return ret; 11282 11168 ··· 12880 12754 kfree(jc); 12881 12755 } 12882 12756 12757 + static int perf_cgroup_css_online(struct cgroup_subsys_state *css) 12758 + { 12759 + perf_event_cgroup(css->cgroup); 12760 + return 0; 12761 + } 12762 + 12883 12763 static int __perf_cgroup_move(void *info) 12884 12764 { 12885 12765 struct task_struct *task = info; ··· 12907 12775 struct cgroup_subsys perf_event_cgrp_subsys = { 12908 12776 .css_alloc = perf_cgroup_css_alloc, 12909 12777 .css_free = perf_cgroup_css_free, 12778 + .css_online = perf_cgroup_css_online, 12910 12779 .attach = perf_cgroup_attach, 12911 12780 /* 12912 12781 * Implicitly enable on dfl hierarchy so that perf events can

+2 -1

tools/build/Makefile.feature

··· 72 72 setns \ 73 73 libaio \ 74 74 libzstd \ 75 - disassembler-four-args 75 + disassembler-four-args \ 76 + file-handle 76 77 77 78 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list 78 79 # of all feature tests

+4 -1

tools/build/feature/Makefile

··· 68 68 test-llvm-version.bin \ 69 69 test-libaio.bin \ 70 70 test-libzstd.bin \ 71 - test-clang-bpf-global-var.bin 71 + test-clang-bpf-global-var.bin \ 72 + test-file-handle.bin 72 73 73 74 FILES := $(addprefix $(OUTPUT),$(FILES)) 74 75 ··· 328 327 $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ 329 328 grep BTF_KIND_VAR 330 329 330 + $(OUTPUT)test-file-handle.bin: 331 + $(BUILD) 331 332 332 333 ############################### 333 334

+17

tools/build/feature/test-file-handle.c

··· 1 + #define _GNU_SOURCE 2 + #include <sys/types.h> 3 + #include <sys/stat.h> 4 + #include <fcntl.h> 5 + #include <inttypes.h> 6 + 7 + int main(void) 8 + { 9 + struct { 10 + struct file_handle fh; 11 + uint64_t cgroup_id; 12 + } handle; 13 + int mount_id; 14 + 15 + name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0); 16 + return 0; 17 + }

+14 -2

tools/include/uapi/linux/perf_event.h

··· 142 142 PERF_SAMPLE_REGS_INTR = 1U << 18, 143 143 PERF_SAMPLE_PHYS_ADDR = 1U << 19, 144 144 PERF_SAMPLE_AUX = 1U << 20, 145 + PERF_SAMPLE_CGROUP = 1U << 21, 145 146 146 - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ 147 + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ 147 148 148 149 __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ 149 150 }; ··· 382 381 ksymbol : 1, /* include ksymbol events */ 383 382 bpf_event : 1, /* include bpf events */ 384 383 aux_output : 1, /* generate AUX records instead of events */ 385 - __reserved_1 : 32; 384 + cgroup : 1, /* include cgroup events */ 385 + __reserved_1 : 31; 386 386 387 387 union { 388 388 __u32 wakeup_events; /* wakeup every n events */ ··· 1013 1011 * }; 1014 1012 */ 1015 1013 PERF_RECORD_BPF_EVENT = 18, 1014 + 1015 + /* 1016 + * struct { 1017 + * struct perf_event_header header; 1018 + * u64 id; 1019 + * char path[]; 1020 + * struct sample_id sample_id; 1021 + * }; 1022 + */ 1023 + PERF_RECORD_CGROUP = 19, 1016 1024 1017 1025 PERF_RECORD_MAX, /* non-ABI */ 1018 1026 };

+7

tools/lib/perf/include/perf/event.h

··· 105 105 __u8 tag[BPF_TAG_SIZE]; // prog tag 106 106 }; 107 107 108 + struct perf_record_cgroup { 109 + struct perf_event_header header; 110 + __u64 id; 111 + char path[PATH_MAX]; 112 + }; 113 + 108 114 struct perf_record_sample { 109 115 struct perf_event_header header; 110 116 __u64 array[]; ··· 358 352 struct perf_record_mmap2 mmap2; 359 353 struct perf_record_comm comm; 360 354 struct perf_record_namespaces namespaces; 355 + struct perf_record_cgroup cgroup; 361 356 struct perf_record_fork fork; 362 357 struct perf_record_lost lost; 363 358 struct perf_record_lost_samples lost_samples;

+8 -6

tools/perf/Documentation/perf-config.txt

··· 405 405 This option is only applied to TUI. 406 406 407 407 call-graph.*:: 408 - When sub-commands 'top' and 'report' work with -g/—-children 409 - there're options in control of call-graph. 408 + The following controls the handling of call-graphs (obtained via the 409 + -g/--call-graph options). 410 410 411 411 call-graph.record-mode:: 412 - The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. 413 - The value of 'dwarf' is effective only if perf detect needed library 414 - (libunwind or a recent version of libdw). 415 - 'lbr' only work for cpus that support it. 412 + The mode for user space can be 'fp' (frame pointer), 'dwarf' 413 + and 'lbr'. The value 'dwarf' is effective only if libunwind 414 + (or a recent version of libdw) is present on the system; 415 + the value 'lbr' only works for certain cpus. The method for 416 + kernel space is controlled not by this option but by the 417 + kernel config (CONFIG_UNWINDER_*). 416 418 417 419 call-graph.dump-size:: 418 420 The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).

+16 -7

tools/perf/Documentation/perf-record.txt

··· 237 237 option and remains only for backward compatibility. See --event. 238 238 239 239 -g:: 240 - Enables call-graph (stack chain/backtrace) recording. 240 + Enables call-graph (stack chain/backtrace) recording for both 241 + kernel space and user space. 241 242 242 243 --call-graph:: 243 244 Setup and enable call-graph (stack chain/backtrace) recording, 244 - implies -g. Default is "fp". 245 + implies -g. Default is "fp" (for user space). 245 246 246 - Allows specifying "fp" (frame pointer) or "dwarf" 247 - (DWARF's CFI - Call Frame Information) or "lbr" 248 - (Hardware Last Branch Record facility) as the method to collect 249 - the information used to show the call graphs. 247 + The unwinding method used for kernel space is dependent on the 248 + unwinder used by the active kernel configuration, i.e 249 + CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc) 250 + 251 + Any option specified here controls the method used for user space. 252 + 253 + Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI - 254 + Call Frame Information) or "lbr" (Hardware Last Branch Record 255 + facility). 250 256 251 257 In some systems, where binaries are build with gcc 252 258 --fomit-frame-pointer, using the "fp" method will produce bogus ··· 391 385 abort events and some memory events in precise mode on modern Intel CPUs. 392 386 393 387 --namespaces:: 394 - Record events of type PERF_RECORD_NAMESPACES. 388 + Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key. 389 + 390 + --all-cgroups:: 391 + Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key. 395 392 396 393 --transaction:: 397 394 Record transaction flags for transaction related events.

+6

tools/perf/Documentation/perf-report.txt

··· 95 95 abort cost. This is the global weight. 96 96 - local_weight: Local weight version of the weight above. 97 97 - cgroup_id: ID derived from cgroup namespace device and inode numbers. 98 + - cgroup: cgroup pathname in the cgroupfs. 98 99 - transaction: Transaction abort flags. 99 100 - overhead: Overhead percentage of sample 100 101 - overhead_sys: Overhead percentage of sample running in system mode ··· 377 376 --group:: 378 377 Show event group information together. It forces group output also 379 378 if there are no groups defined in data file. 379 + 380 + --group-sort-idx:: 381 + Sort the output by the event at the index n in group. If n is invalid, 382 + sort by the first event. It can support multiple groups with different 383 + amount of events. WARNING: This should be used on grouped events. 380 384 381 385 --demangle:: 382 386 Demangle symbol names to human readable form. It's enabled by default,

+14

tools/perf/Documentation/perf-script.txt

··· 319 319 --show-bpf-events 320 320 Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT. 321 321 322 + --show-cgroup-events 323 + Display cgroup events i.e. events of type PERF_RECORD_CGROUP. 324 + 322 325 --demangle:: 323 326 Demangle symbol names to human readable form. It's enabled by default, 324 327 disable with --no-demangle. ··· 393 390 --reltime:: 394 391 Print time stamps relative to trace start. 395 392 393 + --deltatime:: 394 + Print time stamps relative to previous event. 395 + 396 396 --per-event-dump:: 397 397 Create per event files with a "perf.data.EVENT.dump" name instead of 398 398 printing to stdout, useful, for instance, for generating flamegraphs. ··· 411 405 412 406 --xed:: 413 407 Run xed disassembler on output. Requires installing the xed disassembler. 408 + 409 + -S:: 410 + --symbols=symbol[,symbol...]:: 411 + Only consider the listed symbols. Symbols are typically a name 412 + but they may also be hexadecimal address. 413 + 414 + For example, to select the symbol noploop or the address 0x4007a0: 415 + perf script --symbols=noploop,0x4007a0 414 416 415 417 --call-trace:: 416 418 Show call stream for intel_pt traces. The CPUs are interleaved, but

+9

tools/perf/Documentation/perf-top.txt

··· 53 53 --group:: 54 54 Put the counters into a counter group. 55 55 56 + --group-sort-idx:: 57 + Sort the output by the event at the index n in group. If n is invalid, 58 + sort by the first event. It can support multiple groups with different 59 + amount of events. WARNING: This should be used on grouped events. 60 + 56 61 -F <freq>:: 57 62 --freq=<freq>:: 58 63 Profile at this frequency. Use 'max' to use the currently maximum ··· 276 271 --namespaces:: 277 272 Record events of type PERF_RECORD_NAMESPACES and display it with the 278 273 'cgroup_id' sort key. 274 + 275 + --all-cgroups:: 276 + Record events of type PERF_RECORD_CGROUP and display it with the 277 + 'cgroup' sort key. 279 278 280 279 --switch-on EVENT_NAME:: 281 280 Only consider events after this event is found.

+14 -1

tools/perf/Makefile.config

··· 228 228 229 229 PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) 230 230 231 + # Python 3.8 changed the output of `python-config --ldflags` to not include the 232 + # '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for 233 + # libpython fails if that flag is not included in LDFLAGS 234 + ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0) 235 + PYTHON_CONFIG_LDFLAGS := --ldflags --embed 236 + else 237 + PYTHON_CONFIG_LDFLAGS := --ldflags 238 + endif 239 + 231 240 ifdef PYTHON_CONFIG 232 - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) 241 + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) 233 242 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) 234 243 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil 235 244 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) ··· 355 346 356 347 ifeq ($(feature-gettid), 1) 357 348 CFLAGS += -DHAVE_GETTID 349 + endif 350 + 351 + ifeq ($(feature-file-handle), 1) 352 + CFLAGS += -DHAVE_FILE_HANDLE 358 353 endif 359 354 360 355 ifdef NO_LIBELF

+6 -5

tools/perf/Makefile.perf

··· 231 231 BPF_DIR = $(srctree)/tools/lib/bpf/ 232 232 SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ 233 233 LIBPERF_DIR = $(srctree)/tools/lib/perf/ 234 + DOC_DIR = $(srctree)/tools/perf/Documentation/ 234 235 235 236 # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. 236 237 # Without this setting the output feature dump file misses some features, for ··· 574 573 arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh 575 574 576 575 $(arch_errno_name_array): $(arch_errno_tbl) 577 - $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ 576 + $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ 578 577 579 578 sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c 580 579 sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh ··· 793 792 $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a 794 793 795 794 $(LIBSUBCMD)-clean: 796 - $(call QUIET_CLEAN, libsubcmd) 797 795 $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean 798 796 799 797 help: ··· 832 832 833 833 # 'make doc' should call 'make -C Documentation all' 834 834 $(DOC_TARGETS): 835 - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) 835 + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) 836 836 837 837 TAG_FOLDERS= . ../lib ../include 838 838 TAG_FILES= ../../include/uapi/linux/perf_event.h ··· 959 959 960 960 # 'make install-doc' should call 'make -C Documentation install' 961 961 $(INSTALL_DOC_TARGETS): 962 - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=) 962 + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) 963 963 964 964 ### Cleaning rules 965 965 ··· 1008 1008 $(OUTPUT)$(rename_flags_array) \ 1009 1009 $(OUTPUT)$(arch_errno_name_array) \ 1010 1010 $(OUTPUT)$(sync_file_range_arrays) 1011 - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean 1011 + $(call QUIET_CLEAN, Documentation) \ 1012 + $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null 1012 1013 1013 1014 # 1014 1015 # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)

+1 -1

tools/perf/arch/arm64/util/Build

··· 1 1 perf-y += header.o 2 + perf-y += machine.o 2 3 perf-y += perf_regs.o 3 - perf-y += sym-handling.o 4 4 perf-$(CONFIG_DWARF) += dwarf-regs.o 5 5 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o 6 6 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

+27

tools/perf/arch/arm64/util/machine.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <stdio.h> 4 + #include <string.h> 5 + #include "debug.h" 6 + #include "symbol.h" 7 + 8 + /* On arm64, kernel text segment start at high memory address, 9 + * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory 10 + * address, like 0xffff 0000 00ax xxxx. When only samll amount of 11 + * memory is used by modules, gap between end of module's text segment 12 + * and start of kernel text segment may be reach 2G. 13 + * Therefore do not fill this gap and do not assign it to the kernel dso map. 14 + */ 15 + 16 + #define SYMBOL_LIMIT (1 << 12) /* 4K */ 17 + 18 + void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) 19 + { 20 + if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) || 21 + (strchr(p->name, '[') == NULL && strchr(c->name, '['))) 22 + /* Limit range of last symbol in module and kernel */ 23 + p->end += SYMBOL_LIMIT; 24 + else 25 + p->end = c->start; 26 + pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); 27 + }

-19

tools/perf/arch/arm64/util/sym-handling.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * 4 - * Copyright (C) 2015 Naveen N. Rao, IBM Corporation 5 - */ 6 - 7 - #include "symbol.h" // for the elf__needs_adjust_symbols() prototype 8 - #include <stdbool.h> 9 - 10 - #ifdef HAVE_LIBELF_SUPPORT 11 - #include <gelf.h> 12 - 13 - bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) 14 - { 15 - return ehdr.e_type == ET_EXEC || 16 - ehdr.e_type == ET_REL || 17 - ehdr.e_type == ET_DYN; 18 - } 19 - #endif

-1

tools/perf/arch/powerpc/util/Build

··· 1 1 perf-y += header.o 2 - perf-y += sym-handling.o 3 2 perf-y += kvm-stat.o 4 3 perf-y += perf_regs.o 5 4 perf-y += mem-events.o

-10

tools/perf/arch/powerpc/util/sym-handling.c

··· 10 10 #include "probe-event.h" 11 11 #include "probe-file.h" 12 12 13 - #ifdef HAVE_LIBELF_SUPPORT 14 - bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) 15 - { 16 - return ehdr.e_type == ET_EXEC || 17 - ehdr.e_type == ET_REL || 18 - ehdr.e_type == ET_DYN; 19 - } 20 - 21 - #endif 22 - 23 13 int arch__choose_best_symbol(struct symbol *syma, 24 14 struct symbol *symb __maybe_unused) 25 15 {

+1

tools/perf/builtin-diff.c

··· 455 455 .fork = perf_event__process_fork, 456 456 .lost = perf_event__process_lost, 457 457 .namespaces = perf_event__process_namespaces, 458 + .cgroup = perf_event__process_cgroup, 458 459 .ordered_events = true, 459 460 .ordering_requires_timestamps = true, 460 461 },

+16

tools/perf/builtin-record.c

··· 1397 1397 if (err < 0) 1398 1398 pr_warning("Couldn't synthesize bpf events.\n"); 1399 1399 1400 + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1401 + machine); 1402 + if (err < 0) 1403 + pr_warning("Couldn't synthesize cgroup events.\n"); 1404 + 1400 1405 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, 1401 1406 process_synthesized_event, opts->sample_address, 1402 1407 1); ··· 1432 1427 1433 1428 if (rec->opts.record_namespaces) 1434 1429 tool->namespace_events = true; 1430 + 1431 + if (rec->opts.record_cgroup) { 1432 + #ifdef HAVE_FILE_HANDLE 1433 + tool->cgroup_events = true; 1434 + #else 1435 + pr_err("cgroup tracking is not supported\n"); 1436 + return -1; 1437 + #endif 1438 + } 1435 1439 1436 1440 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1437 1441 signal(SIGUSR2, snapshot_sig_handler); ··· 2372 2358 "per thread proc mmap processing timeout in ms"), 2373 2359 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2374 2360 "Record namespaces events"), 2361 + OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 2362 + "Record cgroup events"), 2375 2363 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2376 2364 "Record context switch events"), 2377 2365 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,

+14 -3

tools/perf/builtin-report.c

··· 635 635 * Usually "ret" is the last pressed key, and we only 636 636 * care if the key notifies us to switch data file. 637 637 */ 638 - if (ret != K_SWITCH_INPUT_DATA) 638 + if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD) 639 639 ret = 0; 640 640 break; 641 641 case 2: ··· 1105 1105 .mmap2 = perf_event__process_mmap2, 1106 1106 .comm = perf_event__process_comm, 1107 1107 .namespaces = perf_event__process_namespaces, 1108 + .cgroup = perf_event__process_cgroup, 1108 1109 .exit = perf_event__process_exit, 1109 1110 .fork = perf_event__process_fork, 1110 1111 .lost = perf_event__process_lost, ··· 1228 1227 "Show a column with the sum of periods"), 1229 1228 OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, 1230 1229 "Show event group information together"), 1230 + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, 1231 + "Sort the output by the event at the index n in group. " 1232 + "If n is invalid, sort by the first event. " 1233 + "WARNING: should be used on grouped events."), 1231 1234 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", 1232 1235 "use branch records for per branch histogram filling", 1233 1236 parse_branch_mode), ··· 1374 1369 1375 1370 setup_forced_leader(&report, session->evlist); 1376 1371 1372 + if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) { 1373 + parse_options_usage(NULL, options, "group-sort-idx", 0); 1374 + ret = -EINVAL; 1375 + goto error; 1376 + } 1377 + 1377 1378 if (itrace_synth_opts.last_branch) 1378 1379 has_br_stack = true; 1379 1380 ··· 1481 1470 sort_order = sort_tmp; 1482 1471 } 1483 1472 1484 - if ((last_key != K_SWITCH_INPUT_DATA) && 1473 + if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) && 1485 1474 (setup_sorting(session->evlist) < 0)) { 1486 1475 if (sort_order) 1487 1476 parse_options_usage(report_usage, options, "s", 1); ··· 1560 1549 sort__setup_elide(stdout); 1561 1550 1562 1551 ret = __cmd_report(&report); 1563 - if (ret == K_SWITCH_INPUT_DATA) { 1552 + if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) { 1564 1553 perf_session__delete(session); 1565 1554 last_key = K_SWITCH_INPUT_DATA; 1566 1555 goto repeat;

+59 -1

tools/perf/builtin-script.c

··· 63 63 static char const *script_name; 64 64 static char const *generate_script_lang; 65 65 static bool reltime; 66 + static bool deltatime; 66 67 static u64 initial_time; 68 + static u64 previous_time; 67 69 static bool debug_mode; 68 70 static u64 last_timestamp; 69 71 static u64 nr_unordered; ··· 706 704 if (!initial_time) 707 705 initial_time = sample->time; 708 706 t = sample->time - initial_time; 707 + } else if (deltatime) { 708 + if (previous_time) 709 + t = sample->time - previous_time; 710 + else { 711 + t = 0; 712 + } 713 + previous_time = sample->time; 709 714 } 710 715 nsecs = t; 711 716 secs = nsecs / NSEC_PER_SEC; ··· 1694 1685 bool show_lost_events; 1695 1686 bool show_round_events; 1696 1687 bool show_bpf_events; 1688 + bool show_cgroup_events; 1697 1689 bool allocated; 1698 1690 bool per_event_dump; 1699 1691 struct evswitch evswitch; ··· 2213 2203 return ret; 2214 2204 } 2215 2205 2206 + static int process_cgroup_event(struct perf_tool *tool, 2207 + union perf_event *event, 2208 + struct perf_sample *sample, 2209 + struct machine *machine) 2210 + { 2211 + struct thread *thread; 2212 + struct perf_script *script = container_of(tool, struct perf_script, tool); 2213 + struct perf_session *session = script->session; 2214 + struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); 2215 + int ret = -1; 2216 + 2217 + thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2218 + if (thread == NULL) { 2219 + pr_debug("problem processing CGROUP event, skipping it.\n"); 2220 + return -1; 2221 + } 2222 + 2223 + if (perf_event__process_cgroup(tool, event, sample, machine) < 0) 2224 + goto out; 2225 + 2226 + if (!evsel->core.attr.sample_id_all) { 2227 + sample->cpu = 0; 2228 + sample->time = 0; 2229 + } 2230 + if (!filter_cpu(sample)) { 2231 + perf_sample__fprintf_start(sample, thread, evsel, 2232 + PERF_RECORD_CGROUP, stdout); 2233 + perf_event__fprintf(event, stdout); 2234 + } 2235 + ret = 0; 2236 + out: 2237 + thread__put(thread); 2238 + return ret; 2239 + } 2240 + 2216 2241 static int process_fork_event(struct perf_tool *tool, 2217 2242 union perf_event *event, 2218 2243 struct perf_sample *sample, ··· 2587 2542 script->tool.context_switch = process_switch_event; 2588 2543 if (script->show_namespace_events) 2589 2544 script->tool.namespaces = process_namespaces_event; 2545 + if (script->show_cgroup_events) 2546 + script->tool.cgroup = process_cgroup_event; 2590 2547 if (script->show_lost_events) 2591 2548 script->tool.lost = process_lost_event; 2592 2549 if (script->show_round_events) { ··· 3265 3218 __script_root = get_script_root(script_dirent, suffix); 3266 3219 if (__script_root && !strcmp(script_root, __script_root)) { 3267 3220 free(__script_root); 3268 - closedir(lang_dir); 3269 3221 closedir(scripts_dir); 3270 3222 scnprintf(script_path, MAXPATHLEN, "%s/%s", 3271 3223 lang_path, script_dirent->d_name); 3224 + closedir(lang_dir); 3272 3225 return strdup(script_path); 3273 3226 } 3274 3227 free(__script_root); ··· 3514 3467 .mmap2 = perf_event__process_mmap2, 3515 3468 .comm = perf_event__process_comm, 3516 3469 .namespaces = perf_event__process_namespaces, 3470 + .cgroup = perf_event__process_cgroup, 3517 3471 .exit = perf_event__process_exit, 3518 3472 .fork = perf_event__process_fork, 3519 3473 .attr = process_attr, ··· 3603 3555 "anything beyond the specified depth will be ignored. " 3604 3556 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 3605 3557 OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"), 3558 + OPT_BOOLEAN(0, "deltatime", &deltatime, "Show time stamps relative to previous event"), 3606 3559 OPT_BOOLEAN('I', "show-info", &show_full_info, 3607 3560 "display extended information from perf.data file"), 3608 3561 OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, ··· 3616 3567 "Show context switch events (if recorded)"), 3617 3568 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, 3618 3569 "Show namespace events (if recorded)"), 3570 + OPT_BOOLEAN('\0', "show-cgroup-events", &script.show_cgroup_events, 3571 + "Show cgroup events (if recorded)"), 3619 3572 OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, 3620 3573 "Show lost events (if recorded)"), 3621 3574 OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, ··· 3700 3649 "(see 'perf script -l' for listing)\n"); 3701 3650 return -1; 3702 3651 } 3652 + } 3653 + 3654 + if (reltime && deltatime) { 3655 + fprintf(stderr, 3656 + "reltime and deltatime - the two don't get along well. " 3657 + "Please limit to --reltime or --deltatime.\n"); 3658 + return -1; 3703 3659 } 3704 3660 3705 3661 if (itrace_synth_opts.callchain &&

+28 -2

tools/perf/builtin-top.c

··· 616 616 .arg = top, 617 617 .refresh = top->delay_secs, 618 618 }; 619 + int ret; 619 620 620 621 /* In order to read symbols from other namespaces perf to needs to call 621 622 * setns(2). This isn't permitted if the struct_fs has multiple users. ··· 627 626 628 627 prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0); 629 628 629 + repeat: 630 630 perf_top__sort_new_samples(top); 631 631 632 632 /* ··· 640 638 hists->uid_filter_str = top->record_opts.target.uid_str; 641 639 } 642 640 643 - perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 641 + ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 644 642 top->min_percent, 645 643 &top->session->header.env, 646 644 !top->record_opts.overwrite, 647 645 &top->annotation_opts); 648 646 649 - stop_top(); 647 + if (ret == K_RELOAD) { 648 + top->zero = true; 649 + goto repeat; 650 + } else 651 + stop_top(); 652 + 650 653 return NULL; 651 654 } 652 655 ··· 1253 1246 1254 1247 if (opts->record_namespaces) 1255 1248 top->tool.namespace_events = true; 1249 + if (opts->record_cgroup) { 1250 + #ifdef HAVE_FILE_HANDLE 1251 + top->tool.cgroup_events = true; 1252 + #else 1253 + pr_err("cgroup tracking is not supported.\n"); 1254 + return -1; 1255 + #endif 1256 + } 1256 1257 1257 1258 ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, 1258 1259 &top->session->machines.host, 1259 1260 &top->record_opts); 1260 1261 if (ret < 0) 1261 1262 pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n"); 1263 + 1264 + ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process, 1265 + &top->session->machines.host); 1266 + if (ret < 0) 1267 + pr_debug("Couldn't synthesize cgroup events.\n"); 1262 1268 1263 1269 machine__synthesize_threads(&top->session->machines.host, &opts->target, 1264 1270 top->evlist->core.threads, false, ··· 1565 1545 "number of thread to run event synthesize"), 1566 1546 OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces, 1567 1547 "Record namespaces events"), 1548 + OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup, 1549 + "Record cgroup events"), 1550 + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, 1551 + "Sort the output by the event at the index n in group. " 1552 + "If n is invalid, sort by the first event. " 1553 + "WARNING: should be used on grouped events."), 1568 1554 OPTS_EVSWITCH(&top.evswitch), 1569 1555 OPT_END() 1570 1556 };

+26

tools/perf/pmu-events/arch/test/test_cpu/other.json

··· 1 + [ 2 + { 3 + "EventCode": "0x6", 4 + "Counter": "0,1", 5 + "UMask": "0x80", 6 + "EventName": "SEGMENT_REG_LOADS.ANY", 7 + "SampleAfterValue": "200000", 8 + "BriefDescription": "Number of segment register loads." 9 + }, 10 + { 11 + "EventCode": "0x9", 12 + "Counter": "0,1", 13 + "UMask": "0x20", 14 + "EventName": "DISPATCH_BLOCKED.ANY", 15 + "SampleAfterValue": "200000", 16 + "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason" 17 + }, 18 + { 19 + "EventCode": "0x3A", 20 + "Counter": "0,1", 21 + "UMask": "0x0", 22 + "EventName": "EIST_TRANS", 23 + "SampleAfterValue": "200000", 24 + "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions" 25 + } 26 + ]

+21

tools/perf/pmu-events/arch/test/test_cpu/uncore.json

··· 1 + [ 2 + { 3 + "EventCode": "0x02", 4 + "EventName": "uncore_hisi_ddrc.flux_wcmd", 5 + "BriefDescription": "DDRC write commands", 6 + "PublicDescription": "DDRC write commands", 7 + "Unit": "hisi_sccl,ddrc" 8 + }, 9 + { 10 + "Unit": "CBO", 11 + "EventCode": "0x22", 12 + "UMask": "0x81", 13 + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", 14 + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", 15 + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", 16 + "Counter": "0,1", 17 + "CounterMask": "0", 18 + "Invert": "0", 19 + "EdgeDetect": "0" 20 + } 21 + ]

tools/perf/pmu-events/arch/x86/amdfam17h/branch.json tools/perf/pmu-events/arch/test/test_cpu/branch.json

-329

tools/perf/pmu-events/arch/x86/amdfam17h/cache.json

··· 1 - [ 2 - { 3 - "EventName": "ic_fw32", 4 - "EventCode": "0x80", 5 - "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." 6 - }, 7 - { 8 - "EventName": "ic_fw32_miss", 9 - "EventCode": "0x81", 10 - "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." 11 - }, 12 - { 13 - "EventName": "ic_cache_fill_l2", 14 - "EventCode": "0x82", 15 - "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." 16 - }, 17 - { 18 - "EventName": "ic_cache_fill_sys", 19 - "EventCode": "0x83", 20 - "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." 21 - }, 22 - { 23 - "EventName": "bp_l1_tlb_miss_l2_hit", 24 - "EventCode": "0x84", 25 - "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." 26 - }, 27 - { 28 - "EventName": "bp_l1_tlb_miss_l2_miss", 29 - "EventCode": "0x85", 30 - "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." 31 - }, 32 - { 33 - "EventName": "bp_snp_re_sync", 34 - "EventCode": "0x86", 35 - "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." 36 - }, 37 - { 38 - "EventName": "ic_fetch_stall.ic_stall_any", 39 - "EventCode": "0x87", 40 - "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 41 - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 42 - "UMask": "0x4" 43 - }, 44 - { 45 - "EventName": "ic_fetch_stall.ic_stall_dq_empty", 46 - "EventCode": "0x87", 47 - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 48 - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 49 - "UMask": "0x2" 50 - }, 51 - { 52 - "EventName": "ic_fetch_stall.ic_stall_back_pressure", 53 - "EventCode": "0x87", 54 - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 55 - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 56 - "UMask": "0x1" 57 - }, 58 - { 59 - "EventName": "ic_cache_inval.l2_invalidating_probe", 60 - "EventCode": "0x8c", 61 - "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", 62 - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", 63 - "UMask": "0x2" 64 - }, 65 - { 66 - "EventName": "ic_cache_inval.fill_invalidated", 67 - "EventCode": "0x8c", 68 - "BriefDescription": "IC line invalidated due to overwriting fill response.", 69 - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", 70 - "UMask": "0x1" 71 - }, 72 - { 73 - "EventName": "bp_tlb_rel", 74 - "EventCode": "0x99", 75 - "BriefDescription": "The number of ITLB reload requests." 76 - }, 77 - { 78 - "EventName": "l2_request_g1.rd_blk_l", 79 - "EventCode": "0x60", 80 - "BriefDescription": "Requests to L2 Group1.", 81 - "PublicDescription": "Requests to L2 Group1.", 82 - "UMask": "0x80" 83 - }, 84 - { 85 - "EventName": "l2_request_g1.rd_blk_x", 86 - "EventCode": "0x60", 87 - "BriefDescription": "Requests to L2 Group1.", 88 - "PublicDescription": "Requests to L2 Group1.", 89 - "UMask": "0x40" 90 - }, 91 - { 92 - "EventName": "l2_request_g1.ls_rd_blk_c_s", 93 - "EventCode": "0x60", 94 - "BriefDescription": "Requests to L2 Group1.", 95 - "PublicDescription": "Requests to L2 Group1.", 96 - "UMask": "0x20" 97 - }, 98 - { 99 - "EventName": "l2_request_g1.cacheable_ic_read", 100 - "EventCode": "0x60", 101 - "BriefDescription": "Requests to L2 Group1.", 102 - "PublicDescription": "Requests to L2 Group1.", 103 - "UMask": "0x10" 104 - }, 105 - { 106 - "EventName": "l2_request_g1.change_to_x", 107 - "EventCode": "0x60", 108 - "BriefDescription": "Requests to L2 Group1.", 109 - "PublicDescription": "Requests to L2 Group1.", 110 - "UMask": "0x8" 111 - }, 112 - { 113 - "EventName": "l2_request_g1.prefetch_l2", 114 - "EventCode": "0x60", 115 - "BriefDescription": "Requests to L2 Group1.", 116 - "PublicDescription": "Requests to L2 Group1.", 117 - "UMask": "0x4" 118 - }, 119 - { 120 - "EventName": "l2_request_g1.l2_hw_pf", 121 - "EventCode": "0x60", 122 - "BriefDescription": "Requests to L2 Group1.", 123 - "PublicDescription": "Requests to L2 Group1.", 124 - "UMask": "0x2" 125 - }, 126 - { 127 - "EventName": "l2_request_g1.other_requests", 128 - "EventCode": "0x60", 129 - "BriefDescription": "Events covered by l2_request_g2.", 130 - "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", 131 - "UMask": "0x1" 132 - }, 133 - { 134 - "EventName": "l2_request_g2.group1", 135 - "EventCode": "0x61", 136 - "BriefDescription": "All Group 1 commands not in unit0.", 137 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", 138 - "UMask": "0x80" 139 - }, 140 - { 141 - "EventName": "l2_request_g2.ls_rd_sized", 142 - "EventCode": "0x61", 143 - "BriefDescription": "RdSized, RdSized32, RdSized64.", 144 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", 145 - "UMask": "0x40" 146 - }, 147 - { 148 - "EventName": "l2_request_g2.ls_rd_sized_nc", 149 - "EventCode": "0x61", 150 - "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", 151 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", 152 - "UMask": "0x20" 153 - }, 154 - { 155 - "EventName": "l2_request_g2.ic_rd_sized", 156 - "EventCode": "0x61", 157 - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 158 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 159 - "UMask": "0x10" 160 - }, 161 - { 162 - "EventName": "l2_request_g2.ic_rd_sized_nc", 163 - "EventCode": "0x61", 164 - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 165 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 166 - "UMask": "0x8" 167 - }, 168 - { 169 - "EventName": "l2_request_g2.smc_inval", 170 - "EventCode": "0x61", 171 - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 172 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 173 - "UMask": "0x4" 174 - }, 175 - { 176 - "EventName": "l2_request_g2.bus_locks_originator", 177 - "EventCode": "0x61", 178 - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 179 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 180 - "UMask": "0x2" 181 - }, 182 - { 183 - "EventName": "l2_request_g2.bus_locks_responses", 184 - "EventCode": "0x61", 185 - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 186 - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", 187 - "UMask": "0x1" 188 - }, 189 - { 190 - "EventName": "l2_latency.l2_cycles_waiting_on_fills", 191 - "EventCode": "0x62", 192 - "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 193 - "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 194 - "UMask": "0x1" 195 - }, 196 - { 197 - "EventName": "l2_wcb_req.wcb_write", 198 - "EventCode": "0x63", 199 - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", 200 - "BriefDescription": "LS to L2 WCB write requests.", 201 - "UMask": "0x40" 202 - }, 203 - { 204 - "EventName": "l2_wcb_req.wcb_close", 205 - "EventCode": "0x63", 206 - "BriefDescription": "LS to L2 WCB close requests.", 207 - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", 208 - "UMask": "0x20" 209 - }, 210 - { 211 - "EventName": "l2_wcb_req.zero_byte_store", 212 - "EventCode": "0x63", 213 - "BriefDescription": "LS to L2 WCB zero byte store requests.", 214 - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 215 - "UMask": "0x4" 216 - }, 217 - { 218 - "EventName": "l2_wcb_req.cl_zero", 219 - "EventCode": "0x63", 220 - "PublicDescription": "LS to L2 WCB cache line zeroing requests.", 221 - "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 222 - "UMask": "0x1" 223 - }, 224 - { 225 - "EventName": "l2_cache_req_stat.ls_rd_blk_cs", 226 - "EventCode": "0x64", 227 - "BriefDescription": "LS ReadBlock C/S Hit.", 228 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", 229 - "UMask": "0x80" 230 - }, 231 - { 232 - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", 233 - "EventCode": "0x64", 234 - "BriefDescription": "LS Read Block L Hit X.", 235 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", 236 - "UMask": "0x40" 237 - }, 238 - { 239 - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", 240 - "EventCode": "0x64", 241 - "BriefDescription": "LsRdBlkL Hit Shared.", 242 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", 243 - "UMask": "0x20" 244 - }, 245 - { 246 - "EventName": "l2_cache_req_stat.ls_rd_blk_x", 247 - "EventCode": "0x64", 248 - "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", 249 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", 250 - "UMask": "0x10" 251 - }, 252 - { 253 - "EventName": "l2_cache_req_stat.ls_rd_blk_c", 254 - "EventCode": "0x64", 255 - "BriefDescription": "LS Read Block C S L X Change to X Miss.", 256 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", 257 - "UMask": "0x8" 258 - }, 259 - { 260 - "EventName": "l2_cache_req_stat.ic_fill_hit_x", 261 - "EventCode": "0x64", 262 - "BriefDescription": "IC Fill Hit Exclusive Stale.", 263 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", 264 - "UMask": "0x4" 265 - }, 266 - { 267 - "EventName": "l2_cache_req_stat.ic_fill_hit_s", 268 - "EventCode": "0x64", 269 - "BriefDescription": "IC Fill Hit Shared.", 270 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", 271 - "UMask": "0x2" 272 - }, 273 - { 274 - "EventName": "l2_cache_req_stat.ic_fill_miss", 275 - "EventCode": "0x64", 276 - "BriefDescription": "IC Fill Miss.", 277 - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", 278 - "UMask": "0x1" 279 - }, 280 - { 281 - "EventName": "l2_fill_pending.l2_fill_busy", 282 - "EventCode": "0x6d", 283 - "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", 284 - "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", 285 - "UMask": "0x1" 286 - }, 287 - { 288 - "EventName": "l3_request_g1.caching_l3_cache_accesses", 289 - "EventCode": "0x01", 290 - "BriefDescription": "Caching: L3 cache accesses", 291 - "UMask": "0x80", 292 - "Unit": "L3PMC" 293 - }, 294 - { 295 - "EventName": "l3_lookup_state.all_l3_req_typs", 296 - "EventCode": "0x04", 297 - "BriefDescription": "All L3 Request Types", 298 - "UMask": "0xff", 299 - "Unit": "L3PMC" 300 - }, 301 - { 302 - "EventName": "l3_comb_clstr_state.other_l3_miss_typs", 303 - "EventCode": "0x06", 304 - "BriefDescription": "Other L3 Miss Request Types", 305 - "UMask": "0xfe", 306 - "Unit": "L3PMC" 307 - }, 308 - { 309 - "EventName": "l3_comb_clstr_state.request_miss", 310 - "EventCode": "0x06", 311 - "BriefDescription": "L3 cache misses", 312 - "UMask": "0x01", 313 - "Unit": "L3PMC" 314 - }, 315 - { 316 - "EventName": "xi_sys_fill_latency", 317 - "EventCode": "0x90", 318 - "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", 319 - "UMask": "0x00", 320 - "Unit": "L3PMC" 321 - }, 322 - { 323 - "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", 324 - "EventCode": "0x9a", 325 - "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", 326 - "UMask": "0x3f", 327 - "Unit": "L3PMC" 328 - } 329 - ]

+3 -12

tools/perf/pmu-events/arch/x86/amdfam17h/core.json tools/perf/pmu-events/arch/x86/amdzen1/core.json

··· 62 62 "EventName": "ex_ret_brn_ind_misp", 63 63 "EventCode": "0xca", 64 64 "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", 65 - "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." 66 65 }, 67 66 { 68 67 "EventName": "ex_ret_mmx_fp_instr.sse_instr", ··· 90 91 "BriefDescription": "Retired Conditional Branch Instructions." 91 92 }, 92 93 { 93 - "EventName": "ex_ret_cond_misp", 94 - "EventCode": "0xd2", 95 - "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." 96 - }, 97 - { 98 94 "EventName": "ex_div_busy", 99 95 "EventCode": "0xd3", 100 96 "BriefDescription": "Div Cycles Busy count." ··· 102 108 { 103 109 "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", 104 110 "EventCode": "0x1cf", 105 - "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 106 - "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 111 + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 107 112 "UMask": "0x4" 108 113 }, 109 114 { 110 115 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", 111 116 "EventCode": "0x1cf", 112 - "BriefDescription": "Number of Ops tagged by IBS that retired.", 113 - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 117 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 114 118 "UMask": "0x2" 115 119 }, 116 120 { 117 121 "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", 118 122 "EventCode": "0x1cf", 119 - "BriefDescription": "Number of Ops tagged by IBS.", 120 - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 123 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 121 124 "UMask": "0x1" 122 125 }, 123 126 {

+60 -4

tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json

··· 2 2 { 3 3 "EventName": "fpu_pipe_assignment.dual", 4 4 "EventCode": "0x00", 5 - "BriefDescription": "Total number multi-pipe uOps.", 6 - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", 5 + "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.", 6 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.", 7 7 "UMask": "0xf0" 8 + }, 9 + { 10 + "EventName": "fpu_pipe_assignment.dual3", 11 + "EventCode": "0x00", 12 + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.", 13 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.", 14 + "UMask": "0x80" 15 + }, 16 + { 17 + "EventName": "fpu_pipe_assignment.dual2", 18 + "EventCode": "0x00", 19 + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.", 20 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.", 21 + "UMask": "0x40" 22 + }, 23 + { 24 + "EventName": "fpu_pipe_assignment.dual1", 25 + "EventCode": "0x00", 26 + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.", 27 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.", 28 + "UMask": "0x20" 29 + }, 30 + { 31 + "EventName": "fpu_pipe_assignment.dual0", 32 + "EventCode": "0x00", 33 + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.", 34 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.", 35 + "UMask": "0x10" 8 36 }, 9 37 { 10 38 "EventName": "fpu_pipe_assignment.total", 11 39 "EventCode": "0x00", 12 - "BriefDescription": "Total number uOps.", 13 - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", 40 + "BriefDescription": "Total number uOps assigned to all fpu pipes.", 41 + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", 14 42 "UMask": "0xf" 43 + }, 44 + { 45 + "EventName": "fpu_pipe_assignment.total3", 46 + "EventCode": "0x00", 47 + "BriefDescription": "Total number of fp uOps on pipe 3.", 48 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", 49 + "UMask": "0x8" 50 + }, 51 + { 52 + "EventName": "fpu_pipe_assignment.total2", 53 + "EventCode": "0x00", 54 + "BriefDescription": "Total number of fp uOps on pipe 2.", 55 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", 56 + "UMask": "0x4" 57 + }, 58 + { 59 + "EventName": "fpu_pipe_assignment.total1", 60 + "EventCode": "0x00", 61 + "BriefDescription": "Total number of fp uOps on pipe 1.", 62 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", 63 + "UMask": "0x2" 64 + }, 65 + { 66 + "EventName": "fpu_pipe_assignment.total0", 67 + "EventCode": "0x00", 68 + "BriefDescription": "Total number of fp uOps on pipe 0.", 69 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", 70 + "UMask": "0x1" 15 71 }, 16 72 { 17 73 "EventName": "fp_sched_empty",

+52 -30

tools/perf/pmu-events/arch/x86/amdfam17h/memory.json tools/perf/pmu-events/arch/x86/amdzen1/memory.json

··· 3 3 "EventName": "ls_locks.bus_lock", 4 4 "EventCode": "0x25", 5 5 "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", 6 - "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", 7 6 "UMask": "0x1" 8 7 }, 9 8 { 10 9 "EventName": "ls_dispatch.ld_st_dispatch", 11 10 "EventCode": "0x29", 12 - "BriefDescription": "Load-op-Stores.", 13 - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", 11 + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", 14 12 "UMask": "0x4" 15 13 }, 16 14 { 17 15 "EventName": "ls_dispatch.store_dispatch", 18 16 "EventCode": "0x29", 19 - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 20 - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 17 + "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", 21 18 "UMask": "0x2" 22 19 }, 23 20 { 24 21 "EventName": "ls_dispatch.ld_dispatch", 25 22 "EventCode": "0x29", 26 - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 27 - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 23 + "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", 28 24 "UMask": "0x1" 29 25 }, 30 26 { ··· 34 38 "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." 35 39 }, 36 40 { 41 + "EventName": "ls_mab_alloc.dc_prefetcher", 42 + "EventCode": "0x41", 43 + "BriefDescription": "LS MAB allocates by type - DC prefetcher.", 44 + "UMask": "0x8" 45 + }, 46 + { 47 + "EventName": "ls_mab_alloc.stores", 48 + "EventCode": "0x41", 49 + "BriefDescription": "LS MAB allocates by type - stores.", 50 + "UMask": "0x2" 51 + }, 52 + { 53 + "EventName": "ls_mab_alloc.loads", 54 + "EventCode": "0x41", 55 + "BriefDescription": "LS MAB allocates by type - loads.", 56 + "UMask": "0x01" 57 + }, 58 + { 37 59 "EventName": "ls_l1_d_tlb_miss.all", 38 60 "EventCode": "0x45", 39 61 "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", 40 - "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", 41 62 "UMask": "0xff" 42 63 }, 43 64 { 44 65 "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 45 66 "EventCode": "0x45", 46 67 "BriefDescription": "L1 DTLB Miss of a page of 1G size.", 47 - "PublicDescription": "L1 DTLB Miss of a page of 1G size.", 48 68 "UMask": "0x80" 49 69 }, 50 70 { 51 71 "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 52 72 "EventCode": "0x45", 53 73 "BriefDescription": "L1 DTLB Miss of a page of 2M size.", 54 - "PublicDescription": "L1 DTLB Miss of a page of 2M size.", 55 74 "UMask": "0x40" 56 75 }, 57 76 { 58 77 "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", 59 78 "EventCode": "0x45", 60 79 "BriefDescription": "L1 DTLB Miss of a page of 32K size.", 61 - "PublicDescription": "L1 DTLB Miss of a page of 32K size.", 62 80 "UMask": "0x20" 63 81 }, 64 82 { 65 83 "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 66 84 "EventCode": "0x45", 67 85 "BriefDescription": "L1 DTLB Miss of a page of 4K size.", 68 - "PublicDescription": "L1 DTLB Miss of a page of 4K size.", 69 86 "UMask": "0x10" 70 87 }, 71 88 { 72 89 "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 73 90 "EventCode": "0x45", 74 91 "BriefDescription": "L1 DTLB Reload of a page of 1G size.", 75 - "PublicDescription": "L1 DTLB Reload of a page of 1G size.", 76 92 "UMask": "0x8" 77 93 }, 78 94 { 79 95 "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 80 96 "EventCode": "0x45", 81 97 "BriefDescription": "L1 DTLB Reload of a page of 2M size.", 82 - "PublicDescription": "L1 DTLB Reload of a page of 2M size.", 83 98 "UMask": "0x4" 84 99 }, 85 100 { 86 101 "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", 87 102 "EventCode": "0x45", 88 103 "BriefDescription": "L1 DTLB Reload of a page of 32K size.", 89 - "PublicDescription": "L1 DTLB Reload of a page of 32K size.", 90 104 "UMask": "0x2" 91 105 }, 92 106 { 93 107 "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 94 108 "EventCode": "0x45", 95 109 "BriefDescription": "L1 DTLB Reload of a page of 4K size.", 96 - "PublicDescription": "L1 DTLB Reload of a page of 4K size.", 97 110 "UMask": "0x1" 98 111 }, 99 112 { 100 - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", 113 + "EventName": "ls_tablewalker.iside", 101 114 "EventCode": "0x46", 102 - "BriefDescription": "Tablewalker allocation.", 103 - "PublicDescription": "Tablewalker allocation.", 115 + "BriefDescription": "Total Page Table Walks on I-side.", 104 116 "UMask": "0xc" 105 117 }, 106 118 { 107 - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", 119 + "EventName": "ls_tablewalker.ic_type1", 108 120 "EventCode": "0x46", 109 - "BriefDescription": "Tablewalker allocation.", 110 - "PublicDescription": "Tablewalker allocation.", 121 + "BriefDescription": "Total Page Table Walks IC Type 1.", 122 + "UMask": "0x8" 123 + }, 124 + { 125 + "EventName": "ls_tablewalker.ic_type0", 126 + "EventCode": "0x46", 127 + "BriefDescription": "Total Page Table Walks IC Type 0.", 128 + "UMask": "0x4" 129 + }, 130 + { 131 + "EventName": "ls_tablewalker.dside", 132 + "EventCode": "0x46", 133 + "BriefDescription": "Total Page Table Walks on D-side.", 111 134 "UMask": "0x3" 135 + }, 136 + { 137 + "EventName": "ls_tablewalker.dc_type1", 138 + "EventCode": "0x46", 139 + "BriefDescription": "Total Page Table Walks DC Type 1.", 140 + "UMask": "0x2" 141 + }, 142 + { 143 + "EventName": "ls_tablewalker.dc_type0", 144 + "EventCode": "0x46", 145 + "BriefDescription": "Total Page Table Walks DC Type 0.", 146 + "UMask": "0x1" 112 147 }, 113 148 { 114 149 "EventName": "ls_misal_accesses", ··· 150 123 "EventName": "ls_pref_instr_disp.prefetch_nta", 151 124 "EventCode": "0x4b", 152 125 "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", 153 - "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", 154 126 "UMask": "0x4" 155 127 }, 156 128 { 157 129 "EventName": "ls_pref_instr_disp.store_prefetch_w", 158 130 "EventCode": "0x4b", 159 131 "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", 160 - "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", 161 132 "UMask": "0x2" 162 133 }, 163 134 { 164 135 "EventName": "ls_pref_instr_disp.load_prefetch_w", 165 136 "EventCode": "0x4b", 166 - "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", 167 - "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", 137 + "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", 168 138 "UMask": "0x1" 169 139 }, 170 140 { 171 141 "EventName": "ls_inef_sw_pref.mab_mch_cnt", 172 142 "EventCode": "0x52", 173 - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", 174 - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", 143 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", 175 144 "UMask": "0x2" 176 145 }, 177 146 { 178 147 "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 179 148 "EventCode": "0x52", 180 - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", 181 - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", 149 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", 182 150 "UMask": "0x1" 183 151 }, 184 152 {

-65

tools/perf/pmu-events/arch/x86/amdfam17h/other.json

··· 1 - [ 2 - { 3 - "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 4 - "EventCode": "0x28a", 5 - "BriefDescription": "OC to IC mode switch.", 6 - "PublicDescription": "OC Mode Switch. OC to IC mode switch.", 7 - "UMask": "0x2" 8 - }, 9 - { 10 - "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 11 - "EventCode": "0x28a", 12 - "BriefDescription": "IC to OC mode switch.", 13 - "PublicDescription": "OC Mode Switch. IC to OC mode switch.", 14 - "UMask": "0x1" 15 - }, 16 - { 17 - "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", 18 - "EventCode": "0xaf", 19 - "BriefDescription": "RETIRE Tokens unavailable.", 20 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", 21 - "UMask": "0x40" 22 - }, 23 - { 24 - "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", 25 - "EventCode": "0xaf", 26 - "BriefDescription": "AGSQ Tokens unavailable.", 27 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", 28 - "UMask": "0x20" 29 - }, 30 - { 31 - "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", 32 - "EventCode": "0xaf", 33 - "BriefDescription": "ALU tokens total unavailable.", 34 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", 35 - "UMask": "0x10" 36 - }, 37 - { 38 - "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", 39 - "EventCode": "0xaf", 40 - "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", 41 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", 42 - "UMask": "0x8" 43 - }, 44 - { 45 - "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", 46 - "EventCode": "0xaf", 47 - "BriefDescription": "ALSQ 3 Tokens unavailable.", 48 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", 49 - "UMask": "0x4" 50 - }, 51 - { 52 - "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", 53 - "EventCode": "0xaf", 54 - "BriefDescription": "ALSQ 2 Tokens unavailable.", 55 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", 56 - "UMask": "0x2" 57 - }, 58 - { 59 - "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", 60 - "EventCode": "0xaf", 61 - "BriefDescription": "ALSQ 1 Tokens unavailable.", 62 - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", 63 - "UMask": "0x1" 64 - } 65 - ]

+23

tools/perf/pmu-events/arch/x86/amdzen1/branch.json

··· 1 + [ 2 + { 3 + "EventName": "bp_l1_btb_correct", 4 + "EventCode": "0x8a", 5 + "BriefDescription": "L1 BTB Correction." 6 + }, 7 + { 8 + "EventName": "bp_l2_btb_correct", 9 + "EventCode": "0x8b", 10 + "BriefDescription": "L2 BTB Correction." 11 + }, 12 + { 13 + "EventName": "bp_dyn_ind_pred", 14 + "EventCode": "0x8e", 15 + "BriefDescription": "Dynamic Indirect Predictions.", 16 + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." 17 + }, 18 + { 19 + "EventName": "bp_de_redirect", 20 + "EventCode": "0x91", 21 + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." 22 + } 23 + ]

+294

tools/perf/pmu-events/arch/x86/amdzen1/cache.json

··· 1 + [ 2 + { 3 + "EventName": "ic_fw32", 4 + "EventCode": "0x80", 5 + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." 6 + }, 7 + { 8 + "EventName": "ic_fw32_miss", 9 + "EventCode": "0x81", 10 + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." 11 + }, 12 + { 13 + "EventName": "ic_cache_fill_l2", 14 + "EventCode": "0x82", 15 + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." 16 + }, 17 + { 18 + "EventName": "ic_cache_fill_sys", 19 + "EventCode": "0x83", 20 + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." 21 + }, 22 + { 23 + "EventName": "bp_l1_tlb_miss_l2_hit", 24 + "EventCode": "0x84", 25 + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." 26 + }, 27 + { 28 + "EventName": "bp_l1_tlb_miss_l2_miss", 29 + "EventCode": "0x85", 30 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." 31 + }, 32 + { 33 + "EventName": "bp_snp_re_sync", 34 + "EventCode": "0x86", 35 + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." 36 + }, 37 + { 38 + "EventName": "ic_fetch_stall.ic_stall_any", 39 + "EventCode": "0x87", 40 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 41 + "UMask": "0x4" 42 + }, 43 + { 44 + "EventName": "ic_fetch_stall.ic_stall_dq_empty", 45 + "EventCode": "0x87", 46 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 47 + "UMask": "0x2" 48 + }, 49 + { 50 + "EventName": "ic_fetch_stall.ic_stall_back_pressure", 51 + "EventCode": "0x87", 52 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 53 + "UMask": "0x1" 54 + }, 55 + { 56 + "EventName": "ic_cache_inval.l2_invalidating_probe", 57 + "EventCode": "0x8c", 58 + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 59 + "UMask": "0x2" 60 + }, 61 + { 62 + "EventName": "ic_cache_inval.fill_invalidated", 63 + "EventCode": "0x8c", 64 + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 65 + "UMask": "0x1" 66 + }, 67 + { 68 + "EventName": "bp_tlb_rel", 69 + "EventCode": "0x99", 70 + "BriefDescription": "The number of ITLB reload requests." 71 + }, 72 + { 73 + "EventName": "l2_request_g1.rd_blk_l", 74 + "EventCode": "0x60", 75 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", 76 + "UMask": "0x80" 77 + }, 78 + { 79 + "EventName": "l2_request_g1.rd_blk_x", 80 + "EventCode": "0x60", 81 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", 82 + "UMask": "0x40" 83 + }, 84 + { 85 + "EventName": "l2_request_g1.ls_rd_blk_c_s", 86 + "EventCode": "0x60", 87 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", 88 + "UMask": "0x20" 89 + }, 90 + { 91 + "EventName": "l2_request_g1.cacheable_ic_read", 92 + "EventCode": "0x60", 93 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", 94 + "UMask": "0x10" 95 + }, 96 + { 97 + "EventName": "l2_request_g1.change_to_x", 98 + "EventCode": "0x60", 99 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", 100 + "UMask": "0x8" 101 + }, 102 + { 103 + "EventName": "l2_request_g1.prefetch_l2_cmd", 104 + "EventCode": "0x60", 105 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", 106 + "UMask": "0x4" 107 + }, 108 + { 109 + "EventName": "l2_request_g1.l2_hw_pf", 110 + "EventCode": "0x60", 111 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", 112 + "UMask": "0x2" 113 + }, 114 + { 115 + "EventName": "l2_request_g1.group2", 116 + "EventCode": "0x60", 117 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", 118 + "UMask": "0x1" 119 + }, 120 + { 121 + "EventName": "l2_request_g2.group1", 122 + "EventCode": "0x61", 123 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", 124 + "UMask": "0x80" 125 + }, 126 + { 127 + "EventName": "l2_request_g2.ls_rd_sized", 128 + "EventCode": "0x61", 129 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", 130 + "UMask": "0x40" 131 + }, 132 + { 133 + "EventName": "l2_request_g2.ls_rd_sized_nc", 134 + "EventCode": "0x61", 135 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", 136 + "UMask": "0x20" 137 + }, 138 + { 139 + "EventName": "l2_request_g2.ic_rd_sized", 140 + "EventCode": "0x61", 141 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", 142 + "UMask": "0x10" 143 + }, 144 + { 145 + "EventName": "l2_request_g2.ic_rd_sized_nc", 146 + "EventCode": "0x61", 147 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", 148 + "UMask": "0x8" 149 + }, 150 + { 151 + "EventName": "l2_request_g2.smc_inval", 152 + "EventCode": "0x61", 153 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", 154 + "UMask": "0x4" 155 + }, 156 + { 157 + "EventName": "l2_request_g2.bus_locks_originator", 158 + "EventCode": "0x61", 159 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", 160 + "UMask": "0x2" 161 + }, 162 + { 163 + "EventName": "l2_request_g2.bus_locks_responses", 164 + "EventCode": "0x61", 165 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", 166 + "UMask": "0x1" 167 + }, 168 + { 169 + "EventName": "l2_latency.l2_cycles_waiting_on_fills", 170 + "EventCode": "0x62", 171 + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 172 + "UMask": "0x1" 173 + }, 174 + { 175 + "EventName": "l2_wcb_req.wcb_write", 176 + "EventCode": "0x63", 177 + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", 178 + "UMask": "0x40" 179 + }, 180 + { 181 + "EventName": "l2_wcb_req.wcb_close", 182 + "EventCode": "0x63", 183 + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", 184 + "UMask": "0x20" 185 + }, 186 + { 187 + "EventName": "l2_wcb_req.zero_byte_store", 188 + "EventCode": "0x63", 189 + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 190 + "UMask": "0x4" 191 + }, 192 + { 193 + "EventName": "l2_wcb_req.cl_zero", 194 + "EventCode": "0x63", 195 + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 196 + "UMask": "0x1" 197 + }, 198 + { 199 + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", 200 + "EventCode": "0x64", 201 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", 202 + "UMask": "0x80" 203 + }, 204 + { 205 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", 206 + "EventCode": "0x64", 207 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", 208 + "UMask": "0x40" 209 + }, 210 + { 211 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", 212 + "EventCode": "0x64", 213 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", 214 + "UMask": "0x20" 215 + }, 216 + { 217 + "EventName": "l2_cache_req_stat.ls_rd_blk_x", 218 + "EventCode": "0x64", 219 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", 220 + "UMask": "0x10" 221 + }, 222 + { 223 + "EventName": "l2_cache_req_stat.ls_rd_blk_c", 224 + "EventCode": "0x64", 225 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", 226 + "UMask": "0x8" 227 + }, 228 + { 229 + "EventName": "l2_cache_req_stat.ic_fill_hit_x", 230 + "EventCode": "0x64", 231 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", 232 + "UMask": "0x4" 233 + }, 234 + { 235 + "EventName": "l2_cache_req_stat.ic_fill_hit_s", 236 + "EventCode": "0x64", 237 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", 238 + "UMask": "0x2" 239 + }, 240 + { 241 + "EventName": "l2_cache_req_stat.ic_fill_miss", 242 + "EventCode": "0x64", 243 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", 244 + "UMask": "0x1" 245 + }, 246 + { 247 + "EventName": "l2_fill_pending.l2_fill_busy", 248 + "EventCode": "0x6d", 249 + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", 250 + "UMask": "0x1" 251 + }, 252 + { 253 + "EventName": "l3_request_g1.caching_l3_cache_accesses", 254 + "EventCode": "0x01", 255 + "BriefDescription": "Caching: L3 cache accesses", 256 + "UMask": "0x80", 257 + "Unit": "L3PMC" 258 + }, 259 + { 260 + "EventName": "l3_lookup_state.all_l3_req_typs", 261 + "EventCode": "0x04", 262 + "BriefDescription": "All L3 Request Types", 263 + "UMask": "0xff", 264 + "Unit": "L3PMC" 265 + }, 266 + { 267 + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", 268 + "EventCode": "0x06", 269 + "BriefDescription": "Other L3 Miss Request Types", 270 + "UMask": "0xfe", 271 + "Unit": "L3PMC" 272 + }, 273 + { 274 + "EventName": "l3_comb_clstr_state.request_miss", 275 + "EventCode": "0x06", 276 + "BriefDescription": "L3 cache misses", 277 + "UMask": "0x01", 278 + "Unit": "L3PMC" 279 + }, 280 + { 281 + "EventName": "xi_sys_fill_latency", 282 + "EventCode": "0x90", 283 + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", 284 + "UMask": "0x00", 285 + "Unit": "L3PMC" 286 + }, 287 + { 288 + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", 289 + "EventCode": "0x9a", 290 + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", 291 + "UMask": "0x3f", 292 + "Unit": "L3PMC" 293 + } 294 + ]

+56

tools/perf/pmu-events/arch/x86/amdzen1/other.json

··· 1 + [ 2 + { 3 + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 4 + "EventCode": "0x28a", 5 + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", 6 + "UMask": "0x2" 7 + }, 8 + { 9 + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 10 + "EventCode": "0x28a", 11 + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", 12 + "UMask": "0x1" 13 + }, 14 + { 15 + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", 16 + "EventCode": "0xaf", 17 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", 18 + "UMask": "0x40" 19 + }, 20 + { 21 + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", 22 + "EventCode": "0xaf", 23 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", 24 + "UMask": "0x20" 25 + }, 26 + { 27 + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", 28 + "EventCode": "0xaf", 29 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", 30 + "UMask": "0x10" 31 + }, 32 + { 33 + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", 34 + "EventCode": "0xaf", 35 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", 36 + "UMask": "0x8" 37 + }, 38 + { 39 + "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", 40 + "EventCode": "0xaf", 41 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", 42 + "UMask": "0x4" 43 + }, 44 + { 45 + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", 46 + "EventCode": "0xaf", 47 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", 48 + "UMask": "0x2" 49 + }, 50 + { 51 + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", 52 + "EventCode": "0xaf", 53 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", 54 + "UMask": "0x1" 55 + } 56 + ]

+52

tools/perf/pmu-events/arch/x86/amdzen2/branch.json

··· 1 + [ 2 + { 3 + "EventName": "bp_l1_btb_correct", 4 + "EventCode": "0x8a", 5 + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." 6 + }, 7 + { 8 + "EventName": "bp_l2_btb_correct", 9 + "EventCode": "0x8b", 10 + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." 11 + }, 12 + { 13 + "EventName": "bp_dyn_ind_pred", 14 + "EventCode": "0x8e", 15 + "BriefDescription": "Dynamic Indirect Predictions.", 16 + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." 17 + }, 18 + { 19 + "EventName": "bp_de_redirect", 20 + "EventCode": "0x91", 21 + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." 22 + }, 23 + { 24 + "EventName": "bp_l1_tlb_fetch_hit", 25 + "EventCode": "0x94", 26 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", 27 + "UMask": "0xFF" 28 + }, 29 + { 30 + "EventName": "bp_l1_tlb_fetch_hit.if1g", 31 + "EventCode": "0x94", 32 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", 33 + "UMask": "0x4" 34 + }, 35 + { 36 + "EventName": "bp_l1_tlb_fetch_hit.if2m", 37 + "EventCode": "0x94", 38 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", 39 + "UMask": "0x2" 40 + }, 41 + { 42 + "EventName": "bp_l1_tlb_fetch_hit.if4k", 43 + "EventCode": "0x94", 44 + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", 45 + "UMask": "0x1" 46 + }, 47 + { 48 + "EventName": "bp_tlb_rel", 49 + "EventCode": "0x99", 50 + "BriefDescription": "The number of ITLB reload requests." 51 + } 52 + ]

+338

tools/perf/pmu-events/arch/x86/amdzen2/cache.json

··· 1 + [ 2 + { 3 + "EventName": "l2_request_g1.rd_blk_l", 4 + "EventCode": "0x60", 5 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", 6 + "UMask": "0x80" 7 + }, 8 + { 9 + "EventName": "l2_request_g1.rd_blk_x", 10 + "EventCode": "0x60", 11 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", 12 + "UMask": "0x40" 13 + }, 14 + { 15 + "EventName": "l2_request_g1.ls_rd_blk_c_s", 16 + "EventCode": "0x60", 17 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", 18 + "UMask": "0x20" 19 + }, 20 + { 21 + "EventName": "l2_request_g1.cacheable_ic_read", 22 + "EventCode": "0x60", 23 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", 24 + "UMask": "0x10" 25 + }, 26 + { 27 + "EventName": "l2_request_g1.change_to_x", 28 + "EventCode": "0x60", 29 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", 30 + "UMask": "0x8" 31 + }, 32 + { 33 + "EventName": "l2_request_g1.prefetch_l2_cmd", 34 + "EventCode": "0x60", 35 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", 36 + "UMask": "0x4" 37 + }, 38 + { 39 + "EventName": "l2_request_g1.l2_hw_pf", 40 + "EventCode": "0x60", 41 + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", 42 + "UMask": "0x2" 43 + }, 44 + { 45 + "EventName": "l2_request_g1.group2", 46 + "EventCode": "0x60", 47 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", 48 + "UMask": "0x1" 49 + }, 50 + { 51 + "EventName": "l2_request_g2.group1", 52 + "EventCode": "0x61", 53 + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", 54 + "UMask": "0x80" 55 + }, 56 + { 57 + "EventName": "l2_request_g2.ls_rd_sized", 58 + "EventCode": "0x61", 59 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", 60 + "UMask": "0x40" 61 + }, 62 + { 63 + "EventName": "l2_request_g2.ls_rd_sized_nc", 64 + "EventCode": "0x61", 65 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", 66 + "UMask": "0x20" 67 + }, 68 + { 69 + "EventName": "l2_request_g2.ic_rd_sized", 70 + "EventCode": "0x61", 71 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", 72 + "UMask": "0x10" 73 + }, 74 + { 75 + "EventName": "l2_request_g2.ic_rd_sized_nc", 76 + "EventCode": "0x61", 77 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", 78 + "UMask": "0x8" 79 + }, 80 + { 81 + "EventName": "l2_request_g2.smc_inval", 82 + "EventCode": "0x61", 83 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", 84 + "UMask": "0x4" 85 + }, 86 + { 87 + "EventName": "l2_request_g2.bus_locks_originator", 88 + "EventCode": "0x61", 89 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", 90 + "UMask": "0x2" 91 + }, 92 + { 93 + "EventName": "l2_request_g2.bus_locks_responses", 94 + "EventCode": "0x61", 95 + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", 96 + "UMask": "0x1" 97 + }, 98 + { 99 + "EventName": "l2_latency.l2_cycles_waiting_on_fills", 100 + "EventCode": "0x62", 101 + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", 102 + "UMask": "0x1" 103 + }, 104 + { 105 + "EventName": "l2_wcb_req.wcb_write", 106 + "EventCode": "0x63", 107 + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", 108 + "UMask": "0x40" 109 + }, 110 + { 111 + "EventName": "l2_wcb_req.wcb_close", 112 + "EventCode": "0x63", 113 + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", 114 + "UMask": "0x20" 115 + }, 116 + { 117 + "EventName": "l2_wcb_req.zero_byte_store", 118 + "EventCode": "0x63", 119 + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", 120 + "UMask": "0x4" 121 + }, 122 + { 123 + "EventName": "l2_wcb_req.cl_zero", 124 + "EventCode": "0x63", 125 + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", 126 + "UMask": "0x1" 127 + }, 128 + { 129 + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", 130 + "EventCode": "0x64", 131 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", 132 + "UMask": "0x80" 133 + }, 134 + { 135 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", 136 + "EventCode": "0x64", 137 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", 138 + "UMask": "0x40" 139 + }, 140 + { 141 + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", 142 + "EventCode": "0x64", 143 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", 144 + "UMask": "0x20" 145 + }, 146 + { 147 + "EventName": "l2_cache_req_stat.ls_rd_blk_x", 148 + "EventCode": "0x64", 149 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", 150 + "UMask": "0x10" 151 + }, 152 + { 153 + "EventName": "l2_cache_req_stat.ls_rd_blk_c", 154 + "EventCode": "0x64", 155 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", 156 + "UMask": "0x8" 157 + }, 158 + { 159 + "EventName": "l2_cache_req_stat.ic_fill_hit_x", 160 + "EventCode": "0x64", 161 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", 162 + "UMask": "0x4" 163 + }, 164 + { 165 + "EventName": "l2_cache_req_stat.ic_fill_hit_s", 166 + "EventCode": "0x64", 167 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", 168 + "UMask": "0x2" 169 + }, 170 + { 171 + "EventName": "l2_cache_req_stat.ic_fill_miss", 172 + "EventCode": "0x64", 173 + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", 174 + "UMask": "0x1" 175 + }, 176 + { 177 + "EventName": "l2_fill_pending.l2_fill_busy", 178 + "EventCode": "0x6d", 179 + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", 180 + "UMask": "0x1" 181 + }, 182 + { 183 + "EventName": "l2_pf_hit_l2", 184 + "EventCode": "0x70", 185 + "BriefDescription": "L2 prefetch hit in L2.", 186 + "UMask": "0xff" 187 + }, 188 + { 189 + "EventName": "l2_pf_miss_l2_hit_l3", 190 + "EventCode": "0x71", 191 + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", 192 + "UMask": "0xff" 193 + }, 194 + { 195 + "EventName": "l2_pf_miss_l2_l3", 196 + "EventCode": "0x72", 197 + "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", 198 + "UMask": "0xff" 199 + }, 200 + { 201 + "EventName": "ic_fw32", 202 + "EventCode": "0x80", 203 + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." 204 + }, 205 + { 206 + "EventName": "ic_fw32_miss", 207 + "EventCode": "0x81", 208 + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." 209 + }, 210 + { 211 + "EventName": "ic_cache_fill_l2", 212 + "EventCode": "0x82", 213 + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." 214 + }, 215 + { 216 + "EventName": "ic_cache_fill_sys", 217 + "EventCode": "0x83", 218 + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." 219 + }, 220 + { 221 + "EventName": "bp_l1_tlb_miss_l2_hit", 222 + "EventCode": "0x84", 223 + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." 224 + }, 225 + { 226 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", 227 + "EventCode": "0x85", 228 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", 229 + "UMask": "0xff" 230 + }, 231 + { 232 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", 233 + "EventCode": "0x85", 234 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", 235 + "UMask": "0x4" 236 + }, 237 + { 238 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", 239 + "EventCode": "0x85", 240 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", 241 + "UMask": "0x2" 242 + }, 243 + { 244 + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", 245 + "EventCode": "0x85", 246 + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", 247 + "UMask": "0x1" 248 + }, 249 + { 250 + "EventName": "bp_snp_re_sync", 251 + "EventCode": "0x86", 252 + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." 253 + }, 254 + { 255 + "EventName": "ic_fetch_stall.ic_stall_any", 256 + "EventCode": "0x87", 257 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", 258 + "UMask": "0x4" 259 + }, 260 + { 261 + "EventName": "ic_fetch_stall.ic_stall_dq_empty", 262 + "EventCode": "0x87", 263 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", 264 + "UMask": "0x2" 265 + }, 266 + { 267 + "EventName": "ic_fetch_stall.ic_stall_back_pressure", 268 + "EventCode": "0x87", 269 + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", 270 + "UMask": "0x1" 271 + }, 272 + { 273 + "EventName": "ic_cache_inval.l2_invalidating_probe", 274 + "EventCode": "0x8c", 275 + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 276 + "UMask": "0x2" 277 + }, 278 + { 279 + "EventName": "ic_cache_inval.fill_invalidated", 280 + "EventCode": "0x8c", 281 + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", 282 + "UMask": "0x1" 283 + }, 284 + { 285 + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", 286 + "EventCode": "0x28a", 287 + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", 288 + "UMask": "0x2" 289 + }, 290 + { 291 + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", 292 + "EventCode": "0x28a", 293 + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", 294 + "UMask": "0x1" 295 + }, 296 + { 297 + "EventName": "l3_request_g1.caching_l3_cache_accesses", 298 + "EventCode": "0x01", 299 + "BriefDescription": "Caching: L3 cache accesses", 300 + "UMask": "0x80", 301 + "Unit": "L3PMC" 302 + }, 303 + { 304 + "EventName": "l3_lookup_state.all_l3_req_typs", 305 + "EventCode": "0x04", 306 + "BriefDescription": "All L3 Request Types", 307 + "UMask": "0xff", 308 + "Unit": "L3PMC" 309 + }, 310 + { 311 + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", 312 + "EventCode": "0x06", 313 + "BriefDescription": "Other L3 Miss Request Types", 314 + "UMask": "0xfe", 315 + "Unit": "L3PMC" 316 + }, 317 + { 318 + "EventName": "l3_comb_clstr_state.request_miss", 319 + "EventCode": "0x06", 320 + "BriefDescription": "L3 cache misses", 321 + "UMask": "0x01", 322 + "Unit": "L3PMC" 323 + }, 324 + { 325 + "EventName": "xi_sys_fill_latency", 326 + "EventCode": "0x90", 327 + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", 328 + "UMask": "0x00", 329 + "Unit": "L3PMC" 330 + }, 331 + { 332 + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", 333 + "EventCode": "0x9A", 334 + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", 335 + "UMask": "0x3f", 336 + "Unit": "L3PMC" 337 + } 338 + ]

+130

tools/perf/pmu-events/arch/x86/amdzen2/core.json

··· 1 + [ 2 + { 3 + "EventName": "ex_ret_instr", 4 + "EventCode": "0xc0", 5 + "BriefDescription": "Retired Instructions." 6 + }, 7 + { 8 + "EventName": "ex_ret_cops", 9 + "EventCode": "0xc1", 10 + "BriefDescription": "Retired Uops.", 11 + "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8." 12 + }, 13 + { 14 + "EventName": "ex_ret_brn", 15 + "EventCode": "0xc2", 16 + "BriefDescription": "Retired Branch Instructions.", 17 + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." 18 + }, 19 + { 20 + "EventName": "ex_ret_brn_misp", 21 + "EventCode": "0xc3", 22 + "BriefDescription": "Retired Branch Instructions Mispredicted.", 23 + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." 24 + }, 25 + { 26 + "EventName": "ex_ret_brn_tkn", 27 + "EventCode": "0xc4", 28 + "BriefDescription": "Retired Taken Branch Instructions.", 29 + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." 30 + }, 31 + { 32 + "EventName": "ex_ret_brn_tkn_misp", 33 + "EventCode": "0xc5", 34 + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", 35 + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." 36 + }, 37 + { 38 + "EventName": "ex_ret_brn_far", 39 + "EventCode": "0xc6", 40 + "BriefDescription": "Retired Far Control Transfers.", 41 + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." 42 + }, 43 + { 44 + "EventName": "ex_ret_brn_resync", 45 + "EventCode": "0xc7", 46 + "BriefDescription": "Retired Branch Resyncs.", 47 + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." 48 + }, 49 + { 50 + "EventName": "ex_ret_near_ret", 51 + "EventCode": "0xc8", 52 + "BriefDescription": "Retired Near Returns.", 53 + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." 54 + }, 55 + { 56 + "EventName": "ex_ret_near_ret_mispred", 57 + "EventCode": "0xc9", 58 + "BriefDescription": "Retired Near Returns Mispredicted.", 59 + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." 60 + }, 61 + { 62 + "EventName": "ex_ret_brn_ind_misp", 63 + "EventCode": "0xca", 64 + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted." 65 + }, 66 + { 67 + "EventName": "ex_ret_mmx_fp_instr.sse_instr", 68 + "EventCode": "0xcb", 69 + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 70 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", 71 + "UMask": "0x4" 72 + }, 73 + { 74 + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", 75 + "EventCode": "0xcb", 76 + "BriefDescription": "MMX instructions.", 77 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", 78 + "UMask": "0x2" 79 + }, 80 + { 81 + "EventName": "ex_ret_mmx_fp_instr.x87_instr", 82 + "EventCode": "0xcb", 83 + "BriefDescription": "x87 instructions.", 84 + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", 85 + "UMask": "0x1" 86 + }, 87 + { 88 + "EventName": "ex_ret_cond", 89 + "EventCode": "0xd1", 90 + "BriefDescription": "Retired Conditional Branch Instructions." 91 + }, 92 + { 93 + "EventName": "ex_ret_cond_misp", 94 + "EventCode": "0xd2", 95 + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." 96 + }, 97 + { 98 + "EventName": "ex_div_busy", 99 + "EventCode": "0xd3", 100 + "BriefDescription": "Div Cycles Busy count." 101 + }, 102 + { 103 + "EventName": "ex_div_count", 104 + "EventCode": "0xd4", 105 + "BriefDescription": "Div Op Count." 106 + }, 107 + { 108 + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", 109 + "EventCode": "0x1cf", 110 + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", 111 + "UMask": "0x4" 112 + }, 113 + { 114 + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", 115 + "EventCode": "0x1cf", 116 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", 117 + "UMask": "0x2" 118 + }, 119 + { 120 + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", 121 + "EventCode": "0x1cf", 122 + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", 123 + "UMask": "0x1" 124 + }, 125 + { 126 + "EventName": "ex_ret_fus_brnch_inst", 127 + "EventCode": "0x1d0", 128 + "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.", 129 + } 130 + ]

+140

tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json

··· 1 + [ 2 + { 3 + "EventName": "fpu_pipe_assignment.total", 4 + "EventCode": "0x00", 5 + "BriefDescription": "Total number of fp uOps.", 6 + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", 7 + "UMask": "0xf" 8 + }, 9 + { 10 + "EventName": "fpu_pipe_assignment.total3", 11 + "EventCode": "0x00", 12 + "BriefDescription": "Total number uOps assigned to pipe 3.", 13 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", 14 + "UMask": "0x8" 15 + }, 16 + { 17 + "EventName": "fpu_pipe_assignment.total2", 18 + "EventCode": "0x00", 19 + "BriefDescription": "Total number uOps assigned to pipe 2.", 20 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", 21 + "UMask": "0x4" 22 + }, 23 + { 24 + "EventName": "fpu_pipe_assignment.total1", 25 + "EventCode": "0x00", 26 + "BriefDescription": "Total number uOps assigned to pipe 1.", 27 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", 28 + "UMask": "0x2" 29 + }, 30 + { 31 + "EventName": "fpu_pipe_assignment.total0", 32 + "EventCode": "0x00", 33 + "BriefDescription": "Total number of fp uOps on pipe 0.", 34 + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", 35 + "UMask": "0x1" 36 + }, 37 + { 38 + "EventName": "fp_ret_sse_avx_ops.all", 39 + "EventCode": "0x03", 40 + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 41 + "UMask": "0xff" 42 + }, 43 + { 44 + "EventName": "fp_ret_sse_avx_ops.mac_flops", 45 + "EventCode": "0x03", 46 + "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 47 + "PublicDescription": "", 48 + "UMask": "0x8" 49 + }, 50 + { 51 + "EventName": "fp_ret_sse_avx_ops.div_flops", 52 + "EventCode": "0x03", 53 + "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 54 + "UMask": "0x4" 55 + }, 56 + { 57 + "EventName": "fp_ret_sse_avx_ops.mult_flops", 58 + "EventCode": "0x03", 59 + "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 60 + "UMask": "0x2" 61 + }, 62 + { 63 + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", 64 + "EventCode": "0x03", 65 + "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", 66 + "UMask": "0x1" 67 + }, 68 + { 69 + "EventName": "fp_num_mov_elim_scal_op.optimized", 70 + "EventCode": "0x04", 71 + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 72 + "UMask": "0x8" 73 + }, 74 + { 75 + "EventName": "fp_num_mov_elim_scal_op.opt_potential", 76 + "EventCode": "0x04", 77 + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 78 + "UMask": "0x4" 79 + }, 80 + { 81 + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", 82 + "EventCode": "0x04", 83 + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 84 + "UMask": "0x2" 85 + }, 86 + { 87 + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", 88 + "EventCode": "0x04", 89 + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", 90 + "UMask": "0x1" 91 + }, 92 + { 93 + "EventName": "fp_retired_ser_ops.sse_bot_ret", 94 + "EventCode": "0x05", 95 + "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", 96 + "UMask": "0x8" 97 + }, 98 + { 99 + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", 100 + "EventCode": "0x05", 101 + "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", 102 + "UMask": "0x4" 103 + }, 104 + { 105 + "EventName": "fp_retired_ser_ops.x87_bot_ret", 106 + "EventCode": "0x05", 107 + "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", 108 + "UMask": "0x2" 109 + }, 110 + { 111 + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", 112 + "EventCode": "0x05", 113 + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", 114 + "UMask": "0x1" 115 + }, 116 + { 117 + "EventName": "fp_disp_faults.ymm_spill_fault", 118 + "EventCode": "0x0e", 119 + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", 120 + "UMask": "0x8" 121 + }, 122 + { 123 + "EventName": "fp_disp_faults.ymm_fill_fault", 124 + "EventCode": "0x0e", 125 + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", 126 + "UMask": "0x4" 127 + }, 128 + { 129 + "EventName": "fp_disp_faults.xmm_fill_fault", 130 + "EventCode": "0x0e", 131 + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", 132 + "UMask": "0x2" 133 + }, 134 + { 135 + "EventName": "fp_disp_faults.x87_fill_fault", 136 + "EventCode": "0x0e", 137 + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", 138 + "UMask": "0x1" 139 + } 140 + ]

+341

tools/perf/pmu-events/arch/x86/amdzen2/memory.json

··· 1 + [ 2 + { 3 + "EventName": "ls_bad_status2.stli_other", 4 + "EventCode": "0x24", 5 + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", 6 + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", 7 + "UMask": "0x2" 8 + }, 9 + { 10 + "EventName": "ls_locks.spec_lock_hi_spec", 11 + "EventCode": "0x25", 12 + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", 13 + "UMask": "0x8" 14 + }, 15 + { 16 + "EventName": "ls_locks.spec_lock_lo_spec", 17 + "EventCode": "0x25", 18 + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", 19 + "UMask": "0x4" 20 + }, 21 + { 22 + "EventName": "ls_locks.non_spec_lock", 23 + "EventCode": "0x25", 24 + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", 25 + "UMask": "0x2" 26 + }, 27 + { 28 + "EventName": "ls_locks.bus_lock", 29 + "EventCode": "0x25", 30 + "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", 31 + "UMask": "0x1" 32 + }, 33 + { 34 + "EventName": "ls_ret_cl_flush", 35 + "EventCode": "0x26", 36 + "BriefDescription": "Number of retired CLFLUSH instructions." 37 + }, 38 + { 39 + "EventName": "ls_ret_cpuid", 40 + "EventCode": "0x27", 41 + "BriefDescription": "Number of retired CPUID instructions." 42 + }, 43 + { 44 + "EventName": "ls_dispatch.ld_st_dispatch", 45 + "EventCode": "0x29", 46 + "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", 47 + "UMask": "0x4" 48 + }, 49 + { 50 + "EventName": "ls_dispatch.store_dispatch", 51 + "EventCode": "0x29", 52 + "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 53 + "UMask": "0x2" 54 + }, 55 + { 56 + "EventName": "ls_dispatch.ld_dispatch", 57 + "EventCode": "0x29", 58 + "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", 59 + "UMask": "0x1" 60 + }, 61 + { 62 + "EventName": "ls_smi_rx", 63 + "EventCode": "0x2B", 64 + "BriefDescription": "Number of SMIs received." 65 + }, 66 + { 67 + "EventName": "ls_int_taken", 68 + "EventCode": "0x2C", 69 + "BriefDescription": "Number of interrupts taken." 70 + }, 71 + { 72 + "EventName": "ls_rdtsc", 73 + "EventCode": "0x2D", 74 + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." 75 + }, 76 + { 77 + "EventName": "ls_stlf", 78 + "EventCode": "0x35", 79 + "BriefDescription": "Number of STLF hits." 80 + }, 81 + { 82 + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", 83 + "EventCode": "0x37", 84 + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full." 85 + }, 86 + { 87 + "EventName": "ls_dc_accesses", 88 + "EventCode": "0x40", 89 + "BriefDescription": "Number of accesses to the dcache for load/store references.", 90 + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." 91 + }, 92 + { 93 + "EventName": "ls_mab_alloc.dc_prefetcher", 94 + "EventCode": "0x41", 95 + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", 96 + "UMask": "0x8" 97 + }, 98 + { 99 + "EventName": "ls_mab_alloc.stores", 100 + "EventCode": "0x41", 101 + "BriefDescription": "LS MAB Allocates by Type. Stores.", 102 + "UMask": "0x2" 103 + }, 104 + { 105 + "EventName": "ls_mab_alloc.loads", 106 + "EventCode": "0x41", 107 + "BriefDescription": "LS MAB Allocates by Type. Loads.", 108 + "UMask": "0x1" 109 + }, 110 + { 111 + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", 112 + "EventCode": "0x43", 113 + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.", 114 + "UMask": "0x40" 115 + }, 116 + { 117 + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache", 118 + "EventCode": "0x43", 119 + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.", 120 + "UMask": "0x10" 121 + }, 122 + { 123 + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", 124 + "EventCode": "0x43", 125 + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", 126 + "UMask": "0x8" 127 + }, 128 + { 129 + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", 130 + "EventCode": "0x43", 131 + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", 132 + "UMask": "0x2" 133 + }, 134 + { 135 + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", 136 + "EventCode": "0x43", 137 + "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", 138 + "UMask": "0x1" 139 + }, 140 + { 141 + "EventName": "ls_l1_d_tlb_miss.all", 142 + "EventCode": "0x45", 143 + "BriefDescription": "All L1 DTLB Misses or Reloads.", 144 + "UMask": "0xff" 145 + }, 146 + { 147 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 148 + "EventCode": "0x45", 149 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.", 150 + "UMask": "0x80" 151 + }, 152 + { 153 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 154 + "EventCode": "0x45", 155 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.", 156 + "UMask": "0x40" 157 + }, 158 + { 159 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 160 + "EventCode": "0x45", 161 + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.", 162 + "UMask": "0x20" 163 + }, 164 + { 165 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 166 + "EventCode": "0x45", 167 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.", 168 + "UMask": "0x10" 169 + }, 170 + { 171 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 172 + "EventCode": "0x45", 173 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", 174 + "UMask": "0x8" 175 + }, 176 + { 177 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 178 + "EventCode": "0x45", 179 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", 180 + "UMask": "0x4" 181 + }, 182 + { 183 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 184 + "EventCode": "0x45", 185 + "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", 186 + "UMask": "0x2" 187 + }, 188 + { 189 + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 190 + "EventCode": "0x45", 191 + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", 192 + "UMask": "0x1" 193 + }, 194 + { 195 + "EventName": "ls_tablewalker.iside", 196 + "EventCode": "0x46", 197 + "BriefDescription": "Total Page Table Walks on I-side.", 198 + "UMask": "0xc" 199 + }, 200 + { 201 + "EventName": "ls_tablewalker.ic_type1", 202 + "EventCode": "0x46", 203 + "BriefDescription": "Total Page Table Walks IC Type 1.", 204 + "UMask": "0x8" 205 + }, 206 + { 207 + "EventName": "ls_tablewalker.ic_type0", 208 + "EventCode": "0x46", 209 + "BriefDescription": "Total Page Table Walks IC Type 0.", 210 + "UMask": "0x4" 211 + }, 212 + { 213 + "EventName": "ls_tablewalker.dside", 214 + "EventCode": "0x46", 215 + "BriefDescription": "Total Page Table Walks on D-side.", 216 + "UMask": "0x3" 217 + }, 218 + { 219 + "EventName": "ls_tablewalker.dc_type1", 220 + "EventCode": "0x46", 221 + "BriefDescription": "Total Page Table Walks DC Type 1.", 222 + "UMask": "0x2" 223 + }, 224 + { 225 + "EventName": "ls_tablewalker.dc_type0", 226 + "EventCode": "0x46", 227 + "BriefDescription": "Total Page Table Walks DC Type 0.", 228 + "UMask": "0x1" 229 + }, 230 + { 231 + "EventName": "ls_misal_accesses", 232 + "EventCode": "0x47", 233 + "BriefDescription": "Misaligned loads." 234 + }, 235 + { 236 + "EventName": "ls_pref_instr_disp", 237 + "EventCode": "0x4b", 238 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", 239 + "UMask": "0xff" 240 + }, 241 + { 242 + "EventName": "ls_pref_instr_disp.prefetch_nta", 243 + "EventCode": "0x4b", 244 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", 245 + "UMask": "0x4" 246 + }, 247 + { 248 + "EventName": "ls_pref_instr_disp.prefetch_w", 249 + "EventCode": "0x4b", 250 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", 251 + "UMask": "0x2" 252 + }, 253 + { 254 + "EventName": "ls_pref_instr_disp.prefetch", 255 + "EventCode": "0x4b", 256 + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", 257 + "UMask": "0x1" 258 + }, 259 + { 260 + "EventName": "ls_inef_sw_pref.mab_mch_cnt", 261 + "EventCode": "0x52", 262 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", 263 + "UMask": "0x2" 264 + }, 265 + { 266 + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 267 + "EventCode": "0x52", 268 + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", 269 + "UMask": "0x1" 270 + }, 271 + { 272 + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", 273 + "EventCode": "0x59", 274 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", 275 + "UMask": "0x40" 276 + }, 277 + { 278 + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache", 279 + "EventCode": "0x59", 280 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", 281 + "UMask": "0x10" 282 + }, 283 + { 284 + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", 285 + "EventCode": "0x59", 286 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", 287 + "UMask": "0x8" 288 + }, 289 + { 290 + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", 291 + "EventCode": "0x59", 292 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", 293 + "UMask": "0x2" 294 + }, 295 + { 296 + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", 297 + "EventCode": "0x59", 298 + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", 299 + "UMask": "0x1" 300 + }, 301 + { 302 + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", 303 + "EventCode": "0x5A", 304 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", 305 + "UMask": "0x40" 306 + }, 307 + { 308 + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", 309 + "EventCode": "0x5A", 310 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", 311 + "UMask": "0x10" 312 + }, 313 + { 314 + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", 315 + "EventCode": "0x5A", 316 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", 317 + "UMask": "0x8" 318 + }, 319 + { 320 + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", 321 + "EventCode": "0x5A", 322 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", 323 + "UMask": "0x2" 324 + }, 325 + { 326 + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", 327 + "EventCode": "0x5A", 328 + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", 329 + "UMask": "0x1" 330 + }, 331 + { 332 + "EventName": "ls_not_halted_cyc", 333 + "EventCode": "0x76", 334 + "BriefDescription": "Cycles not in Halt." 335 + }, 336 + { 337 + "EventName": "ls_tlb_flush", 338 + "EventCode": "0x78", 339 + "BriefDescription": "All TLB Flushes" 340 + } 341 + ]

+115

tools/perf/pmu-events/arch/x86/amdzen2/other.json

··· 1 + [ 2 + { 3 + "EventName": "de_dis_uop_queue_empty_di0", 4 + "EventCode": "0xa9", 5 + "BriefDescription": "Cycles where the Micro-Op Queue is empty." 6 + }, 7 + { 8 + "EventName": "de_dis_uops_from_decoder", 9 + "EventCode": "0xaa", 10 + "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.", 11 + "UMask": "0xff" 12 + }, 13 + { 14 + "EventName": "de_dis_uops_from_decoder.opcache_dispatched", 15 + "EventCode": "0xaa", 16 + "BriefDescription": "Count of dispatched Ops from OpCache.", 17 + "UMask": "0x2" 18 + }, 19 + { 20 + "EventName": "de_dis_uops_from_decoder.decoder_dispatched", 21 + "EventCode": "0xaa", 22 + "BriefDescription": "Count of dispatched Ops from Decoder.", 23 + "UMask": "0x1" 24 + }, 25 + { 26 + "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", 27 + "EventCode": "0xae", 28 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.", 29 + "UMask": "0x80" 30 + }, 31 + { 32 + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 33 + "EventCode": "0xae", 34 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", 35 + "UMask": "0x40" 36 + }, 37 + { 38 + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 39 + "EventCode": "0xae", 40 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", 41 + "UMask": "0x20" 42 + }, 43 + { 44 + "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall", 45 + "EventCode": "0xae", 46 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.", 47 + "UMask": "0x10" 48 + }, 49 + { 50 + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 51 + "EventCode": "0xae", 52 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", 53 + "UMask": "0x8" 54 + }, 55 + { 56 + "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", 57 + "EventCode": "0xae", 58 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", 59 + "UMask": "0x4" 60 + }, 61 + { 62 + "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", 63 + "EventCode": "0xae", 64 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", 65 + "UMask": "0x2" 66 + }, 67 + { 68 + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", 69 + "EventCode": "0xae", 70 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", 71 + "UMask": "0x1" 72 + }, 73 + { 74 + "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", 75 + "EventCode": "0xaf", 76 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.", 77 + "UMask": "0x40" 78 + }, 79 + { 80 + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", 81 + "EventCode": "0xaf", 82 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", 83 + "UMask": "0x20" 84 + }, 85 + { 86 + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", 87 + "EventCode": "0xaf", 88 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", 89 + "UMask": "0x10" 90 + }, 91 + { 92 + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", 93 + "EventCode": "0xaf", 94 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", 95 + "UMask": "0x8" 96 + }, 97 + { 98 + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", 99 + "EventCode": "0xaf", 100 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", 101 + "UMask": "0x4" 102 + }, 103 + { 104 + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", 105 + "EventCode": "0xaf", 106 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", 107 + "UMask": "0x2" 108 + }, 109 + { 110 + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", 111 + "EventCode": "0xaf", 112 + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", 113 + "UMask": "0x1" 114 + } 115 + ]

+1 -1

tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json

··· 297 297 }, 298 298 { 299 299 "BriefDescription": "Fraction of cycles spent in Kernel mode", 300 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 300 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 301 301 "MetricGroup": "Summary", 302 302 "MetricName": "Kernel_Utilization" 303 303 },

+1 -1

tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json

··· 115 115 }, 116 116 { 117 117 "BriefDescription": "Fraction of cycles spent in Kernel mode", 118 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 118 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 119 119 "MetricGroup": "Summary", 120 120 "MetricName": "Kernel_Utilization" 121 121 },

+1 -1

tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json

··· 297 297 }, 298 298 { 299 299 "BriefDescription": "Fraction of cycles spent in Kernel mode", 300 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 300 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 301 301 "MetricGroup": "Summary", 302 302 "MetricName": "Kernel_Utilization" 303 303 },

+1 -1

tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json

··· 316 316 }, 317 317 { 318 318 "BriefDescription": "Fraction of cycles spent in Kernel mode", 319 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 319 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 320 320 "MetricGroup": "Summary", 321 321 "MetricName": "Kernel_Utilization" 322 322 },

+1 -1

tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json

··· 267 267 }, 268 268 { 269 269 "BriefDescription": "Fraction of cycles spent in Kernel mode", 270 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 270 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 271 271 "MetricGroup": "Summary", 272 272 "MetricName": "Kernel_Utilization" 273 273 },

+1 -1

tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json

··· 267 267 }, 268 268 { 269 269 "BriefDescription": "Fraction of cycles spent in Kernel mode", 270 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 270 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 271 271 "MetricGroup": "Summary", 272 272 "MetricName": "Kernel_Utilization" 273 273 },

+1 -1

tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json

··· 285 285 }, 286 286 { 287 287 "BriefDescription": "Fraction of cycles spent in Kernel mode", 288 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 288 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 289 289 "MetricGroup": "Summary", 290 290 "MetricName": "Kernel_Utilization" 291 291 },

+1 -1

tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json

··· 285 285 }, 286 286 { 287 287 "BriefDescription": "Fraction of cycles spent in Kernel mode", 288 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 288 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 289 289 "MetricGroup": "Summary", 290 290 "MetricName": "Kernel_Utilization" 291 291 },

+1 -1

tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json

··· 171 171 }, 172 172 { 173 173 "BriefDescription": "Fraction of cycles spent in Kernel mode", 174 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 174 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 175 175 "MetricGroup": "Summary", 176 176 "MetricName": "Kernel_Utilization" 177 177 },

+2 -1

tools/perf/pmu-events/arch/x86/mapfile.csv

··· 36 36 GenuineIntel-6-7D,v1,icelake,core 37 37 GenuineIntel-6-7E,v1,icelake,core 38 38 GenuineIntel-6-86,v1,tremontx,core 39 - AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core 39 + AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core 40 + AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core

+1 -1

tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json

··· 171 171 }, 172 172 { 173 173 "BriefDescription": "Fraction of cycles spent in Kernel mode", 174 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 174 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 175 175 "MetricGroup": "Summary", 176 176 "MetricName": "Kernel_Utilization" 177 177 },

+1 -1

tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json

··· 304 304 }, 305 305 { 306 306 "BriefDescription": "Fraction of cycles spent in Kernel mode", 307 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 307 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 308 308 "MetricGroup": "Summary", 309 309 "MetricName": "Kernel_Utilization" 310 310 },

+1 -1

tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json

··· 316 316 }, 317 317 { 318 318 "BriefDescription": "Fraction of cycles spent in Kernel mode", 319 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", 319 + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", 320 320 "MetricGroup": "Summary", 321 321 "MetricName": "Kernel_Utilization" 322 322 },

+30

tools/perf/pmu-events/jevents.c

··· 771 771 fprintf(outfp, "};\n"); 772 772 } 773 773 774 + static void print_mapping_test_table(FILE *outfp) 775 + { 776 + /* 777 + * Print the terminating, NULL entry. 778 + */ 779 + fprintf(outfp, "{\n"); 780 + fprintf(outfp, "\t.cpuid = \"testcpu\",\n"); 781 + fprintf(outfp, "\t.version = \"v1\",\n"); 782 + fprintf(outfp, "\t.type = \"core\",\n"); 783 + fprintf(outfp, "\t.table = pme_test_cpu,\n"); 784 + fprintf(outfp, "},\n"); 785 + } 786 + 774 787 static int process_mapfile(FILE *outfp, char *fpath) 775 788 { 776 789 int n = 16384; ··· 861 848 } 862 849 863 850 out: 851 + print_mapping_test_table(outfp); 864 852 print_mapping_table_suffix(outfp); 865 853 fclose(mapfp); 866 854 free(line); ··· 1175 1161 } else if (rc < 0) { 1176 1162 /* Make build fail */ 1177 1163 fclose(eventsfp); 1164 + free_arch_std_events(); 1165 + ret = 1; 1166 + goto out_free_mapfile; 1167 + } else if (rc) { 1168 + goto empty_map; 1169 + } 1170 + 1171 + sprintf(ldirname, "%s/test", start_dirname); 1172 + 1173 + rc = nftw(ldirname, process_one_file, maxfds, 0); 1174 + if (rc && verbose) { 1175 + pr_info("%s: Error walking file tree %s rc=%d for test\n", 1176 + prog, ldirname, rc); 1177 + goto empty_map; 1178 + } else if (rc < 0) { 1179 + /* Make build fail */ 1178 1180 free_arch_std_events(); 1179 1181 ret = 1; 1180 1182 goto out_free_mapfile;

+1

tools/perf/tests/Build

··· 14 14 perf-y += evsel-tp-sched.o 15 15 perf-y += fdarray.o 16 16 perf-y += pmu.o 17 + perf-y += pmu-events.o 17 18 perf-y += hists_common.o 18 19 perf-y += hists_link.o 19 20 perf-y += hists_filter.o

+4

tools/perf/tests/builtin-test.c

··· 73 73 .func = test__pmu, 74 74 }, 75 75 { 76 + .desc = "PMU events", 77 + .func = test__pmu_events, 78 + }, 79 + { 76 80 .desc = "DSO data read", 77 81 .func = test__dso_data, 78 82 },

+7 -3

tools/perf/tests/make

··· 28 28 29 29 PARALLEL_OPT= 30 30 ifeq ($(SET_PARALLEL),1) 31 - cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) 32 - ifeq ($(cores),0) 33 - cores := 1 31 + ifeq ($(JOBS),) 32 + cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) 33 + ifeq ($(cores),0) 34 + cores := 1 35 + endif 36 + else 37 + cores=$(JOBS) 34 38 endif 35 39 PARALLEL_OPT="-j$(cores)" 36 40 endif

+379

tools/perf/tests/pmu-events.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "parse-events.h" 3 + #include "pmu.h" 4 + #include "tests.h" 5 + #include <errno.h> 6 + #include <stdio.h> 7 + #include <linux/kernel.h> 8 + #include <linux/zalloc.h> 9 + #include "debug.h" 10 + #include "../pmu-events/pmu-events.h" 11 + 12 + struct perf_pmu_test_event { 13 + struct pmu_event event; 14 + 15 + /* extra events for aliases */ 16 + const char *alias_str; 17 + 18 + /* 19 + * Note: For when PublicDescription does not exist in the JSON, we 20 + * will have no long_desc in pmu_event.long_desc, but long_desc may 21 + * be set in the alias. 22 + */ 23 + const char *alias_long_desc; 24 + }; 25 + 26 + static struct perf_pmu_test_event test_cpu_events[] = { 27 + { 28 + .event = { 29 + .name = "bp_l1_btb_correct", 30 + .event = "event=0x8a", 31 + .desc = "L1 BTB Correction", 32 + .topic = "branch", 33 + }, 34 + .alias_str = "event=0x8a", 35 + .alias_long_desc = "L1 BTB Correction", 36 + }, 37 + { 38 + .event = { 39 + .name = "bp_l2_btb_correct", 40 + .event = "event=0x8b", 41 + .desc = "L2 BTB Correction", 42 + .topic = "branch", 43 + }, 44 + .alias_str = "event=0x8b", 45 + .alias_long_desc = "L2 BTB Correction", 46 + }, 47 + { 48 + .event = { 49 + .name = "segment_reg_loads.any", 50 + .event = "umask=0x80,period=200000,event=0x6", 51 + .desc = "Number of segment register loads", 52 + .topic = "other", 53 + }, 54 + .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", 55 + .alias_long_desc = "Number of segment register loads", 56 + }, 57 + { 58 + .event = { 59 + .name = "dispatch_blocked.any", 60 + .event = "umask=0x20,period=200000,event=0x9", 61 + .desc = "Memory cluster signals to block micro-op dispatch for any reason", 62 + .topic = "other", 63 + }, 64 + .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", 65 + .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", 66 + }, 67 + { 68 + .event = { 69 + .name = "eist_trans", 70 + .event = "umask=0x0,period=200000,event=0x3a", 71 + .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", 72 + .topic = "other", 73 + }, 74 + .alias_str = "umask=0,(null)=0x30d40,event=0x3a", 75 + .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", 76 + }, 77 + { /* sentinel */ 78 + .event = { 79 + .name = NULL, 80 + }, 81 + }, 82 + }; 83 + 84 + static struct perf_pmu_test_event test_uncore_events[] = { 85 + { 86 + .event = { 87 + .name = "uncore_hisi_ddrc.flux_wcmd", 88 + .event = "event=0x2", 89 + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", 90 + .topic = "uncore", 91 + .long_desc = "DDRC write commands", 92 + .pmu = "hisi_sccl,ddrc", 93 + }, 94 + .alias_str = "event=0x2", 95 + .alias_long_desc = "DDRC write commands", 96 + }, 97 + { 98 + .event = { 99 + .name = "unc_cbo_xsnp_response.miss_eviction", 100 + .event = "umask=0x81,event=0x22", 101 + .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core", 102 + .topic = "uncore", 103 + .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", 104 + .pmu = "uncore_cbox", 105 + }, 106 + .alias_str = "umask=0x81,event=0x22", 107 + .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", 108 + }, 109 + { /* sentinel */ 110 + .event = { 111 + .name = NULL, 112 + }, 113 + } 114 + }; 115 + 116 + const int total_test_events_size = ARRAY_SIZE(test_uncore_events); 117 + 118 + static bool is_same(const char *reference, const char *test) 119 + { 120 + if (!reference && !test) 121 + return true; 122 + 123 + if (reference && !test) 124 + return false; 125 + 126 + if (!reference && test) 127 + return false; 128 + 129 + return !strcmp(reference, test); 130 + } 131 + 132 + static struct pmu_events_map *__test_pmu_get_events_map(void) 133 + { 134 + struct pmu_events_map *map; 135 + 136 + for (map = &pmu_events_map[0]; map->cpuid; map++) { 137 + if (!strcmp(map->cpuid, "testcpu")) 138 + return map; 139 + } 140 + 141 + pr_err("could not find test events map\n"); 142 + 143 + return NULL; 144 + } 145 + 146 + /* Verify generated events from pmu-events.c is as expected */ 147 + static int __test_pmu_event_table(void) 148 + { 149 + struct pmu_events_map *map = __test_pmu_get_events_map(); 150 + struct pmu_event *table; 151 + int map_events = 0, expected_events; 152 + 153 + /* ignore 2x sentinels */ 154 + expected_events = ARRAY_SIZE(test_cpu_events) + 155 + ARRAY_SIZE(test_uncore_events) - 2; 156 + 157 + if (!map) 158 + return -1; 159 + 160 + for (table = map->table; table->name; table++) { 161 + struct perf_pmu_test_event *test; 162 + struct pmu_event *te; 163 + bool found = false; 164 + 165 + if (table->pmu) 166 + test = &test_uncore_events[0]; 167 + else 168 + test = &test_cpu_events[0]; 169 + 170 + te = &test->event; 171 + 172 + for (; te->name; test++, te = &test->event) { 173 + if (strcmp(table->name, te->name)) 174 + continue; 175 + found = true; 176 + map_events++; 177 + 178 + if (!is_same(table->desc, te->desc)) { 179 + pr_debug2("testing event table %s: mismatched desc, %s vs %s\n", 180 + table->name, table->desc, te->desc); 181 + return -1; 182 + } 183 + 184 + if (!is_same(table->topic, te->topic)) { 185 + pr_debug2("testing event table %s: mismatched topic, %s vs %s\n", 186 + table->name, table->topic, 187 + te->topic); 188 + return -1; 189 + } 190 + 191 + if (!is_same(table->long_desc, te->long_desc)) { 192 + pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n", 193 + table->name, table->long_desc, 194 + te->long_desc); 195 + return -1; 196 + } 197 + 198 + if (!is_same(table->unit, te->unit)) { 199 + pr_debug2("testing event table %s: mismatched unit, %s vs %s\n", 200 + table->name, table->unit, 201 + te->unit); 202 + return -1; 203 + } 204 + 205 + if (!is_same(table->perpkg, te->perpkg)) { 206 + pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n", 207 + table->name, table->perpkg, 208 + te->perpkg); 209 + return -1; 210 + } 211 + 212 + if (!is_same(table->metric_expr, te->metric_expr)) { 213 + pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n", 214 + table->name, table->metric_expr, 215 + te->metric_expr); 216 + return -1; 217 + } 218 + 219 + if (!is_same(table->metric_name, te->metric_name)) { 220 + pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n", 221 + table->name, table->metric_name, 222 + te->metric_name); 223 + return -1; 224 + } 225 + 226 + if (!is_same(table->deprecated, te->deprecated)) { 227 + pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n", 228 + table->name, table->deprecated, 229 + te->deprecated); 230 + return -1; 231 + } 232 + 233 + pr_debug("testing event table %s: pass\n", table->name); 234 + } 235 + 236 + if (!found) { 237 + pr_err("testing event table: could not find event %s\n", 238 + table->name); 239 + return -1; 240 + } 241 + } 242 + 243 + if (map_events != expected_events) { 244 + pr_err("testing event table: found %d, but expected %d\n", 245 + map_events, expected_events); 246 + return -1; 247 + } 248 + 249 + return 0; 250 + } 251 + 252 + static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases) 253 + { 254 + struct perf_pmu_alias *alias; 255 + 256 + list_for_each_entry(alias, aliases, list) 257 + if (!strcmp(test_event, alias->name)) 258 + return alias; 259 + 260 + return NULL; 261 + } 262 + 263 + /* Verify aliases are as expected */ 264 + static int __test__pmu_event_aliases(char *pmu_name, int *count) 265 + { 266 + struct perf_pmu_test_event *test; 267 + struct pmu_event *te; 268 + struct perf_pmu *pmu; 269 + LIST_HEAD(aliases); 270 + int res = 0; 271 + bool use_uncore_table; 272 + struct pmu_events_map *map = __test_pmu_get_events_map(); 273 + 274 + if (!map) 275 + return -1; 276 + 277 + if (is_pmu_core(pmu_name)) { 278 + test = &test_cpu_events[0]; 279 + use_uncore_table = false; 280 + } else { 281 + test = &test_uncore_events[0]; 282 + use_uncore_table = true; 283 + } 284 + 285 + pmu = zalloc(sizeof(*pmu)); 286 + if (!pmu) 287 + return -1; 288 + 289 + pmu->name = pmu_name; 290 + 291 + pmu_add_cpu_aliases_map(&aliases, pmu, map); 292 + 293 + for (te = &test->event; te->name; test++, te = &test->event) { 294 + struct perf_pmu_alias *alias = find_alias(te->name, &aliases); 295 + 296 + if (!alias) { 297 + bool uncore_match = pmu_uncore_alias_match(pmu_name, 298 + te->pmu); 299 + 300 + if (use_uncore_table && !uncore_match) { 301 + pr_debug3("testing aliases PMU %s: skip matching alias %s\n", 302 + pmu_name, te->name); 303 + continue; 304 + } 305 + 306 + pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n", 307 + pmu_name, te->name); 308 + res = -1; 309 + break; 310 + } 311 + 312 + if (!is_same(alias->desc, te->desc)) { 313 + pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n", 314 + pmu_name, alias->desc, te->desc); 315 + res = -1; 316 + break; 317 + } 318 + 319 + if (!is_same(alias->long_desc, test->alias_long_desc)) { 320 + pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n", 321 + pmu_name, alias->long_desc, 322 + test->alias_long_desc); 323 + res = -1; 324 + break; 325 + } 326 + 327 + if (!is_same(alias->str, test->alias_str)) { 328 + pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n", 329 + pmu_name, alias->str, test->alias_str); 330 + res = -1; 331 + break; 332 + } 333 + 334 + if (!is_same(alias->topic, te->topic)) { 335 + pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n", 336 + pmu_name, alias->topic, te->topic); 337 + res = -1; 338 + break; 339 + } 340 + 341 + (*count)++; 342 + pr_debug2("testing aliases PMU %s: matched event %s\n", 343 + pmu_name, alias->name); 344 + } 345 + 346 + free(pmu); 347 + return res; 348 + } 349 + 350 + int test__pmu_events(struct test *test __maybe_unused, 351 + int subtest __maybe_unused) 352 + { 353 + struct perf_pmu *pmu = NULL; 354 + 355 + if (__test_pmu_event_table()) 356 + return -1; 357 + 358 + while ((pmu = perf_pmu__scan(pmu)) != NULL) { 359 + int count = 0; 360 + 361 + if (list_empty(&pmu->format)) { 362 + pr_debug2("skipping testing PMU %s\n", pmu->name); 363 + continue; 364 + } 365 + 366 + if (__test__pmu_event_aliases(pmu->name, &count)) { 367 + pr_debug("testing PMU %s aliases: failed\n", pmu->name); 368 + return -1; 369 + } 370 + 371 + if (count == 0) 372 + pr_debug3("testing PMU %s aliases: no events to match\n", 373 + pmu->name); 374 + else 375 + pr_debug("testing PMU %s aliases: pass\n", pmu->name); 376 + } 377 + 378 + return 0; 379 + }

+5 -1

tools/perf/tests/sample-parsing.c

··· 151 151 if (type & PERF_SAMPLE_PHYS_ADDR) 152 152 COMP(phys_addr); 153 153 154 + if (type & PERF_SAMPLE_CGROUP) 155 + COMP(cgroup); 156 + 154 157 if (type & PERF_SAMPLE_AUX) { 155 158 COMP(aux_sample.size); 156 159 if (memcmp(s1->aux_sample.data, s2->aux_sample.data, ··· 233 230 .regs = regs, 234 231 }, 235 232 .phys_addr = 113, 233 + .cgroup = 114, 236 234 .aux_sample = { 237 235 .size = sizeof(aux_data), 238 236 .data = (void *)aux_data, ··· 340 336 * were added. Please actually update the test rather than just change 341 337 * the condition below. 342 338 */ 343 - if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { 339 + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { 344 340 pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); 345 341 return -1; 346 342 }

+1

tools/perf/tests/tests.h

··· 49 49 int test__perf_evsel__tp_sched_test(struct test *test, int subtest); 50 50 int test__syscall_openat_tp_fields(struct test *test, int subtest); 51 51 int test__pmu(struct test *test, int subtest); 52 + int test__pmu_events(struct test *test, int subtest); 52 53 int test__attr(struct test *test, int subtest); 53 54 int test__dso_data(struct test *test, int subtest); 54 55 int test__dso_data_cache(struct test *test, int subtest);

+105 -23

tools/perf/ui/browsers/hists.c

··· 677 677 return browser->title ? browser->title(browser, bf, size) : 0; 678 678 } 679 679 680 - static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, int key) 680 + static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, size_t size, int key) 681 681 { 682 682 switch (key) { 683 683 case K_TIMER: { ··· 703 703 ui_browser__warn_lost_events(&browser->b); 704 704 } 705 705 706 - hist_browser__title(browser, title, sizeof(title)); 706 + hist_browser__title(browser, title, size); 707 707 ui_browser__show_title(&browser->b, title); 708 708 break; 709 709 } ··· 764 764 if (ui_browser__show(&browser->b, title, "%s", help) < 0) 765 765 return -1; 766 766 767 - if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) 767 + if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key)) 768 768 goto out; 769 769 770 770 while (1) { 771 771 key = ui_browser__run(&browser->b, delay_secs); 772 772 773 - if (hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) 773 + if (hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key)) 774 774 break; 775 775 } 776 776 out: ··· 2465 2465 return 0; 2466 2466 } 2467 2467 2468 + static struct symbol *symbol__new_unresolved(u64 addr, struct map *map) 2469 + { 2470 + struct annotated_source *src; 2471 + struct symbol *sym; 2472 + char name[64]; 2473 + 2474 + snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr); 2475 + 2476 + sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name); 2477 + if (sym) { 2478 + src = symbol__hists(sym, 1); 2479 + if (!src) { 2480 + symbol__delete(sym); 2481 + return NULL; 2482 + } 2483 + 2484 + dso__insert_symbol(map->dso, sym); 2485 + } 2486 + 2487 + return sym; 2488 + } 2489 + 2468 2490 static int 2469 2491 add_annotate_opt(struct hist_browser *browser __maybe_unused, 2470 2492 struct popup_action *act, char **optstr, 2471 - struct map_symbol *ms) 2493 + struct map_symbol *ms, 2494 + u64 addr) 2472 2495 { 2473 - if (ms->sym == NULL || ms->map->dso->annotate_warned || 2474 - symbol__annotation(ms->sym)->src == NULL) 2496 + if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned) 2497 + return 0; 2498 + 2499 + if (!ms->sym) 2500 + ms->sym = symbol__new_unresolved(addr, ms->map); 2501 + 2502 + if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL) 2475 2503 return 0; 2476 2504 2477 2505 if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0) ··· 2992 2964 "s Switch to another data file in PWD\n" 2993 2965 "t Zoom into current Thread\n" 2994 2966 "V Verbose (DSO names in callchains, etc)\n" 2995 - "/ Filter symbol by name"; 2967 + "/ Filter symbol by name\n" 2968 + "0-9 Sort by event n in group"; 2996 2969 static const char top_help[] = HIST_BROWSER_HELP_COMMON 2997 2970 "P Print histograms to perf.hist.N\n" 2998 2971 "t Zoom into current Thread\n" ··· 3054 3025 * go to the next or previous 3055 3026 */ 3056 3027 goto out_free_stack; 3028 + case '0' ... '9': 3029 + if (!symbol_conf.event_group || 3030 + evsel->core.nr_members < 2) { 3031 + snprintf(buf, sizeof(buf), 3032 + "Sort by index only available with group events!"); 3033 + helpline = buf; 3034 + continue; 3035 + } 3036 + 3037 + if (key - '0' == symbol_conf.group_sort_idx) 3038 + continue; 3039 + 3040 + symbol_conf.group_sort_idx = key - '0'; 3041 + 3042 + if (symbol_conf.group_sort_idx >= evsel->core.nr_members) { 3043 + snprintf(buf, sizeof(buf), 3044 + "Max event group index to sort is %d (index from 0 to %d)", 3045 + evsel->core.nr_members - 1, 3046 + evsel->core.nr_members - 1); 3047 + helpline = buf; 3048 + continue; 3049 + } 3050 + 3051 + key = K_RELOAD; 3052 + goto out_free_stack; 3057 3053 case 'a': 3058 3054 if (!hists__has(hists, sym)) { 3059 3055 ui_browser__warning(&browser->b, delay_secs * 2, ··· 3087 3033 continue; 3088 3034 } 3089 3035 3090 - if (browser->selection == NULL || 3091 - browser->selection->sym == NULL || 3092 - browser->selection->map->dso->annotate_warned) 3093 - continue; 3094 - 3095 - if (symbol__annotation(browser->selection->sym)->src == NULL) { 3096 - ui_browser__warning(&browser->b, delay_secs * 2, 3097 - "No samples for the \"%s\" symbol.\n\n" 3098 - "Probably appeared just in a callchain", 3099 - browser->selection->sym->name); 3036 + if (!browser->selection || 3037 + !browser->selection->map || 3038 + !browser->selection->map->dso || 3039 + browser->selection->map->dso->annotate_warned) { 3100 3040 continue; 3101 3041 } 3102 3042 3103 - actions->ms.map = browser->selection->map; 3104 - actions->ms.sym = browser->selection->sym; 3043 + if (!browser->selection->sym) { 3044 + if (!browser->he_selection) 3045 + continue; 3046 + 3047 + if (sort__mode == SORT_MODE__BRANCH) { 3048 + bi = browser->he_selection->branch_info; 3049 + if (!bi || !bi->to.ms.map) 3050 + continue; 3051 + 3052 + actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map); 3053 + actions->ms.map = bi->to.ms.map; 3054 + } else { 3055 + actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip, 3056 + browser->selection->map); 3057 + actions->ms.map = browser->selection->map; 3058 + } 3059 + 3060 + if (!actions->ms.sym) 3061 + continue; 3062 + } else { 3063 + if (symbol__annotation(browser->selection->sym)->src == NULL) { 3064 + ui_browser__warning(&browser->b, delay_secs * 2, 3065 + "No samples for the \"%s\" symbol.\n\n" 3066 + "Probably appeared just in a callchain", 3067 + browser->selection->sym->name); 3068 + continue; 3069 + } 3070 + 3071 + actions->ms.map = browser->selection->map; 3072 + actions->ms.sym = browser->selection->sym; 3073 + } 3074 + 3105 3075 do_annotate(browser, actions); 3106 3076 continue; 3107 3077 case 'P': ··· 3297 3219 nr_options += add_annotate_opt(browser, 3298 3220 &actions[nr_options], 3299 3221 &options[nr_options], 3300 - &bi->from.ms); 3222 + &bi->from.ms, 3223 + bi->from.al_addr); 3301 3224 if (bi->to.ms.sym != bi->from.ms.sym) 3302 3225 nr_options += add_annotate_opt(browser, 3303 3226 &actions[nr_options], 3304 3227 &options[nr_options], 3305 - &bi->to.ms); 3228 + &bi->to.ms, 3229 + bi->to.al_addr); 3306 3230 } else { 3307 3231 nr_options += add_annotate_opt(browser, 3308 3232 &actions[nr_options], 3309 3233 &options[nr_options], 3310 - browser->selection); 3234 + browser->selection, 3235 + browser->he_selection->ip); 3311 3236 } 3312 3237 skip_annotation: 3313 3238 nr_options += add_thread_opt(browser, &actions[nr_options], ··· 3521 3440 pos = perf_evsel__prev(pos); 3522 3441 goto browse_hists; 3523 3442 case K_SWITCH_INPUT_DATA: 3443 + case K_RELOAD: 3524 3444 case 'q': 3525 3445 case CTRL('c'): 3526 3446 goto out;

+78 -15

tools/perf/ui/hist.c

··· 151 151 return 0; 152 152 } 153 153 154 + static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, 155 + hpp_field_fn get_field, int nr_members, 156 + u64 **fields_a, u64 **fields_b) 157 + { 158 + u64 *fa = calloc(nr_members, sizeof(*fa)), 159 + *fb = calloc(nr_members, sizeof(*fb)); 160 + struct hist_entry *pair; 161 + 162 + if (!fa || !fb) 163 + goto out_free; 164 + 165 + list_for_each_entry(pair, &a->pairs.head, pairs.node) { 166 + struct evsel *evsel = hists_to_evsel(pair->hists); 167 + fa[perf_evsel__group_idx(evsel)] = get_field(pair); 168 + } 169 + 170 + list_for_each_entry(pair, &b->pairs.head, pairs.node) { 171 + struct evsel *evsel = hists_to_evsel(pair->hists); 172 + fb[perf_evsel__group_idx(evsel)] = get_field(pair); 173 + } 174 + 175 + *fields_a = fa; 176 + *fields_b = fb; 177 + return 0; 178 + out_free: 179 + free(fa); 180 + free(fb); 181 + *fields_a = *fields_b = NULL; 182 + return -1; 183 + } 184 + 185 + static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, 186 + hpp_field_fn get_field, int idx) 187 + { 188 + struct evsel *evsel = hists_to_evsel(a->hists); 189 + u64 *fields_a, *fields_b; 190 + int cmp, nr_members, ret, i; 191 + 192 + cmp = field_cmp(get_field(a), get_field(b)); 193 + if (!perf_evsel__is_group_event(evsel)) 194 + return cmp; 195 + 196 + nr_members = evsel->core.nr_members; 197 + if (idx < 1 || idx >= nr_members) 198 + return cmp; 199 + 200 + ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); 201 + if (ret) { 202 + ret = cmp; 203 + goto out; 204 + } 205 + 206 + ret = field_cmp(fields_a[idx], fields_b[idx]); 207 + if (ret) 208 + goto out; 209 + 210 + for (i = 1; i < nr_members; i++) { 211 + if (i != idx) { 212 + ret = field_cmp(fields_a[i], fields_b[i]); 213 + if (ret) 214 + goto out; 215 + } 216 + } 217 + 218 + out: 219 + free(fields_a); 220 + free(fields_b); 221 + 222 + return ret; 223 + } 224 + 154 225 static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, 155 226 hpp_field_fn get_field) 156 227 { 157 228 s64 ret; 158 229 int i, nr_members; 159 230 struct evsel *evsel; 160 - struct hist_entry *pair; 161 231 u64 *fields_a, *fields_b; 232 + 233 + if (symbol_conf.group_sort_idx && symbol_conf.event_group) { 234 + return __hpp__group_sort_idx(a, b, get_field, 235 + symbol_conf.group_sort_idx); 236 + } 162 237 163 238 ret = field_cmp(get_field(a), get_field(b)); 164 239 if (ret || !symbol_conf.event_group) ··· 244 169 return ret; 245 170 246 171 nr_members = evsel->core.nr_members; 247 - fields_a = calloc(nr_members, sizeof(*fields_a)); 248 - fields_b = calloc(nr_members, sizeof(*fields_b)); 249 - 250 - if (!fields_a || !fields_b) 172 + i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); 173 + if (i) 251 174 goto out; 252 - 253 - list_for_each_entry(pair, &a->pairs.head, pairs.node) { 254 - evsel = hists_to_evsel(pair->hists); 255 - fields_a[perf_evsel__group_idx(evsel)] = get_field(pair); 256 - } 257 - 258 - list_for_each_entry(pair, &b->pairs.head, pairs.node) { 259 - evsel = hists_to_evsel(pair->hists); 260 - fields_b[perf_evsel__group_idx(evsel)] = get_field(pair); 261 - } 262 175 263 176 for (i = 1; i < nr_members; i++) { 264 177 ret = field_cmp(fields_a[i], fields_b[i]);

+1

tools/perf/ui/keysyms.h

··· 25 25 #define K_ERROR -2 26 26 #define K_RESIZE -3 27 27 #define K_SWITCH_INPUT_DATA -4 28 + #define K_RELOAD -5 28 29 29 30 #endif /* _PERF_KEYSYMS_H_ */

+1

tools/perf/util/annotate.h

··· 74 74 #define ANNOTATION__CYCLES_WIDTH 6 75 75 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 76 76 #define ANNOTATION__AVG_IPC_WIDTH 36 77 + #define ANNOTATION_DUMMY_LEN 256 77 78 78 79 struct annotation_options { 79 80 bool hide_src_code,

+80

tools/perf/util/cgroup.c

··· 191 191 } 192 192 return 0; 193 193 } 194 + 195 + static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id, 196 + bool create, const char *path) 197 + { 198 + struct rb_node **p = &root->rb_node; 199 + struct rb_node *parent = NULL; 200 + struct cgroup *cgrp; 201 + 202 + while (*p != NULL) { 203 + parent = *p; 204 + cgrp = rb_entry(parent, struct cgroup, node); 205 + 206 + if (cgrp->id == id) 207 + return cgrp; 208 + 209 + if (cgrp->id < id) 210 + p = &(*p)->rb_left; 211 + else 212 + p = &(*p)->rb_right; 213 + } 214 + 215 + if (!create) 216 + return NULL; 217 + 218 + cgrp = malloc(sizeof(*cgrp)); 219 + if (cgrp == NULL) 220 + return NULL; 221 + 222 + cgrp->name = strdup(path); 223 + if (cgrp->name == NULL) { 224 + free(cgrp); 225 + return NULL; 226 + } 227 + 228 + cgrp->fd = -1; 229 + cgrp->id = id; 230 + refcount_set(&cgrp->refcnt, 1); 231 + 232 + rb_link_node(&cgrp->node, parent, p); 233 + rb_insert_color(&cgrp->node, root); 234 + 235 + return cgrp; 236 + } 237 + 238 + struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, 239 + const char *path) 240 + { 241 + struct cgroup *cgrp; 242 + 243 + down_write(&env->cgroups.lock); 244 + cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path); 245 + up_write(&env->cgroups.lock); 246 + return cgrp; 247 + } 248 + 249 + struct cgroup *cgroup__find(struct perf_env *env, uint64_t id) 250 + { 251 + struct cgroup *cgrp; 252 + 253 + down_read(&env->cgroups.lock); 254 + cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL); 255 + up_read(&env->cgroups.lock); 256 + return cgrp; 257 + } 258 + 259 + void perf_env__purge_cgroups(struct perf_env *env) 260 + { 261 + struct rb_node *node; 262 + struct cgroup *cgrp; 263 + 264 + down_write(&env->cgroups.lock); 265 + while (!RB_EMPTY_ROOT(&env->cgroups.tree)) { 266 + node = rb_first(&env->cgroups.tree); 267 + cgrp = rb_entry(node, struct cgroup, node); 268 + 269 + rb_erase(node, &env->cgroups.tree); 270 + cgroup__put(cgrp); 271 + } 272 + up_write(&env->cgroups.lock); 273 + }

+13 -4

tools/perf/util/cgroup.h

··· 3 3 #define __CGROUP_H__ 4 4 5 5 #include <linux/refcount.h> 6 + #include <linux/rbtree.h> 7 + #include "util/env.h" 6 8 7 9 struct option; 8 10 9 11 struct cgroup { 10 - char *name; 11 - int fd; 12 - refcount_t refcnt; 12 + struct rb_node node; 13 + u64 id; 14 + char *name; 15 + int fd; 16 + refcount_t refcnt; 13 17 }; 14 - 15 18 16 19 extern int nr_cgroups; /* number of explicit cgroups defined */ 17 20 ··· 28 25 void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); 29 26 30 27 int parse_cgroups(const struct option *opt, const char *str, int unset); 28 + 29 + struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, 30 + const char *path); 31 + struct cgroup *cgroup__find(struct perf_env *env, uint64_t id); 32 + 33 + void perf_env__purge_cgroups(struct perf_env *env); 31 34 32 35 #endif /* __CGROUP_H__ */

+5 -5

tools/perf/util/cpumap.c

··· 317 317 318 318 /* get the highest possible cpu number for a sparse allocation */ 319 319 ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); 320 - if (ret == PATH_MAX) { 320 + if (ret >= PATH_MAX) { 321 321 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 322 322 goto out; 323 323 } ··· 328 328 329 329 /* get the highest present cpu number for a sparse allocation */ 330 330 ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); 331 - if (ret == PATH_MAX) { 331 + if (ret >= PATH_MAX) { 332 332 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 333 333 goto out; 334 334 } ··· 356 356 357 357 /* get the highest possible cpu number for a sparse allocation */ 358 358 ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); 359 - if (ret == PATH_MAX) { 359 + if (ret >= PATH_MAX) { 360 360 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 361 361 goto out; 362 362 } ··· 441 441 return 0; 442 442 443 443 n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); 444 - if (n == PATH_MAX) { 444 + if (n >= PATH_MAX) { 445 445 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 446 446 return -1; 447 447 } ··· 456 456 continue; 457 457 458 458 n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); 459 - if (n == PATH_MAX) { 459 + if (n >= PATH_MAX) { 460 460 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 461 461 continue; 462 462 }

+21 -1

tools/perf/util/dsos.c

··· 26 26 return 0; 27 27 } 28 28 29 + static bool dso_id__empty(struct dso_id *id) 30 + { 31 + if (!id) 32 + return true; 33 + 34 + return !id->maj && !id->min && !id->ino && !id->ino_generation; 35 + } 36 + 37 + static void dso__inject_id(struct dso *dso, struct dso_id *id) 38 + { 39 + dso->id.maj = id->maj; 40 + dso->id.min = id->min; 41 + dso->id.ino = id->ino; 42 + dso->id.ino_generation = id->ino_generation; 43 + } 44 + 29 45 static int dso_id__cmp(struct dso_id *a, struct dso_id *b) 30 46 { 31 47 /* 32 48 * The second is always dso->id, so zeroes if not set, assume passing 33 49 * NULL for a means a zeroed id 34 50 */ 35 - if (a == NULL) 51 + if (dso_id__empty(a) || dso_id__empty(b)) 36 52 return 0; 37 53 38 54 return __dso_id__cmp(a, b); ··· 265 249 static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) 266 250 { 267 251 struct dso *dso = __dsos__find_id(dsos, name, id, false); 252 + 253 + if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id)) 254 + dso__inject_id(dso, id); 255 + 268 256 return dso ? dso : __dsos__addnew_id(dsos, name, id); 269 257 } 270 258

+2

tools/perf/util/env.c

··· 6 6 #include <linux/ctype.h> 7 7 #include <linux/zalloc.h> 8 8 #include "bpf-event.h" 9 + #include "cgroup.h" 9 10 #include <errno.h> 10 11 #include <sys/utsname.h> 11 12 #include <bpf/libbpf.h> ··· 169 168 int i; 170 169 171 170 perf_env__purge_bpf(env); 171 + perf_env__purge_cgroups(env); 172 172 zfree(&env->hostname); 173 173 zfree(&env->os_release); 174 174 zfree(&env->version);

+6

tools/perf/util/env.h

··· 88 88 u32 btfs_cnt; 89 89 } bpf_progs; 90 90 91 + /* same reason as above (for perf-top) */ 92 + struct { 93 + struct rw_semaphore lock; 94 + struct rb_root tree; 95 + } cgroups; 96 + 91 97 /* For fast cpu to numa node lookup via perf_env__numa_node */ 92 98 int *numa_map; 93 99 int nr_numa_map;

+35 -4

tools/perf/util/event.c

··· 54 54 [PERF_RECORD_NAMESPACES] = "NAMESPACES", 55 55 [PERF_RECORD_KSYMBOL] = "KSYMBOL", 56 56 [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", 57 + [PERF_RECORD_CGROUP] = "CGROUP", 57 58 [PERF_RECORD_HEADER_ATTR] = "ATTR", 58 59 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 59 60 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", ··· 181 180 return ret; 182 181 } 183 182 183 + size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) 184 + { 185 + return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n", 186 + event->cgroup.id, event->cgroup.path); 187 + } 188 + 184 189 int perf_event__process_comm(struct perf_tool *tool __maybe_unused, 185 190 union perf_event *event, 186 191 struct perf_sample *sample, ··· 201 194 struct machine *machine) 202 195 { 203 196 return machine__process_namespaces_event(machine, event, sample); 197 + } 198 + 199 + int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, 200 + union perf_event *event, 201 + struct perf_sample *sample, 202 + struct machine *machine) 203 + { 204 + return machine__process_cgroup_event(machine, event, sample); 204 205 } 205 206 206 207 int perf_event__process_lost(struct perf_tool *tool __maybe_unused, ··· 432 417 case PERF_RECORD_NAMESPACES: 433 418 ret += perf_event__fprintf_namespaces(event, fp); 434 419 break; 420 + case PERF_RECORD_CGROUP: 421 + ret += perf_event__fprintf_cgroup(event, fp); 422 + break; 435 423 case PERF_RECORD_MMAP2: 436 424 ret += perf_event__fprintf_mmap2(event, fp); 437 425 break; ··· 617 599 al->sym = map__find_symbol(al->map, al->addr); 618 600 } 619 601 620 - if (symbol_conf.sym_list && 621 - (!al->sym || !strlist__has_entry(symbol_conf.sym_list, 622 - al->sym->name))) { 623 - al->filtered |= (1 << HIST_FILTER__SYMBOL); 602 + if (symbol_conf.sym_list) { 603 + int ret = 0; 604 + char al_addr_str[32]; 605 + size_t sz = sizeof(al_addr_str); 606 + 607 + if (al->sym) { 608 + ret = strlist__has_entry(symbol_conf.sym_list, 609 + al->sym->name); 610 + } 611 + if (!(ret && al->sym)) { 612 + snprintf(al_addr_str, sz, "0x%"PRIx64, 613 + al->map->unmap_ip(al->map, al->sym->start)); 614 + ret = strlist__has_entry(symbol_conf.sym_list, 615 + al_addr_str); 616 + } 617 + if (!ret) 618 + al->filtered |= (1 << HIST_FILTER__SYMBOL); 624 619 } 625 620 626 621 return 0;

+6

tools/perf/util/event.h

··· 135 135 u32 raw_size; 136 136 u64 data_src; 137 137 u64 phys_addr; 138 + u64 cgroup; 138 139 u32 flags; 139 140 u16 insn_len; 140 141 u8 cpumode; ··· 323 322 union perf_event *event, 324 323 struct perf_sample *sample, 325 324 struct machine *machine); 325 + int perf_event__process_cgroup(struct perf_tool *tool, 326 + union perf_event *event, 327 + struct perf_sample *sample, 328 + struct machine *machine); 326 329 int perf_event__process_mmap(struct perf_tool *tool, 327 330 union perf_event *event, 328 331 struct perf_sample *sample, ··· 382 377 size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); 383 378 size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); 384 379 size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); 380 + size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); 385 381 size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); 386 382 size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); 387 383 size_t perf_event__fprintf(union perf_event *event, FILE *fp);

+17 -1

tools/perf/util/evsel.c

··· 1104 1104 if (opts->record_namespaces) 1105 1105 attr->namespaces = track; 1106 1106 1107 + if (opts->record_cgroup) { 1108 + attr->cgroup = track && !perf_missing_features.cgroup; 1109 + perf_evsel__set_sample_bit(evsel, CGROUP); 1110 + } 1111 + 1107 1112 if (opts->record_switch_events) 1108 1113 attr->context_switch = track; 1109 1114 ··· 1292 1287 perf_thread_map__put(evsel->core.threads); 1293 1288 zfree(&evsel->group_name); 1294 1289 zfree(&evsel->name); 1290 + zfree(&evsel->pmu_name); 1295 1291 perf_evsel__object.fini(evsel); 1296 1292 } 1297 1293 ··· 1794 1788 * Must probe features in the order they were added to the 1795 1789 * perf_event_attr interface. 1796 1790 */ 1797 - if (!perf_missing_features.branch_hw_idx && 1791 + if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { 1792 + perf_missing_features.cgroup = true; 1793 + pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); 1794 + goto out_close; 1795 + } else if (!perf_missing_features.branch_hw_idx && 1798 1796 (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { 1799 1797 perf_missing_features.branch_hw_idx = true; 1800 1798 pr_debug2("switching off branch HW index support\n"); ··· 2273 2263 data->phys_addr = 0; 2274 2264 if (type & PERF_SAMPLE_PHYS_ADDR) { 2275 2265 data->phys_addr = *array; 2266 + array++; 2267 + } 2268 + 2269 + data->cgroup = 0; 2270 + if (type & PERF_SAMPLE_CGROUP) { 2271 + data->cgroup = *array; 2276 2272 array++; 2277 2273 } 2278 2274

+1

tools/perf/util/evsel.h

··· 120 120 bool bpf; 121 121 bool aux_output; 122 122 bool branch_hw_idx; 123 + bool cgroup; 123 124 }; 124 125 125 126 extern struct perf_missing_features perf_missing_features;

+13

tools/perf/util/hist.c

··· 10 10 #include "mem-events.h" 11 11 #include "session.h" 12 12 #include "namespaces.h" 13 + #include "cgroup.h" 13 14 #include "sort.h" 14 15 #include "units.h" 15 16 #include "evlist.h" ··· 195 194 hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); 196 195 } 197 196 197 + hists__new_col_len(hists, HISTC_CGROUP, 6); 198 198 hists__new_col_len(hists, HISTC_CGROUP_ID, 20); 199 199 hists__new_col_len(hists, HISTC_CPU, 3); 200 200 hists__new_col_len(hists, HISTC_SOCKET, 6); ··· 224 222 225 223 if (h->trace_output) 226 224 hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); 225 + 226 + if (h->cgroup) { 227 + const char *cgrp_name = "unknown"; 228 + struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env, 229 + h->cgroup); 230 + if (cgrp != NULL) 231 + cgrp_name = cgrp->name; 232 + 233 + hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name)); 234 + } 227 235 } 228 236 229 237 void hists__output_recalc_col_len(struct hists *hists, int max_rows) ··· 703 691 .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, 704 692 .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, 705 693 }, 694 + .cgroup = sample->cgroup, 706 695 .ms = { 707 696 .maps = al->maps, 708 697 .map = al->map,

+2

tools/perf/util/hist.h

··· 38 38 HISTC_THREAD, 39 39 HISTC_COMM, 40 40 HISTC_CGROUP_ID, 41 + HISTC_CGROUP, 41 42 HISTC_PARENT, 42 43 HISTC_CPU, 43 44 HISTC_SOCKET, ··· 537 536 #define K_LEFT -1000 538 537 #define K_RIGHT -2000 539 538 #define K_SWITCH_INPUT_DATA -3000 539 + #define K_RELOAD -4000 540 540 #endif 541 541 542 542 unsigned int hists__sort_list_width(struct hists *hists);

+19

tools/perf/util/machine.c

··· 33 33 #include "asm/bug.h" 34 34 #include "bpf-event.h" 35 35 #include <internal/lib.h> // page_size 36 + #include "cgroup.h" 36 37 37 38 #include <linux/ctype.h> 38 39 #include <symbol/kallsyms.h> ··· 653 652 thread__put(thread); 654 653 655 654 return err; 655 + } 656 + 657 + int machine__process_cgroup_event(struct machine *machine, 658 + union perf_event *event, 659 + struct perf_sample *sample __maybe_unused) 660 + { 661 + struct cgroup *cgrp; 662 + 663 + if (dump_trace) 664 + perf_event__fprintf_cgroup(event, stdout); 665 + 666 + cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path); 667 + if (cgrp == NULL) 668 + return -ENOMEM; 669 + 670 + return 0; 656 671 } 657 672 658 673 int machine__process_lost_event(struct machine *machine __maybe_unused, ··· 1895 1878 ret = machine__process_mmap_event(machine, event, sample); break; 1896 1879 case PERF_RECORD_NAMESPACES: 1897 1880 ret = machine__process_namespaces_event(machine, event, sample); break; 1881 + case PERF_RECORD_CGROUP: 1882 + ret = machine__process_cgroup_event(machine, event, sample); break; 1898 1883 case PERF_RECORD_MMAP2: 1899 1884 ret = machine__process_mmap2_event(machine, event, sample); break; 1900 1885 case PERF_RECORD_FORK:

+3

tools/perf/util/machine.h

··· 128 128 int machine__process_namespaces_event(struct machine *machine, 129 129 union perf_event *event, 130 130 struct perf_sample *sample); 131 + int machine__process_cgroup_event(struct machine *machine, 132 + union perf_event *event, 133 + struct perf_sample *sample); 131 134 int machine__process_mmap_event(struct machine *machine, union perf_event *event, 132 135 struct perf_sample *sample); 133 136 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,

+29 -18

tools/perf/util/metricgroup.c

··· 95 95 static struct evsel *find_evsel_group(struct evlist *perf_evlist, 96 96 const char **ids, 97 97 int idnum, 98 - struct evsel **metric_events) 98 + struct evsel **metric_events, 99 + bool *evlist_used) 99 100 { 100 101 struct evsel *ev; 101 - int i = 0; 102 + int i = 0, j = 0; 102 103 bool leader_found; 103 104 104 105 evlist__for_each_entry (perf_evlist, ev) { 106 + if (evlist_used[j++]) 107 + continue; 105 108 if (!strcmp(ev->name, ids[i])) { 106 109 if (!metric_events[i]) 107 110 metric_events[i] = ev; ··· 112 109 if (i == idnum) 113 110 break; 114 111 } else { 115 - if (i + 1 == idnum) { 116 - /* Discard the whole match and start again */ 117 - i = 0; 118 - memset(metric_events, 0, 119 - sizeof(struct evsel *) * idnum); 120 - continue; 121 - } 112 + /* Discard the whole match and start again */ 113 + i = 0; 114 + memset(metric_events, 0, 115 + sizeof(struct evsel *) * idnum); 122 116 123 - if (!strcmp(ev->name, ids[i])) 124 - metric_events[i] = ev; 125 - else { 126 - /* Discard the whole match and start again */ 127 - i = 0; 128 - memset(metric_events, 0, 129 - sizeof(struct evsel *) * idnum); 130 - continue; 117 + if (!strcmp(ev->name, ids[i])) { 118 + if (!metric_events[i]) 119 + metric_events[i] = ev; 120 + i++; 121 + if (i == idnum) 122 + break; 131 123 } 132 124 } 133 125 } ··· 144 146 !strcmp(ev->name, metric_events[i]->name)) { 145 147 ev->metric_leader = metric_events[i]; 146 148 } 149 + j++; 147 150 } 151 + ev = metric_events[i]; 152 + evlist_used[ev->idx] = true; 148 153 } 149 154 150 155 return metric_events[0]; ··· 163 162 int ret = 0; 164 163 struct egroup *eg; 165 164 struct evsel *evsel; 165 + bool *evlist_used; 166 + 167 + evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); 168 + if (!evlist_used) { 169 + ret = -ENOMEM; 170 + return ret; 171 + } 166 172 167 173 list_for_each_entry (eg, groups, nd) { 168 174 struct evsel **metric_events; ··· 180 172 break; 181 173 } 182 174 evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, 183 - metric_events); 175 + metric_events, evlist_used); 184 176 if (!evsel) { 185 177 pr_debug("Cannot resolve %s: %s\n", 186 178 eg->metric_name, eg->metric_expr); ··· 204 196 expr->metric_events = metric_events; 205 197 list_add(&expr->nd, &me->head); 206 198 } 199 + 200 + free(evlist_used); 201 + 207 202 return ret; 208 203 } 209 204

+3 -3

tools/perf/util/parse-events.c

··· 1449 1449 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, 1450 1450 auto_merge_stats, NULL); 1451 1451 if (evsel) { 1452 - evsel->pmu_name = name; 1452 + evsel->pmu_name = name ? strdup(name) : NULL; 1453 1453 evsel->use_uncore_alias = use_uncore_alias; 1454 1454 return 0; 1455 1455 } else { ··· 1497 1497 evsel->snapshot = info.snapshot; 1498 1498 evsel->metric_expr = info.metric_expr; 1499 1499 evsel->metric_name = info.metric_name; 1500 - evsel->pmu_name = name; 1500 + evsel->pmu_name = name ? strdup(name) : NULL; 1501 1501 evsel->use_uncore_alias = use_uncore_alias; 1502 1502 evsel->percore = config_term_percore(&evsel->config_terms); 1503 1503 } ··· 1547 1547 if (!parse_events_add_pmu(parse_state, list, 1548 1548 pmu->name, head, 1549 1549 true, true)) { 1550 - pr_debug("%s -> %s/%s/\n", config, 1550 + pr_debug("%s -> %s/%s/\n", str, 1551 1551 pmu->name, alias->str); 1552 1552 ok++; 1553 1553 }

+7 -5

tools/perf/util/parse-events.l

··· 342 342 * Because the prefix cycles is mixed up with cpu-cycles. 343 343 * loads and stores are mixed up with cache event 344 344 */ 345 - cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 346 - cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 347 - mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 348 - mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 349 - topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 345 + cycles-ct | 346 + cycles-t | 347 + mem-loads | 348 + mem-stores | 349 + topdown-[a-z-]+ | 350 + tx-capacity-[a-z-]+ | 351 + el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } 350 352 351 353 L1-dcache|l1-d|l1d|L1-data | 352 354 L1-icache|l1-i|l1i|L1-instruction |

+2

tools/perf/util/perf_event_attr_fprintf.c

··· 35 35 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), 36 36 bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), 37 37 bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), 38 + bit_name(CGROUP), 38 39 { .name = NULL, } 39 40 }; 40 41 #undef bit_name ··· 133 132 PRINT_ATTRf(ksymbol, p_unsigned); 134 133 PRINT_ATTRf(bpf_event, p_unsigned); 135 134 PRINT_ATTRf(aux_output, p_unsigned); 135 + PRINT_ATTRf(cgroup, p_unsigned); 136 136 137 137 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 138 138 PRINT_ATTRf(bp_type, p_unsigned);

+24 -15

tools/perf/util/pmu.c

··· 21 21 #include "pmu.h" 22 22 #include "parse-events.h" 23 23 #include "header.h" 24 - #include "pmu-events/pmu-events.h" 25 24 #include "string2.h" 26 25 #include "strbuf.h" 27 26 #include "fncache.h" ··· 698 699 return map; 699 700 } 700 701 701 - static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) 702 + bool pmu_uncore_alias_match(const char *pmu_name, const char *name) 702 703 { 703 704 char *tmp = NULL, *tok, *str; 704 705 bool res; ··· 743 744 * to the current running CPU. Then, add all PMU events from that table 744 745 * as aliases. 745 746 */ 746 - static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) 747 + void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, 748 + struct pmu_events_map *map) 747 749 { 748 750 int i; 749 - struct pmu_events_map *map; 750 751 const char *name = pmu->name; 751 - 752 - map = perf_pmu__find_map(pmu); 753 - if (!map) 754 - return; 755 - 756 752 /* 757 753 * Found a matching PMU events table. Create aliases 758 754 */ ··· 780 786 (char *)pe->metric_name, 781 787 (char *)pe->deprecated); 782 788 } 789 + } 790 + 791 + static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) 792 + { 793 + struct pmu_events_map *map; 794 + 795 + map = perf_pmu__find_map(pmu); 796 + if (!map) 797 + return; 798 + 799 + pmu_add_cpu_aliases_map(head, pmu, map); 783 800 } 784 801 785 802 struct perf_event_attr * __weak ··· 984 979 struct parse_events_term *t; 985 980 986 981 list_for_each_entry(t, head_terms, list) { 987 - if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { 988 - if (!strcmp(t->config, term->config)) { 989 - t->used = true; 990 - *value = t->val.num; 991 - return 0; 992 - } 982 + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM && 983 + t->config && !strcmp(t->config, term->config)) { 984 + t->used = true; 985 + *value = t->val.num; 986 + return 0; 993 987 } 994 988 } 995 989 ··· 1397 1393 column += n; 1398 1394 s = skip_spaces(s); 1399 1395 } 1396 + } 1397 + 1398 + bool is_pmu_core(const char *name) 1399 + { 1400 + return !strcmp(name, "cpu") || is_arm_pmu_core(name); 1400 1401 } 1401 1402 1402 1403 void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,

+5

tools/perf/util/pmu.h

··· 7 7 #include <linux/perf_event.h> 8 8 #include <stdbool.h> 9 9 #include "parse-events.h" 10 + #include "pmu-events/pmu-events.h" 10 11 11 12 struct perf_evsel_config_term; 12 13 ··· 88 87 89 88 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); 90 89 90 + bool is_pmu_core(const char *name); 91 91 void print_pmu_events(const char *event_glob, bool name_only, bool quiet, 92 92 bool long_desc, bool details_flag, 93 93 bool deprecated); ··· 99 97 int perf_pmu__test(void); 100 98 101 99 struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); 100 + void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, 101 + struct pmu_events_map *map); 102 102 103 103 struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); 104 + bool pmu_uncore_alias_match(const char *pmu_name, const char *name); 104 105 105 106 int perf_pmu__convert_scale(const char *scale, char **end, double *sval); 106 107

+1

tools/perf/util/python-ext-sources

··· 34 34 util/symbol_fprintf.c 35 35 util/units.c 36 36 util/affinity.c 37 + util/rwsem.c

+1

tools/perf/util/record.h

··· 34 34 bool auxtrace_snapshot_on_exit; 35 35 bool auxtrace_sample_mode; 36 36 bool record_namespaces; 37 + bool record_cgroup; 37 38 bool record_switch_events; 38 39 bool all_kernel; 39 40 bool all_user;

+3

tools/perf/util/scripting-engines/trace-event-python.c

··· 694 694 695 695 bf[0] = 0; 696 696 697 + if (!regs || !regs->regs) 698 + return 0; 699 + 697 700 for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { 698 701 u64 val = regs->regs[i++]; 699 702

+4

tools/perf/util/session.c

··· 471 471 tool->comm = process_event_stub; 472 472 if (tool->namespaces == NULL) 473 473 tool->namespaces = process_event_stub; 474 + if (tool->cgroup == NULL) 475 + tool->cgroup = process_event_stub; 474 476 if (tool->fork == NULL) 475 477 tool->fork = process_event_stub; 476 478 if (tool->exit == NULL) ··· 1438 1436 return tool->comm(tool, event, sample, machine); 1439 1437 case PERF_RECORD_NAMESPACES: 1440 1438 return tool->namespaces(tool, event, sample, machine); 1439 + case PERF_RECORD_CGROUP: 1440 + return tool->cgroup(tool, event, sample, machine); 1441 1441 case PERF_RECORD_FORK: 1442 1442 return tool->fork(tool, event, sample, machine); 1443 1443 case PERF_RECORD_EXIT:

+1 -1

tools/perf/util/setup.py

··· 3 3 from re import sub 4 4 5 5 cc = getenv("CC") 6 - cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() 6 + cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() 7 7 8 8 def clang_has_option(option): 9 9 return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]

+41 -2

tools/perf/util/sort.c

··· 12 12 #include "cacheline.h" 13 13 #include "comm.h" 14 14 #include "map.h" 15 + #include "maps.h" 15 16 #include "symbol.h" 16 17 #include "map_symbol.h" 17 18 #include "branch.h" ··· 26 25 #include "mem-events.h" 27 26 #include "annotate.h" 28 27 #include "time-utils.h" 28 + #include "cgroup.h" 29 + #include "machine.h" 29 30 #include <linux/kernel.h> 30 31 #include <linux/string.h> 31 32 ··· 637 634 .se_width_idx = HISTC_CGROUP_ID, 638 635 }; 639 636 637 + /* --sort cgroup */ 638 + 639 + static int64_t 640 + sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right) 641 + { 642 + return right->cgroup - left->cgroup; 643 + } 644 + 645 + static int hist_entry__cgroup_snprintf(struct hist_entry *he, 646 + char *bf, size_t size, 647 + unsigned int width __maybe_unused) 648 + { 649 + const char *cgrp_name = "N/A"; 650 + 651 + if (he->cgroup) { 652 + struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env, 653 + he->cgroup); 654 + if (cgrp != NULL) 655 + cgrp_name = cgrp->name; 656 + else 657 + cgrp_name = "unknown"; 658 + } 659 + 660 + return repsep_snprintf(bf, size, "%s", cgrp_name); 661 + } 662 + 663 + struct sort_entry sort_cgroup = { 664 + .se_header = "Cgroup", 665 + .se_cmp = sort__cgroup_cmp, 666 + .se_snprintf = hist_entry__cgroup_snprintf, 667 + .se_width_idx = HISTC_CGROUP, 668 + }; 669 + 640 670 /* --sort socket */ 641 671 642 672 static int64_t ··· 905 869 if (he->branch_info) { 906 870 struct addr_map_symbol *from = &he->branch_info->from; 907 871 908 - return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width); 872 + return _hist_entry__sym_snprintf(&from->ms, from->al_addr, 873 + he->level, bf, size, width); 909 874 } 910 875 911 876 return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); ··· 918 881 if (he->branch_info) { 919 882 struct addr_map_symbol *to = &he->branch_info->to; 920 883 921 - return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width); 884 + return _hist_entry__sym_snprintf(&to->ms, to->al_addr, 885 + he->level, bf, size, width); 922 886 } 923 887 924 888 return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); ··· 1696 1658 DIM(SORT_TRACE, "trace", sort_trace), 1697 1659 DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), 1698 1660 DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), 1661 + DIM(SORT_CGROUP, "cgroup", sort_cgroup), 1699 1662 DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), 1700 1663 DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), 1701 1664 DIM(SORT_TIME, "time", sort_time),

+2

tools/perf/util/sort.h

··· 101 101 struct thread *thread; 102 102 struct comm *comm; 103 103 struct namespace_id cgroup_id; 104 + u64 cgroup; 104 105 u64 ip; 105 106 u64 transaction; 106 107 s32 socket; ··· 225 224 SORT_TRACE, 226 225 SORT_SYM_SIZE, 227 226 SORT_DSO_SIZE, 227 + SORT_CGROUP, 228 228 SORT_CGROUP_ID, 229 229 SORT_SYM_IPC_NULL, 230 230 SORT_TIME,

+3 -3

tools/perf/util/stat-display.c

··· 115 115 fprintf(config->output, "S%d-D%d-C%*d%s", 116 116 cpu_map__id_to_socket(id), 117 117 cpu_map__id_to_die(id), 118 - config->csv_output ? 0 : -5, 118 + config->csv_output ? 0 : -3, 119 119 cpu_map__id_to_cpu(id), config->csv_sep); 120 120 } else { 121 - fprintf(config->output, "CPU%*d%s ", 122 - config->csv_output ? 0 : -5, 121 + fprintf(config->output, "CPU%*d%s", 122 + config->csv_output ? 0 : -7, 123 123 evsel__cpus(evsel)->map[id], 124 124 config->csv_sep); 125 125 }

+8 -2

tools/perf/util/symbol-elf.c

··· 704 704 close(ss->fd); 705 705 } 706 706 707 - bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) 707 + bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) 708 708 { 709 - return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL; 709 + /* 710 + * Usually vmlinux is an ELF file with type ET_EXEC for most 711 + * architectures; except Arm64 kernel is linked with option 712 + * '-share', so need to check type ET_DYN. 713 + */ 714 + return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL || 715 + ehdr.e_type == ET_DYN; 710 716 } 711 717 712 718 int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,

+1

tools/perf/util/symbol_conf.h

··· 73 73 const char *symfs; 74 74 int res_sample; 75 75 int pad_output_len_dso; 76 + int group_sort_idx; 76 77 }; 77 78 78 79 extern struct symbol_conf symbol_conf;

+130

tools/perf/util/synthetic-events.c

··· 16 16 #include "util/synthetic-events.h" 17 17 #include "util/target.h" 18 18 #include "util/time-utils.h" 19 + #include "util/cgroup.h" 19 20 #include <linux/bitops.h> 20 21 #include <linux/kernel.h> 21 22 #include <linux/string.h> ··· 414 413 fclose(fp); 415 414 return rc; 416 415 } 416 + 417 + #ifdef HAVE_FILE_HANDLE 418 + static int perf_event__synthesize_cgroup(struct perf_tool *tool, 419 + union perf_event *event, 420 + char *path, size_t mount_len, 421 + perf_event__handler_t process, 422 + struct machine *machine) 423 + { 424 + size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path); 425 + size_t path_len = strlen(path) - mount_len + 1; 426 + struct { 427 + struct file_handle fh; 428 + uint64_t cgroup_id; 429 + } handle; 430 + int mount_id; 431 + 432 + while (path_len % sizeof(u64)) 433 + path[mount_len + path_len++] = '\0'; 434 + 435 + memset(&event->cgroup, 0, event_size); 436 + 437 + event->cgroup.header.type = PERF_RECORD_CGROUP; 438 + event->cgroup.header.size = event_size + path_len + machine->id_hdr_size; 439 + 440 + handle.fh.handle_bytes = sizeof(handle.cgroup_id); 441 + if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) { 442 + pr_debug("stat failed: %s\n", path); 443 + return -1; 444 + } 445 + 446 + event->cgroup.id = handle.cgroup_id; 447 + strncpy(event->cgroup.path, path + mount_len, path_len); 448 + memset(event->cgroup.path + path_len, 0, machine->id_hdr_size); 449 + 450 + if (perf_tool__process_synth_event(tool, event, machine, process) < 0) { 451 + pr_debug("process synth event failed\n"); 452 + return -1; 453 + } 454 + 455 + return 0; 456 + } 457 + 458 + static int perf_event__walk_cgroup_tree(struct perf_tool *tool, 459 + union perf_event *event, 460 + char *path, size_t mount_len, 461 + perf_event__handler_t process, 462 + struct machine *machine) 463 + { 464 + size_t pos = strlen(path); 465 + DIR *d; 466 + struct dirent *dent; 467 + int ret = 0; 468 + 469 + if (perf_event__synthesize_cgroup(tool, event, path, mount_len, 470 + process, machine) < 0) 471 + return -1; 472 + 473 + d = opendir(path); 474 + if (d == NULL) { 475 + pr_debug("failed to open directory: %s\n", path); 476 + return -1; 477 + } 478 + 479 + while ((dent = readdir(d)) != NULL) { 480 + if (dent->d_type != DT_DIR) 481 + continue; 482 + if (!strcmp(dent->d_name, ".") || 483 + !strcmp(dent->d_name, "..")) 484 + continue; 485 + 486 + /* any sane path should be less than PATH_MAX */ 487 + if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX) 488 + continue; 489 + 490 + if (path[pos - 1] != '/') 491 + strcat(path, "/"); 492 + strcat(path, dent->d_name); 493 + 494 + ret = perf_event__walk_cgroup_tree(tool, event, path, 495 + mount_len, process, machine); 496 + if (ret < 0) 497 + break; 498 + 499 + path[pos] = '\0'; 500 + } 501 + 502 + closedir(d); 503 + return ret; 504 + } 505 + 506 + int perf_event__synthesize_cgroups(struct perf_tool *tool, 507 + perf_event__handler_t process, 508 + struct machine *machine) 509 + { 510 + union perf_event event; 511 + char cgrp_root[PATH_MAX]; 512 + size_t mount_len; /* length of mount point in the path */ 513 + 514 + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { 515 + pr_debug("cannot find cgroup mount point\n"); 516 + return -1; 517 + } 518 + 519 + mount_len = strlen(cgrp_root); 520 + /* make sure the path starts with a slash (after mount point) */ 521 + strcat(cgrp_root, "/"); 522 + 523 + if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len, 524 + process, machine) < 0) 525 + return -1; 526 + 527 + return 0; 528 + } 529 + #else 530 + int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, 531 + perf_event__handler_t process __maybe_unused, 532 + struct machine *machine __maybe_unused) 533 + { 534 + return -1; 535 + } 536 + #endif 417 537 418 538 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, 419 539 struct machine *machine) ··· 1352 1230 if (type & PERF_SAMPLE_PHYS_ADDR) 1353 1231 result += sizeof(u64); 1354 1232 1233 + if (type & PERF_SAMPLE_CGROUP) 1234 + result += sizeof(u64); 1235 + 1355 1236 if (type & PERF_SAMPLE_AUX) { 1356 1237 result += sizeof(u64); 1357 1238 result += sample->aux_sample.size; ··· 1526 1401 1527 1402 if (type & PERF_SAMPLE_PHYS_ADDR) { 1528 1403 *array = sample->phys_addr; 1404 + array++; 1405 + } 1406 + 1407 + if (type & PERF_SAMPLE_CGROUP) { 1408 + *array = sample->cgroup; 1529 1409 array++; 1530 1410 } 1531 1411

+1

tools/perf/util/synthetic-events.h

··· 45 45 int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); 46 46 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); 47 47 int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); 48 + int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); 48 49 int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); 49 50 int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); 50 51 int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);

+2

tools/perf/util/tool.h

··· 46 46 mmap2, 47 47 comm, 48 48 namespaces, 49 + cgroup, 49 50 fork, 50 51 exit, 51 52 lost, ··· 79 78 bool ordered_events; 80 79 bool ordering_requires_timestamps; 81 80 bool namespace_events; 81 + bool cgroup_events; 82 82 bool no_warn; 83 83 enum show_feature_header show_feat_hdr; 84 84 };