Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf mem/c2c amd: Add ldlat support

'perf mem/c2c' uses IBS Op PMU on AMD platforms.

IBS Op PMU on Zen5 uarch has added support for Load Latency filtering.

Implement 'perf mem/c2c' --ldlat using IBS Op Load Latency filtering
capability.

Some subtle differences between AMD and other arch:

o --ldlat is disabled by default on AMD

o Supported values are 128 to 2048.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20250429035938.1301-4-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ravi Bangoria and committed by
Arnaldo Carvalho de Melo
fa1332a8 fc481adc

+83 -10
+9 -2
tools/perf/Documentation/perf-c2c.txt
··· 54 54 55 55 -l:: 56 56 --ldlat:: 57 - Configure mem-loads latency. Supported on Intel and Arm64 processors 58 - only. Ignored on other archs. 57 + Configure mem-loads latency. Supported on Intel, Arm64 and some AMD 58 + processors. Ignored on other archs. 59 + 60 + On supported AMD processors: 61 + - /sys/bus/event_source/devices/ibs_op/caps/ldlat file contains '1'. 62 + - Supported latency values are 128 to 2048 (both inclusive). 63 + - Latency value which is a multiple of 128 incurs a little less profiling 64 + overhead compared to other values. 65 + - Load latency filtering is disabled by default. 59 66 60 67 -k:: 61 68 --all-kernel::
+11 -2
tools/perf/Documentation/perf-mem.txt
··· 28 28 Due to the statistical nature of SPE sampling, not every memory operation will 29 29 be sampled. 30 30 31 + On AMD this use IBS Op PMU to sample load-store operations. 32 + 31 33 COMMON OPTIONS 32 34 -------------- 33 35 -f:: ··· 69 67 Configure all used events to run in user space. 70 68 71 69 --ldlat <n>:: 72 - Specify desired latency for loads event. Supported on Intel and Arm64 73 - processors only. Ignored on other archs. 70 + Specify desired latency for loads event. Supported on Intel, Arm64 and 71 + some AMD processors. Ignored on other archs. 72 + 73 + On supported AMD processors: 74 + - /sys/bus/event_source/devices/ibs_op/caps/ldlat file contains '1'. 75 + - Supported latency values are 128 to 2048 (both inclusive). 76 + - Latency value which is a multiple of 128 incurs a little less profiling 77 + overhead compared to other values. 78 + - Load latency filtering is disabled by default. 74 79 75 80 REPORT OPTIONS 76 81 --------------
+6
tools/perf/arch/x86/util/mem-events.c
··· 26 26 E(NULL, NULL, NULL, false, 0), 27 27 E("mem-ldst", "%s//", NULL, false, 0), 28 28 }; 29 + 30 + struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = { 31 + E(NULL, NULL, NULL, false, 0), 32 + E(NULL, NULL, NULL, false, 0), 33 + E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0), 34 + };
+1
tools/perf/arch/x86/util/mem-events.h
··· 6 6 extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX]; 7 7 8 8 extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX]; 9 + extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX]; 9 10 10 11 #endif /* _X86_MEM_EVENTS_H */
+17 -3
tools/perf/arch/x86/util/pmu.c
··· 18 18 #include "mem-events.h" 19 19 #include "util/env.h" 20 20 21 - void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) 21 + void perf_pmu__arch_init(struct perf_pmu *pmu) 22 22 { 23 + struct perf_pmu_caps *ldlat_cap; 24 + 23 25 #ifdef HAVE_AUXTRACE_SUPPORT 24 26 if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) { 25 27 pmu->auxtrace = true; ··· 35 33 #endif 36 34 37 35 if (x86__is_amd_cpu()) { 38 - if (!strcmp(pmu->name, "ibs_op")) 39 - pmu->mem_events = perf_mem_events_amd; 36 + if (strcmp(pmu->name, "ibs_op")) 37 + return; 38 + 39 + pmu->mem_events = perf_mem_events_amd; 40 + 41 + if (!perf_pmu__caps_parse(pmu)) 42 + return; 43 + 44 + ldlat_cap = perf_pmu__get_cap(pmu, "ldlat"); 45 + if (!ldlat_cap || strcmp(ldlat_cap->value, "1")) 46 + return; 47 + 48 + perf_mem_events__loads_ldlat = 0; 49 + pmu->mem_events = perf_mem_events_amd_ldlat; 40 50 } else if (pmu->is_core) { 41 51 if (perf_pmu__have_event(pmu, "mem-loads-aux")) 42 52 pmu->mem_events = perf_mem_events_intel_aux;
+26 -3
tools/perf/tests/shell/test_data_symbol.sh
··· 54 54 55 55 echo "Recording workload..." 56 56 57 - # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support 58 - # user/kernel filtering and per-process monitoring, spin program on 59 - # specific CPU and test in per-CPU mode. 60 57 is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo) 61 58 if (($is_amd >= 1)); then 59 + mem_events="$(perf mem record -v -e list 2>&1)" 60 + if ! [[ "$mem_events" =~ ^mem\-ldst.*ibs_op/(.*)/.*available ]]; then 61 + echo "ERROR: mem-ldst event is not matching" 62 + exit 1 63 + fi 64 + 65 + # --ldlat on AMD: 66 + # o Zen4 and earlier uarch does not support ldlat 67 + # o Even on supported platforms, it's disabled (--ldlat=0) by default. 68 + ldlat=${BASH_REMATCH[1]} 69 + if [[ -n $ldlat ]]; then 70 + if ! [[ "$ldlat" =~ ldlat=0 ]]; then 71 + echo "ERROR: ldlat not initialized to 0?" 72 + exit 1 73 + fi 74 + 75 + mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)" 76 + if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then 77 + echo "ERROR: --ldlat not honored?" 78 + exit 1 79 + fi 80 + fi 81 + 82 + # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't 83 + # support user/kernel filtering and per-process monitoring on older 84 + # kernels, spin program on specific CPU and test in per-CPU mode. 62 85 perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" 63 86 else 64 87 perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}"
+11
tools/perf/util/pmu.c
··· 2259 2259 } 2260 2260 } 2261 2261 2262 + struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name) 2263 + { 2264 + struct perf_pmu_caps *caps; 2265 + 2266 + list_for_each_entry(caps, &pmu->caps, list) { 2267 + if (!strcmp(caps->name, name)) 2268 + return caps; 2269 + } 2270 + return NULL; 2271 + } 2272 + 2262 2273 /* 2263 2274 * Reading/parsing the given pmu capabilities, which should be located at: 2264 2275 * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+2
tools/perf/util/pmu.h
··· 277 277 278 278 int perf_pmu__convert_scale(const char *scale, char **end, double *sval); 279 279 280 + struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name); 281 + 280 282 int perf_pmu__caps_parse(struct perf_pmu *pmu); 281 283 282 284 void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,