Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf script ibs: Support new IBS bits in raw trace dump

Interpret Additional set of IBS register bits while doing
perf report/script raw dump.

IBS op PMU ex:

$ sudo ./perf record -c 130 -a -e ibs_op/l3missonly=1/ --raw-samples
$ sudo ./perf report -D
...
ibs_op_ctl: 0000004500070008 MaxCnt 128 L3MissOnly 1 En 1
Val 1 CntCtl 0=cycles CurCnt 69
ibs_op_data: 0000000000710002 CompToRetCtr 2 TagToRetCtr 113
BrnRet 0 RipInvalid 0 BrnFuse 0 Microcode 0
ibs_op_data2: 0000000000000002 CacheHitSt 0=M-state RmtNode 0
DataSrc 2=A peer cache in a near CCX
ibs_op_data3: 000000681d1700a1 LdOp 1 StOp 0 DcL1TlbMiss 0
DcL2TlbMiss 0 DcL1TlbHit2M 0 DcL1TlbHit1G 1 DcL2TlbHit2M 0
DcMiss 1 DcMisAcc 0 DcWcMemAcc 0 DcUcMemAcc 0 DcLockedOp 0
DcMissNoMabAlloc 1 DcLinAddrValid 1 DcPhyAddrValid 1
DcL2TlbHit1G 0 L2Miss 1 SwPf 0 OpMemWidth 8 bytes
OpDcMissOpenMemReqs 7 DcMissLat 104 TlbRefillLat 0

IBS Fetch PMU ex:
$ sudo ./perf record -c 130 -a -e ibs_fetch/l3missonly=1/ --raw-samples
$ sudo ./perf report -D
...
ibs_fetch_ctl: 3c1f00c700080008 MaxCnt 128 Cnt 128 Lat 199
En 1 Val 1 Comp 1 IcMiss 1 PhyAddrValid 1 L1TlbPgSz 4KB
L1TlbMiss 0 L2TlbMiss 0 RandEn 0 L2Miss 1 L3MissOnly 1
FetchOcMiss 1 FetchL3Miss 1

With the DataSrc extensions, the source of data can be decoded among:
- Local L3 or other L1/L2 in CCX.
- A peer cache in a near CCX.
- Data returned from DRAM.
- A peer cache in a far CCX.
- DRAM address map with "long latency" bit set.
- Data returned from MMIO/Config/PCI/APIC.
- Extension Memory (S-Link, GenZ, etc - identified by the CS target
and/or address map at DF's choice).
- Peer Agent Memory.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <rrichter@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: like.xu.linux@gmail.com
Cc: x86@kernel.org
Link: https://lore.kernel.org/r/20220604044519.594-9-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ravi Bangoria and committed by
Arnaldo Carvalho de Melo
0429796e c1f4f92b

+58 -6
+58 -6
tools/perf/util/amd-sample-raw.c
··· 18 18 #include "pmu-events/pmu-events.h" 19 19 20 20 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; 21 + static bool zen4_ibs_extensions; 21 22 22 23 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) 23 24 { ··· 40 39 }; 41 40 const char *ic_miss_str = NULL; 42 41 const char *l1tlb_pgsz_str = NULL; 42 + char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = ""; 43 43 44 44 if (cpu_family == 0x19 && cpu_model < 0x10) { 45 45 /* ··· 55 53 ic_miss_str = ic_miss_strs[reg.ic_miss]; 56 54 } 57 55 56 + if (zen4_ibs_extensions) { 57 + snprintf(l3_miss_str, sizeof(l3_miss_str), 58 + " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d", 59 + reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); 60 + } 61 + 58 62 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " 59 - "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", 63 + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n", 60 64 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, 61 65 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", 62 66 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, 63 - reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); 67 + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "", 68 + l3_miss_str); 64 69 } 65 70 66 71 static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) ··· 77 68 78 69 static void pr_ibs_op_ctl(union ibs_op_ctl reg) 79 70 { 80 - printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", 81 - reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, 82 - reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); 71 + char l3_miss_only[sizeof(" L3MissOnly _")] = ""; 72 + 73 + if (zen4_ibs_extensions) 74 + snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); 75 + 76 + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", 77 + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, 78 + reg.op_en, reg.op_val, reg.cnt_ctl, 79 + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); 83 80 } 84 81 85 82 static void pr_ibs_op_data(union ibs_op_data reg) ··· 99 84 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); 100 85 } 101 86 102 - static void pr_ibs_op_data2(union ibs_op_data2 reg) 87 + static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) 88 + { 89 + static const char * const data_src_str[] = { 90 + "", 91 + " DataSrc 1=Local L3 or other L1/L2 in CCX", 92 + " DataSrc 2=A peer cache in a near CCX", 93 + " DataSrc 3=Data returned from DRAM", 94 + " DataSrc 4=(reserved)", 95 + " DataSrc 5=A peer cache in a far CCX", 96 + " DataSrc 6=DRAM address map with \"long latency\" bit set", 97 + " DataSrc 7=Data returned from MMIO/Config/PCI/APIC", 98 + " DataSrc 8=Extension Memory (S-Link, GenZ, etc)", 99 + " DataSrc 9=(reserved)", 100 + " DataSrc 10=(reserved)", 101 + " DataSrc 11=(reserved)", 102 + " DataSrc 12=Peer Agent Memory", 103 + /* 13 to 31 are reserved. Avoid printing them. */ 104 + }; 105 + int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; 106 + 107 + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, 108 + (data_src == 1 || data_src == 2 || data_src == 5) ? 109 + (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", 110 + reg.rmt_node, 111 + data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : ""); 112 + } 113 + 114 + static void pr_ibs_op_data2_default(union ibs_op_data2 reg) 103 115 { 104 116 static const char * const data_src_str[] = { 105 117 "", ··· 143 101 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " 144 102 : "CacheHitSt 0=M-state ") : "", 145 103 reg.rmt_node, data_src_str[reg.data_src_lo]); 104 + } 105 + 106 + static void pr_ibs_op_data2(union ibs_op_data2 reg) 107 + { 108 + if (zen4_ibs_extensions) 109 + return pr_ibs_op_data2_extended(reg); 110 + pr_ibs_op_data2_default(reg); 146 111 } 147 112 148 113 static void pr_ibs_op_data3(union ibs_op_data3 reg) ··· 327 278 } 328 279 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; 329 280 } 281 + 282 + if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) 283 + zen4_ibs_extensions = 1; 330 284 331 285 if (ibs_fetch_type || ibs_op_type) { 332 286 if (!cpu_family)