Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/perf: power10 Performance Monitoring support

Base enablement patch to register performance monitoring hardware
support for power10. Patch introduce the raw event encoding format,
defines the supported list of events, config fields for the event
attributes and their corresponding bit values which are exported via
sysfs.

Patch also enhances the support function in isa207_common.c to include
power10 pmu hardware.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1594996707-3727-9-git-send-email-atrajeev@linux.vnet.ibm.com

authored by

Athira Rajeev and committed by
Michael Ellerman
a64e697c 9908c826

+566 -11
+1 -1
arch/powerpc/perf/Makefile
··· 9 9 obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ 10 10 power5+-pmu.o power6-pmu.o power7-pmu.o \ 11 11 isa207-common.o power8-pmu.o power9-pmu.o \ 12 - generic-compat-pmu.o 12 + generic-compat-pmu.o power10-pmu.o 13 13 obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o 14 14 15 15 obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
+2
arch/powerpc/perf/core-book3s.c
··· 2333 2333 return 0; 2334 2334 else if (!init_power9_pmu()) 2335 2335 return 0; 2336 + else if (!init_power10_pmu()) 2337 + return 0; 2336 2338 else if (!init_ppc970_pmu()) 2337 2339 return 0; 2338 2340 else
+1
arch/powerpc/perf/internal.h
··· 9 9 extern int init_power7_pmu(void); 10 10 extern int init_power8_pmu(void); 11 11 extern int init_power9_pmu(void); 12 + extern int init_power10_pmu(void); 12 13 extern int init_generic_compat_pmu(void);
+50 -9
arch/powerpc/perf/isa207-common.c
··· 55 55 { 56 56 u64 valid_mask = EVENT_VALID_MASK; 57 57 58 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 58 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 59 + valid_mask = p10_EVENT_VALID_MASK; 60 + else if (cpu_has_feature(CPU_FTR_ARCH_300)) 59 61 valid_mask = p9_EVENT_VALID_MASK; 60 62 61 63 return !(event & ~valid_mask); ··· 71 69 return false; 72 70 } 73 71 72 + static unsigned long sdar_mod_val(u64 event) 73 + { 74 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 75 + return p10_SDAR_MODE(event); 76 + 77 + return p9_SDAR_MODE(event); 78 + } 79 + 74 80 static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) 75 81 { 76 82 /* ··· 89 79 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling 90 80 * mode and will be un-changed when setting MMCRA[63] (Marked events). 91 81 * 92 - * Incase of Power9: 82 + * Incase of Power9/power10: 93 83 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), 94 84 * or if group already have any marked events. 95 85 * For rest ··· 100 90 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 101 91 if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) 102 92 *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; 103 - else if (p9_SDAR_MODE(event)) 104 - *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; 93 + else if (sdar_mod_val(event)) 94 + *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; 105 95 else 106 96 *mmcra |= MMCRA_SDAR_MODE_DCACHE; 107 97 } else ··· 144 134 /* 145 135 * Check the mantissa upper two bits are not zero, unless the 146 136 * exponent is also zero. See the THRESH_CMP_MANTISSA doc. 137 + * Power10: thresh_cmp is replaced by l2_l3 event select. 147 138 */ 139 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 140 + return false; 141 + 148 142 cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; 149 143 exp = cmp >> 7; 150 144 ··· 265 251 266 252 pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; 267 253 unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; 268 - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; 254 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 255 + cache = (event >> EVENT_CACHE_SEL_SHIFT) & 256 + p10_EVENT_CACHE_SEL_MASK; 257 + else 258 + cache = (event >> EVENT_CACHE_SEL_SHIFT) & 259 + EVENT_CACHE_SEL_MASK; 269 260 ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; 270 261 271 262 if (pmc) { ··· 302 283 } 303 284 304 285 if (unit >= 6 && unit <= 9) { 305 - if (cpu_has_feature(CPU_FTR_ARCH_300)) { 286 + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { 287 + mask |= CNST_L2L3_GROUP_MASK; 288 + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); 289 + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { 306 290 mask |= CNST_CACHE_GROUP_MASK; 307 291 value |= CNST_CACHE_GROUP_VAL(event & 0xff); 308 292 ··· 389 367 struct perf_event *pevents[]) 390 368 { 391 369 unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; 370 + unsigned long mmcr3; 392 371 unsigned int pmc, pmc_inuse; 393 372 int i; 394 373 ··· 402 379 pmc_inuse |= 1 << pmc; 403 380 } 404 381 405 - mmcra = mmcr1 = mmcr2 = 0; 382 + mmcra = mmcr1 = mmcr2 = mmcr3 = 0; 406 383 407 384 /* Second pass: assign PMCs, set all MMCR1 fields */ 408 385 for (i = 0; i < n_ev; ++i) { ··· 461 438 mmcra |= val << MMCRA_THR_CTL_SHIFT; 462 439 val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; 463 440 mmcra |= val << MMCRA_THR_SEL_SHIFT; 464 - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; 465 - mmcra |= thresh_cmp_val(val); 441 + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 442 + val = (event[i] >> EVENT_THR_CMP_SHIFT) & 443 + EVENT_THR_CMP_MASK; 444 + mmcra |= thresh_cmp_val(val); 445 + } 446 + } 447 + 448 + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { 449 + val = (event[i] >> p10_L2L3_EVENT_SHIFT) & 450 + p10_EVENT_L2L3_SEL_MASK; 451 + mmcr2 |= val << p10_L2L3_SEL_SHIFT; 466 452 } 467 453 468 454 if (event[i] & EVENT_WANTS_BHRB) { ··· 490 458 mmcr2 |= MMCR2_FCH(pmc); 491 459 else 492 460 mmcr2 |= MMCR2_FCS(pmc); 461 + } 462 + 463 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 464 + if (pmc <= 4) { 465 + val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & 466 + p10_EVENT_MMCR3_MASK; 467 + mmcr3 |= val << MMCR3_SHIFT(pmc); 468 + } 493 469 } 494 470 495 471 hwc[i] = pmc - 1; ··· 520 480 mmcr->mmcr1 = mmcr1; 521 481 mmcr->mmcra = mmcra; 522 482 mmcr->mmcr2 = mmcr2; 483 + mmcr->mmcr3 = mmcr3; 523 484 524 485 return 0; 525 486 }
+32 -1
arch/powerpc/perf/isa207-common.h
··· 87 87 EVENT_LINUX_MASK | \ 88 88 EVENT_PSEL_MASK)) 89 89 90 + /* Contants to support power10 raw encoding format */ 91 + #define p10_SDAR_MODE_SHIFT 22 92 + #define p10_SDAR_MODE_MASK 0x3ull 93 + #define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \ 94 + p10_SDAR_MODE_MASK) 95 + #define p10_EVENT_L2L3_SEL_MASK 0x1f 96 + #define p10_L2L3_SEL_SHIFT 3 97 + #define p10_L2L3_EVENT_SHIFT 40 98 + #define p10_EVENT_THRESH_MASK 0xffffull 99 + #define p10_EVENT_CACHE_SEL_MASK 0x3ull 100 + #define p10_EVENT_MMCR3_MASK 0x7fffull 101 + #define p10_EVENT_MMCR3_SHIFT 45 102 + 103 + #define p10_EVENT_VALID_MASK \ 104 + ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ 105 + (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ 106 + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ 107 + (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ 108 + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ 109 + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ 110 + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ 111 + (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ 112 + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ 113 + EVENT_LINUX_MASK | \ 114 + EVENT_PSEL_MASK)) 90 115 /* 91 116 * Layout of constraint bits: 92 117 * ··· 159 134 #define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff) 160 135 #define CNST_CACHE_PMC4_VAL (1ull << 54) 161 136 #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL 137 + 138 + #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) 139 + #define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) 162 140 163 141 /* 164 142 * For NC we are counting up to 4 events. This requires three bits, and we need ··· 219 191 #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ 220 192 MMCRA_THR_CTR_EXP_MASK) 221 193 222 - /* MMCR1 Threshold Compare bit constant for power9 */ 194 + /* MMCRA Threshold Compare bit constant for power9 */ 223 195 #define p9_MMCRA_THR_CMP_SHIFT 45 224 196 225 197 /* Bits in MMCR2 for PowerISA v2.07 */ ··· 229 201 230 202 #define MAX_ALT 2 231 203 #define MAX_PMU_COUNTERS 6 204 + 205 + /* Bits in MMCR3 for PowerISA v3.10 */ 206 + #define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1))) 232 207 233 208 #define ISA207_SIER_TYPE_SHIFT 15 234 209 #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)
+70
arch/powerpc/perf/power10-events-list.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Performance counter support for POWER10 processors. 4 + * 5 + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. 6 + * Copyright 2020 Athira Rajeev, IBM Corporation. 7 + */ 8 + 9 + /* 10 + * Power10 event codes. 11 + */ 12 + EVENT(PM_RUN_CYC, 0x600f4); 13 + EVENT(PM_DISP_STALL_CYC, 0x100f8); 14 + EVENT(PM_EXEC_STALL, 0x30008); 15 + EVENT(PM_RUN_INST_CMPL, 0x500fa); 16 + EVENT(PM_BR_CMPL, 0x4d05e); 17 + EVENT(PM_BR_MPRED_CMPL, 0x400f6); 18 + 19 + /* All L1 D cache load references counted at finish, gated by reject */ 20 + EVENT(PM_LD_REF_L1, 0x100fc); 21 + /* Load Missed L1 */ 22 + EVENT(PM_LD_MISS_L1, 0x3e054); 23 + /* Store Missed L1 */ 24 + EVENT(PM_ST_MISS_L1, 0x300f0); 25 + /* L1 cache data prefetches */ 26 + EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c); 27 + /* Demand iCache Miss */ 28 + EVENT(PM_L1_ICACHE_MISS, 0x200fc); 29 + /* Instruction fetches from L1 */ 30 + EVENT(PM_INST_FROM_L1, 0x04080); 31 + /* Instruction Demand sectors wriittent into IL1 */ 32 + EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040); 33 + /* Instruction prefetch written into IL1 */ 34 + EVENT(PM_IC_PREF_REQ, 0x040a0); 35 + /* The data cache was reloaded from local core's L3 due to a demand load */ 36 + EVENT(PM_DATA_FROM_L3, 0x01340000001c040); 37 + /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ 38 + EVENT(PM_DATA_FROM_L3MISS, 0x300fe); 39 + /* Data PTEG reload */ 40 + EVENT(PM_DTLB_MISS, 0x300fc); 41 + /* ITLB Reloaded */ 42 + EVENT(PM_ITLB_MISS, 0x400fc); 43 + 44 + EVENT(PM_RUN_CYC_ALT, 0x0001e); 45 + EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); 46 + 47 + /* 48 + * Memory Access Events 49 + * 50 + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) 51 + * To enable capturing of memory profiling, these MMCRA bits 52 + * needs to be programmed and corresponding raw event format 53 + * encoding. 54 + * 55 + * MMCRA bits encoding needed are 56 + * SM (Sampling Mode) 57 + * EM (Eligibility for Random Sampling) 58 + * TECE (Threshold Event Counter Event) 59 + * TS (Threshold Start Event) 60 + * TE (Threshold End Event) 61 + * 62 + * Corresponding Raw Encoding bits: 63 + * sample [EM,SM] 64 + * thresh_sel (TECE) 65 + * thresh start (TS) 66 + * thresh end (TE) 67 + */ 68 + 69 + EVENT(MEM_LOADS, 0x34340401e0); 70 + EVENT(MEM_STORES, 0x343c0401e0);
+410
arch/powerpc/perf/power10-pmu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Performance counter support for POWER10 processors. 4 + * 5 + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. 6 + * Copyright 2020 Athira Rajeev, IBM Corporation. 7 + */ 8 + 9 + #define pr_fmt(fmt) "power10-pmu: " fmt 10 + 11 + #include "isa207-common.h" 12 + #include "internal.h" 13 + 14 + /* 15 + * Raw event encoding for Power10: 16 + * 17 + * 60 56 52 48 44 40 36 32 18 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 19 + * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ] 20 + * | | | | | | 21 + * | | *- IFM (Linux) | | thresh start/stop -* 22 + * | *- BHRB (Linux) | src_sel 23 + * *- EBB (Linux) *invert_bit 24 + * 25 + * 28 24 20 16 12 8 4 0 26 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 27 + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] 28 + * | | | | | | 29 + * | | | | | *- mark 30 + * | | | *- L1/L2/L3 cache_sel | 31 + * | | sdar_mode | 32 + * | *- sampling mode for marked events *- combine 33 + * | 34 + * *- thresh_sel 35 + * 36 + * Below uses IBM bit numbering. 37 + * 38 + * MMCR1[x:y] = unit (PMCxUNIT) 39 + * MMCR1[24] = pmc1combine[0] 40 + * MMCR1[25] = pmc1combine[1] 41 + * MMCR1[26] = pmc2combine[0] 42 + * MMCR1[27] = pmc2combine[1] 43 + * MMCR1[28] = pmc3combine[0] 44 + * MMCR1[29] = pmc3combine[1] 45 + * MMCR1[30] = pmc4combine[0] 46 + * MMCR1[31] = pmc4combine[1] 47 + * 48 + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 49 + * MMCR1[20:27] = thresh_ctl 50 + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 51 + * MMCR1[20:27] = thresh_ctl 52 + * else 53 + * MMCRA[48:55] = thresh_ctl (THRESH START/END) 54 + * 55 + * if thresh_sel: 56 + * MMCRA[45:47] = thresh_sel 57 + * 58 + * if l2l3_sel: 59 + * MMCR2[56:60] = l2l3_sel[0:4] 60 + * 61 + * MMCR1[16] = cache_sel[0] 62 + * MMCR1[17] = cache_sel[1] 63 + * 64 + * if mark: 65 + * MMCRA[63] = 1 (SAMPLE_ENABLE) 66 + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) 67 + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) 68 + * 69 + * if EBB and BHRB: 70 + * MMCRA[32:33] = IFM 71 + * 72 + * MMCRA[SDAR_MODE] = sdar_mode[0:1] 73 + */ 74 + 75 + /* 76 + * Some power10 event codes. 77 + */ 78 + #define EVENT(_name, _code) enum{_name = _code} 79 + 80 + #include "power10-events-list.h" 81 + 82 + #undef EVENT 83 + 84 + /* MMCRA IFM bits - POWER10 */ 85 + #define POWER10_MMCRA_IFM1 0x0000000040000000UL 86 + #define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL 87 + 88 + /* Table of alternatives, sorted by column 0 */ 89 + static const unsigned int power10_event_alternatives[][MAX_ALT] = { 90 + { PM_RUN_CYC_ALT, PM_RUN_CYC }, 91 + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, 92 + }; 93 + 94 + static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) 95 + { 96 + int num_alt = 0; 97 + 98 + num_alt = isa207_get_alternatives(event, alt, 99 + ARRAY_SIZE(power10_event_alternatives), flags, 100 + power10_event_alternatives); 101 + 102 + return num_alt; 103 + } 104 + 105 + GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); 106 + GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); 107 + GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); 108 + GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); 109 + GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); 110 + GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); 111 + GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); 112 + GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); 113 + 114 + CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); 115 + CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); 116 + CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS); 117 + CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); 118 + CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); 119 + CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); 120 + CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); 121 + CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); 122 + CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); 123 + CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); 124 + CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); 125 + CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); 126 + CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); 127 + 128 + static struct attribute *power10_events_attr[] = { 129 + GENERIC_EVENT_PTR(PM_RUN_CYC), 130 + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), 131 + GENERIC_EVENT_PTR(PM_BR_CMPL), 132 + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), 133 + GENERIC_EVENT_PTR(PM_LD_REF_L1), 134 + GENERIC_EVENT_PTR(PM_LD_MISS_L1), 135 + GENERIC_EVENT_PTR(MEM_LOADS), 136 + GENERIC_EVENT_PTR(MEM_STORES), 137 + CACHE_EVENT_PTR(PM_LD_MISS_L1), 138 + CACHE_EVENT_PTR(PM_LD_REF_L1), 139 + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), 140 + CACHE_EVENT_PTR(PM_ST_MISS_L1), 141 + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), 142 + CACHE_EVENT_PTR(PM_INST_FROM_L1), 143 + CACHE_EVENT_PTR(PM_IC_PREF_REQ), 144 + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), 145 + CACHE_EVENT_PTR(PM_DATA_FROM_L3), 146 + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), 147 + CACHE_EVENT_PTR(PM_BR_CMPL), 148 + CACHE_EVENT_PTR(PM_DTLB_MISS), 149 + CACHE_EVENT_PTR(PM_ITLB_MISS), 150 + NULL 151 + }; 152 + 153 + static struct attribute_group power10_pmu_events_group = { 154 + .name = "events", 155 + .attrs = power10_events_attr, 156 + }; 157 + 158 + PMU_FORMAT_ATTR(event, "config:0-59"); 159 + PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); 160 + PMU_FORMAT_ATTR(mark, "config:8"); 161 + PMU_FORMAT_ATTR(combine, "config:10-11"); 162 + PMU_FORMAT_ATTR(unit, "config:12-15"); 163 + PMU_FORMAT_ATTR(pmc, "config:16-19"); 164 + PMU_FORMAT_ATTR(cache_sel, "config:20-21"); 165 + PMU_FORMAT_ATTR(sdar_mode, "config:22-23"); 166 + PMU_FORMAT_ATTR(sample_mode, "config:24-28"); 167 + PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); 168 + PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); 169 + PMU_FORMAT_ATTR(thresh_start, "config:36-39"); 170 + PMU_FORMAT_ATTR(l2l3_sel, "config:40-44"); 171 + PMU_FORMAT_ATTR(src_sel, "config:45-46"); 172 + PMU_FORMAT_ATTR(invert_bit, "config:47"); 173 + PMU_FORMAT_ATTR(src_mask, "config:48-53"); 174 + PMU_FORMAT_ATTR(src_match, "config:54-59"); 175 + 176 + static struct attribute *power10_pmu_format_attr[] = { 177 + &format_attr_event.attr, 178 + &format_attr_pmcxsel.attr, 179 + &format_attr_mark.attr, 180 + &format_attr_combine.attr, 181 + &format_attr_unit.attr, 182 + &format_attr_pmc.attr, 183 + &format_attr_cache_sel.attr, 184 + &format_attr_sdar_mode.attr, 185 + &format_attr_sample_mode.attr, 186 + &format_attr_thresh_sel.attr, 187 + &format_attr_thresh_stop.attr, 188 + &format_attr_thresh_start.attr, 189 + &format_attr_l2l3_sel.attr, 190 + &format_attr_src_sel.attr, 191 + &format_attr_invert_bit.attr, 192 + &format_attr_src_mask.attr, 193 + &format_attr_src_match.attr, 194 + NULL, 195 + }; 196 + 197 + static struct attribute_group power10_pmu_format_group = { 198 + .name = "format", 199 + .attrs = power10_pmu_format_attr, 200 + }; 201 + 202 + static const struct attribute_group *power10_pmu_attr_groups[] = { 203 + &power10_pmu_format_group, 204 + &power10_pmu_events_group, 205 + NULL, 206 + }; 207 + 208 + static int power10_generic_events[] = { 209 + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, 210 + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, 211 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, 212 + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, 213 + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, 214 + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, 215 + }; 216 + 217 + static u64 power10_bhrb_filter_map(u64 branch_sample_type) 218 + { 219 + u64 pmu_bhrb_filter = 0; 220 + 221 + /* BHRB and regular PMU events share the same privilege state 222 + * filter configuration. BHRB is always recorded along with a 223 + * regular PMU event. As the privilege state filter is handled 224 + * in the basic PMC configuration of the accompanying regular 225 + * PMU event, we ignore any separate BHRB specific request. 226 + */ 227 + 228 + /* No branch filter requested */ 229 + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) 230 + return pmu_bhrb_filter; 231 + 232 + /* Invalid branch filter options - HW does not support */ 233 + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 234 + return -1; 235 + 236 + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) 237 + return -1; 238 + 239 + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) 240 + return -1; 241 + 242 + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { 243 + pmu_bhrb_filter |= POWER10_MMCRA_IFM1; 244 + return pmu_bhrb_filter; 245 + } 246 + 247 + /* Every thing else is unsupported */ 248 + return -1; 249 + } 250 + 251 + static void power10_config_bhrb(u64 pmu_bhrb_filter) 252 + { 253 + pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK; 254 + 255 + /* Enable BHRB filter in PMU */ 256 + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); 257 + } 258 + 259 + #define C(x) PERF_COUNT_HW_CACHE_##x 260 + 261 + /* 262 + * Table of generalized cache-related events. 263 + * 0 means not supported, -1 means nonsensical, other values 264 + * are event codes. 265 + */ 266 + static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 267 + [C(L1D)] = { 268 + [C(OP_READ)] = { 269 + [C(RESULT_ACCESS)] = PM_LD_REF_L1, 270 + [C(RESULT_MISS)] = PM_LD_MISS_L1, 271 + }, 272 + [C(OP_WRITE)] = { 273 + [C(RESULT_ACCESS)] = 0, 274 + [C(RESULT_MISS)] = PM_ST_MISS_L1, 275 + }, 276 + [C(OP_PREFETCH)] = { 277 + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, 278 + [C(RESULT_MISS)] = 0, 279 + }, 280 + }, 281 + [C(L1I)] = { 282 + [C(OP_READ)] = { 283 + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, 284 + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, 285 + }, 286 + [C(OP_WRITE)] = { 287 + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, 288 + [C(RESULT_MISS)] = -1, 289 + }, 290 + [C(OP_PREFETCH)] = { 291 + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, 292 + [C(RESULT_MISS)] = 0, 293 + }, 294 + }, 295 + [C(LL)] = { 296 + [C(OP_READ)] = { 297 + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, 298 + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, 299 + }, 300 + [C(OP_WRITE)] = { 301 + [C(RESULT_ACCESS)] = -1, 302 + [C(RESULT_MISS)] = -1, 303 + }, 304 + [C(OP_PREFETCH)] = { 305 + [C(RESULT_ACCESS)] = -1, 306 + [C(RESULT_MISS)] = 0, 307 + }, 308 + }, 309 + [C(DTLB)] = { 310 + [C(OP_READ)] = { 311 + [C(RESULT_ACCESS)] = 0, 312 + [C(RESULT_MISS)] = PM_DTLB_MISS, 313 + }, 314 + [C(OP_WRITE)] = { 315 + [C(RESULT_ACCESS)] = -1, 316 + [C(RESULT_MISS)] = -1, 317 + }, 318 + [C(OP_PREFETCH)] = { 319 + [C(RESULT_ACCESS)] = -1, 320 + [C(RESULT_MISS)] = -1, 321 + }, 322 + }, 323 + [C(ITLB)] = { 324 + [C(OP_READ)] = { 325 + [C(RESULT_ACCESS)] = 0, 326 + [C(RESULT_MISS)] = PM_ITLB_MISS, 327 + }, 328 + [C(OP_WRITE)] = { 329 + [C(RESULT_ACCESS)] = -1, 330 + [C(RESULT_MISS)] = -1, 331 + }, 332 + [C(OP_PREFETCH)] = { 333 + [C(RESULT_ACCESS)] = -1, 334 + [C(RESULT_MISS)] = -1, 335 + }, 336 + }, 337 + [C(BPU)] = { 338 + [C(OP_READ)] = { 339 + [C(RESULT_ACCESS)] = PM_BR_CMPL, 340 + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, 341 + }, 342 + [C(OP_WRITE)] = { 343 + [C(RESULT_ACCESS)] = -1, 344 + [C(RESULT_MISS)] = -1, 345 + }, 346 + [C(OP_PREFETCH)] = { 347 + [C(RESULT_ACCESS)] = -1, 348 + [C(RESULT_MISS)] = -1, 349 + }, 350 + }, 351 + [C(NODE)] = { 352 + [C(OP_READ)] = { 353 + [C(RESULT_ACCESS)] = -1, 354 + [C(RESULT_MISS)] = -1, 355 + }, 356 + [C(OP_WRITE)] = { 357 + [C(RESULT_ACCESS)] = -1, 358 + [C(RESULT_MISS)] = -1, 359 + }, 360 + [C(OP_PREFETCH)] = { 361 + [C(RESULT_ACCESS)] = -1, 362 + [C(RESULT_MISS)] = -1, 363 + }, 364 + }, 365 + }; 366 + 367 + #undef C 368 + 369 + static struct power_pmu power10_pmu = { 370 + .name = "POWER10", 371 + .n_counter = MAX_PMU_COUNTERS, 372 + .add_fields = ISA207_ADD_FIELDS, 373 + .test_adder = ISA207_TEST_ADDER, 374 + .group_constraint_mask = CNST_CACHE_PMC4_MASK, 375 + .group_constraint_val = CNST_CACHE_PMC4_VAL, 376 + .compute_mmcr = isa207_compute_mmcr, 377 + .config_bhrb = power10_config_bhrb, 378 + .bhrb_filter_map = power10_bhrb_filter_map, 379 + .get_constraint = isa207_get_constraint, 380 + .get_alternatives = power10_get_alternatives, 381 + .get_mem_data_src = isa207_get_mem_data_src, 382 + .get_mem_weight = isa207_get_mem_weight, 383 + .disable_pmc = isa207_disable_pmc, 384 + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | 385 + PPMU_ARCH_31, 386 + .n_generic = ARRAY_SIZE(power10_generic_events), 387 + .generic_events = power10_generic_events, 388 + .cache_events = &power10_cache_events, 389 + .attr_groups = power10_pmu_attr_groups, 390 + .bhrb_nr = 32, 391 + }; 392 + 393 + int init_power10_pmu(void) 394 + { 395 + int rc; 396 + 397 + /* Comes from cpu_specs[] */ 398 + if (!cur_cpu_spec->oprofile_cpu_type || 399 + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) 400 + return -ENODEV; 401 + 402 + rc = register_power_pmu(&power10_pmu); 403 + if (rc) 404 + return rc; 405 + 406 + /* Tell userspace that EBB is supported */ 407 + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; 408 + 409 + return 0; 410 + }