Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf: arm_pmuv3: Add support for Armv9.4 PMU instruction counter

Armv9.4/8.9 PMU adds optional support for a fixed instruction counter
similar to the fixed cycle counter. Support for the feature is indicated
in the ID_AA64DFR1_EL1 register PMICNTR field. The counter is not
accessible in AArch32.

Existing userspace using direct counter access won't know how to handle
the fixed instruction counter, so we have to avoid using the counter
when user access is requested.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-7-280a8d7ff465@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Rob Herring (Arm) and committed by
Will Deacon
d8226d8c 2f62701f

+114 -6
+20
arch/arm/include/asm/arm_pmuv3.h
··· 127 127 return (dfr0 >> 24) & 0xf; 128 128 } 129 129 130 + static inline bool pmuv3_has_icntr(void) 131 + { 132 + /* FEAT_PMUv3_ICNTR not accessible for 32-bit */ 133 + return false; 134 + } 135 + 130 136 static inline void write_pmcr(u32 val) 131 137 { 132 138 write_sysreg(val, PMCR); ··· 158 152 return read_sysreg(PMCCNTR); 159 153 } 160 154 155 + static inline void write_pmicntr(u64 val) {} 156 + 157 + static inline u64 read_pmicntr(void) 158 + { 159 + return 0; 160 + } 161 + 161 162 static inline void write_pmcntenset(u32 val) 162 163 { 163 164 write_sysreg(val, PMCNTENSET); ··· 188 175 static inline void write_pmccfiltr(u32 val) 189 176 { 190 177 write_sysreg(val, PMCCFILTR); 178 + } 179 + 180 + static inline void write_pmicfiltr(u64 val) {} 181 + 182 + static inline u64 read_pmicfiltr(void) 183 + { 184 + return 0; 191 185 } 192 186 193 187 static inline void write_pmovsclr(u32 val)
+28
arch/arm64/include/asm/arm_pmuv3.h
··· 54 54 ID_AA64DFR0_EL1_PMUVer_SHIFT); 55 55 } 56 56 57 + static inline bool pmuv3_has_icntr(void) 58 + { 59 + u64 dfr1 = read_sysreg(id_aa64dfr1_el1); 60 + 61 + return !!cpuid_feature_extract_unsigned_field(dfr1, 62 + ID_AA64DFR1_EL1_PMICNTR_SHIFT); 63 + } 64 + 57 65 static inline void write_pmcr(u64 val) 58 66 { 59 67 write_sysreg(val, pmcr_el0); ··· 85 77 static inline u64 read_pmccntr(void) 86 78 { 87 79 return read_sysreg(pmccntr_el0); 80 + } 81 + 82 + static inline void write_pmicntr(u64 val) 83 + { 84 + write_sysreg_s(val, SYS_PMICNTR_EL0); 85 + } 86 + 87 + static inline u64 read_pmicntr(void) 88 + { 89 + return read_sysreg_s(SYS_PMICNTR_EL0); 88 90 } 89 91 90 92 static inline void write_pmcntenset(u64 val) ··· 125 107 static inline u64 read_pmccfiltr(void) 126 108 { 127 109 return read_sysreg(pmccfiltr_el0); 110 + } 111 + 112 + static inline void write_pmicfiltr(u64 val) 113 + { 114 + write_sysreg_s(val, SYS_PMICFILTR_EL0); 115 + } 116 + 117 + static inline u64 read_pmicfiltr(void) 118 + { 119 + return read_sysreg_s(SYS_PMICFILTR_EL0); 128 120 } 129 121 130 122 static inline void write_pmovsclr(u64 val)
+6 -2
arch/arm64/kvm/pmu.c
··· 66 66 67 67 /* 68 68 * Read a value direct from PMEVTYPER<idx> where idx is 0-30 69 - * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31). 69 + * or PMxCFILTR_EL0 where idx is 31-32. 70 70 */ 71 71 static u64 kvm_vcpu_pmu_read_evtype_direct(int idx) 72 72 { 73 73 if (idx == ARMV8_PMU_CYCLE_IDX) 74 74 return read_pmccfiltr(); 75 + else if (idx == ARMV8_PMU_INSTR_IDX) 76 + return read_pmicfiltr(); 75 77 76 78 return read_pmevtypern(idx); 77 79 } 78 80 79 81 /* 80 82 * Write a value direct to PMEVTYPER<idx> where idx is 0-30 81 - * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31). 83 + * or PMxCFILTR_EL0 where idx is 31-32. 82 84 */ 83 85 static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val) 84 86 { 85 87 if (idx == ARMV8_PMU_CYCLE_IDX) 86 88 write_pmccfiltr(val); 89 + else if (idx == ARMV8_PMU_INSTR_IDX) 90 + write_pmicfiltr(val); 87 91 else 88 92 write_pmevtypern(idx, val); 89 93 }
+25
arch/arm64/tools/sysreg
··· 2029 2029 Field 63:0 ADDR 2030 2030 EndSysreg 2031 2031 2032 + Sysreg PMICNTR_EL0 3 3 9 4 0 2033 + Field 63:0 ICNT 2034 + EndSysreg 2035 + 2036 + Sysreg PMICFILTR_EL0 3 3 9 6 0 2037 + Res0 63:59 2038 + Field 58 SYNC 2039 + Field 57:56 VS 2040 + Res0 55:32 2041 + Field 31 P 2042 + Field 30 U 2043 + Field 29 NSK 2044 + Field 28 NSU 2045 + Field 27 NSH 2046 + Field 26 M 2047 + Res0 25 2048 + Field 24 SH 2049 + Field 23 T 2050 + Field 22 RLK 2051 + Field 21 RLU 2052 + Field 20 RLH 2053 + Res0 19:16 2054 + Field 15:0 evtCount 2055 + EndSysreg 2056 + 2032 2057 Sysreg PMSCR_EL1 3 0 9 9 0 2033 2058 Res0 63:8 2034 2059 Field 7:6 PCT
+25
drivers/perf/arm_pmuv3.c
··· 571 571 572 572 if (idx == ARMV8_PMU_CYCLE_IDX) 573 573 value = read_pmccntr(); 574 + else if (idx == ARMV8_PMU_INSTR_IDX) 575 + value = read_pmicntr(); 574 576 else 575 577 value = armv8pmu_read_hw_counter(event); 576 578 ··· 606 604 607 605 if (idx == ARMV8_PMU_CYCLE_IDX) 608 606 write_pmccntr(value); 607 + else if (idx == ARMV8_PMU_INSTR_IDX) 608 + write_pmicntr(value); 609 609 else 610 610 armv8pmu_write_hw_counter(event, value); 611 611 } ··· 645 641 } else { 646 642 if (idx == ARMV8_PMU_CYCLE_IDX) 647 643 write_pmccfiltr(hwc->config_base); 644 + else if (idx == ARMV8_PMU_INSTR_IDX) 645 + write_pmicfiltr(hwc->config_base); 648 646 else 649 647 armv8pmu_write_evtype(idx, hwc->config_base); 650 648 } ··· 775 769 ARMPMU_MAX_HWEVENTS) { 776 770 if (i == ARMV8_PMU_CYCLE_IDX) 777 771 write_pmccntr(0); 772 + else if (i == ARMV8_PMU_INSTR_IDX) 773 + write_pmicntr(0); 778 774 else 779 775 armv8pmu_write_evcntr(i, 0); 780 776 } ··· 942 934 armv8pmu_event_want_user_access(event) && 943 935 !armv8pmu_has_long_event(cpu_pmu)) 944 936 return -EAGAIN; 937 + } 938 + 939 + /* 940 + * Always prefer to place a instruction counter into the instruction counter, 941 + * but don't expose the instruction counter to userspace access as userspace 942 + * may not know how to handle it. 943 + */ 944 + if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) && 945 + !armv8pmu_event_get_threshold(&event->attr) && 946 + test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && 947 + !armv8pmu_event_want_user_access(event)) { 948 + if (!test_and_set_bit(ARMV8_PMU_INSTR_IDX, cpuc->used_mask)) 949 + return ARMV8_PMU_INSTR_IDX; 945 950 } 946 951 947 952 /* ··· 1213 1192 1214 1193 /* Add the CPU cycles counter */ 1215 1194 set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask); 1195 + 1196 + /* Add the CPU instructions counter */ 1197 + if (pmuv3_has_icntr()) 1198 + set_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask); 1216 1199 1217 1200 pmceid[0] = pmceid_raw[0] = read_pmceid0(); 1218 1201 pmceid[1] = pmceid_raw[1] = read_pmceid1();
+6 -2
include/linux/perf/arm_pmu.h
··· 17 17 #ifdef CONFIG_ARM_PMU 18 18 19 19 /* 20 - * The ARMv7 CPU PMU supports up to 32 event counters. 20 + * The Armv7 and Armv8.8 or less CPU PMU supports up to 32 event counters. 21 + * The Armv8.9/9.4 CPU PMU supports up to 33 event counters. 21 22 */ 23 + #ifdef CONFIG_ARM 22 24 #define ARMPMU_MAX_HWEVENTS 32 23 - 25 + #else 26 + #define ARMPMU_MAX_HWEVENTS 33 27 + #endif 24 28 /* 25 29 * ARM PMU hw_event flags 26 30 */
+4 -2
include/linux/perf/arm_pmuv3.h
··· 8 8 9 9 #define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 10 10 #define ARMV8_PMU_CYCLE_IDX 31 11 - 11 + #define ARMV8_PMU_INSTR_IDX 32 /* Not accessible from AArch32 */ 12 12 13 13 /* 14 14 * Common architectural and microarchitectural event numbers. ··· 228 228 */ 229 229 #define ARMV8_PMU_OVSR_P GENMASK(30, 0) 230 230 #define ARMV8_PMU_OVSR_C BIT(31) 231 + #define ARMV8_PMU_OVSR_F BIT_ULL(32) /* arm64 only */ 231 232 /* Mask for writable bits is both P and C fields */ 232 - #define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) 233 + #define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C | \ 234 + ARMV8_PMU_OVSR_F) 233 235 234 236 /* 235 237 * PMXEVTYPER: Event selection reg