Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-next/perf-m1' into for-next/perf

Support for the CPU PMUs on the Apple M1.

* for-next/perf-m1:
drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
drivers/perf: arm_pmu: Handle 47 bit counters
irqchip/apple-aic: Move PMU-specific registers to their own include file
arm64: dts: apple: Add t8303 PMU nodes
arm64: dts: apple: Add t8103 PMU interrupt affinities
irqchip/apple-aic: Wire PMU interrupts
irqchip/apple-aic: Parse FIQ affinities from device-tree
dt-bindings: apple,aic: Add affinity description for per-cpu pseudo-interrupts
dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts
dt-bindings: arm-pmu: Document Apple PMU compatible strings

+791 -22
+2
Documentation/devicetree/bindings/arm/pmu.yaml
··· 20 20 items: 21 21 - enum: 22 22 - apm,potenza-pmu 23 + - apple,firestorm-pmu 24 + - apple,icestorm-pmu 23 25 - arm,armv8-pmuv3 # Only for s/w models 24 26 - arm,arm1136-pmu 25 27 - arm,arm1176-pmu
+31
Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
··· 56 56 - 1: virtual HV timer 57 57 - 2: physical guest timer 58 58 - 3: virtual guest timer 59 + - 4: 'efficient' CPU PMU 60 + - 5: 'performance' CPU PMU 59 61 60 62 The 3rd cell contains the interrupt flags. This is normally 61 63 IRQ_TYPE_LEVEL_HIGH (4). ··· 69 67 70 68 power-domains: 71 69 maxItems: 1 70 + 71 + affinities: 72 + type: object 73 + additionalProperties: false 74 + description: 75 + FIQ affinity can be expressed as a single "affinities" node, 76 + containing a set of sub-nodes, one per FIQ with a non-default 77 + affinity. 78 + patternProperties: 79 + "^.+-affinity$": 80 + type: object 81 + additionalProperties: false 82 + properties: 83 + apple,fiq-index: 84 + description: 85 + The interrupt number specified as a FIQ, and for which 86 + the affinity is not the default. 87 + $ref: /schemas/types.yaml#/definitions/uint32 88 + maximum: 5 89 + 90 + cpus: 91 + $ref: /schemas/types.yaml#/definitions/phandle-array 92 + description: 93 + Should be a list of phandles to CPU nodes (as described in 94 + Documentation/devicetree/bindings/arm/cpus.yaml). 95 + 96 + required: 97 + - fiq-index 98 + - cpus 72 99 73 100 required: 74 101 - compatible
+24
arch/arm64/boot/dts/apple/t8103.dtsi
··· 97 97 <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>; 98 98 }; 99 99 100 + pmu-e { 101 + compatible = "apple,icestorm-pmu"; 102 + interrupt-parent = <&aic>; 103 + interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>; 104 + }; 105 + 106 + pmu-p { 107 + compatible = "apple,firestorm-pmu"; 108 + interrupt-parent = <&aic>; 109 + interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>; 110 + }; 111 + 100 112 clkref: clock-ref { 101 113 compatible = "fixed-clock"; 102 114 #clock-cells = <0>; ··· 225 213 interrupt-controller; 226 214 reg = <0x2 0x3b100000 0x0 0x8000>; 227 215 power-domains = <&ps_aic>; 216 + 217 + affinities { 218 + e-core-pmu-affinity { 219 + apple,fiq-index = <AIC_CPU_PMU_E>; 220 + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; 221 + }; 222 + 223 + p-core-pmu-affinity { 224 + apple,fiq-index = <AIC_CPU_PMU_P>; 225 + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; 226 + }; 227 + }; 228 228 }; 229 229 230 230 pmgr: power-management@23b700000 {
+64
arch/arm64/include/asm/apple_m1_pmu.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #ifndef __ASM_APPLE_M1_PMU_h 4 + #define __ASM_APPLE_M1_PMU_h 5 + 6 + #include <linux/bits.h> 7 + #include <asm/sysreg.h> 8 + 9 + /* Counters */ 10 + #define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0) 11 + #define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0) 12 + #define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0) 13 + #define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0) 14 + #define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0) 15 + #define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0) 16 + #define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0) 17 + #define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0) 18 + #define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0) 19 + #define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0) 20 + 21 + /* Core PMC control register */ 22 + #define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) 23 + #define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0) 24 + #define PMCR0_IMODE GENMASK(10, 8) 25 + #define PMCR0_IMODE_OFF 0 26 + #define PMCR0_IMODE_PMI 1 27 + #define PMCR0_IMODE_AIC 2 28 + #define PMCR0_IMODE_HALT 3 29 + #define PMCR0_IMODE_FIQ 4 30 + #define PMCR0_IACT BIT(11) 31 + #define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12) 32 + #define PMCR0_STOP_CNT_ON_PMI BIT(20) 33 + #define PMCR0_CNT_GLOB_L2C_EVT BIT(21) 34 + #define PMCR0_DEFER_PMI_TO_ERET BIT(22) 35 + #define PMCR0_ALLOW_CNT_EN_EL0 BIT(30) 36 + #define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32) 37 + #define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) 38 + 39 + #define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) 40 + #define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) 41 + #define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) 42 + #define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) 43 + #define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48) 44 + 45 + #define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0) 46 + #define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0) 47 + #define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0) 48 + 49 + #define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0) 50 + #define PMESR0_EVT_CNT_2 GENMASK(7, 0) 51 + #define PMESR0_EVT_CNT_3 GENMASK(15, 8) 52 + #define PMESR0_EVT_CNT_4 GENMASK(23, 16) 53 + #define PMESR0_EVT_CNT_5 GENMASK(31, 24) 54 + 55 + #define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0) 56 + #define PMESR1_EVT_CNT_6 GENMASK(7, 0) 57 + #define PMESR1_EVT_CNT_7 GENMASK(15, 8) 58 + #define PMESR1_EVT_CNT_8 GENMASK(23, 16) 59 + #define PMESR1_EVT_CNT_9 GENMASK(31, 24) 60 + 61 + #define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0) 62 + #define PMSR_OVERFLOW GENMASK(9, 0) 63 + 64 + #endif /* __ASM_APPLE_M1_PMU_h */
+72 -22
drivers/irqchip/irq-apple-aic.c
··· 55 55 #include <linux/limits.h> 56 56 #include <linux/of_address.h> 57 57 #include <linux/slab.h> 58 + #include <asm/apple_m1_pmu.h> 58 59 #include <asm/exception.h> 59 60 #include <asm/sysreg.h> 60 61 #include <asm/virt.h> ··· 110 109 * Note: sysreg-based IPIs are not supported yet. 111 110 */ 112 111 113 - /* Core PMC control register */ 114 - #define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) 115 - #define PMCR0_IMODE GENMASK(10, 8) 116 - #define PMCR0_IMODE_OFF 0 117 - #define PMCR0_IMODE_PMI 1 118 - #define PMCR0_IMODE_AIC 2 119 - #define PMCR0_IMODE_HALT 3 120 - #define PMCR0_IMODE_FIQ 4 121 - #define PMCR0_IACT BIT(11) 122 - 123 112 /* IPI request registers */ 124 113 #define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) 125 114 #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) ··· 146 155 #define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4) 147 156 #define UPMSR_IACT BIT(0) 148 157 149 - #define AIC_NR_FIQ 4 158 + #define AIC_NR_FIQ 6 150 159 #define AIC_NR_SWIPI 32 151 160 152 161 /* ··· 168 177 void __iomem *base; 169 178 struct irq_domain *hw_domain; 170 179 struct irq_domain *ipi_domain; 180 + struct { 181 + cpumask_t aff; 182 + } *fiq_aff[AIC_NR_FIQ]; 171 183 int nr_hw; 172 184 }; 173 185 ··· 406 412 aic_irqc->nr_hw + AIC_TMR_EL02_VIRT); 407 413 } 408 414 409 - if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == 410 - (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { 411 - /* 412 - * Not supported yet, let's figure out how to handle this when 413 - * we implement these proprietary performance counters. For now, 414 - * just mask it and move on. 415 - */ 416 - pr_err_ratelimited("PMC FIQ fired. Masking.\n"); 417 - sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT, 418 - FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF)); 415 + if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { 416 + int irq; 417 + if (cpumask_test_cpu(smp_processor_id(), 418 + &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) 419 + irq = AIC_CPU_PMU_P; 420 + else 421 + irq = AIC_CPU_PMU_E; 422 + generic_handle_domain_irq(aic_irqc->hw_domain, 423 + aic_irqc->nr_hw + irq); 419 424 } 420 425 421 426 if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ && ··· 454 461 handle_fasteoi_irq, NULL, NULL); 455 462 irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); 456 463 } else { 457 - irq_set_percpu_devid(irq); 464 + int fiq = hw - ic->nr_hw; 465 + 466 + switch (fiq) { 467 + case AIC_CPU_PMU_P: 468 + case AIC_CPU_PMU_E: 469 + irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff); 470 + break; 471 + default: 472 + irq_set_percpu_devid(irq); 473 + break; 474 + } 475 + 458 476 irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data, 459 477 handle_percpu_devid_irq, NULL, NULL); 460 478 } ··· 797 793 .no_hw_deactivation = true, 798 794 }; 799 795 796 + static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff) 797 + { 798 + int i, n; 799 + u32 fiq; 800 + 801 + if (of_property_read_u32(aff, "apple,fiq-index", &fiq) || 802 + WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq]) 803 + return; 804 + 805 + n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32)); 806 + if (WARN_ON(n < 0)) 807 + return; 808 + 809 + ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL); 810 + if (!ic->fiq_aff[fiq]) 811 + return; 812 + 813 + for (i = 0; i < n; i++) { 814 + struct device_node *cpu_node; 815 + u32 cpu_phandle; 816 + int cpu; 817 + 818 + if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle)) 819 + continue; 820 + 821 + cpu_node = of_find_node_by_phandle(cpu_phandle); 822 + if (WARN_ON(!cpu_node)) 823 + continue; 824 + 825 + cpu = of_cpu_node_to_id(cpu_node); 826 + if (WARN_ON(cpu < 0)) 827 + continue; 828 + 829 + cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff); 830 + } 831 + } 832 + 800 833 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) 801 834 { 802 835 int i; 803 836 void __iomem *regs; 804 837 u32 info; 805 838 struct aic_irq_chip *irqc; 839 + struct device_node *affs; 806 840 807 841 regs = of_iomap(node, 0); 808 842 if (WARN_ON(!regs)) ··· 872 830 iounmap(irqc->base); 873 831 kfree(irqc); 874 832 return -ENODEV; 833 + } 834 + 835 + affs = of_get_child_by_name(node, "affinities"); 836 + if (affs) { 837 + struct device_node *chld; 838 + 839 + for_each_child_of_node(affs, chld) 840 + build_fiq_affinity(irqc, chld); 875 841 } 876 842 877 843 set_handle_irq(aic_handle_irq);
+7
drivers/perf/Kconfig
··· 146 146 Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) 147 147 performance monitors on CN10K family silicons. 148 148 149 + config APPLE_M1_CPU_PMU 150 + bool "Apple M1 CPU PMU support" 151 + depends on ARM_PMU && ARCH_APPLE 152 + help 153 + Provides support for the non-architectural CPU PMUs present on 154 + the Apple M1 SoCs and derivatives. 155 + 149 156 source "drivers/perf/hisilicon/Kconfig" 150 157 151 158 config MARVELL_CN10K_DDR_PMU
+1
drivers/perf/Makefile
··· 16 16 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o 17 17 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o 18 18 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o 19 + obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
+584
drivers/perf/apple_m1_cpu_pmu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * CPU PMU driver for the Apple M1 and derivatives 4 + * 5 + * Copyright (C) 2021 Google LLC 6 + * 7 + * Author: Marc Zyngier <maz@kernel.org> 8 + * 9 + * Most of the information used in this driver was provided by the 10 + * Asahi Linux project. The rest was experimentally discovered. 11 + */ 12 + 13 + #include <linux/of.h> 14 + #include <linux/perf/arm_pmu.h> 15 + #include <linux/platform_device.h> 16 + 17 + #include <asm/apple_m1_pmu.h> 18 + #include <asm/irq_regs.h> 19 + #include <asm/perf_event.h> 20 + 21 + #define M1_PMU_NR_COUNTERS 10 22 + 23 + #define M1_PMU_CFG_EVENT GENMASK(7, 0) 24 + 25 + #define ANY_BUT_0_1 GENMASK(9, 2) 26 + #define ONLY_2_TO_7 GENMASK(7, 2) 27 + #define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6)) 28 + #define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7)) 29 + 30 + /* 31 + * Description of the events we actually know about, as well as those with 32 + * a specific counter affinity. Yes, this is a grand total of two known 33 + * counters, and the rest is anybody's guess. 34 + * 35 + * Not all counters can count all events. Counters #0 and #1 are wired to 36 + * count cycles and instructions respectively, and some events have 37 + * bizarre mappings (every other counter, or even *one* counter). These 38 + * restrictions equally apply to both P and E cores. 39 + * 40 + * It is worth noting that the PMUs attached to P and E cores are likely 41 + * to be different because the underlying uarches are different. At the 42 + * moment, we don't really need to distinguish between the two because we 43 + * know next to nothing about the events themselves, and we already have 44 + * per cpu-type PMU abstractions. 45 + * 46 + * If we eventually find out that the events are different across 47 + * implementations, we'll have to introduce per cpu-type tables. 48 + */ 49 + enum m1_pmu_events { 50 + M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, 51 + M1_PMU_PERFCTR_CPU_CYCLES = 0x02, 52 + M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, 53 + M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, 54 + M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, 55 + M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, 56 + M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, 57 + M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, 58 + M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, 59 + M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, 60 + M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, 61 + M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, 62 + M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, 63 + M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, 64 + M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, 65 + M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, 66 + M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, 67 + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, 68 + M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, 69 + M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, 70 + M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, 71 + M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, 72 + M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, 73 + M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, 74 + M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, 75 + M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, 76 + M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, 77 + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, 78 + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, 79 + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, 80 + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, 81 + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, 82 + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, 83 + 84 + /* 85 + * From this point onwards, these are not actual HW events, 86 + * but attributes that get stored in hw->config_base. 87 + */ 88 + M1_PMU_CFG_COUNT_USER = BIT(8), 89 + M1_PMU_CFG_COUNT_KERNEL = BIT(9), 90 + }; 91 + 92 + /* 93 + * Per-event affinity table. Most events can be installed on counter 94 + * 2-9, but there are a number of exceptions. Note that this table 95 + * has been created experimentally, and I wouldn't be surprised if more 96 + * counters had strange affinities. 97 + */ 98 + static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { 99 + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, 100 + [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), 101 + [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), 102 + [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), 103 + [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, 104 + [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, 105 + [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, 106 + [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, 107 + [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, 108 + [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, 109 + [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, 110 + [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, 111 + [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), 112 + [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, 113 + [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, 114 + [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), 115 + [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, 116 + [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, 117 + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), 118 + [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, 119 + [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, 120 + [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, 121 + [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, 122 + [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, 123 + [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, 124 + [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, 125 + [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, 126 + [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, 127 + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, 128 + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, 129 + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, 130 + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, 131 + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, 132 + }; 133 + 134 + static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { 135 + PERF_MAP_ALL_UNSUPPORTED, 136 + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, 137 + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, 138 + /* No idea about the rest yet */ 139 + }; 140 + 141 + /* sysfs definitions */ 142 + static ssize_t m1_pmu_events_sysfs_show(struct device *dev, 143 + struct device_attribute *attr, 144 + char *page) 145 + { 146 + struct perf_pmu_events_attr *pmu_attr; 147 + 148 + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); 149 + 150 + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); 151 + } 152 + 153 + #define M1_PMU_EVENT_ATTR(name, config) \ 154 + PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) 155 + 156 + static struct attribute *m1_pmu_event_attrs[] = { 157 + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), 158 + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), 159 + NULL, 160 + }; 161 + 162 + static const struct attribute_group m1_pmu_events_attr_group = { 163 + .name = "events", 164 + .attrs = m1_pmu_event_attrs, 165 + }; 166 + 167 + PMU_FORMAT_ATTR(event, "config:0-7"); 168 + 169 + static struct attribute *m1_pmu_format_attrs[] = { 170 + &format_attr_event.attr, 171 + NULL, 172 + }; 173 + 174 + static const struct attribute_group m1_pmu_format_attr_group = { 175 + .name = "format", 176 + .attrs = m1_pmu_format_attrs, 177 + }; 178 + 179 + /* Low level accessors. No synchronisation. */ 180 + #define PMU_READ_COUNTER(_idx) \ 181 + case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1) 182 + 183 + #define PMU_WRITE_COUNTER(_val, _idx) \ 184 + case _idx: \ 185 + write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \ 186 + return 187 + 188 + static u64 m1_pmu_read_hw_counter(unsigned int index) 189 + { 190 + switch (index) { 191 + PMU_READ_COUNTER(0); 192 + PMU_READ_COUNTER(1); 193 + PMU_READ_COUNTER(2); 194 + PMU_READ_COUNTER(3); 195 + PMU_READ_COUNTER(4); 196 + PMU_READ_COUNTER(5); 197 + PMU_READ_COUNTER(6); 198 + PMU_READ_COUNTER(7); 199 + PMU_READ_COUNTER(8); 200 + PMU_READ_COUNTER(9); 201 + } 202 + 203 + BUG(); 204 + } 205 + 206 + static void m1_pmu_write_hw_counter(u64 val, unsigned int index) 207 + { 208 + switch (index) { 209 + PMU_WRITE_COUNTER(val, 0); 210 + PMU_WRITE_COUNTER(val, 1); 211 + PMU_WRITE_COUNTER(val, 2); 212 + PMU_WRITE_COUNTER(val, 3); 213 + PMU_WRITE_COUNTER(val, 4); 214 + PMU_WRITE_COUNTER(val, 5); 215 + PMU_WRITE_COUNTER(val, 6); 216 + PMU_WRITE_COUNTER(val, 7); 217 + PMU_WRITE_COUNTER(val, 8); 218 + PMU_WRITE_COUNTER(val, 9); 219 + } 220 + 221 + BUG(); 222 + } 223 + 224 + #define get_bit_offset(index, mask) (__ffs(mask) + (index)) 225 + 226 + static void __m1_pmu_enable_counter(unsigned int index, bool en) 227 + { 228 + u64 val, bit; 229 + 230 + switch (index) { 231 + case 0 ... 7: 232 + bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7)); 233 + break; 234 + case 8 ... 9: 235 + bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9)); 236 + break; 237 + default: 238 + BUG(); 239 + } 240 + 241 + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); 242 + 243 + if (en) 244 + val |= bit; 245 + else 246 + val &= ~bit; 247 + 248 + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); 249 + } 250 + 251 + static void m1_pmu_enable_counter(unsigned int index) 252 + { 253 + __m1_pmu_enable_counter(index, true); 254 + } 255 + 256 + static void m1_pmu_disable_counter(unsigned int index) 257 + { 258 + __m1_pmu_enable_counter(index, false); 259 + } 260 + 261 + static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en) 262 + { 263 + u64 val, bit; 264 + 265 + switch (index) { 266 + case 0 ... 7: 267 + bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7)); 268 + break; 269 + case 8 ... 9: 270 + bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9)); 271 + break; 272 + default: 273 + BUG(); 274 + } 275 + 276 + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); 277 + 278 + if (en) 279 + val |= bit; 280 + else 281 + val &= ~bit; 282 + 283 + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); 284 + } 285 + 286 + static void m1_pmu_enable_counter_interrupt(unsigned int index) 287 + { 288 + __m1_pmu_enable_counter_interrupt(index, true); 289 + } 290 + 291 + static void m1_pmu_disable_counter_interrupt(unsigned int index) 292 + { 293 + __m1_pmu_enable_counter_interrupt(index, false); 294 + } 295 + 296 + static void m1_pmu_configure_counter(unsigned int index, u8 event, 297 + bool user, bool kernel) 298 + { 299 + u64 val, user_bit, kernel_bit; 300 + int shift; 301 + 302 + switch (index) { 303 + case 0 ... 7: 304 + user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7)); 305 + kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7)); 306 + break; 307 + case 8 ... 9: 308 + user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9)); 309 + kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9)); 310 + break; 311 + default: 312 + BUG(); 313 + } 314 + 315 + val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1); 316 + 317 + if (user) 318 + val |= user_bit; 319 + else 320 + val &= ~user_bit; 321 + 322 + if (kernel) 323 + val |= kernel_bit; 324 + else 325 + val &= ~kernel_bit; 326 + 327 + write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1); 328 + 329 + /* 330 + * Counters 0 and 1 have fixed events. For anything else, 331 + * place the event at the expected location in the relevant 332 + * register (PMESR0 holds the event configuration for counters 333 + * 2-5, resp. PMESR1 for counters 6-9). 334 + */ 335 + switch (index) { 336 + case 0 ... 1: 337 + break; 338 + case 2 ... 5: 339 + shift = (index - 2) * 8; 340 + val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1); 341 + val &= ~((u64)0xff << shift); 342 + val |= (u64)event << shift; 343 + write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1); 344 + break; 345 + case 6 ... 9: 346 + shift = (index - 6) * 8; 347 + val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1); 348 + val &= ~((u64)0xff << shift); 349 + val |= (u64)event << shift; 350 + write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1); 351 + break; 352 + } 353 + } 354 + 355 + /* arm_pmu backend */ 356 + static void m1_pmu_enable_event(struct perf_event *event) 357 + { 358 + bool user, kernel; 359 + u8 evt; 360 + 361 + evt = event->hw.config_base & M1_PMU_CFG_EVENT; 362 + user = event->hw.config_base & M1_PMU_CFG_COUNT_USER; 363 + kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL; 364 + 365 + m1_pmu_disable_counter_interrupt(event->hw.idx); 366 + m1_pmu_disable_counter(event->hw.idx); 367 + isb(); 368 + 369 + m1_pmu_configure_counter(event->hw.idx, evt, user, kernel); 370 + m1_pmu_enable_counter(event->hw.idx); 371 + m1_pmu_enable_counter_interrupt(event->hw.idx); 372 + isb(); 373 + } 374 + 375 + static void m1_pmu_disable_event(struct perf_event *event) 376 + { 377 + m1_pmu_disable_counter_interrupt(event->hw.idx); 378 + m1_pmu_disable_counter(event->hw.idx); 379 + isb(); 380 + } 381 + 382 + static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) 383 + { 384 + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); 385 + struct pt_regs *regs; 386 + u64 overflow, state; 387 + int idx; 388 + 389 + overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1); 390 + if (!overflow) { 391 + /* Spurious interrupt? */ 392 + state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); 393 + state &= ~PMCR0_IACT; 394 + write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1); 395 + isb(); 396 + return IRQ_NONE; 397 + } 398 + 399 + cpu_pmu->stop(cpu_pmu); 400 + 401 + regs = get_irq_regs(); 402 + 403 + for (idx = 0; idx < cpu_pmu->num_events; idx++) { 404 + struct perf_event *event = cpuc->events[idx]; 405 + struct perf_sample_data data; 406 + 407 + if (!event) 408 + continue; 409 + 410 + armpmu_event_update(event); 411 + perf_sample_data_init(&data, 0, event->hw.last_period); 412 + if (!armpmu_event_set_period(event)) 413 + continue; 414 + 415 + if (perf_event_overflow(event, &data, regs)) 416 + m1_pmu_disable_event(event); 417 + } 418 + 419 + cpu_pmu->start(cpu_pmu); 420 + 421 + return IRQ_HANDLED; 422 + } 423 + 424 + static u64 m1_pmu_read_counter(struct perf_event *event) 425 + { 426 + return m1_pmu_read_hw_counter(event->hw.idx); 427 + } 428 + 429 + static void m1_pmu_write_counter(struct perf_event *event, u64 value) 430 + { 431 + m1_pmu_write_hw_counter(value, event->hw.idx); 432 + isb(); 433 + } 434 + 435 + static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc, 436 + struct perf_event *event) 437 + { 438 + unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT; 439 + unsigned long affinity = m1_pmu_event_affinity[evtype]; 440 + int idx; 441 + 442 + /* 443 + * Place the event on the first free counter that can count 444 + * this event. 445 + * 446 + * We could do a better job if we had a view of all the events 447 + * counting on the PMU at any given time, and by placing the 448 + * most constraining events first. 449 + */ 450 + for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) { 451 + if (!test_and_set_bit(idx, cpuc->used_mask)) 452 + return idx; 453 + } 454 + 455 + return -EAGAIN; 456 + } 457 + 458 + static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc, 459 + struct perf_event *event) 460 + { 461 + clear_bit(event->hw.idx, cpuc->used_mask); 462 + } 463 + 464 + static void __m1_pmu_set_mode(u8 mode) 465 + { 466 + u64 val; 467 + 468 + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); 469 + val &= ~(PMCR0_IMODE | PMCR0_IACT); 470 + val |= FIELD_PREP(PMCR0_IMODE, mode); 471 + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); 472 + isb(); 473 + } 474 + 475 + static void m1_pmu_start(struct arm_pmu *cpu_pmu) 476 + { 477 + __m1_pmu_set_mode(PMCR0_IMODE_FIQ); 478 + } 479 + 480 + static void m1_pmu_stop(struct arm_pmu *cpu_pmu) 481 + { 482 + __m1_pmu_set_mode(PMCR0_IMODE_OFF); 483 + } 484 + 485 + static int m1_pmu_map_event(struct perf_event *event) 486 + { 487 + /* 488 + * Although the counters are 48bit wide, bit 47 is what 489 + * triggers the overflow interrupt. Advertise the counters 490 + * being 47bit wide to mimick the behaviour of the ARM PMU. 491 + */ 492 + event->hw.flags |= ARMPMU_EVT_47BIT; 493 + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); 494 + } 495 + 496 + static void m1_pmu_reset(void *info) 497 + { 498 + int i; 499 + 500 + __m1_pmu_set_mode(PMCR0_IMODE_OFF); 501 + 502 + for (i = 0; i < M1_PMU_NR_COUNTERS; i++) { 503 + m1_pmu_disable_counter(i); 504 + m1_pmu_disable_counter_interrupt(i); 505 + m1_pmu_write_hw_counter(0, i); 506 + } 507 + 508 + isb(); 509 + } 510 + 511 + static int m1_pmu_set_event_filter(struct hw_perf_event *event, 512 + struct perf_event_attr *attr) 513 + { 514 + unsigned long config_base = 0; 515 + 516 + if (!attr->exclude_guest) 517 + return -EINVAL; 518 + if (!attr->exclude_kernel) 519 + config_base |= M1_PMU_CFG_COUNT_KERNEL; 520 + if (!attr->exclude_user) 521 + config_base |= M1_PMU_CFG_COUNT_USER; 522 + 523 + event->config_base = config_base; 524 + 525 + return 0; 526 + } 527 + 528 + static int m1_pmu_init(struct arm_pmu *cpu_pmu) 529 + { 530 + cpu_pmu->handle_irq = m1_pmu_handle_irq; 531 + cpu_pmu->enable = m1_pmu_enable_event; 532 + cpu_pmu->disable = m1_pmu_disable_event; 533 + cpu_pmu->read_counter = m1_pmu_read_counter; 534 + cpu_pmu->write_counter = m1_pmu_write_counter; 535 + cpu_pmu->get_event_idx = m1_pmu_get_event_idx; 536 + cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; 537 + cpu_pmu->start = m1_pmu_start; 538 + cpu_pmu->stop = m1_pmu_stop; 539 + cpu_pmu->map_event = m1_pmu_map_event; 540 + cpu_pmu->reset = m1_pmu_reset; 541 + cpu_pmu->set_event_filter = m1_pmu_set_event_filter; 542 + 543 + cpu_pmu->num_events = M1_PMU_NR_COUNTERS; 544 + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; 545 + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; 546 + return 0; 547 + } 548 + 549 + /* Device driver gunk */ 550 + static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) 551 + { 552 + cpu_pmu->name = "apple_icestorm_pmu"; 553 + return m1_pmu_init(cpu_pmu); 554 + } 555 + 556 + static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) 557 + { 558 + cpu_pmu->name = "apple_firestorm_pmu"; 559 + return m1_pmu_init(cpu_pmu); 560 + } 561 + 562 + static const struct of_device_id m1_pmu_of_device_ids[] = { 563 + { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, 564 + { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, 565 + { }, 566 + }; 567 + MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids); 568 + 569 + static int m1_pmu_device_probe(struct platform_device *pdev) 570 + { 571 + return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL); 572 + } 573 + 574 + static struct platform_driver m1_pmu_driver = { 575 + .driver = { 576 + .name = "apple-m1-cpu-pmu", 577 + .of_match_table = m1_pmu_of_device_ids, 578 + .suppress_bind_attrs = true, 579 + }, 580 + .probe = m1_pmu_device_probe, 581 + }; 582 + 583 + module_platform_driver(m1_pmu_driver); 584 + MODULE_LICENSE("GPL v2");
+2
drivers/perf/arm_pmu.c
··· 109 109 { 110 110 if (event->hw.flags & ARMPMU_EVT_64BIT) 111 111 return GENMASK_ULL(63, 0); 112 + else if (event->hw.flags & ARMPMU_EVT_47BIT) 113 + return GENMASK_ULL(46, 0); 112 114 else 113 115 return GENMASK_ULL(31, 0); 114 116 }
+2
include/dt-bindings/interrupt-controller/apple-aic.h
··· 11 11 #define AIC_TMR_HV_VIRT 1 12 12 #define AIC_TMR_GUEST_PHYS 2 13 13 #define AIC_TMR_GUEST_VIRT 3 14 + #define AIC_CPU_PMU_E 4 15 + #define AIC_CPU_PMU_P 5 14 16 15 17 #endif
+2
include/linux/perf/arm_pmu.h
··· 26 26 */ 27 27 /* Event uses a 64bit counter */ 28 28 #define ARMPMU_EVT_64BIT 1 29 + /* Event uses a 47bit counter */ 30 + #define ARMPMU_EVT_47BIT 2 29 31 30 32 #define HW_OP_UNSUPPORTED 0xFFFF 31 33 #define C(_x) PERF_COUNT_HW_CACHE_##_x