Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf: Add Arm CMN-600 PMU driver

Initial driver for PMU event counting on the Arm CMN-600 interconnect.
CMN sports an obnoxiously complex distributed PMU system as part of
its debug and trace features, which can do all manner of things like
sampling, cross-triggering and generating CoreSight trace. This driver
covers the PMU functionality, plus the relevant aspects of watchpoints
for simply counting matching flits.

Tested-by: Tsahi Zidenberg <tsahee@amazon.com>
Tested-by: Tuan Phan <tuanphan@os.amperecomputing.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Robin Murphy and committed by
Will Deacon
0ba64770 c8fdbbfa

+1715
+65
Documentation/admin-guide/perf/arm-cmn.rst
··· 1 + ============================= 2 + Arm Coherent Mesh Network PMU 3 + ============================= 4 + 5 + CMN-600 is a configurable mesh interconnect consisting of a rectangular 6 + grid of crosspoints (XPs), with each crosspoint supporting up to two 7 + device ports to which various AMBA CHI agents are attached. 8 + 9 + CMN implements a distributed PMU design as part of its debug and trace 10 + functionality. This consists of a local monitor (DTM) at every XP, which 11 + counts up to 4 event signals from the connected device nodes and/or the 12 + XP itself. Overflow from these local counters is accumulated in up to 8 13 + global counters implemented by the main controller (DTC), which provides 14 + overall PMU control and interrupts for global counter overflow. 15 + 16 + PMU events 17 + ---------- 18 + 19 + The PMU driver registers a single PMU device for the whole interconnect, 20 + see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link 21 + more than one CMN together via external CCIX links - in this situation, 22 + each mesh counts its own events entirely independently, and additional 23 + PMU devices will be named arm_cmn_{1..n}. 24 + 25 + Most events are specified in a format based directly on the TRM 26 + definitions - "type" selects the respective node type, and "eventid" the 27 + event number. Some events require an additional occupancy ID, which is 28 + specified by "occupid". 29 + 30 + * Since RN-D nodes do not have any distinct events from RN-I nodes, they 31 + are treated as the same type (0xa), and the common event templates are 32 + named "rnid_*". 33 + 34 + * The cycle counter is treated as a synthetic event belonging to the DTC 35 + node ("type" == 0x3, "eventid" is ignored). 36 + 37 + * XP events also encode the port and channel in the "eventid" field, to 38 + match the underlying pmu_event0_id encoding for the pmu_event_sel 39 + register. The event templates are named with prefixes to cover all 40 + permutations. 41 + 42 + By default each event provides an aggregate count over all nodes of the 43 + given type. To target a specific node, "bynodeid" must be set to 1 and 44 + "nodeid" to the appropriate value derived from the CMN configuration 45 + (as defined in the "Node ID Mapping" section of the TRM). 46 + 47 + Watchpoints 48 + ----------- 49 + 50 + The PMU can also count watchpoint events to monitor specific flit 51 + traffic. Watchpoints are treated as a synthetic event type, and like PMU 52 + events can be global or targeted with a particular XP's "nodeid" value. 53 + Since the watchpoint direction is otherwise implicit in the underlying 54 + register selection, separate events are provided for flit uploads and 55 + downloads. 56 + 57 + The flit match value and mask are passed in config1 and config2 ("val" 58 + and "mask" respectively). "wp_dev_sel", "wp_chn_sel", "wp_grp" and 59 + "wp_exclusive" are specified per the TRM definitions for dtm_wp_config0. 60 + Where a watchpoint needs to match fields from both match groups on the 61 + REQ or SNP channel, it can be specified as two events - one for each 62 + group - with the same nonzero "combine" value. The count for such a 63 + pair of combined events will be attributed to the primary match. 64 + Watchpoint events with a "combine" value of 0 are considered independent 65 + and will count individually.
+1
Documentation/admin-guide/perf/index.rst
··· 12 12 qcom_l2_pmu 13 13 qcom_l3_pmu 14 14 arm-ccn 15 + arm-cmn 15 16 xgene-pmu 16 17 arm_dsu_pmu 17 18 thunderx2-pmu
+7
drivers/perf/Kconfig
··· 41 41 PMU (perf) driver supporting the ARM CCN (Cache Coherent Network) 42 42 interconnect. 43 43 44 + config ARM_CMN 45 + tristate "Arm CMN-600 PMU support" 46 + depends on ARM64 || (COMPILE_TEST && 64BIT) 47 + help 48 + Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh 49 + Network interconnect. 50 + 44 51 config ARM_PMU 45 52 depends on ARM || ARM64 46 53 bool "ARM PMU framework"
+1
drivers/perf/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o 3 3 obj-$(CONFIG_ARM_CCN) += arm-ccn.o 4 + obj-$(CONFIG_ARM_CMN) += arm-cmn.o 4 5 obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o 5 6 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 6 7 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+1641
drivers/perf/arm-cmn.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2016-2020 Arm Limited 3 + // CMN-600 Coherent Mesh Network PMU driver 4 + 5 + #include <linux/acpi.h> 6 + #include <linux/bitfield.h> 7 + #include <linux/bitops.h> 8 + #include <linux/interrupt.h> 9 + #include <linux/io.h> 10 + #include <linux/kernel.h> 11 + #include <linux/list.h> 12 + #include <linux/module.h> 13 + #include <linux/of.h> 14 + #include <linux/perf_event.h> 15 + #include <linux/platform_device.h> 16 + #include <linux/slab.h> 17 + #include <linux/sort.h> 18 + 19 + /* Common register stuff */ 20 + #define CMN_NODE_INFO 0x0000 21 + #define CMN_NI_NODE_TYPE GENMASK_ULL(15, 0) 22 + #define CMN_NI_NODE_ID GENMASK_ULL(31, 16) 23 + #define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) 24 + 25 + #define CMN_NODEID_DEVID(reg) ((reg) & 3) 26 + #define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) 27 + #define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) 28 + #define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) 29 + 30 + #define CMN_CHILD_INFO 0x0080 31 + #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) 32 + #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) 33 + 34 + #define CMN_CHILD_NODE_ADDR GENMASK(27,0) 35 + #define CMN_CHILD_NODE_EXTERNAL BIT(31) 36 + 37 + #define CMN_ADDR_NODE_PTR GENMASK(27, 14) 38 + 39 + #define CMN_NODE_PTR_DEVID(ptr) (((ptr) >> 2) & 3) 40 + #define CMN_NODE_PTR_PID(ptr) ((ptr) & 1) 41 + #define CMN_NODE_PTR_X(ptr, bits) ((ptr) >> (6 + (bits))) 42 + #define CMN_NODE_PTR_Y(ptr, bits) (((ptr) >> 6) & ((1U << (bits)) - 1)) 43 + 44 + #define CMN_MAX_XPS (8 * 8) 45 + 46 + /* The CFG node has one other useful purpose */ 47 + #define CMN_CFGM_PERIPH_ID_2 0x0010 48 + #define CMN_CFGM_PID2_REVISION GENMASK(7, 4) 49 + 50 + /* PMU registers occupy the 3rd 4KB page of each node's 16KB space */ 51 + #define CMN_PMU_OFFSET 0x2000 52 + 53 + /* For most nodes, this is all there is */ 54 + #define CMN_PMU_EVENT_SEL 0x000 55 + #define CMN_PMU_EVENTn_ID_SHIFT(n) ((n) * 8) 56 + 57 + /* DTMs live in the PMU space of XP registers */ 58 + #define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) 59 + #define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) 60 + #define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) 61 + #define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) 62 + #define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) 63 + #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) 64 + #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) 65 + #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) 66 + #define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10) 67 + 68 + #define CMN_DTM_PMU_CONFIG 0x210 69 + #define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32) 70 + #define CMN__PMEVCNT0_INPUT_SEL_WP 0x00 71 + #define CMN__PMEVCNT0_INPUT_SEL_XP 0x04 72 + #define CMN__PMEVCNT0_INPUT_SEL_DEV 0x10 73 + #define CMN__PMEVCNT0_GLOBAL_NUM GENMASK_ULL(18, 16) 74 + #define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n) ((n) * 4) 75 + #define CMN__PMEVCNT_PAIRED(n) BIT(4 + (n)) 76 + #define CMN__PMEVCNT23_COMBINED BIT(2) 77 + #define CMN__PMEVCNT01_COMBINED BIT(1) 78 + #define CMN_DTM_PMU_CONFIG_PMU_EN BIT(0) 79 + 80 + #define CMN_DTM_PMEVCNT 0x220 81 + 82 + #define CMN_DTM_PMEVCNTSR 0x240 83 + 84 + #define CMN_DTM_NUM_COUNTERS 4 85 + 86 + /* The DTC node is where the magic happens */ 87 + #define CMN_DT_DTC_CTL 0x0a00 88 + #define CMN_DT_DTC_CTL_DT_EN BIT(0) 89 + 90 + /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */ 91 + #define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4) 92 + #define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n)) 93 + #define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40) 94 + 95 + #define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n)) 96 + #define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90) 97 + 98 + #define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100) 99 + #define CMN_DT_PMCR_PMU_EN BIT(0) 100 + #define CMN_DT_PMCR_CNTR_RST BIT(5) 101 + #define CMN_DT_PMCR_OVFL_INTR_EN BIT(6) 102 + 103 + #define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118) 104 + #define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120) 105 + 106 + #define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128) 107 + #define CMN_DT_PMSSR_SS_STATUS(n) BIT(n) 108 + 109 + #define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130) 110 + #define CMN_DT_PMSRR_SS_REQ BIT(0) 111 + 112 + #define CMN_DT_NUM_COUNTERS 8 113 + #define CMN_MAX_DTCS 4 114 + 115 + /* 116 + * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles, 117 + * so throwing away one bit to make overflow handling easy is no big deal. 118 + */ 119 + #define CMN_COUNTER_INIT 0x80000000 120 + /* Similarly for the 40-bit cycle counter */ 121 + #define CMN_CC_INIT 0x8000000000ULL 122 + 123 + 124 + /* Event attributes */ 125 + #define CMN_CONFIG_TYPE GENMASK(15, 0) 126 + #define CMN_CONFIG_EVENTID GENMASK(23, 16) 127 + #define CMN_CONFIG_OCCUPID GENMASK(27, 24) 128 + #define CMN_CONFIG_BYNODEID BIT(31) 129 + #define CMN_CONFIG_NODEID GENMASK(47, 32) 130 + 131 + #define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config) 132 + #define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config) 133 + #define CMN_EVENT_OCCUPID(event) FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config) 134 + #define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) 135 + #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) 136 + 137 + #define CMN_CONFIG_WP_COMBINE GENMASK(27, 24) 138 + #define CMN_CONFIG_WP_DEV_SEL BIT(48) 139 + #define CMN_CONFIG_WP_CHN_SEL GENMASK(50, 49) 140 + #define CMN_CONFIG_WP_GRP BIT(52) 141 + #define CMN_CONFIG_WP_EXCLUSIVE BIT(53) 142 + #define CMN_CONFIG1_WP_VAL GENMASK(63, 0) 143 + #define CMN_CONFIG2_WP_MASK GENMASK(63, 0) 144 + 145 + #define CMN_EVENT_WP_COMBINE(event) FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config) 146 + #define CMN_EVENT_WP_DEV_SEL(event) FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config) 147 + #define CMN_EVENT_WP_CHN_SEL(event) FIELD_GET(CMN_CONFIG_WP_CHN_SEL, (event)->attr.config) 148 + #define CMN_EVENT_WP_GRP(event) FIELD_GET(CMN_CONFIG_WP_GRP, (event)->attr.config) 149 + #define CMN_EVENT_WP_EXCLUSIVE(event) FIELD_GET(CMN_CONFIG_WP_EXCLUSIVE, (event)->attr.config) 150 + #define CMN_EVENT_WP_VAL(event) FIELD_GET(CMN_CONFIG1_WP_VAL, (event)->attr.config1) 151 + #define CMN_EVENT_WP_MASK(event) FIELD_GET(CMN_CONFIG2_WP_MASK, (event)->attr.config2) 152 + 153 + /* Made-up event IDs for watchpoint direction */ 154 + #define CMN_WP_UP 0 155 + #define CMN_WP_DOWN 2 156 + 157 + 158 + /* r0px probably don't exist in silicon, thankfully */ 159 + enum cmn_revision { 160 + CMN600_R1P0, 161 + CMN600_R1P1, 162 + CMN600_R1P2, 163 + CMN600_R1P3, 164 + CMN600_R2P0, 165 + CMN600_R3P0, 166 + }; 167 + 168 + enum cmn_node_type { 169 + CMN_TYPE_INVALID, 170 + CMN_TYPE_DVM, 171 + CMN_TYPE_CFG, 172 + CMN_TYPE_DTC, 173 + CMN_TYPE_HNI, 174 + CMN_TYPE_HNF, 175 + CMN_TYPE_XP, 176 + CMN_TYPE_SBSX, 177 + CMN_TYPE_RNI = 0xa, 178 + CMN_TYPE_RND = 0xd, 179 + CMN_TYPE_RNSAM = 0xf, 180 + CMN_TYPE_CXRA = 0x100, 181 + CMN_TYPE_CXHA = 0x101, 182 + CMN_TYPE_CXLA = 0x102, 183 + /* Not a real node type */ 184 + CMN_TYPE_WP = 0x7770 185 + }; 186 + 187 + struct arm_cmn_node { 188 + void __iomem *pmu_base; 189 + u16 id, logid; 190 + enum cmn_node_type type; 191 + 192 + union { 193 + /* Device node */ 194 + struct { 195 + int to_xp; 196 + /* DN/HN-F/CXHA */ 197 + unsigned int occupid_val; 198 + unsigned int occupid_count; 199 + }; 200 + /* XP */ 201 + struct { 202 + int dtc; 203 + u32 pmu_config_low; 204 + union { 205 + u8 input_sel[4]; 206 + __le32 pmu_config_high; 207 + }; 208 + s8 wp_event[4]; 209 + }; 210 + }; 211 + 212 + union { 213 + u8 event[4]; 214 + __le32 event_sel; 215 + }; 216 + }; 217 + 218 + struct arm_cmn_dtc { 219 + void __iomem *base; 220 + unsigned int irq; 221 + int irq_friend; 222 + bool cc_active; 223 + 224 + struct perf_event *counters[CMN_DT_NUM_COUNTERS]; 225 + struct perf_event *cycles; 226 + }; 227 + 228 + #define CMN_STATE_DISABLED BIT(0) 229 + #define CMN_STATE_TXN BIT(1) 230 + 231 + struct arm_cmn { 232 + struct device *dev; 233 + void __iomem *base; 234 + 235 + enum cmn_revision rev; 236 + u8 mesh_x; 237 + u8 mesh_y; 238 + u16 num_xps; 239 + u16 num_dns; 240 + struct arm_cmn_node *xps; 241 + struct arm_cmn_node *dns; 242 + 243 + struct arm_cmn_dtc *dtc; 244 + unsigned int num_dtcs; 245 + 246 + int cpu; 247 + struct hlist_node cpuhp_node; 248 + 249 + unsigned int state; 250 + struct pmu pmu; 251 + }; 252 + 253 + #define to_cmn(p) container_of(p, struct arm_cmn, pmu) 254 + 255 + static int arm_cmn_hp_state; 256 + 257 + struct arm_cmn_hw_event { 258 + struct arm_cmn_node *dn; 259 + u64 dtm_idx[2]; 260 + unsigned int dtc_idx; 261 + u8 dtcs_used; 262 + u8 num_dns; 263 + }; 264 + 265 + #define for_each_hw_dn(hw, dn, i) \ 266 + for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) 267 + 268 + static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) 269 + { 270 + BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); 271 + return (struct arm_cmn_hw_event *)&event->hw; 272 + } 273 + 274 + static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val) 275 + { 276 + x[pos / 32] |= (u64)val << ((pos % 32) * 2); 277 + } 278 + 279 + static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) 280 + { 281 + return (x[pos / 32] >> ((pos % 32) * 2)) & 3; 282 + } 283 + 284 + struct arm_cmn_event_attr { 285 + struct device_attribute attr; 286 + enum cmn_node_type type; 287 + u8 eventid; 288 + u8 occupid; 289 + }; 290 + 291 + struct arm_cmn_format_attr { 292 + struct device_attribute attr; 293 + u64 field; 294 + int config; 295 + }; 296 + 297 + static int arm_cmn_xyidbits(const struct arm_cmn *cmn) 298 + { 299 + return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2; 300 + } 301 + 302 + static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, 303 + struct arm_cmn_node *dn) 304 + { 305 + int bits = arm_cmn_xyidbits(cmn); 306 + int x = CMN_NODEID_X(dn->id, bits); 307 + int y = CMN_NODEID_Y(dn->id, bits); 308 + int xp_idx = cmn->mesh_x * y + x; 309 + 310 + dn->to_xp = (cmn->xps + xp_idx) - dn; 311 + } 312 + 313 + static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) 314 + { 315 + return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; 316 + } 317 + 318 + static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, 319 + enum cmn_node_type type) 320 + { 321 + int i; 322 + 323 + for (i = 0; i < cmn->num_dns; i++) 324 + if (cmn->dns[i].type == type) 325 + return &cmn->dns[i]; 326 + return NULL; 327 + } 328 + 329 + #define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \ 330 + (&((struct arm_cmn_event_attr[]) {{ \ 331 + .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \ 332 + .type = _type, \ 333 + .eventid = _eventid, \ 334 + .occupid = _occupid, \ 335 + }})[0].attr.attr) 336 + 337 + static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id) 338 + { 339 + return (type == CMN_TYPE_DVM && id == 0x05) || 340 + (type == CMN_TYPE_HNF && id == 0x0f); 341 + } 342 + 343 + static ssize_t arm_cmn_event_show(struct device *dev, 344 + struct device_attribute *attr, char *buf) 345 + { 346 + struct arm_cmn_event_attr *eattr; 347 + 348 + eattr = container_of(attr, typeof(*eattr), attr); 349 + 350 + if (eattr->type == CMN_TYPE_DTC) 351 + return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type); 352 + 353 + if (eattr->type == CMN_TYPE_WP) 354 + return snprintf(buf, PAGE_SIZE, 355 + "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", 356 + eattr->type, eattr->eventid); 357 + 358 + if (arm_cmn_is_occup_event(eattr->type, eattr->eventid)) 359 + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n", 360 + eattr->type, eattr->eventid, eattr->occupid); 361 + 362 + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n", 363 + eattr->type, eattr->eventid); 364 + } 365 + 366 + static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, 367 + struct attribute *attr, 368 + int unused) 369 + { 370 + struct device *dev = kobj_to_dev(kobj); 371 + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); 372 + struct arm_cmn_event_attr *eattr; 373 + enum cmn_node_type type; 374 + 375 + eattr = container_of(attr, typeof(*eattr), attr.attr); 376 + type = eattr->type; 377 + 378 + /* Watchpoints aren't nodes */ 379 + if (type == CMN_TYPE_WP) 380 + type = CMN_TYPE_XP; 381 + 382 + /* Revision-specific differences */ 383 + if (cmn->rev < CMN600_R1P2) { 384 + if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b) 385 + return 0; 386 + } 387 + 388 + if (!arm_cmn_node(cmn, type)) 389 + return 0; 390 + 391 + return attr->mode; 392 + } 393 + 394 + #define _CMN_EVENT_DVM(_name, _event, _occup) \ 395 + CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup) 396 + #define CMN_EVENT_DTC(_name) \ 397 + CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0) 398 + #define _CMN_EVENT_HNF(_name, _event, _occup) \ 399 + CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup) 400 + #define CMN_EVENT_HNI(_name, _event) \ 401 + CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0) 402 + #define __CMN_EVENT_XP(_name, _event) \ 403 + CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0) 404 + #define CMN_EVENT_SBSX(_name, _event) \ 405 + CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0) 406 + #define CMN_EVENT_RNID(_name, _event) \ 407 + CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0) 408 + 409 + #define CMN_EVENT_DVM(_name, _event) \ 410 + _CMN_EVENT_DVM(_name, _event, 0) 411 + #define CMN_EVENT_HNF(_name, _event) \ 412 + _CMN_EVENT_HNF(_name, _event, 0) 413 + #define _CMN_EVENT_XP(_name, _event) \ 414 + __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ 415 + __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ 416 + __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ 417 + __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ 418 + __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ 419 + __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)) 420 + 421 + /* Good thing there are only 3 fundamental XP events... */ 422 + #define CMN_EVENT_XP(_name, _event) \ 423 + _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ 424 + _CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)), \ 425 + _CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)), \ 426 + _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)) 427 + 428 + 429 + static struct attribute *arm_cmn_event_attrs[] = { 430 + CMN_EVENT_DTC(cycles), 431 + 432 + /* 433 + * DVM node events conflict with HN-I events in the equivalent PMU 434 + * slot, but our lazy short-cut of using the DTM counter index for 435 + * the PMU index as well happens to avoid that by construction. 436 + */ 437 + CMN_EVENT_DVM(rxreq_dvmop, 0x01), 438 + CMN_EVENT_DVM(rxreq_dvmsync, 0x02), 439 + CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03), 440 + CMN_EVENT_DVM(rxreq_retried, 0x04), 441 + _CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0), 442 + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1), 443 + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2), 444 + 445 + CMN_EVENT_HNF(cache_miss, 0x01), 446 + CMN_EVENT_HNF(slc_sf_cache_access, 0x02), 447 + CMN_EVENT_HNF(cache_fill, 0x03), 448 + CMN_EVENT_HNF(pocq_retry, 0x04), 449 + CMN_EVENT_HNF(pocq_reqs_recvd, 0x05), 450 + CMN_EVENT_HNF(sf_hit, 0x06), 451 + CMN_EVENT_HNF(sf_evictions, 0x07), 452 + CMN_EVENT_HNF(dir_snoops_sent, 0x08), 453 + CMN_EVENT_HNF(brd_snoops_sent, 0x09), 454 + CMN_EVENT_HNF(slc_eviction, 0x0a), 455 + CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b), 456 + CMN_EVENT_HNF(mc_retries, 0x0c), 457 + CMN_EVENT_HNF(mc_reqs, 0x0d), 458 + CMN_EVENT_HNF(qos_hh_retry, 0x0e), 459 + _CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0), 460 + _CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1), 461 + _CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2), 462 + _CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3), 463 + _CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4), 464 + CMN_EVENT_HNF(pocq_addrhaz, 0x10), 465 + CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11), 466 + CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12), 467 + CMN_EVENT_HNF(cmp_adq_full, 0x13), 468 + CMN_EVENT_HNF(txdat_stall, 0x14), 469 + CMN_EVENT_HNF(txrsp_stall, 0x15), 470 + CMN_EVENT_HNF(seq_full, 0x16), 471 + CMN_EVENT_HNF(seq_hit, 0x17), 472 + CMN_EVENT_HNF(snp_sent, 0x18), 473 + CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19), 474 + CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a), 475 + CMN_EVENT_HNF(snp_sent_untrk, 0x1b), 476 + CMN_EVENT_HNF(intv_dirty, 0x1c), 477 + CMN_EVENT_HNF(stash_snp_sent, 0x1d), 478 + CMN_EVENT_HNF(stash_data_pull, 0x1e), 479 + CMN_EVENT_HNF(snp_fwded, 0x1f), 480 + 481 + CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), 482 + CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), 483 + CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22), 484 + CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23), 485 + CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24), 486 + CMN_EVENT_HNI(rrt_rd_alloc, 0x25), 487 + CMN_EVENT_HNI(rrt_wr_alloc, 0x26), 488 + CMN_EVENT_HNI(rdt_rd_alloc, 0x27), 489 + CMN_EVENT_HNI(rdt_wr_alloc, 0x28), 490 + CMN_EVENT_HNI(wdb_alloc, 0x29), 491 + CMN_EVENT_HNI(txrsp_retryack, 0x2a), 492 + CMN_EVENT_HNI(arvalid_no_arready, 0x2b), 493 + CMN_EVENT_HNI(arready_no_arvalid, 0x2c), 494 + CMN_EVENT_HNI(awvalid_no_awready, 0x2d), 495 + CMN_EVENT_HNI(awready_no_awvalid, 0x2e), 496 + CMN_EVENT_HNI(wvalid_no_wready, 0x2f), 497 + CMN_EVENT_HNI(txdat_stall, 0x30), 498 + CMN_EVENT_HNI(nonpcie_serialization, 0x31), 499 + CMN_EVENT_HNI(pcie_serialization, 0x32), 500 + 501 + CMN_EVENT_XP(txflit_valid, 0x01), 502 + CMN_EVENT_XP(txflit_stall, 0x02), 503 + CMN_EVENT_XP(partial_dat_flit, 0x03), 504 + /* We treat watchpoints as a special made-up class of XP events */ 505 + CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0), 506 + CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0), 507 + 508 + CMN_EVENT_SBSX(rd_req, 0x01), 509 + CMN_EVENT_SBSX(wr_req, 0x02), 510 + CMN_EVENT_SBSX(cmo_req, 0x03), 511 + CMN_EVENT_SBSX(txrsp_retryack, 0x04), 512 + CMN_EVENT_SBSX(txdat_flitv, 0x05), 513 + CMN_EVENT_SBSX(txrsp_flitv, 0x06), 514 + CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11), 515 + CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12), 516 + CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13), 517 + CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14), 518 + CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15), 519 + CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16), 520 + CMN_EVENT_SBSX(arvalid_no_arready, 0x21), 521 + CMN_EVENT_SBSX(awvalid_no_awready, 0x22), 522 + CMN_EVENT_SBSX(wvalid_no_wready, 0x23), 523 + CMN_EVENT_SBSX(txdat_stall, 0x24), 524 + CMN_EVENT_SBSX(txrsp_stall, 0x25), 525 + 526 + CMN_EVENT_RNID(s0_rdata_beats, 0x01), 527 + CMN_EVENT_RNID(s1_rdata_beats, 0x02), 528 + CMN_EVENT_RNID(s2_rdata_beats, 0x03), 529 + CMN_EVENT_RNID(rxdat_flits, 0x04), 530 + CMN_EVENT_RNID(txdat_flits, 0x05), 531 + CMN_EVENT_RNID(txreq_flits_total, 0x06), 532 + CMN_EVENT_RNID(txreq_flits_retried, 0x07), 533 + CMN_EVENT_RNID(rrt_occ_ovfl, 0x08), 534 + CMN_EVENT_RNID(wrt_occ_ovfl, 0x09), 535 + CMN_EVENT_RNID(txreq_flits_replayed, 0x0a), 536 + CMN_EVENT_RNID(wrcancel_sent, 0x0b), 537 + CMN_EVENT_RNID(s0_wdata_beats, 0x0c), 538 + CMN_EVENT_RNID(s1_wdata_beats, 0x0d), 539 + CMN_EVENT_RNID(s2_wdata_beats, 0x0e), 540 + CMN_EVENT_RNID(rrt_alloc, 0x0f), 541 + CMN_EVENT_RNID(wrt_alloc, 0x10), 542 + CMN_EVENT_RNID(rdb_unord, 0x11), 543 + CMN_EVENT_RNID(rdb_replay, 0x12), 544 + CMN_EVENT_RNID(rdb_hybrid, 0x13), 545 + CMN_EVENT_RNID(rdb_ord, 0x14), 546 + 547 + NULL 548 + }; 549 + 550 + static const struct attribute_group arm_cmn_event_attrs_group = { 551 + .name = "events", 552 + .attrs = arm_cmn_event_attrs, 553 + .is_visible = arm_cmn_event_attr_is_visible, 554 + }; 555 + 556 + static ssize_t arm_cmn_format_show(struct device *dev, 557 + struct device_attribute *attr, char *buf) 558 + { 559 + struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr); 560 + int lo = __ffs(fmt->field), hi = __fls(fmt->field); 561 + 562 + if (lo == hi) 563 + return snprintf(buf, PAGE_SIZE, "config:%d\n", lo); 564 + 565 + if (!fmt->config) 566 + return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi); 567 + 568 + return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi); 569 + } 570 + 571 + #define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \ 572 + (&((struct arm_cmn_format_attr[]) {{ \ 573 + .attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL), \ 574 + .config = _cfg, \ 575 + .field = _fld, \ 576 + }})[0].attr.attr) 577 + #define CMN_FORMAT_ATTR(_name, _fld) _CMN_FORMAT_ATTR(_name, 0, _fld) 578 + 579 + static struct attribute *arm_cmn_format_attrs[] = { 580 + CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE), 581 + CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID), 582 + CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID), 583 + CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID), 584 + CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID), 585 + 586 + CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL), 587 + CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL), 588 + CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP), 589 + CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE), 590 + CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE), 591 + 592 + _CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL), 593 + _CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK), 594 + 595 + NULL 596 + }; 597 + 598 + static const struct attribute_group arm_cmn_format_attrs_group = { 599 + .name = "format", 600 + .attrs = arm_cmn_format_attrs, 601 + }; 602 + 603 + static ssize_t arm_cmn_cpumask_show(struct device *dev, 604 + struct device_attribute *attr, char *buf) 605 + { 606 + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); 607 + 608 + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu)); 609 + } 610 + 611 + static struct device_attribute arm_cmn_cpumask_attr = 612 + __ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL); 613 + 614 + static struct attribute *arm_cmn_cpumask_attrs[] = { 615 + &arm_cmn_cpumask_attr.attr, 616 + NULL, 617 + }; 618 + 619 + static struct attribute_group arm_cmn_cpumask_attr_group = { 620 + .attrs = arm_cmn_cpumask_attrs, 621 + }; 622 + 623 + static const struct attribute_group *arm_cmn_attr_groups[] = { 624 + &arm_cmn_event_attrs_group, 625 + &arm_cmn_format_attrs_group, 626 + &arm_cmn_cpumask_attr_group, 627 + NULL 628 + }; 629 + 630 + static int arm_cmn_wp_idx(struct perf_event *event) 631 + { 632 + return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event); 633 + } 634 + 635 + static u32 arm_cmn_wp_config(struct perf_event *event) 636 + { 637 + u32 config; 638 + u32 dev = CMN_EVENT_WP_DEV_SEL(event); 639 + u32 chn = CMN_EVENT_WP_CHN_SEL(event); 640 + u32 grp = CMN_EVENT_WP_GRP(event); 641 + u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); 642 + u32 combine = CMN_EVENT_WP_COMBINE(event); 643 + 644 + config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | 645 + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | 646 + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | 647 + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc); 648 + if (combine && !grp) 649 + config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; 650 + 651 + return config; 652 + } 653 + 654 + static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state) 655 + { 656 + if (!cmn->state) 657 + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR); 658 + cmn->state |= state; 659 + } 660 + 661 + static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state) 662 + { 663 + cmn->state &= ~state; 664 + if (!cmn->state) 665 + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, 666 + cmn->dtc[0].base + CMN_DT_PMCR); 667 + } 668 + 669 + static void arm_cmn_pmu_enable(struct pmu *pmu) 670 + { 671 + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_DISABLED); 672 + } 673 + 674 + static void arm_cmn_pmu_disable(struct pmu *pmu) 675 + { 676 + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_DISABLED); 677 + } 678 + 679 + static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, 680 + bool snapshot) 681 + { 682 + struct arm_cmn_node *dn; 683 + unsigned int i, offset; 684 + u64 count = 0; 685 + 686 + offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; 687 + for_each_hw_dn(hw, dn, i) { 688 + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); 689 + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); 690 + u64 reg = readq_relaxed(xp->pmu_base + offset); 691 + u16 dtm_count = reg >> (dtm_idx * 16); 692 + 693 + count += dtm_count; 694 + } 695 + return count; 696 + } 697 + 698 + static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc) 699 + { 700 + u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR); 701 + 702 + writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR); 703 + return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1); 704 + } 705 + 706 + static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx) 707 + { 708 + u32 val, pmevcnt = CMN_DT_PMEVCNT(idx); 709 + 710 + val = readl_relaxed(dtc->base + pmevcnt); 711 + writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt); 712 + return val - CMN_COUNTER_INIT; 713 + } 714 + 715 + static void arm_cmn_init_counter(struct perf_event *event) 716 + { 717 + struct arm_cmn *cmn = to_cmn(event->pmu); 718 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 719 + unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx); 720 + u64 count; 721 + 722 + for (i = 0; hw->dtcs_used & (1U << i); i++) { 723 + writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt); 724 + cmn->dtc[i].counters[hw->dtc_idx] = event; 725 + } 726 + 727 + count = arm_cmn_read_dtm(cmn, hw, false); 728 + local64_set(&event->hw.prev_count, count); 729 + } 730 + 731 + static void arm_cmn_event_read(struct perf_event *event) 732 + { 733 + struct arm_cmn *cmn = to_cmn(event->pmu); 734 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 735 + u64 delta, new, prev; 736 + unsigned long flags; 737 + unsigned int i; 738 + 739 + if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) { 740 + i = __ffs(hw->dtcs_used); 741 + delta = arm_cmn_read_cc(cmn->dtc + i); 742 + local64_add(delta, &event->count); 743 + return; 744 + } 745 + new = arm_cmn_read_dtm(cmn, hw, false); 746 + prev = local64_xchg(&event->hw.prev_count, new); 747 + 748 + delta = new - prev; 749 + 750 + local_irq_save(flags); 751 + for (i = 0; hw->dtcs_used & (1U << i); i++) { 752 + new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx); 753 + delta += new << 16; 754 + } 755 + local_irq_restore(flags); 756 + local64_add(delta, &event->count); 757 + } 758 + 759 + static void arm_cmn_event_start(struct perf_event *event, int flags) 760 + { 761 + struct arm_cmn *cmn = to_cmn(event->pmu); 762 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 763 + struct arm_cmn_node *dn; 764 + enum cmn_node_type type = CMN_EVENT_TYPE(event); 765 + int i; 766 + 767 + if (type == CMN_TYPE_DTC) { 768 + i = __ffs(hw->dtcs_used); 769 + writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); 770 + cmn->dtc[i].cc_active = true; 771 + } else if (type == CMN_TYPE_WP) { 772 + int wp_idx = arm_cmn_wp_idx(event); 773 + u64 val = CMN_EVENT_WP_VAL(event); 774 + u64 mask = CMN_EVENT_WP_MASK(event); 775 + 776 + for_each_hw_dn(hw, dn, i) { 777 + writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); 778 + writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); 779 + } 780 + } else for_each_hw_dn(hw, dn, i) { 781 + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); 782 + 783 + dn->event[dtm_idx] = CMN_EVENT_EVENTID(event); 784 + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); 785 + } 786 + } 787 + 788 + static void arm_cmn_event_stop(struct perf_event *event, int flags) 789 + { 790 + struct arm_cmn *cmn = to_cmn(event->pmu); 791 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 792 + struct arm_cmn_node *dn; 793 + enum cmn_node_type type = CMN_EVENT_TYPE(event); 794 + int i; 795 + 796 + if (type == CMN_TYPE_DTC) { 797 + i = __ffs(hw->dtcs_used); 798 + cmn->dtc[i].cc_active = false; 799 + } else if (type == CMN_TYPE_WP) { 800 + int wp_idx = arm_cmn_wp_idx(event); 801 + 802 + for_each_hw_dn(hw, dn, i) { 803 + writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); 804 + writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); 805 + } 806 + } else for_each_hw_dn(hw, dn, i) { 807 + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); 808 + 809 + dn->event[dtm_idx] = 0; 810 + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); 811 + } 812 + 813 + arm_cmn_event_read(event); 814 + } 815 + 816 + struct arm_cmn_val { 817 + u8 dtm_count[CMN_MAX_XPS]; 818 + u8 occupid[CMN_MAX_XPS]; 819 + u8 wp[CMN_MAX_XPS][4]; 820 + int dtc_count; 821 + bool cycles; 822 + }; 823 + 824 + static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event) 825 + { 826 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 827 + struct arm_cmn_node *dn; 828 + enum cmn_node_type type; 829 + int i; 830 + u8 occupid; 831 + 832 + if (is_software_event(event)) 833 + return; 834 + 835 + type = CMN_EVENT_TYPE(event); 836 + if (type == CMN_TYPE_DTC) { 837 + val->cycles = true; 838 + return; 839 + } 840 + 841 + val->dtc_count++; 842 + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) 843 + occupid = CMN_EVENT_OCCUPID(event) + 1; 844 + else 845 + occupid = 0; 846 + 847 + for_each_hw_dn(hw, dn, i) { 848 + int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid; 849 + 850 + val->dtm_count[xp]++; 851 + val->occupid[xp] = occupid; 852 + 853 + if (type != CMN_TYPE_WP) 854 + continue; 855 + 856 + wp_idx = arm_cmn_wp_idx(event); 857 + val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; 858 + } 859 + } 860 + 861 + static int arm_cmn_validate_group(struct perf_event *event) 862 + { 863 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 864 + struct arm_cmn_node *dn; 865 + struct perf_event *sibling, *leader = event->group_leader; 866 + enum cmn_node_type type; 867 + struct arm_cmn_val val; 868 + int i; 869 + u8 occupid; 870 + 871 + if (leader == event) 872 + return 0; 873 + 874 + if (event->pmu != leader->pmu && !is_software_event(leader)) 875 + return -EINVAL; 876 + 877 + memset(&val, 0, sizeof(val)); 878 + 879 + arm_cmn_val_add_event(&val, leader); 880 + for_each_sibling_event(sibling, leader) 881 + arm_cmn_val_add_event(&val, sibling); 882 + 883 + type = CMN_EVENT_TYPE(event); 884 + if (type == CMN_TYPE_DTC) 885 + return val.cycles ? -EINVAL : 0; 886 + 887 + if (val.dtc_count == CMN_DT_NUM_COUNTERS) 888 + return -EINVAL; 889 + 890 + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) 891 + occupid = CMN_EVENT_OCCUPID(event) + 1; 892 + else 893 + occupid = 0; 894 + 895 + for_each_hw_dn(hw, dn, i) { 896 + int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid; 897 + 898 + if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS) 899 + return -EINVAL; 900 + 901 + if (occupid && val.occupid[xp] && occupid != val.occupid[xp]) 902 + return -EINVAL; 903 + 904 + if (type != CMN_TYPE_WP) 905 + continue; 906 + 907 + wp_idx = arm_cmn_wp_idx(event); 908 + if (val.wp[xp][wp_idx]) 909 + return -EINVAL; 910 + 911 + wp_cmb = val.wp[xp][wp_idx ^ 1]; 912 + if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) 913 + return -EINVAL; 914 + } 915 + 916 + return 0; 917 + } 918 + 919 + static int arm_cmn_event_init(struct perf_event *event) 920 + { 921 + struct arm_cmn *cmn = to_cmn(event->pmu); 922 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 923 + enum cmn_node_type type; 924 + unsigned int i; 925 + bool bynodeid; 926 + u16 nodeid, eventid; 927 + 928 + if (event->attr.type != event->pmu->type) 929 + return -ENOENT; 930 + 931 + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) 932 + return -EINVAL; 933 + 934 + event->cpu = cmn->cpu; 935 + if (event->cpu < 0) 936 + return -EINVAL; 937 + 938 + type = CMN_EVENT_TYPE(event); 939 + /* DTC events (i.e. cycles) already have everything they need */ 940 + if (type == CMN_TYPE_DTC) 941 + return 0; 942 + 943 + /* For watchpoints we need the actual XP node here */ 944 + if (type == CMN_TYPE_WP) { 945 + type = CMN_TYPE_XP; 946 + /* ...and we need a "real" direction */ 947 + eventid = CMN_EVENT_EVENTID(event); 948 + if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN) 949 + return -EINVAL; 950 + } 951 + 952 + bynodeid = CMN_EVENT_BYNODEID(event); 953 + nodeid = CMN_EVENT_NODEID(event); 954 + 955 + hw->dn = arm_cmn_node(cmn, type); 956 + for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) { 957 + if (!bynodeid) { 958 + hw->num_dns++; 959 + } else if (cmn->dns[i].id != nodeid) { 960 + hw->dn++; 961 + } else { 962 + hw->num_dns = 1; 963 + break; 964 + } 965 + } 966 + 967 + if (!hw->num_dns) { 968 + int bits = arm_cmn_xyidbits(cmn); 969 + 970 + dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", 971 + nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits), 972 + CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type); 973 + return -EINVAL; 974 + } 975 + /* 976 + * By assuming events count in all DTC domains, we cunningly avoid 977 + * needing to know anything about how XPs are assigned to domains. 978 + */ 979 + hw->dtcs_used = (1U << cmn->num_dtcs) - 1; 980 + 981 + return arm_cmn_validate_group(event); 982 + } 983 + 984 + static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, 985 + int i) 986 + { 987 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 988 + enum cmn_node_type type = CMN_EVENT_TYPE(event); 989 + 990 + while (i--) { 991 + struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i); 992 + unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); 993 + 994 + if (type == CMN_TYPE_WP) 995 + hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1; 996 + 997 + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) 998 + hw->dn[i].occupid_count--; 999 + 1000 + xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); 1001 + writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); 1002 + } 1003 + memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); 1004 + 1005 + for (i = 0; hw->dtcs_used & (1U << i); i++) 1006 + cmn->dtc[i].counters[hw->dtc_idx] = NULL; 1007 + } 1008 + 1009 + static int arm_cmn_event_add(struct perf_event *event, int flags) 1010 + { 1011 + struct arm_cmn *cmn = to_cmn(event->pmu); 1012 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 1013 + struct arm_cmn_dtc *dtc = &cmn->dtc[0]; 1014 + struct arm_cmn_node *dn; 1015 + enum cmn_node_type type = CMN_EVENT_TYPE(event); 1016 + unsigned int i, dtc_idx, input_sel; 1017 + 1018 + if (type == CMN_TYPE_DTC) { 1019 + i = 0; 1020 + while (cmn->dtc[i].cycles) 1021 + if (++i == cmn->num_dtcs) 1022 + return -ENOSPC; 1023 + 1024 + cmn->dtc[i].cycles = event; 1025 + hw->dtc_idx = CMN_DT_NUM_COUNTERS; 1026 + hw->dtcs_used = 1U << i; 1027 + 1028 + if (flags & PERF_EF_START) 1029 + arm_cmn_event_start(event, 0); 1030 + return 0; 1031 + } 1032 + 1033 + /* Grab a free global counter first... */ 1034 + dtc_idx = 0; 1035 + while (dtc->counters[dtc_idx]) 1036 + if (++dtc_idx == CMN_DT_NUM_COUNTERS) 1037 + return -ENOSPC; 1038 + 1039 + hw->dtc_idx = dtc_idx; 1040 + 1041 + /* ...then the local counters to feed it. */ 1042 + for_each_hw_dn(hw, dn, i) { 1043 + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); 1044 + unsigned int dtm_idx, shift; 1045 + u64 reg; 1046 + 1047 + dtm_idx = 0; 1048 + while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx)) 1049 + if (++dtm_idx == CMN_DTM_NUM_COUNTERS) 1050 + goto free_dtms; 1051 + 1052 + if (type == CMN_TYPE_XP) { 1053 + input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; 1054 + } else if (type == CMN_TYPE_WP) { 1055 + int tmp, wp_idx = arm_cmn_wp_idx(event); 1056 + u32 cfg = arm_cmn_wp_config(event); 1057 + 1058 + if (dn->wp_event[wp_idx] >= 0) 1059 + goto free_dtms; 1060 + 1061 + tmp = dn->wp_event[wp_idx ^ 1]; 1062 + if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != 1063 + CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) 1064 + goto free_dtms; 1065 + 1066 + input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; 1067 + dn->wp_event[wp_idx] = dtc_idx; 1068 + writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx)); 1069 + } else { 1070 + unsigned int port = CMN_NODEID_PID(dn->id); 1071 + unsigned int dev = CMN_NODEID_DEVID(dn->id); 1072 + 1073 + input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx + 1074 + (port << 4) + (dev << 2); 1075 + 1076 + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { 1077 + int occupid = CMN_EVENT_OCCUPID(event); 1078 + 1079 + if (dn->occupid_count == 0) { 1080 + dn->occupid_val = occupid; 1081 + writel_relaxed(occupid, 1082 + dn->pmu_base + CMN_PMU_EVENT_SEL + 4); 1083 + } else if (dn->occupid_val != occupid) { 1084 + goto free_dtms; 1085 + } 1086 + dn->occupid_count++; 1087 + } 1088 + } 1089 + 1090 + arm_cmn_set_index(hw->dtm_idx, i, dtm_idx); 1091 + 1092 + xp->input_sel[dtm_idx] = input_sel; 1093 + shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); 1094 + xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); 1095 + xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; 1096 + xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); 1097 + reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low; 1098 + writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG); 1099 + } 1100 + 1101 + /* Go go go! */ 1102 + arm_cmn_init_counter(event); 1103 + 1104 + if (flags & PERF_EF_START) 1105 + arm_cmn_event_start(event, 0); 1106 + 1107 + return 0; 1108 + 1109 + free_dtms: 1110 + arm_cmn_event_clear(cmn, event, i); 1111 + return -ENOSPC; 1112 + } 1113 + 1114 + static void arm_cmn_event_del(struct perf_event *event, int flags) 1115 + { 1116 + struct arm_cmn *cmn = to_cmn(event->pmu); 1117 + struct arm_cmn_hw_event *hw = to_cmn_hw(event); 1118 + enum cmn_node_type type = CMN_EVENT_TYPE(event); 1119 + 1120 + arm_cmn_event_stop(event, PERF_EF_UPDATE); 1121 + 1122 + if (type == CMN_TYPE_DTC) 1123 + cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL; 1124 + else 1125 + arm_cmn_event_clear(cmn, event, hw->num_dns); 1126 + } 1127 + 1128 + /* 1129 + * We stop the PMU for both add and read, to avoid skew across DTM counters. 1130 + * In theory we could use snapshots to read without stopping, but then it 1131 + * becomes a lot trickier to deal with overlow and racing against interrupts, 1132 + * plus it seems they don't work properly on some hardware anyway :( 1133 + */ 1134 + static void arm_cmn_start_txn(struct pmu *pmu, unsigned int flags) 1135 + { 1136 + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_TXN); 1137 + } 1138 + 1139 + static void arm_cmn_end_txn(struct pmu *pmu) 1140 + { 1141 + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_TXN); 1142 + } 1143 + 1144 + static int arm_cmn_commit_txn(struct pmu *pmu) 1145 + { 1146 + arm_cmn_end_txn(pmu); 1147 + return 0; 1148 + } 1149 + 1150 + static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) 1151 + { 1152 + struct arm_cmn *cmn; 1153 + unsigned int target; 1154 + 1155 + cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); 1156 + if (cpu != cmn->cpu) 1157 + return 0; 1158 + 1159 + target = cpumask_any_but(cpu_online_mask, cpu); 1160 + if (target >= nr_cpu_ids) 1161 + return 0; 1162 + 1163 + perf_pmu_migrate_context(&cmn->pmu, cpu, target); 1164 + cmn->cpu = target; 1165 + return 0; 1166 + } 1167 + 1168 + static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) 1169 + { 1170 + struct arm_cmn_dtc *dtc = dev_id; 1171 + irqreturn_t ret = IRQ_NONE; 1172 + 1173 + for (;;) { 1174 + u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR); 1175 + u64 delta; 1176 + int i; 1177 + 1178 + for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) { 1179 + if (status & (1U << i)) { 1180 + ret = IRQ_HANDLED; 1181 + if (WARN_ON(!dtc->counters[i])) 1182 + continue; 1183 + delta = (u64)arm_cmn_read_counter(dtc, i) << 16; 1184 + local64_add(delta, &dtc->counters[i]->count); 1185 + } 1186 + } 1187 + 1188 + if (status & (1U << CMN_DT_NUM_COUNTERS)) { 1189 + ret = IRQ_HANDLED; 1190 + if (dtc->cc_active && !WARN_ON(!dtc->cycles)) { 1191 + delta = arm_cmn_read_cc(dtc); 1192 + local64_add(delta, &dtc->cycles->count); 1193 + } 1194 + } 1195 + 1196 + writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR); 1197 + 1198 + if (!dtc->irq_friend) 1199 + return ret; 1200 + dtc += dtc->irq_friend; 1201 + } 1202 + } 1203 + 1204 + /* We can reasonably accommodate DTCs of the same CMN sharing IRQs */ 1205 + static int arm_cmn_init_irqs(struct arm_cmn *cmn) 1206 + { 1207 + int i, j, irq, err; 1208 + 1209 + for (i = 0; i < cmn->num_dtcs; i++) { 1210 + irq = cmn->dtc[i].irq; 1211 + for (j = i; j--; ) { 1212 + if (cmn->dtc[j].irq == irq) { 1213 + cmn->dtc[j].irq_friend = j - i; 1214 + goto next; 1215 + } 1216 + } 1217 + err = devm_request_irq(cmn->dev, irq, arm_cmn_handle_irq, 1218 + IRQF_NOBALANCING | IRQF_NO_THREAD, 1219 + dev_name(cmn->dev), &cmn->dtc[i]); 1220 + if (err) 1221 + return err; 1222 + 1223 + err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu)); 1224 + if (err) 1225 + return err; 1226 + next: 1227 + ; /* isn't C great? */ 1228 + } 1229 + return 0; 1230 + } 1231 + 1232 + static void arm_cmn_init_dtm(struct arm_cmn_node *xp) 1233 + { 1234 + int i; 1235 + 1236 + for (i = 0; i < 4; i++) { 1237 + xp->wp_event[i] = -1; 1238 + writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i)); 1239 + writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i)); 1240 + } 1241 + xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; 1242 + xp->dtc = -1; 1243 + } 1244 + 1245 + static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx) 1246 + { 1247 + struct arm_cmn_dtc *dtc = cmn->dtc + idx; 1248 + struct arm_cmn_node *xp; 1249 + 1250 + dtc->base = dn->pmu_base - CMN_PMU_OFFSET; 1251 + dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx); 1252 + if (dtc->irq < 0) 1253 + return dtc->irq; 1254 + 1255 + writel_relaxed(0, dtc->base + CMN_DT_PMCR); 1256 + writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); 1257 + writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); 1258 + 1259 + /* We do at least know that a DTC's XP must be in that DTC's domain */ 1260 + xp = arm_cmn_node_to_xp(dn); 1261 + xp->dtc = idx; 1262 + 1263 + return 0; 1264 + } 1265 + 1266 + static int arm_cmn_node_cmp(const void *a, const void *b) 1267 + { 1268 + const struct arm_cmn_node *dna = a, *dnb = b; 1269 + int cmp; 1270 + 1271 + cmp = dna->type - dnb->type; 1272 + if (!cmp) 1273 + cmp = dna->logid - dnb->logid; 1274 + return cmp; 1275 + } 1276 + 1277 + static int arm_cmn_init_dtcs(struct arm_cmn *cmn) 1278 + { 1279 + struct arm_cmn_node *dn; 1280 + int dtc_idx = 0; 1281 + 1282 + cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); 1283 + if (!cmn->dtc) 1284 + return -ENOMEM; 1285 + 1286 + sort(cmn->dns, cmn->num_dns, sizeof(cmn->dns[0]), arm_cmn_node_cmp, NULL); 1287 + 1288 + cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); 1289 + 1290 + for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) { 1291 + if (dn->type != CMN_TYPE_XP) 1292 + arm_cmn_init_node_to_xp(cmn, dn); 1293 + else if (cmn->num_dtcs == 1) 1294 + dn->dtc = 0; 1295 + 1296 + if (dn->type == CMN_TYPE_DTC) 1297 + arm_cmn_init_dtc(cmn, dn, dtc_idx++); 1298 + 1299 + /* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */ 1300 + if (dn->type == CMN_TYPE_RND) 1301 + dn->type = CMN_TYPE_RNI; 1302 + } 1303 + 1304 + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); 1305 + 1306 + return 0; 1307 + } 1308 + 1309 + static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node) 1310 + { 1311 + int level; 1312 + u64 reg = readq_relaxed(cmn->base + offset + CMN_NODE_INFO); 1313 + 1314 + node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg); 1315 + node->id = FIELD_GET(CMN_NI_NODE_ID, reg); 1316 + node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); 1317 + 1318 + node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET; 1319 + 1320 + if (node->type == CMN_TYPE_CFG) 1321 + level = 0; 1322 + else if (node->type == CMN_TYPE_XP) 1323 + level = 1; 1324 + else 1325 + level = 2; 1326 + 1327 + dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n", 1328 + (level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ', 1329 + node->type, node->logid, offset); 1330 + } 1331 + 1332 + static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) 1333 + { 1334 + void __iomem *cfg_region; 1335 + struct arm_cmn_node cfg, *dn; 1336 + u16 child_count, child_poff; 1337 + u32 xp_offset[CMN_MAX_XPS]; 1338 + u64 reg; 1339 + int i, j; 1340 + 1341 + cfg_region = cmn->base + rgn_offset; 1342 + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); 1343 + cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); 1344 + dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev); 1345 + 1346 + arm_cmn_init_node_info(cmn, rgn_offset, &cfg); 1347 + if (cfg.type != CMN_TYPE_CFG) 1348 + return -ENODEV; 1349 + 1350 + reg = readq_relaxed(cfg_region + CMN_CHILD_INFO); 1351 + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); 1352 + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); 1353 + 1354 + cmn->num_xps = child_count; 1355 + cmn->num_dns = cmn->num_xps; 1356 + 1357 + /* Pass 1: visit the XPs, enumerate their children */ 1358 + for (i = 0; i < cmn->num_xps; i++) { 1359 + reg = readq_relaxed(cfg_region + child_poff + i * 8); 1360 + xp_offset[i] = reg & CMN_CHILD_NODE_ADDR; 1361 + 1362 + reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO); 1363 + cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg); 1364 + } 1365 + 1366 + /* Cheeky +1 to help terminate pointer-based iteration */ 1367 + cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1, 1368 + sizeof(*cmn->dns), GFP_KERNEL); 1369 + if (!cmn->dns) 1370 + return -ENOMEM; 1371 + 1372 + /* Pass 2: now we can actually populate the nodes */ 1373 + dn = cmn->dns; 1374 + for (i = 0; i < cmn->num_xps; i++) { 1375 + void __iomem *xp_region = cmn->base + xp_offset[i]; 1376 + struct arm_cmn_node *xp = dn++; 1377 + 1378 + arm_cmn_init_node_info(cmn, xp_offset[i], xp); 1379 + arm_cmn_init_dtm(xp); 1380 + /* 1381 + * Thanks to the order in which XP logical IDs seem to be 1382 + * assigned, we can handily infer the mesh X dimension by 1383 + * looking out for the XP at (0,1) without needing to know 1384 + * the exact node ID format, which we can later derive. 1385 + */ 1386 + if (xp->id == (1 << 3)) 1387 + cmn->mesh_x = xp->logid; 1388 + 1389 + reg = readq_relaxed(xp_region + CMN_CHILD_INFO); 1390 + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); 1391 + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); 1392 + 1393 + for (j = 0; j < child_count; j++) { 1394 + reg = readq_relaxed(xp_region + child_poff + j * 8); 1395 + /* 1396 + * Don't even try to touch anything external, since in general 1397 + * we haven't a clue how to power up arbitrary CHI requesters. 1398 + * As of CMN-600r1 these could only be RN-SAMs or CXLAs, 1399 + * neither of which have any PMU events anyway. 1400 + * (Actually, CXLAs do seem to have grown some events in r1p2, 1401 + * but they don't go to regular XP DTMs, and they depend on 1402 + * secure configuration which we can't easily deal with) 1403 + */ 1404 + if (reg & CMN_CHILD_NODE_EXTERNAL) { 1405 + dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); 1406 + continue; 1407 + } 1408 + 1409 + arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); 1410 + 1411 + switch (dn->type) { 1412 + case CMN_TYPE_DTC: 1413 + cmn->num_dtcs++; 1414 + dn++; 1415 + break; 1416 + /* These guys have PMU events */ 1417 + case CMN_TYPE_DVM: 1418 + case CMN_TYPE_HNI: 1419 + case CMN_TYPE_HNF: 1420 + case CMN_TYPE_SBSX: 1421 + case CMN_TYPE_RNI: 1422 + case CMN_TYPE_RND: 1423 + case CMN_TYPE_CXRA: 1424 + case CMN_TYPE_CXHA: 1425 + dn++; 1426 + break; 1427 + /* Nothing to see here */ 1428 + case CMN_TYPE_RNSAM: 1429 + case CMN_TYPE_CXLA: 1430 + break; 1431 + /* Something has gone horribly wrong */ 1432 + default: 1433 + dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type); 1434 + return -ENODEV; 1435 + } 1436 + } 1437 + } 1438 + 1439 + /* Correct for any nodes we skipped */ 1440 + cmn->num_dns = dn - cmn->dns; 1441 + 1442 + /* 1443 + * If mesh_x wasn't set during discovery then we never saw 1444 + * an XP at (0,1), thus we must have an Nx1 configuration. 1445 + */ 1446 + if (!cmn->mesh_x) 1447 + cmn->mesh_x = cmn->num_xps; 1448 + cmn->mesh_y = cmn->num_xps / cmn->mesh_x; 1449 + 1450 + dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", 1451 + cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); 1452 + 1453 + return 0; 1454 + } 1455 + 1456 + static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) 1457 + { 1458 + struct resource *cfg, *root; 1459 + 1460 + cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1461 + if (!cfg) 1462 + return -EINVAL; 1463 + 1464 + root = platform_get_resource(pdev, IORESOURCE_MEM, 1); 1465 + if (!root) 1466 + return -EINVAL; 1467 + 1468 + if (!resource_contains(cfg, root)) 1469 + swap(cfg, root); 1470 + /* 1471 + * Note that devm_ioremap_resource() is dumb and won't let the platform 1472 + * device claim cfg when the ACPI companion device has already claimed 1473 + * root within it. But since they *are* already both claimed in the 1474 + * appropriate name, we don't really need to do it again here anyway. 1475 + */ 1476 + cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg)); 1477 + if (!cmn->base) 1478 + return -ENOMEM; 1479 + 1480 + return root->start - cfg->start; 1481 + } 1482 + 1483 + static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn) 1484 + { 1485 + struct device_node *np = pdev->dev.of_node; 1486 + u32 rootnode; 1487 + int ret; 1488 + 1489 + cmn->base = devm_platform_ioremap_resource(pdev, 0); 1490 + if (IS_ERR(cmn->base)) 1491 + return PTR_ERR(cmn->base); 1492 + 1493 + ret = of_property_read_u32(np, "arm,root-node", &rootnode); 1494 + if (ret) 1495 + return ret; 1496 + 1497 + return rootnode; 1498 + } 1499 + 1500 + static int arm_cmn_probe(struct platform_device *pdev) 1501 + { 1502 + struct arm_cmn *cmn; 1503 + const char *name; 1504 + static atomic_t id; 1505 + int err, rootnode, this_id; 1506 + 1507 + cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); 1508 + if (!cmn) 1509 + return -ENOMEM; 1510 + 1511 + cmn->dev = &pdev->dev; 1512 + platform_set_drvdata(pdev, cmn); 1513 + 1514 + if (has_acpi_companion(cmn->dev)) 1515 + rootnode = arm_cmn_acpi_probe(pdev, cmn); 1516 + else 1517 + rootnode = arm_cmn_of_probe(pdev, cmn); 1518 + if (rootnode < 0) 1519 + return rootnode; 1520 + 1521 + err = arm_cmn_discover(cmn, rootnode); 1522 + if (err) 1523 + return err; 1524 + 1525 + err = arm_cmn_init_dtcs(cmn); 1526 + if (err) 1527 + return err; 1528 + 1529 + err = arm_cmn_init_irqs(cmn); 1530 + if (err) 1531 + return err; 1532 + 1533 + cmn->cpu = raw_smp_processor_id(); 1534 + cmn->pmu = (struct pmu) { 1535 + .module = THIS_MODULE, 1536 + .attr_groups = arm_cmn_attr_groups, 1537 + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 1538 + .task_ctx_nr = perf_invalid_context, 1539 + .pmu_enable = arm_cmn_pmu_enable, 1540 + .pmu_disable = arm_cmn_pmu_disable, 1541 + .event_init = arm_cmn_event_init, 1542 + .add = arm_cmn_event_add, 1543 + .del = arm_cmn_event_del, 1544 + .start = arm_cmn_event_start, 1545 + .stop = arm_cmn_event_stop, 1546 + .read = arm_cmn_event_read, 1547 + .start_txn = arm_cmn_start_txn, 1548 + .commit_txn = arm_cmn_commit_txn, 1549 + .cancel_txn = arm_cmn_end_txn, 1550 + }; 1551 + 1552 + this_id = atomic_fetch_inc(&id); 1553 + if (this_id == 0) { 1554 + name = "arm_cmn"; 1555 + } else { 1556 + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); 1557 + if (!name) 1558 + return -ENOMEM; 1559 + } 1560 + 1561 + err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node); 1562 + if (err) 1563 + return err; 1564 + 1565 + err = perf_pmu_register(&cmn->pmu, name, -1); 1566 + if (err) 1567 + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); 1568 + return err; 1569 + } 1570 + 1571 + static int arm_cmn_remove(struct platform_device *pdev) 1572 + { 1573 + struct arm_cmn *cmn = platform_get_drvdata(pdev); 1574 + int i; 1575 + 1576 + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL); 1577 + 1578 + perf_pmu_unregister(&cmn->pmu); 1579 + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); 1580 + 1581 + for (i = 0; i < cmn->num_dtcs; i++) 1582 + irq_set_affinity_hint(cmn->dtc[i].irq, NULL); 1583 + 1584 + return 0; 1585 + } 1586 + 1587 + #ifdef CONFIG_OF 1588 + static const struct of_device_id arm_cmn_of_match[] = { 1589 + { .compatible = "arm,cmn-600", }, 1590 + {} 1591 + }; 1592 + MODULE_DEVICE_TABLE(of, arm_cmn_of_match); 1593 + #endif 1594 + 1595 + #ifdef CONFIG_ACPI 1596 + static const struct acpi_device_id arm_cmn_acpi_match[] = { 1597 + { "ARMHC600", }, 1598 + {} 1599 + }; 1600 + MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); 1601 + #endif 1602 + 1603 + static struct platform_driver arm_cmn_driver = { 1604 + .driver = { 1605 + .name = "arm-cmn", 1606 + .of_match_table = of_match_ptr(arm_cmn_of_match), 1607 + .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), 1608 + }, 1609 + .probe = arm_cmn_probe, 1610 + .remove = arm_cmn_remove, 1611 + }; 1612 + 1613 + static int __init arm_cmn_init(void) 1614 + { 1615 + int ret; 1616 + 1617 + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 1618 + "perf/arm/cmn:online", NULL, 1619 + arm_cmn_pmu_offline_cpu); 1620 + if (ret < 0) 1621 + return ret; 1622 + 1623 + arm_cmn_hp_state = ret; 1624 + ret = platform_driver_register(&arm_cmn_driver); 1625 + if (ret) 1626 + cpuhp_remove_multi_state(arm_cmn_hp_state); 1627 + return ret; 1628 + } 1629 + 1630 + static void __exit arm_cmn_exit(void) 1631 + { 1632 + platform_driver_unregister(&arm_cmn_driver); 1633 + cpuhp_remove_multi_state(arm_cmn_hp_state); 1634 + } 1635 + 1636 + module_init(arm_cmn_init); 1637 + module_exit(arm_cmn_exit); 1638 + 1639 + MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>"); 1640 + MODULE_DESCRIPTION("Arm CMN-600 PMU driver"); 1641 + MODULE_LICENSE("GPL v2");