Merge branch 'will/for-next/perf' into for-next/core

+25

Documentation/perf/qcom_l3_pmu.txt

··· 1 + Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU) 2 + =========================================================================== 3 + 4 + This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies 5 + Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared 6 + by all cores within a socket. Each slice is exposed as a separate uncore perf 7 + PMU with device name l3cache_<socket>_<instance>. User space is responsible 8 + for aggregating across slices. 9 + 10 + The driver provides a description of its available events and configuration 11 + options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs 12 + the driver also exposes a "cpumask" sysfs attribute which contains a mask 13 + consisting of one CPU per socket which will be used to handle all the PMU 14 + events on that socket. 15 + 16 + The hardware implements 32bit event counters and has a flat 8bit event space 17 + exposed via the "event" format attribute. In addition to the 32bit physical 18 + counters the driver supports virtual 64bit hardware counters by using hardware 19 + counter chaining. This feature is exposed via the "lc" (long counter) format 20 + flag. E.g.: 21 + 22 + perf stat -e l3cache_0_0/read-miss,lc/ 23 + 24 + Given that these are uncore PMUs the driver does not support sampling, therefore 25 + "perf record" will not work. Per-task perf sessions are not supported.

+1

MAINTAINERS

··· 976 976 F: drivers/perf/* 977 977 F: include/linux/perf/arm_pmu.h 978 978 F: Documentation/devicetree/bindings/arm/pmu.txt 979 + F: Documentation/devicetree/bindings/perf/ 979 980 980 981 ARM PORT 981 982 M: Russell King <linux@armlinux.org.uk>

+2

arch/arm64/include/asm/acpi.h

··· 85 85 return true; 86 86 } 87 87 88 + struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu); 89 + 88 90 static inline void arch_fix_phys_package_id(int num, u32 slot) { } 89 91 void __init acpi_init_cpus(void); 90 92

+80 -33

arch/arm64/kernel/perf_event.c

··· 957 957 ARMV8_PMU_EVTYPE_EVENT); 958 958 } 959 959 960 + struct armv8pmu_probe_info { 961 + struct arm_pmu *pmu; 962 + bool present; 963 + }; 964 + 960 965 static void __armv8pmu_probe_pmu(void *info) 961 966 { 962 - struct arm_pmu *cpu_pmu = info; 967 + struct armv8pmu_probe_info *probe = info; 968 + struct arm_pmu *cpu_pmu = probe->pmu; 969 + u64 dfr0, pmuver; 963 970 u32 pmceid[2]; 971 + 972 + dfr0 = read_sysreg(id_aa64dfr0_el1); 973 + pmuver = cpuid_feature_extract_unsigned_field(dfr0, 974 + ID_AA64DFR0_PMUVER_SHIFT); 975 + if (pmuver != 1) 976 + return; 977 + 978 + probe->present = true; 964 979 965 980 /* Read the nb of CNTx counters supported from PMNC */ 966 981 cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) ··· 994 979 995 980 static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) 996 981 { 997 - return smp_call_function_any(&cpu_pmu->supported_cpus, 982 + struct armv8pmu_probe_info probe = { 983 + .pmu = cpu_pmu, 984 + .present = false, 985 + }; 986 + int ret; 987 + 988 + ret = smp_call_function_any(&cpu_pmu->supported_cpus, 998 989 __armv8pmu_probe_pmu, 999 - cpu_pmu, 1); 990 + &probe, 1); 991 + if (ret) 992 + return ret; 993 + 994 + return probe.present ? 0 : -ENODEV; 1000 995 } 1001 996 1002 - static void armv8_pmu_init(struct arm_pmu *cpu_pmu) 997 + static int armv8_pmu_init(struct arm_pmu *cpu_pmu) 1003 998 { 999 + int ret = armv8pmu_probe_pmu(cpu_pmu); 1000 + if (ret) 1001 + return ret; 1002 + 1004 1003 cpu_pmu->handle_irq = armv8pmu_handle_irq, 1005 1004 cpu_pmu->enable = armv8pmu_enable_event, 1006 1005 cpu_pmu->disable = armv8pmu_disable_event, ··· 1026 997 cpu_pmu->reset = armv8pmu_reset, 1027 998 cpu_pmu->max_period = (1LLU << 32) - 1, 1028 999 cpu_pmu->set_event_filter = armv8pmu_set_event_filter; 1000 + 1001 + return 0; 1029 1002 } 1030 1003 1031 1004 static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) 1032 1005 { 1033 - armv8_pmu_init(cpu_pmu); 1006 + int ret = armv8_pmu_init(cpu_pmu); 1007 + if (ret) 1008 + return ret; 1009 + 1034 1010 cpu_pmu->name = "armv8_pmuv3"; 1035 1011 cpu_pmu->map_event = armv8_pmuv3_map_event; 1036 1012 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1037 1013 &armv8_pmuv3_events_attr_group; 1038 1014 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1039 1015 &armv8_pmuv3_format_attr_group; 1040 - return armv8pmu_probe_pmu(cpu_pmu); 1016 + 1017 + return 0; 1041 1018 } 1042 1019 1043 1020 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) 1044 1021 { 1045 - armv8_pmu_init(cpu_pmu); 1022 + int ret = armv8_pmu_init(cpu_pmu); 1023 + if (ret) 1024 + return ret; 1025 + 1046 1026 cpu_pmu->name = "armv8_cortex_a53"; 1047 1027 cpu_pmu->map_event = armv8_a53_map_event; 1048 1028 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1049 1029 &armv8_pmuv3_events_attr_group; 1050 1030 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1051 1031 &armv8_pmuv3_format_attr_group; 1052 - return armv8pmu_probe_pmu(cpu_pmu); 1032 + 1033 + return 0; 1053 1034 } 1054 1035 1055 1036 static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) 1056 1037 { 1057 - armv8_pmu_init(cpu_pmu); 1038 + int ret = armv8_pmu_init(cpu_pmu); 1039 + if (ret) 1040 + return ret; 1041 + 1058 1042 cpu_pmu->name = "armv8_cortex_a57"; 1059 1043 cpu_pmu->map_event = armv8_a57_map_event; 1060 1044 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1061 1045 &armv8_pmuv3_events_attr_group; 1062 1046 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1063 1047 &armv8_pmuv3_format_attr_group; 1064 - return armv8pmu_probe_pmu(cpu_pmu); 1048 + 1049 + return 0; 1065 1050 } 1066 1051 1067 1052 static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) 1068 1053 { 1069 - armv8_pmu_init(cpu_pmu); 1054 + int ret = armv8_pmu_init(cpu_pmu); 1055 + if (ret) 1056 + return ret; 1057 + 1070 1058 cpu_pmu->name = "armv8_cortex_a72"; 1071 1059 cpu_pmu->map_event = armv8_a57_map_event; 1072 1060 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1073 1061 &armv8_pmuv3_events_attr_group; 1074 1062 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1075 1063 &armv8_pmuv3_format_attr_group; 1076 - return armv8pmu_probe_pmu(cpu_pmu); 1064 + 1065 + return 0; 1077 1066 } 1078 1067 1079 1068 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) 1080 1069 { 1081 - armv8_pmu_init(cpu_pmu); 1070 + int ret = armv8_pmu_init(cpu_pmu); 1071 + if (ret) 1072 + return ret; 1073 + 1082 1074 cpu_pmu->name = "armv8_cavium_thunder"; 1083 1075 cpu_pmu->map_event = armv8_thunder_map_event; 1084 1076 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1085 1077 &armv8_pmuv3_events_attr_group; 1086 1078 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1087 1079 &armv8_pmuv3_format_attr_group; 1088 - return armv8pmu_probe_pmu(cpu_pmu); 1080 + 1081 + return 0; 1089 1082 } 1090 1083 1091 1084 static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) 1092 1085 { 1093 - armv8_pmu_init(cpu_pmu); 1086 + int ret = armv8_pmu_init(cpu_pmu); 1087 + if (ret) 1088 + return ret; 1089 + 1094 1090 cpu_pmu->name = "armv8_brcm_vulcan"; 1095 1091 cpu_pmu->map_event = armv8_vulcan_map_event; 1096 1092 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = 1097 1093 &armv8_pmuv3_events_attr_group; 1098 1094 cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = 1099 1095 &armv8_pmuv3_format_attr_group; 1100 - return armv8pmu_probe_pmu(cpu_pmu); 1096 + 1097 + return 0; 1101 1098 } 1102 1099 1103 1100 static const struct of_device_id armv8_pmu_of_device_ids[] = { ··· 1136 1081 {}, 1137 1082 }; 1138 1083 1139 - /* 1140 - * Non DT systems have their micro/arch events probed at run-time. 1141 - * A fairly complete list of generic events are provided and ones that 1142 - * aren't supported by the current PMU are disabled. 1143 - */ 1144 - static const struct pmu_probe_info armv8_pmu_probe_table[] = { 1145 - PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ 1146 - { /* sentinel value */ } 1147 - }; 1148 - 1149 1084 static int armv8_pmu_device_probe(struct platform_device *pdev) 1150 1085 { 1151 - if (acpi_disabled) 1152 - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, 1153 - NULL); 1154 - 1155 - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, 1156 - armv8_pmu_probe_table); 1086 + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); 1157 1087 } 1158 1088 1159 1089 static struct platform_driver armv8_pmu_driver = { ··· 1149 1109 .probe = armv8_pmu_device_probe, 1150 1110 }; 1151 1111 1152 - builtin_platform_driver(armv8_pmu_driver); 1112 + static int __init armv8_pmu_driver_init(void) 1113 + { 1114 + if (acpi_disabled) 1115 + return platform_driver_register(&armv8_pmu_driver); 1116 + else 1117 + return arm_pmu_acpi_probe(armv8_pmuv3_init); 1118 + } 1119 + device_initcall(armv8_pmu_driver_init)

+10

arch/arm64/kernel/smp.c

··· 521 521 static unsigned int cpu_count = 1; 522 522 523 523 #ifdef CONFIG_ACPI 524 + static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; 525 + 526 + struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) 527 + { 528 + return &cpu_madt_gicc[cpu]; 529 + } 530 + 524 531 /* 525 532 * acpi_map_gic_cpu_interface - parse processor MADT entry 526 533 * ··· 562 555 return; 563 556 } 564 557 bootcpu_valid = true; 558 + cpu_madt_gicc[0] = *processor; 565 559 early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); 566 560 return; 567 561 } ··· 572 564 573 565 /* map the logical cpu id to cpu MPIDR */ 574 566 cpu_logical_map(cpu_count) = hwid; 567 + 568 + cpu_madt_gicc[cpu_count] = *processor; 575 569 576 570 /* 577 571 * Set-up the ACPI parking protocol cpu entries

+14

drivers/perf/Kconfig

··· 12 12 Say y if you want to use CPU performance monitors on ARM-based 13 13 systems. 14 14 15 + config ARM_PMU_ACPI 16 + depends on ARM_PMU && ACPI 17 + def_bool y 18 + 15 19 config QCOM_L2_PMU 16 20 bool "Qualcomm Technologies L2-cache PMU" 17 21 depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI ··· 24 20 in Qualcomm Technologies processors. 25 21 Adds the L2 cache PMU into the perf events subsystem for 26 22 monitoring L2 cache events. 23 + 24 + config QCOM_L3_PMU 25 + bool "Qualcomm Technologies L3-cache PMU" 26 + depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI 27 + select QCOM_IRQ_COMBINER 28 + help 29 + Provides support for the L3 cache performance monitor unit (PMU) 30 + in Qualcomm Technologies processors. 31 + Adds the L3 cache PMU into the perf events subsystem for 32 + monitoring L3 cache events. 27 33 28 34 config XGENE_PMU 29 35 depends on PERF_EVENTS && ARCH_XGENE

+3 -1

drivers/perf/Makefile

··· 1 - obj-$(CONFIG_ARM_PMU) += arm_pmu.o 1 + obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 2 + obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 2 3 obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o 4 + obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o 3 5 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o

+167 -377

drivers/perf/arm_pmu.c

··· 16 16 #include <linux/cpu_pm.h> 17 17 #include <linux/export.h> 18 18 #include <linux/kernel.h> 19 - #include <linux/of_device.h> 20 19 #include <linux/perf/arm_pmu.h> 21 20 #include <linux/platform_device.h> 22 21 #include <linux/slab.h> ··· 24 25 #include <linux/irq.h> 25 26 #include <linux/irqdesc.h> 26 27 27 - #include <asm/cputype.h> 28 28 #include <asm/irq_regs.h> 29 29 30 30 static int ··· 233 235 struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); 234 236 struct hw_perf_event *hwc = &event->hw; 235 237 int idx; 236 - int err = 0; 237 238 238 239 /* An event following a process won't be stopped earlier */ 239 240 if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) 240 241 return -ENOENT; 241 242 242 - perf_pmu_disable(event->pmu); 243 - 244 243 /* If we don't have a space for the counter then finish early. */ 245 244 idx = armpmu->get_event_idx(hw_events, event); 246 - if (idx < 0) { 247 - err = idx; 248 - goto out; 249 - } 245 + if (idx < 0) 246 + return idx; 250 247 251 248 /* 252 249 * If there is an event in the counter we are going to use then make ··· 258 265 /* Propagate our changes to the userspace mapping. */ 259 266 perf_event_update_userpage(event); 260 267 261 - out: 262 - perf_pmu_enable(event->pmu); 263 - return err; 268 + return 0; 264 269 } 265 270 266 271 static int ··· 314 323 return 0; 315 324 } 316 325 326 + static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu) 327 + { 328 + struct platform_device *pdev = armpmu->plat_device; 329 + 330 + return pdev ? dev_get_platdata(&pdev->dev) : NULL; 331 + } 332 + 317 333 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) 318 334 { 319 335 struct arm_pmu *armpmu; 320 - struct platform_device *plat_device; 321 336 struct arm_pmu_platdata *plat; 322 337 int ret; 323 338 u64 start_clock, finish_clock; ··· 335 338 * dereference. 336 339 */ 337 340 armpmu = *(void **)dev; 338 - plat_device = armpmu->plat_device; 339 - plat = dev_get_platdata(&plat_device->dev); 341 + 342 + plat = armpmu_get_platdata(armpmu); 340 343 341 344 start_clock = sched_clock(); 342 345 if (plat && plat->handle_irq) ··· 347 350 348 351 perf_sample_event_took(finish_clock - start_clock); 349 352 return ret; 350 - } 351 - 352 - static void 353 - armpmu_release_hardware(struct arm_pmu *armpmu) 354 - { 355 - armpmu->free_irq(armpmu); 356 - } 357 - 358 - static int 359 - armpmu_reserve_hardware(struct arm_pmu *armpmu) 360 - { 361 - int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); 362 - if (err) { 363 - armpmu_release_hardware(armpmu); 364 - return err; 365 - } 366 - 367 - return 0; 368 - } 369 - 370 - static void 371 - hw_perf_event_destroy(struct perf_event *event) 372 - { 373 - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 374 - atomic_t *active_events = &armpmu->active_events; 375 - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; 376 - 377 - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { 378 - armpmu_release_hardware(armpmu); 379 - mutex_unlock(pmu_reserve_mutex); 380 - } 381 353 } 382 354 383 355 static int ··· 421 455 static int armpmu_event_init(struct perf_event *event) 422 456 { 423 457 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 424 - int err = 0; 425 - atomic_t *active_events = &armpmu->active_events; 426 458 427 459 /* 428 460 * Reject CPU-affine events for CPUs that are of a different class to ··· 440 476 if (armpmu->map_event(event) == -ENOENT) 441 477 return -ENOENT; 442 478 443 - event->destroy = hw_perf_event_destroy; 444 - 445 - if (!atomic_inc_not_zero(active_events)) { 446 - mutex_lock(&armpmu->reserve_mutex); 447 - if (atomic_read(active_events) == 0) 448 - err = armpmu_reserve_hardware(armpmu); 449 - 450 - if (!err) 451 - atomic_inc(active_events); 452 - mutex_unlock(&armpmu->reserve_mutex); 453 - } 454 - 455 - if (err) 456 - return err; 457 - 458 - err = __hw_perf_event_init(event); 459 - if (err) 460 - hw_perf_event_destroy(event); 461 - 462 - return err; 479 + return __hw_perf_event_init(event); 463 480 } 464 481 465 482 static void armpmu_enable(struct pmu *pmu) ··· 498 553 .attrs = armpmu_common_attrs, 499 554 }; 500 555 501 - static void armpmu_init(struct arm_pmu *armpmu) 502 - { 503 - atomic_set(&armpmu->active_events, 0); 504 - mutex_init(&armpmu->reserve_mutex); 505 - 506 - armpmu->pmu = (struct pmu) { 507 - .pmu_enable = armpmu_enable, 508 - .pmu_disable = armpmu_disable, 509 - .event_init = armpmu_event_init, 510 - .add = armpmu_add, 511 - .del = armpmu_del, 512 - .start = armpmu_start, 513 - .stop = armpmu_stop, 514 - .read = armpmu_read, 515 - .filter_match = armpmu_filter_match, 516 - .attr_groups = armpmu->attr_groups, 517 - }; 518 - armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = 519 - &armpmu_common_attr_group; 520 - } 521 - 522 556 /* Set at runtime when we know what CPU type we are. */ 523 557 static struct arm_pmu *__oprofile_cpu_pmu; 524 558 ··· 525 601 } 526 602 EXPORT_SYMBOL_GPL(perf_num_counters); 527 603 528 - static void cpu_pmu_enable_percpu_irq(void *data) 604 + void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) 529 605 { 530 - int irq = *(int *)data; 606 + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; 607 + int irq = per_cpu(hw_events->irq, cpu); 531 608 532 - enable_percpu_irq(irq, IRQ_TYPE_NONE); 533 - } 609 + if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs)) 610 + return; 534 611 535 - static void cpu_pmu_disable_percpu_irq(void *data) 536 - { 537 - int irq = *(int *)data; 538 - 539 - disable_percpu_irq(irq); 540 - } 541 - 542 - static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) 543 - { 544 - int i, irq, irqs; 545 - struct platform_device *pmu_device = cpu_pmu->plat_device; 546 - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; 547 - 548 - irqs = min(pmu_device->num_resources, num_possible_cpus()); 549 - 550 - irq = platform_get_irq(pmu_device, 0); 551 - if (irq > 0 && irq_is_percpu(irq)) { 552 - on_each_cpu_mask(&cpu_pmu->supported_cpus, 553 - cpu_pmu_disable_percpu_irq, &irq, 1); 612 + if (irq_is_percpu(irq)) { 554 613 free_percpu_irq(irq, &hw_events->percpu_pmu); 555 - } else { 556 - for (i = 0; i < irqs; ++i) { 557 - int cpu = i; 558 - 559 - if (cpu_pmu->irq_affinity) 560 - cpu = cpu_pmu->irq_affinity[i]; 561 - 562 - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) 563 - continue; 564 - irq = platform_get_irq(pmu_device, i); 565 - if (irq > 0) 566 - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); 567 - } 614 + cpumask_clear(&armpmu->active_irqs); 615 + return; 568 616 } 617 + 618 + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); 569 619 } 570 620 571 - static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) 621 + void armpmu_free_irqs(struct arm_pmu *armpmu) 572 622 { 573 - int i, err, irq, irqs; 574 - struct platform_device *pmu_device = cpu_pmu->plat_device; 575 - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; 623 + int cpu; 576 624 577 - if (!pmu_device) 578 - return -ENODEV; 625 + for_each_cpu(cpu, &armpmu->supported_cpus) 626 + armpmu_free_irq(armpmu, cpu); 627 + } 579 628 580 - irqs = min(pmu_device->num_resources, num_possible_cpus()); 581 - if (irqs < 1) { 582 - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); 629 + int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) 630 + { 631 + int err = 0; 632 + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; 633 + const irq_handler_t handler = armpmu_dispatch_irq; 634 + int irq = per_cpu(hw_events->irq, cpu); 635 + if (!irq) 583 636 return 0; 584 - } 585 637 586 - irq = platform_get_irq(pmu_device, 0); 587 - if (irq > 0 && irq_is_percpu(irq)) { 638 + if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) { 588 639 err = request_percpu_irq(irq, handler, "arm-pmu", 589 640 &hw_events->percpu_pmu); 590 - if (err) { 591 - pr_err("unable to request IRQ%d for ARM PMU counters\n", 592 - irq); 593 - return err; 594 - } 641 + } else if (irq_is_percpu(irq)) { 642 + int other_cpu = cpumask_first(&armpmu->active_irqs); 643 + int other_irq = per_cpu(hw_events->irq, other_cpu); 595 644 596 - on_each_cpu_mask(&cpu_pmu->supported_cpus, 597 - cpu_pmu_enable_percpu_irq, &irq, 1); 645 + if (irq != other_irq) { 646 + pr_warn("mismatched PPIs detected.\n"); 647 + err = -EINVAL; 648 + } 598 649 } else { 599 - for (i = 0; i < irqs; ++i) { 600 - int cpu = i; 601 - 602 - err = 0; 603 - irq = platform_get_irq(pmu_device, i); 604 - if (irq < 0) 605 - continue; 606 - 607 - if (cpu_pmu->irq_affinity) 608 - cpu = cpu_pmu->irq_affinity[i]; 609 - 610 - /* 611 - * If we have a single PMU interrupt that we can't shift, 612 - * assume that we're running on a uniprocessor machine and 613 - * continue. Otherwise, continue without this interrupt. 614 - */ 615 - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { 616 - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", 617 - irq, cpu); 618 - continue; 619 - } 620 - 621 - err = request_irq(irq, handler, 622 - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", 623 - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); 624 - if (err) { 625 - pr_err("unable to request IRQ%d for ARM PMU counters\n", 626 - irq); 627 - return err; 628 - } 629 - 630 - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); 631 - } 650 + err = request_irq(irq, handler, 651 + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", 652 + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); 632 653 } 633 654 655 + if (err) { 656 + pr_err("unable to request IRQ%d for ARM PMU counters\n", 657 + irq); 658 + return err; 659 + } 660 + 661 + cpumask_set_cpu(cpu, &armpmu->active_irqs); 662 + 634 663 return 0; 664 + } 665 + 666 + int armpmu_request_irqs(struct arm_pmu *armpmu) 667 + { 668 + int cpu, err; 669 + 670 + for_each_cpu(cpu, &armpmu->supported_cpus) { 671 + err = armpmu_request_irq(armpmu, cpu); 672 + if (err) 673 + break; 674 + } 675 + 676 + return err; 677 + } 678 + 679 + static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) 680 + { 681 + struct pmu_hw_events __percpu *hw_events = pmu->hw_events; 682 + return per_cpu(hw_events->irq, cpu); 635 683 } 636 684 637 685 /* ··· 615 719 static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) 616 720 { 617 721 struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node); 722 + int irq; 618 723 619 724 if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) 620 725 return 0; 621 726 if (pmu->reset) 622 727 pmu->reset(pmu); 728 + 729 + irq = armpmu_get_cpu_irq(pmu, cpu); 730 + if (irq) { 731 + if (irq_is_percpu(irq)) { 732 + enable_percpu_irq(irq, IRQ_TYPE_NONE); 733 + return 0; 734 + } 735 + 736 + if (irq_force_affinity(irq, cpumask_of(cpu)) && 737 + num_possible_cpus() > 1) { 738 + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", 739 + irq, cpu); 740 + } 741 + } 742 + 743 + return 0; 744 + } 745 + 746 + static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) 747 + { 748 + struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node); 749 + int irq; 750 + 751 + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) 752 + return 0; 753 + 754 + irq = armpmu_get_cpu_irq(pmu, cpu); 755 + if (irq && irq_is_percpu(irq)) 756 + disable_percpu_irq(irq); 757 + 623 758 return 0; 624 759 } 625 760 ··· 755 828 static int cpu_pmu_init(struct arm_pmu *cpu_pmu) 756 829 { 757 830 int err; 758 - int cpu; 759 - struct pmu_hw_events __percpu *cpu_hw_events; 760 831 761 - cpu_hw_events = alloc_percpu(struct pmu_hw_events); 762 - if (!cpu_hw_events) 763 - return -ENOMEM; 764 - 765 - err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING, 766 - &cpu_pmu->node); 832 + err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_STARTING, 833 + &cpu_pmu->node); 767 834 if (err) 768 - goto out_free; 835 + goto out; 769 836 770 837 err = cpu_pm_pmu_register(cpu_pmu); 771 838 if (err) 772 839 goto out_unregister; 773 - 774 - for_each_possible_cpu(cpu) { 775 - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); 776 - raw_spin_lock_init(&events->pmu_lock); 777 - events->percpu_pmu = cpu_pmu; 778 - } 779 - 780 - cpu_pmu->hw_events = cpu_hw_events; 781 - cpu_pmu->request_irq = cpu_pmu_request_irq; 782 - cpu_pmu->free_irq = cpu_pmu_free_irq; 783 - 784 - /* Ensure the PMU has sane values out of reset. */ 785 - if (cpu_pmu->reset) 786 - on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, 787 - cpu_pmu, 1); 788 - 789 - /* If no interrupts available, set the corresponding capability flag */ 790 - if (!platform_get_irq(cpu_pmu->plat_device, 0)) 791 - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 792 - 793 - /* 794 - * This is a CPU PMU potentially in a heterogeneous configuration (e.g. 795 - * big.LITTLE). This is not an uncore PMU, and we have taken ctx 796 - * sharing into account (e.g. with our pmu::filter_match callback and 797 - * pmu::event_init group validation). 798 - */ 799 - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS; 800 840 801 841 return 0; 802 842 803 843 out_unregister: 804 844 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING, 805 845 &cpu_pmu->node); 806 - out_free: 807 - free_percpu(cpu_hw_events); 846 + out: 808 847 return err; 809 848 } 810 849 ··· 779 886 cpu_pm_pmu_unregister(cpu_pmu); 780 887 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING, 781 888 &cpu_pmu->node); 782 - free_percpu(cpu_pmu->hw_events); 783 889 } 784 890 785 - /* 786 - * CPU PMU identification and probing. 787 - */ 788 - static int probe_current_pmu(struct arm_pmu *pmu, 789 - const struct pmu_probe_info *info) 891 + struct arm_pmu *armpmu_alloc(void) 790 892 { 791 - int cpu = get_cpu(); 792 - unsigned int cpuid = read_cpuid_id(); 793 - int ret = -ENODEV; 794 - 795 - pr_info("probing PMU on CPU %d\n", cpu); 796 - 797 - for (; info->init != NULL; info++) { 798 - if ((cpuid & info->mask) != info->cpuid) 799 - continue; 800 - ret = info->init(pmu); 801 - break; 802 - } 803 - 804 - put_cpu(); 805 - return ret; 806 - } 807 - 808 - static int of_pmu_irq_cfg(struct arm_pmu *pmu) 809 - { 810 - int *irqs, i = 0; 811 - bool using_spi = false; 812 - struct platform_device *pdev = pmu->plat_device; 813 - 814 - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); 815 - if (!irqs) 816 - return -ENOMEM; 817 - 818 - do { 819 - struct device_node *dn; 820 - int cpu, irq; 821 - 822 - /* See if we have an affinity entry */ 823 - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); 824 - if (!dn) 825 - break; 826 - 827 - /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ 828 - irq = platform_get_irq(pdev, i); 829 - if (irq > 0) { 830 - bool spi = !irq_is_percpu(irq); 831 - 832 - if (i > 0 && spi != using_spi) { 833 - pr_err("PPI/SPI IRQ type mismatch for %s!\n", 834 - dn->name); 835 - of_node_put(dn); 836 - kfree(irqs); 837 - return -EINVAL; 838 - } 839 - 840 - using_spi = spi; 841 - } 842 - 843 - /* Now look up the logical CPU number */ 844 - for_each_possible_cpu(cpu) { 845 - struct device_node *cpu_dn; 846 - 847 - cpu_dn = of_cpu_device_node_get(cpu); 848 - of_node_put(cpu_dn); 849 - 850 - if (dn == cpu_dn) 851 - break; 852 - } 853 - 854 - if (cpu >= nr_cpu_ids) { 855 - pr_warn("Failed to find logical CPU for %s\n", 856 - dn->name); 857 - of_node_put(dn); 858 - cpumask_setall(&pmu->supported_cpus); 859 - break; 860 - } 861 - of_node_put(dn); 862 - 863 - /* For SPIs, we need to track the affinity per IRQ */ 864 - if (using_spi) { 865 - if (i >= pdev->num_resources) 866 - break; 867 - 868 - irqs[i] = cpu; 869 - } 870 - 871 - /* Keep track of the CPUs containing this PMU type */ 872 - cpumask_set_cpu(cpu, &pmu->supported_cpus); 873 - i++; 874 - } while (1); 875 - 876 - /* If we didn't manage to parse anything, try the interrupt affinity */ 877 - if (cpumask_weight(&pmu->supported_cpus) == 0) { 878 - int irq = platform_get_irq(pdev, 0); 879 - 880 - if (irq > 0 && irq_is_percpu(irq)) { 881 - /* If using PPIs, check the affinity of the partition */ 882 - int ret; 883 - 884 - ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); 885 - if (ret) { 886 - kfree(irqs); 887 - return ret; 888 - } 889 - } else { 890 - /* Otherwise default to all CPUs */ 891 - cpumask_setall(&pmu->supported_cpus); 892 - } 893 - } 894 - 895 - /* If we matched up the IRQ affinities, use them to route the SPIs */ 896 - if (using_spi && i == pdev->num_resources) 897 - pmu->irq_affinity = irqs; 898 - else 899 - kfree(irqs); 900 - 901 - return 0; 902 - } 903 - 904 - int arm_pmu_device_probe(struct platform_device *pdev, 905 - const struct of_device_id *of_table, 906 - const struct pmu_probe_info *probe_table) 907 - { 908 - const struct of_device_id *of_id; 909 - const int (*init_fn)(struct arm_pmu *); 910 - struct device_node *node = pdev->dev.of_node; 911 893 struct arm_pmu *pmu; 912 - int ret = -ENODEV; 894 + int cpu; 913 895 914 - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); 896 + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 915 897 if (!pmu) { 916 898 pr_info("failed to allocate PMU device!\n"); 917 - return -ENOMEM; 899 + goto out; 918 900 } 919 901 920 - armpmu_init(pmu); 921 - 922 - pmu->plat_device = pdev; 923 - 924 - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { 925 - init_fn = of_id->data; 926 - 927 - pmu->secure_access = of_property_read_bool(pdev->dev.of_node, 928 - "secure-reg-access"); 929 - 930 - /* arm64 systems boot only as non-secure */ 931 - if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { 932 - pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); 933 - pmu->secure_access = false; 934 - } 935 - 936 - ret = of_pmu_irq_cfg(pmu); 937 - if (!ret) 938 - ret = init_fn(pmu); 939 - } else if (probe_table) { 940 - cpumask_setall(&pmu->supported_cpus); 941 - ret = probe_current_pmu(pmu, probe_table); 902 + pmu->hw_events = alloc_percpu(struct pmu_hw_events); 903 + if (!pmu->hw_events) { 904 + pr_info("failed to allocate per-cpu PMU data.\n"); 905 + goto out_free_pmu; 942 906 } 943 907 944 - if (ret) { 945 - pr_info("%s: failed to probe PMU!\n", of_node_full_name(node)); 946 - goto out_free; 908 + pmu->pmu = (struct pmu) { 909 + .pmu_enable = armpmu_enable, 910 + .pmu_disable = armpmu_disable, 911 + .event_init = armpmu_event_init, 912 + .add = armpmu_add, 913 + .del = armpmu_del, 914 + .start = armpmu_start, 915 + .stop = armpmu_stop, 916 + .read = armpmu_read, 917 + .filter_match = armpmu_filter_match, 918 + .attr_groups = pmu->attr_groups, 919 + /* 920 + * This is a CPU PMU potentially in a heterogeneous 921 + * configuration (e.g. big.LITTLE). This is not an uncore PMU, 922 + * and we have taken ctx sharing into account (e.g. with our 923 + * pmu::filter_match callback and pmu::event_init group 924 + * validation). 925 + */ 926 + .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS, 927 + }; 928 + 929 + pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = 930 + &armpmu_common_attr_group; 931 + 932 + for_each_possible_cpu(cpu) { 933 + struct pmu_hw_events *events; 934 + 935 + events = per_cpu_ptr(pmu->hw_events, cpu); 936 + raw_spin_lock_init(&events->pmu_lock); 937 + events->percpu_pmu = pmu; 947 938 } 948 939 940 + return pmu; 941 + 942 + out_free_pmu: 943 + kfree(pmu); 944 + out: 945 + return NULL; 946 + } 947 + 948 + void armpmu_free(struct arm_pmu *pmu) 949 + { 950 + free_percpu(pmu->hw_events); 951 + kfree(pmu); 952 + } 953 + 954 + int armpmu_register(struct arm_pmu *pmu) 955 + { 956 + int ret; 949 957 950 958 ret = cpu_pmu_init(pmu); 951 959 if (ret) 952 - goto out_free; 960 + return ret; 953 961 954 962 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 955 963 if (ret) ··· 860 1066 __oprofile_cpu_pmu = pmu; 861 1067 862 1068 pr_info("enabled with %s PMU driver, %d counters available\n", 863 - pmu->name, pmu->num_events); 1069 + pmu->name, pmu->num_events); 864 1070 865 1071 return 0; 866 1072 867 1073 out_destroy: 868 1074 cpu_pmu_destroy(pmu); 869 - out_free: 870 - pr_info("%s: failed to register PMU devices!\n", 871 - of_node_full_name(node)); 872 - kfree(pmu->irq_affinity); 873 - kfree(pmu); 874 1075 return ret; 875 1076 } 876 1077 ··· 875 1086 876 1087 ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING, 877 1088 "perf/arm/pmu:starting", 878 - arm_perf_starting_cpu, NULL); 1089 + arm_perf_starting_cpu, 1090 + arm_perf_teardown_cpu); 879 1091 if (ret) 880 1092 pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n", 881 1093 ret);

+256

drivers/perf/arm_pmu_acpi.c

··· 1 + /* 2 + * ACPI probing code for ARM performance counters. 3 + * 4 + * Copyright (C) 2017 ARM Ltd. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + 11 + #include <linux/acpi.h> 12 + #include <linux/cpumask.h> 13 + #include <linux/init.h> 14 + #include <linux/percpu.h> 15 + #include <linux/perf/arm_pmu.h> 16 + 17 + #include <asm/cputype.h> 18 + 19 + static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus); 20 + static DEFINE_PER_CPU(int, pmu_irqs); 21 + 22 + static int arm_pmu_acpi_register_irq(int cpu) 23 + { 24 + struct acpi_madt_generic_interrupt *gicc; 25 + int gsi, trigger; 26 + 27 + gicc = acpi_cpu_get_madt_gicc(cpu); 28 + if (WARN_ON(!gicc)) 29 + return -EINVAL; 30 + 31 + gsi = gicc->performance_interrupt; 32 + if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE) 33 + trigger = ACPI_EDGE_SENSITIVE; 34 + else 35 + trigger = ACPI_LEVEL_SENSITIVE; 36 + 37 + /* 38 + * Helpfully, the MADT GICC doesn't have a polarity flag for the 39 + * "performance interrupt". Luckily, on compliant GICs the polarity is 40 + * a fixed value in HW (for both SPIs and PPIs) that we cannot change 41 + * from SW. 42 + * 43 + * Here we pass in ACPI_ACTIVE_HIGH to keep the core code happy. This 44 + * may not match the real polarity, but that should not matter. 45 + * 46 + * Other interrupt controllers are not supported with ACPI. 47 + */ 48 + return acpi_register_gsi(NULL, gsi, trigger, ACPI_ACTIVE_HIGH); 49 + } 50 + 51 + static void arm_pmu_acpi_unregister_irq(int cpu) 52 + { 53 + struct acpi_madt_generic_interrupt *gicc; 54 + int gsi; 55 + 56 + gicc = acpi_cpu_get_madt_gicc(cpu); 57 + if (!gicc) 58 + return; 59 + 60 + gsi = gicc->performance_interrupt; 61 + acpi_unregister_gsi(gsi); 62 + } 63 + 64 + static int arm_pmu_acpi_parse_irqs(void) 65 + { 66 + int irq, cpu, irq_cpu, err; 67 + 68 + for_each_possible_cpu(cpu) { 69 + irq = arm_pmu_acpi_register_irq(cpu); 70 + if (irq < 0) { 71 + err = irq; 72 + pr_warn("Unable to parse ACPI PMU IRQ for CPU%d: %d\n", 73 + cpu, err); 74 + goto out_err; 75 + } else if (irq == 0) { 76 + pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu); 77 + } 78 + 79 + per_cpu(pmu_irqs, cpu) = irq; 80 + } 81 + 82 + return 0; 83 + 84 + out_err: 85 + for_each_possible_cpu(cpu) { 86 + irq = per_cpu(pmu_irqs, cpu); 87 + if (!irq) 88 + continue; 89 + 90 + arm_pmu_acpi_unregister_irq(cpu); 91 + 92 + /* 93 + * Blat all copies of the IRQ so that we only unregister the 94 + * corresponding GSI once (e.g. when we have PPIs). 95 + */ 96 + for_each_possible_cpu(irq_cpu) { 97 + if (per_cpu(pmu_irqs, irq_cpu) == irq) 98 + per_cpu(pmu_irqs, irq_cpu) = 0; 99 + } 100 + } 101 + 102 + return err; 103 + } 104 + 105 + static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void) 106 + { 107 + unsigned long cpuid = read_cpuid_id(); 108 + struct arm_pmu *pmu; 109 + int cpu; 110 + 111 + for_each_possible_cpu(cpu) { 112 + pmu = per_cpu(probed_pmus, cpu); 113 + if (!pmu || pmu->acpi_cpuid != cpuid) 114 + continue; 115 + 116 + return pmu; 117 + } 118 + 119 + pmu = armpmu_alloc(); 120 + if (!pmu) { 121 + pr_warn("Unable to allocate PMU for CPU%d\n", 122 + smp_processor_id()); 123 + return NULL; 124 + } 125 + 126 + pmu->acpi_cpuid = cpuid; 127 + 128 + return pmu; 129 + } 130 + 131 + /* 132 + * This must run before the common arm_pmu hotplug logic, so that we can 133 + * associate a CPU and its interrupt before the common code tries to manage the 134 + * affinity and so on. 135 + * 136 + * Note that hotplug events are serialized, so we cannot race with another CPU 137 + * coming up. The perf core won't open events while a hotplug event is in 138 + * progress. 139 + */ 140 + static int arm_pmu_acpi_cpu_starting(unsigned int cpu) 141 + { 142 + struct arm_pmu *pmu; 143 + struct pmu_hw_events __percpu *hw_events; 144 + int irq; 145 + 146 + /* If we've already probed this CPU, we have nothing to do */ 147 + if (per_cpu(probed_pmus, cpu)) 148 + return 0; 149 + 150 + irq = per_cpu(pmu_irqs, cpu); 151 + 152 + pmu = arm_pmu_acpi_find_alloc_pmu(); 153 + if (!pmu) 154 + return -ENOMEM; 155 + 156 + cpumask_set_cpu(cpu, &pmu->supported_cpus); 157 + 158 + per_cpu(probed_pmus, cpu) = pmu; 159 + 160 + /* 161 + * Log and request the IRQ so the core arm_pmu code can manage it. In 162 + * some situations (e.g. mismatched PPIs), we may fail to request the 163 + * IRQ. However, it may be too late for us to do anything about it. 164 + * The common ARM PMU code will log a warning in this case. 165 + */ 166 + hw_events = pmu->hw_events; 167 + per_cpu(hw_events->irq, cpu) = irq; 168 + armpmu_request_irq(pmu, cpu); 169 + 170 + /* 171 + * Ideally, we'd probe the PMU here when we find the first matching 172 + * CPU. We can't do that for several reasons; see the comment in 173 + * arm_pmu_acpi_init(). 174 + * 175 + * So for the time being, we're done. 176 + */ 177 + return 0; 178 + } 179 + 180 + int arm_pmu_acpi_probe(armpmu_init_fn init_fn) 181 + { 182 + int pmu_idx = 0; 183 + int cpu, ret; 184 + 185 + if (acpi_disabled) 186 + return 0; 187 + 188 + /* 189 + * Initialise and register the set of PMUs which we know about right 190 + * now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we 191 + * could handle late hotplug, but this may lead to deadlock since we 192 + * might try to register a hotplug notifier instance from within a 193 + * hotplug notifier. 194 + * 195 + * There's also the problem of having access to the right init_fn, 196 + * without tying this too deeply into the "real" PMU driver. 197 + * 198 + * For the moment, as with the platform/DT case, we need at least one 199 + * of a PMU's CPUs to be online at probe time. 200 + */ 201 + for_each_possible_cpu(cpu) { 202 + struct arm_pmu *pmu = per_cpu(probed_pmus, cpu); 203 + char *base_name; 204 + 205 + if (!pmu || pmu->name) 206 + continue; 207 + 208 + ret = init_fn(pmu); 209 + if (ret == -ENODEV) { 210 + /* PMU not handled by this driver, or not present */ 211 + continue; 212 + } else if (ret) { 213 + pr_warn("Unable to initialise PMU for CPU%d\n", cpu); 214 + return ret; 215 + } 216 + 217 + base_name = pmu->name; 218 + pmu->name = kasprintf(GFP_KERNEL, "%s_%d", base_name, pmu_idx++); 219 + if (!pmu->name) { 220 + pr_warn("Unable to allocate PMU name for CPU%d\n", cpu); 221 + return -ENOMEM; 222 + } 223 + 224 + ret = armpmu_register(pmu); 225 + if (ret) { 226 + pr_warn("Failed to register PMU for CPU%d\n", cpu); 227 + return ret; 228 + } 229 + } 230 + 231 + return 0; 232 + } 233 + 234 + static int arm_pmu_acpi_init(void) 235 + { 236 + int ret; 237 + 238 + if (acpi_disabled) 239 + return 0; 240 + 241 + /* 242 + * We can't request IRQs yet, since we don't know the cookie value 243 + * until we know which CPUs share the same logical PMU. We'll handle 244 + * that in arm_pmu_acpi_cpu_starting(). 245 + */ 246 + ret = arm_pmu_acpi_parse_irqs(); 247 + if (ret) 248 + return ret; 249 + 250 + ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING, 251 + "perf/arm/pmu_acpi:starting", 252 + arm_pmu_acpi_cpu_starting, NULL); 253 + 254 + return ret; 255 + } 256 + subsys_initcall(arm_pmu_acpi_init)

+235

drivers/perf/arm_pmu_platform.c

··· 1 + /* 2 + * platform_device probing code for ARM performance counters. 3 + * 4 + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 5 + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> 6 + */ 7 + #define pr_fmt(fmt) "hw perfevents: " fmt 8 + 9 + #include <linux/bug.h> 10 + #include <linux/cpumask.h> 11 + #include <linux/device.h> 12 + #include <linux/errno.h> 13 + #include <linux/irq.h> 14 + #include <linux/irqdesc.h> 15 + #include <linux/kconfig.h> 16 + #include <linux/of.h> 17 + #include <linux/of_device.h> 18 + #include <linux/percpu.h> 19 + #include <linux/perf/arm_pmu.h> 20 + #include <linux/platform_device.h> 21 + #include <linux/printk.h> 22 + #include <linux/smp.h> 23 + 24 + static int probe_current_pmu(struct arm_pmu *pmu, 25 + const struct pmu_probe_info *info) 26 + { 27 + int cpu = get_cpu(); 28 + unsigned int cpuid = read_cpuid_id(); 29 + int ret = -ENODEV; 30 + 31 + pr_info("probing PMU on CPU %d\n", cpu); 32 + 33 + for (; info->init != NULL; info++) { 34 + if ((cpuid & info->mask) != info->cpuid) 35 + continue; 36 + ret = info->init(pmu); 37 + break; 38 + } 39 + 40 + put_cpu(); 41 + return ret; 42 + } 43 + 44 + static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq) 45 + { 46 + int cpu, ret; 47 + struct pmu_hw_events __percpu *hw_events = pmu->hw_events; 48 + 49 + ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); 50 + if (ret) 51 + return ret; 52 + 53 + for_each_cpu(cpu, &pmu->supported_cpus) 54 + per_cpu(hw_events->irq, cpu) = irq; 55 + 56 + return 0; 57 + } 58 + 59 + static bool pmu_has_irq_affinity(struct device_node *node) 60 + { 61 + return !!of_find_property(node, "interrupt-affinity", NULL); 62 + } 63 + 64 + static int pmu_parse_irq_affinity(struct device_node *node, int i) 65 + { 66 + struct device_node *dn; 67 + int cpu; 68 + 69 + /* 70 + * If we don't have an interrupt-affinity property, we guess irq 71 + * affinity matches our logical CPU order, as we used to assume. 72 + * This is fragile, so we'll warn in pmu_parse_irqs(). 73 + */ 74 + if (!pmu_has_irq_affinity(node)) 75 + return i; 76 + 77 + dn = of_parse_phandle(node, "interrupt-affinity", i); 78 + if (!dn) { 79 + pr_warn("failed to parse interrupt-affinity[%d] for %s\n", 80 + i, node->name); 81 + return -EINVAL; 82 + } 83 + 84 + /* Now look up the logical CPU number */ 85 + for_each_possible_cpu(cpu) { 86 + struct device_node *cpu_dn; 87 + 88 + cpu_dn = of_cpu_device_node_get(cpu); 89 + of_node_put(cpu_dn); 90 + 91 + if (dn == cpu_dn) 92 + break; 93 + } 94 + 95 + if (cpu >= nr_cpu_ids) { 96 + pr_warn("failed to find logical CPU for %s\n", dn->name); 97 + } 98 + 99 + of_node_put(dn); 100 + 101 + return cpu; 102 + } 103 + 104 + static int pmu_parse_irqs(struct arm_pmu *pmu) 105 + { 106 + int i = 0, num_irqs; 107 + struct platform_device *pdev = pmu->plat_device; 108 + struct pmu_hw_events __percpu *hw_events = pmu->hw_events; 109 + 110 + num_irqs = platform_irq_count(pdev); 111 + if (num_irqs < 0) { 112 + pr_err("unable to count PMU IRQs\n"); 113 + return num_irqs; 114 + } 115 + 116 + /* 117 + * In this case we have no idea which CPUs are covered by the PMU. 118 + * To match our prior behaviour, we assume all CPUs in this case. 119 + */ 120 + if (num_irqs == 0) { 121 + pr_warn("no irqs for PMU, sampling events not supported\n"); 122 + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 123 + cpumask_setall(&pmu->supported_cpus); 124 + return 0; 125 + } 126 + 127 + if (num_irqs == 1) { 128 + int irq = platform_get_irq(pdev, 0); 129 + if (irq && irq_is_percpu(irq)) 130 + return pmu_parse_percpu_irq(pmu, irq); 131 + } 132 + 133 + if (!pmu_has_irq_affinity(pdev->dev.of_node)) { 134 + pr_warn("no interrupt-affinity property for %s, guessing.\n", 135 + of_node_full_name(pdev->dev.of_node)); 136 + } 137 + 138 + /* 139 + * Some platforms have all PMU IRQs OR'd into a single IRQ, with a 140 + * special platdata function that attempts to demux them. 141 + */ 142 + if (dev_get_platdata(&pdev->dev)) 143 + cpumask_setall(&pmu->supported_cpus); 144 + 145 + for (i = 0; i < num_irqs; i++) { 146 + int cpu, irq; 147 + 148 + irq = platform_get_irq(pdev, i); 149 + if (WARN_ON(irq <= 0)) 150 + continue; 151 + 152 + if (irq_is_percpu(irq)) { 153 + pr_warn("multiple PPIs or mismatched SPI/PPI detected\n"); 154 + return -EINVAL; 155 + } 156 + 157 + cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i); 158 + if (cpu < 0) 159 + return cpu; 160 + if (cpu >= nr_cpu_ids) 161 + continue; 162 + 163 + if (per_cpu(hw_events->irq, cpu)) { 164 + pr_warn("multiple PMU IRQs for the same CPU detected\n"); 165 + return -EINVAL; 166 + } 167 + 168 + per_cpu(hw_events->irq, cpu) = irq; 169 + cpumask_set_cpu(cpu, &pmu->supported_cpus); 170 + } 171 + 172 + return 0; 173 + } 174 + 175 + int arm_pmu_device_probe(struct platform_device *pdev, 176 + const struct of_device_id *of_table, 177 + const struct pmu_probe_info *probe_table) 178 + { 179 + const struct of_device_id *of_id; 180 + armpmu_init_fn init_fn; 181 + struct device_node *node = pdev->dev.of_node; 182 + struct arm_pmu *pmu; 183 + int ret = -ENODEV; 184 + 185 + pmu = armpmu_alloc(); 186 + if (!pmu) 187 + return -ENOMEM; 188 + 189 + pmu->plat_device = pdev; 190 + 191 + ret = pmu_parse_irqs(pmu); 192 + if (ret) 193 + goto out_free; 194 + 195 + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { 196 + init_fn = of_id->data; 197 + 198 + pmu->secure_access = of_property_read_bool(pdev->dev.of_node, 199 + "secure-reg-access"); 200 + 201 + /* arm64 systems boot only as non-secure */ 202 + if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { 203 + pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); 204 + pmu->secure_access = false; 205 + } 206 + 207 + ret = init_fn(pmu); 208 + } else if (probe_table) { 209 + cpumask_setall(&pmu->supported_cpus); 210 + ret = probe_current_pmu(pmu, probe_table); 211 + } 212 + 213 + if (ret) { 214 + pr_info("%s: failed to probe PMU!\n", of_node_full_name(node)); 215 + goto out_free; 216 + } 217 + 218 + ret = armpmu_request_irqs(pmu); 219 + if (ret) 220 + goto out_free_irqs; 221 + 222 + ret = armpmu_register(pmu); 223 + if (ret) 224 + goto out_free; 225 + 226 + return 0; 227 + 228 + out_free_irqs: 229 + armpmu_free_irqs(pmu); 230 + out_free: 231 + pr_info("%s: failed to register PMU devices!\n", 232 + of_node_full_name(node)); 233 + armpmu_free(pmu); 234 + return ret; 235 + }

+849

drivers/perf/qcom_l3_pmu.c

··· 1 + /* 2 + * Driver for the L3 cache PMUs in Qualcomm Technologies chips. 3 + * 4 + * The driver supports a distributed cache architecture where the overall 5 + * cache for a socket is comprised of multiple slices each with its own PMU. 6 + * Access to each individual PMU is provided even though all CPUs share all 7 + * the slices. User space needs to aggregate to individual counts to provide 8 + * a global picture. 9 + * 10 + * See Documentation/perf/qcom_l3_pmu.txt for more details. 11 + * 12 + * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. 13 + * 14 + * This program is free software; you can redistribute it and/or modify 15 + * it under the terms of the GNU General Public License version 2 and 16 + * only version 2 as published by the Free Software Foundation. 17 + * 18 + * This program is distributed in the hope that it will be useful, 19 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 + * GNU General Public License for more details. 22 + */ 23 + 24 + #include <linux/acpi.h> 25 + #include <linux/bitops.h> 26 + #include <linux/interrupt.h> 27 + #include <linux/io.h> 28 + #include <linux/list.h> 29 + #include <linux/module.h> 30 + #include <linux/perf_event.h> 31 + #include <linux/platform_device.h> 32 + 33 + /* 34 + * General constants 35 + */ 36 + 37 + /* Number of counters on each PMU */ 38 + #define L3_NUM_COUNTERS 8 39 + /* Mask for the event type field within perf_event_attr.config and EVTYPE reg */ 40 + #define L3_EVTYPE_MASK 0xFF 41 + /* 42 + * Bit position of the 'long counter' flag within perf_event_attr.config. 43 + * Reserve some space between the event type and this flag to allow expansion 44 + * in the event type field. 45 + */ 46 + #define L3_EVENT_LC_BIT 32 47 + 48 + /* 49 + * Register offsets 50 + */ 51 + 52 + /* Perfmon registers */ 53 + #define L3_HML3_PM_CR 0x000 54 + #define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8) 55 + #define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8) 56 + #define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8) 57 + #define L3_HML3_PM_FILTRA 0x300 58 + #define L3_HML3_PM_FILTRB 0x308 59 + #define L3_HML3_PM_FILTRC 0x310 60 + #define L3_HML3_PM_FILTRAM 0x304 61 + #define L3_HML3_PM_FILTRBM 0x30C 62 + #define L3_HML3_PM_FILTRCM 0x314 63 + 64 + /* Basic counter registers */ 65 + #define L3_M_BC_CR 0x500 66 + #define L3_M_BC_SATROLL_CR 0x504 67 + #define L3_M_BC_CNTENSET 0x508 68 + #define L3_M_BC_CNTENCLR 0x50C 69 + #define L3_M_BC_INTENSET 0x510 70 + #define L3_M_BC_INTENCLR 0x514 71 + #define L3_M_BC_GANG 0x718 72 + #define L3_M_BC_OVSR 0x740 73 + #define L3_M_BC_IRQCTL 0x96C 74 + 75 + /* 76 + * Bit field definitions 77 + */ 78 + 79 + /* L3_HML3_PM_CR */ 80 + #define PM_CR_RESET (0) 81 + 82 + /* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */ 83 + #define PMCNT_RESET (0) 84 + 85 + /* L3_HML3_PM_EVTYPEx */ 86 + #define EVSEL(__val) ((__val) & L3_EVTYPE_MASK) 87 + 88 + /* Reset value for all the filter registers */ 89 + #define PM_FLTR_RESET (0) 90 + 91 + /* L3_M_BC_CR */ 92 + #define BC_RESET (1UL << 1) 93 + #define BC_ENABLE (1UL << 0) 94 + 95 + /* L3_M_BC_SATROLL_CR */ 96 + #define BC_SATROLL_CR_RESET (0) 97 + 98 + /* L3_M_BC_CNTENSET */ 99 + #define PMCNTENSET(__cntr) (1UL << ((__cntr) & 0x7)) 100 + 101 + /* L3_M_BC_CNTENCLR */ 102 + #define PMCNTENCLR(__cntr) (1UL << ((__cntr) & 0x7)) 103 + #define BC_CNTENCLR_RESET (0xFF) 104 + 105 + /* L3_M_BC_INTENSET */ 106 + #define PMINTENSET(__cntr) (1UL << ((__cntr) & 0x7)) 107 + 108 + /* L3_M_BC_INTENCLR */ 109 + #define PMINTENCLR(__cntr) (1UL << ((__cntr) & 0x7)) 110 + #define BC_INTENCLR_RESET (0xFF) 111 + 112 + /* L3_M_BC_GANG */ 113 + #define GANG_EN(__cntr) (1UL << ((__cntr) & 0x7)) 114 + #define BC_GANG_RESET (0) 115 + 116 + /* L3_M_BC_OVSR */ 117 + #define PMOVSRCLR(__cntr) (1UL << ((__cntr) & 0x7)) 118 + #define PMOVSRCLR_RESET (0xFF) 119 + 120 + /* L3_M_BC_IRQCTL */ 121 + #define PMIRQONMSBEN(__cntr) (1UL << ((__cntr) & 0x7)) 122 + #define BC_IRQCTL_RESET (0x0) 123 + 124 + /* 125 + * Events 126 + */ 127 + 128 + #define L3_EVENT_CYCLES 0x01 129 + #define L3_EVENT_READ_HIT 0x20 130 + #define L3_EVENT_READ_MISS 0x21 131 + #define L3_EVENT_READ_HIT_D 0x22 132 + #define L3_EVENT_READ_MISS_D 0x23 133 + #define L3_EVENT_WRITE_HIT 0x24 134 + #define L3_EVENT_WRITE_MISS 0x25 135 + 136 + /* 137 + * Decoding of settings from perf_event_attr 138 + * 139 + * The config format for perf events is: 140 + * - config: bits 0-7: event type 141 + * bit 32: HW counter size requested, 0: 32 bits, 1: 64 bits 142 + */ 143 + 144 + static inline u32 get_event_type(struct perf_event *event) 145 + { 146 + return (event->attr.config) & L3_EVTYPE_MASK; 147 + } 148 + 149 + static inline bool event_uses_long_counter(struct perf_event *event) 150 + { 151 + return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT)); 152 + } 153 + 154 + static inline int event_num_counters(struct perf_event *event) 155 + { 156 + return event_uses_long_counter(event) ? 2 : 1; 157 + } 158 + 159 + /* 160 + * Main PMU, inherits from the core perf PMU type 161 + */ 162 + struct l3cache_pmu { 163 + struct pmu pmu; 164 + struct hlist_node node; 165 + void __iomem *regs; 166 + struct perf_event *events[L3_NUM_COUNTERS]; 167 + unsigned long used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)]; 168 + cpumask_t cpumask; 169 + }; 170 + 171 + #define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu)) 172 + 173 + /* 174 + * Type used to group hardware counter operations 175 + * 176 + * Used to implement two types of hardware counters, standard (32bits) and 177 + * long (64bits). The hardware supports counter chaining which we use to 178 + * implement long counters. This support is exposed via the 'lc' flag field 179 + * in perf_event_attr.config. 180 + */ 181 + struct l3cache_event_ops { 182 + /* Called to start event monitoring */ 183 + void (*start)(struct perf_event *event); 184 + /* Called to stop event monitoring */ 185 + void (*stop)(struct perf_event *event, int flags); 186 + /* Called to update the perf_event */ 187 + void (*update)(struct perf_event *event); 188 + }; 189 + 190 + /* 191 + * Implementation of long counter operations 192 + * 193 + * 64bit counters are implemented by chaining two of the 32bit physical 194 + * counters. The PMU only supports chaining of adjacent even/odd pairs 195 + * and for simplicity the driver always configures the odd counter to 196 + * count the overflows of the lower-numbered even counter. Note that since 197 + * the resulting hardware counter is 64bits no IRQs are required to maintain 198 + * the software counter which is also 64bits. 199 + */ 200 + 201 + static void qcom_l3_cache__64bit_counter_start(struct perf_event *event) 202 + { 203 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 204 + int idx = event->hw.idx; 205 + u32 evsel = get_event_type(event); 206 + u32 gang; 207 + 208 + /* Set the odd counter to count the overflows of the even counter */ 209 + gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG); 210 + gang |= GANG_EN(idx + 1); 211 + writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG); 212 + 213 + /* Initialize the hardware counters and reset prev_count*/ 214 + local64_set(&event->hw.prev_count, 0); 215 + writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)); 216 + writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx)); 217 + 218 + /* 219 + * Set the event types, the upper half must use zero and the lower 220 + * half the actual event type 221 + */ 222 + writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1)); 223 + writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx)); 224 + 225 + /* Finally, enable the counters */ 226 + writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1)); 227 + writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET); 228 + writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx)); 229 + writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET); 230 + } 231 + 232 + static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event, 233 + int flags) 234 + { 235 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 236 + int idx = event->hw.idx; 237 + u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG); 238 + 239 + /* Disable the counters */ 240 + writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR); 241 + writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR); 242 + 243 + /* Disable chaining */ 244 + writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG); 245 + } 246 + 247 + static void qcom_l3_cache__64bit_counter_update(struct perf_event *event) 248 + { 249 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 250 + int idx = event->hw.idx; 251 + u32 hi, lo; 252 + u64 prev, new; 253 + 254 + do { 255 + prev = local64_read(&event->hw.prev_count); 256 + do { 257 + hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)); 258 + lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx)); 259 + } while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1))); 260 + new = ((u64)hi << 32) | lo; 261 + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 262 + 263 + local64_add(new - prev, &event->count); 264 + } 265 + 266 + static const struct l3cache_event_ops event_ops_long = { 267 + .start = qcom_l3_cache__64bit_counter_start, 268 + .stop = qcom_l3_cache__64bit_counter_stop, 269 + .update = qcom_l3_cache__64bit_counter_update, 270 + }; 271 + 272 + /* 273 + * Implementation of standard counter operations 274 + * 275 + * 32bit counters use a single physical counter and a hardware feature that 276 + * asserts the overflow IRQ on the toggling of the most significant bit in 277 + * the counter. This feature allows the counters to be left free-running 278 + * without needing the usual reprogramming required to properly handle races 279 + * during concurrent calls to update. 280 + */ 281 + 282 + static void qcom_l3_cache__32bit_counter_start(struct perf_event *event) 283 + { 284 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 285 + int idx = event->hw.idx; 286 + u32 evsel = get_event_type(event); 287 + u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL); 288 + 289 + /* Set the counter to assert the overflow IRQ on MSB toggling */ 290 + writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL); 291 + 292 + /* Initialize the hardware counter and reset prev_count*/ 293 + local64_set(&event->hw.prev_count, 0); 294 + writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx)); 295 + 296 + /* Set the event type */ 297 + writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx)); 298 + 299 + /* Enable interrupt generation by this counter */ 300 + writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET); 301 + 302 + /* Finally, enable the counter */ 303 + writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx)); 304 + writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET); 305 + } 306 + 307 + static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event, 308 + int flags) 309 + { 310 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 311 + int idx = event->hw.idx; 312 + u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL); 313 + 314 + /* Disable the counter */ 315 + writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR); 316 + 317 + /* Disable interrupt generation by this counter */ 318 + writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR); 319 + 320 + /* Set the counter to not assert the overflow IRQ on MSB toggling */ 321 + writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL); 322 + } 323 + 324 + static void qcom_l3_cache__32bit_counter_update(struct perf_event *event) 325 + { 326 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 327 + int idx = event->hw.idx; 328 + u32 prev, new; 329 + 330 + do { 331 + prev = local64_read(&event->hw.prev_count); 332 + new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx)); 333 + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 334 + 335 + local64_add(new - prev, &event->count); 336 + } 337 + 338 + static const struct l3cache_event_ops event_ops_std = { 339 + .start = qcom_l3_cache__32bit_counter_start, 340 + .stop = qcom_l3_cache__32bit_counter_stop, 341 + .update = qcom_l3_cache__32bit_counter_update, 342 + }; 343 + 344 + /* Retrieve the appropriate operations for the given event */ 345 + static 346 + const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event) 347 + { 348 + if (event_uses_long_counter(event)) 349 + return &event_ops_long; 350 + else 351 + return &event_ops_std; 352 + } 353 + 354 + /* 355 + * Top level PMU functions. 356 + */ 357 + 358 + static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu) 359 + { 360 + int i; 361 + 362 + writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR); 363 + 364 + /* 365 + * Use writel for the first programming command to ensure the basic 366 + * counter unit is stopped before proceeding 367 + */ 368 + writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR); 369 + 370 + writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR); 371 + writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR); 372 + writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR); 373 + writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG); 374 + writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL); 375 + writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR); 376 + 377 + for (i = 0; i < L3_NUM_COUNTERS; ++i) { 378 + writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i)); 379 + writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i)); 380 + } 381 + 382 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA); 383 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM); 384 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB); 385 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM); 386 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC); 387 + writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM); 388 + 389 + /* 390 + * Use writel here to ensure all programming commands are done 391 + * before proceeding 392 + */ 393 + writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR); 394 + } 395 + 396 + static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data) 397 + { 398 + struct l3cache_pmu *l3pmu = data; 399 + /* Read the overflow status register */ 400 + long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR); 401 + int idx; 402 + 403 + if (status == 0) 404 + return IRQ_NONE; 405 + 406 + /* Clear the bits we read on the overflow status register */ 407 + writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR); 408 + 409 + for_each_set_bit(idx, &status, L3_NUM_COUNTERS) { 410 + struct perf_event *event; 411 + const struct l3cache_event_ops *ops; 412 + 413 + event = l3pmu->events[idx]; 414 + if (!event) 415 + continue; 416 + 417 + /* 418 + * Since the IRQ is not enabled for events using long counters 419 + * we should never see one of those here, however, be consistent 420 + * and use the ops indirections like in the other operations. 421 + */ 422 + 423 + ops = l3cache_event_get_ops(event); 424 + ops->update(event); 425 + } 426 + 427 + return IRQ_HANDLED; 428 + } 429 + 430 + /* 431 + * Implementation of abstract pmu functionality required by 432 + * the core perf events code. 433 + */ 434 + 435 + static void qcom_l3_cache__pmu_enable(struct pmu *pmu) 436 + { 437 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu); 438 + 439 + /* Ensure the other programming commands are observed before enabling */ 440 + wmb(); 441 + 442 + writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR); 443 + } 444 + 445 + static void qcom_l3_cache__pmu_disable(struct pmu *pmu) 446 + { 447 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu); 448 + 449 + writel_relaxed(0, l3pmu->regs + L3_M_BC_CR); 450 + 451 + /* Ensure the basic counter unit is stopped before proceeding */ 452 + wmb(); 453 + } 454 + 455 + /* 456 + * We must NOT create groups containing events from multiple hardware PMUs, 457 + * although mixing different software and hardware PMUs is allowed. 458 + */ 459 + static bool qcom_l3_cache__validate_event_group(struct perf_event *event) 460 + { 461 + struct perf_event *leader = event->group_leader; 462 + struct perf_event *sibling; 463 + int counters = 0; 464 + 465 + if (leader->pmu != event->pmu && !is_software_event(leader)) 466 + return false; 467 + 468 + counters = event_num_counters(event); 469 + counters += event_num_counters(leader); 470 + 471 + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 472 + if (is_software_event(sibling)) 473 + continue; 474 + if (sibling->pmu != event->pmu) 475 + return false; 476 + counters += event_num_counters(sibling); 477 + } 478 + 479 + /* 480 + * If the group requires more counters than the HW has, it 481 + * cannot ever be scheduled. 482 + */ 483 + return counters <= L3_NUM_COUNTERS; 484 + } 485 + 486 + static int qcom_l3_cache__event_init(struct perf_event *event) 487 + { 488 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 489 + struct hw_perf_event *hwc = &event->hw; 490 + 491 + /* 492 + * Is the event for this PMU? 493 + */ 494 + if (event->attr.type != event->pmu->type) 495 + return -ENOENT; 496 + 497 + /* 498 + * There are no per-counter mode filters in the PMU. 499 + */ 500 + if (event->attr.exclude_user || event->attr.exclude_kernel || 501 + event->attr.exclude_hv || event->attr.exclude_idle) 502 + return -EINVAL; 503 + 504 + /* 505 + * Sampling not supported since these events are not core-attributable. 506 + */ 507 + if (hwc->sample_period) 508 + return -EINVAL; 509 + 510 + /* 511 + * Task mode not available, we run the counters as socket counters, 512 + * not attributable to any CPU and therefore cannot attribute per-task. 513 + */ 514 + if (event->cpu < 0) 515 + return -EINVAL; 516 + 517 + /* Validate the group */ 518 + if (!qcom_l3_cache__validate_event_group(event)) 519 + return -EINVAL; 520 + 521 + hwc->idx = -1; 522 + 523 + /* 524 + * Many perf core operations (eg. events rotation) operate on a 525 + * single CPU context. This is obvious for CPU PMUs, where one 526 + * expects the same sets of events being observed on all CPUs, 527 + * but can lead to issues for off-core PMUs, like this one, where 528 + * each event could be theoretically assigned to a different CPU. 529 + * To mitigate this, we enforce CPU assignment to one designated 530 + * processor (the one described in the "cpumask" attribute exported 531 + * by the PMU device). perf user space tools honor this and avoid 532 + * opening more than one copy of the events. 533 + */ 534 + event->cpu = cpumask_first(&l3pmu->cpumask); 535 + 536 + return 0; 537 + } 538 + 539 + static void qcom_l3_cache__event_start(struct perf_event *event, int flags) 540 + { 541 + struct hw_perf_event *hwc = &event->hw; 542 + const struct l3cache_event_ops *ops = l3cache_event_get_ops(event); 543 + 544 + hwc->state = 0; 545 + ops->start(event); 546 + } 547 + 548 + static void qcom_l3_cache__event_stop(struct perf_event *event, int flags) 549 + { 550 + struct hw_perf_event *hwc = &event->hw; 551 + const struct l3cache_event_ops *ops = l3cache_event_get_ops(event); 552 + 553 + if (hwc->state & PERF_HES_STOPPED) 554 + return; 555 + 556 + ops->stop(event, flags); 557 + if (flags & PERF_EF_UPDATE) 558 + ops->update(event); 559 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 560 + } 561 + 562 + static int qcom_l3_cache__event_add(struct perf_event *event, int flags) 563 + { 564 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 565 + struct hw_perf_event *hwc = &event->hw; 566 + int order = event_uses_long_counter(event) ? 1 : 0; 567 + int idx; 568 + 569 + /* 570 + * Try to allocate a counter. 571 + */ 572 + idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order); 573 + if (idx < 0) 574 + /* The counters are all in use. */ 575 + return -EAGAIN; 576 + 577 + hwc->idx = idx; 578 + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 579 + l3pmu->events[idx] = event; 580 + 581 + if (flags & PERF_EF_START) 582 + qcom_l3_cache__event_start(event, 0); 583 + 584 + /* Propagate changes to the userspace mapping. */ 585 + perf_event_update_userpage(event); 586 + 587 + return 0; 588 + } 589 + 590 + static void qcom_l3_cache__event_del(struct perf_event *event, int flags) 591 + { 592 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); 593 + struct hw_perf_event *hwc = &event->hw; 594 + int order = event_uses_long_counter(event) ? 1 : 0; 595 + 596 + /* Stop and clean up */ 597 + qcom_l3_cache__event_stop(event, flags | PERF_EF_UPDATE); 598 + l3pmu->events[hwc->idx] = NULL; 599 + bitmap_release_region(l3pmu->used_mask, hwc->idx, order); 600 + 601 + /* Propagate changes to the userspace mapping. */ 602 + perf_event_update_userpage(event); 603 + } 604 + 605 + static void qcom_l3_cache__event_read(struct perf_event *event) 606 + { 607 + const struct l3cache_event_ops *ops = l3cache_event_get_ops(event); 608 + 609 + ops->update(event); 610 + } 611 + 612 + /* 613 + * Add sysfs attributes 614 + * 615 + * We export: 616 + * - formats, used by perf user space and other tools to configure events 617 + * - events, used by perf user space and other tools to create events 618 + * symbolically, e.g.: 619 + * perf stat -a -e l3cache_0_0/event=read-miss/ ls 620 + * perf stat -a -e l3cache_0_0/event=0x21/ ls 621 + * - cpumask, used by perf user space and other tools to know on which CPUs 622 + * to open the events 623 + */ 624 + 625 + /* formats */ 626 + 627 + static ssize_t l3cache_pmu_format_show(struct device *dev, 628 + struct device_attribute *attr, char *buf) 629 + { 630 + struct dev_ext_attribute *eattr; 631 + 632 + eattr = container_of(attr, struct dev_ext_attribute, attr); 633 + return sprintf(buf, "%s\n", (char *) eattr->var); 634 + } 635 + 636 + #define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \ 637 + (&((struct dev_ext_attribute[]) { \ 638 + { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \ 639 + .var = (void *) _config, } \ 640 + })[0].attr.attr) 641 + 642 + static struct attribute *qcom_l3_cache_pmu_formats[] = { 643 + L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"), 644 + L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)), 645 + NULL, 646 + }; 647 + 648 + static struct attribute_group qcom_l3_cache_pmu_format_group = { 649 + .name = "format", 650 + .attrs = qcom_l3_cache_pmu_formats, 651 + }; 652 + 653 + /* events */ 654 + 655 + static ssize_t l3cache_pmu_event_show(struct device *dev, 656 + struct device_attribute *attr, char *page) 657 + { 658 + struct perf_pmu_events_attr *pmu_attr; 659 + 660 + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); 661 + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); 662 + } 663 + 664 + #define L3CACHE_EVENT_ATTR(_name, _id) \ 665 + (&((struct perf_pmu_events_attr[]) { \ 666 + { .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \ 667 + .id = _id, } \ 668 + })[0].attr.attr) 669 + 670 + static struct attribute *qcom_l3_cache_pmu_events[] = { 671 + L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES), 672 + L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT), 673 + L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS), 674 + L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D), 675 + L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D), 676 + L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT), 677 + L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS), 678 + NULL 679 + }; 680 + 681 + static struct attribute_group qcom_l3_cache_pmu_events_group = { 682 + .name = "events", 683 + .attrs = qcom_l3_cache_pmu_events, 684 + }; 685 + 686 + /* cpumask */ 687 + 688 + static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev, 689 + struct device_attribute *attr, char *buf) 690 + { 691 + struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev)); 692 + 693 + return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask); 694 + } 695 + 696 + static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL); 697 + 698 + static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = { 699 + &dev_attr_cpumask.attr, 700 + NULL, 701 + }; 702 + 703 + static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = { 704 + .attrs = qcom_l3_cache_pmu_cpumask_attrs, 705 + }; 706 + 707 + /* 708 + * Per PMU device attribute groups 709 + */ 710 + static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = { 711 + &qcom_l3_cache_pmu_format_group, 712 + &qcom_l3_cache_pmu_events_group, 713 + &qcom_l3_cache_pmu_cpumask_attr_group, 714 + NULL, 715 + }; 716 + 717 + /* 718 + * Probing functions and data. 719 + */ 720 + 721 + static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) 722 + { 723 + struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node); 724 + 725 + /* If there is not a CPU/PMU association pick this CPU */ 726 + if (cpumask_empty(&l3pmu->cpumask)) 727 + cpumask_set_cpu(cpu, &l3pmu->cpumask); 728 + 729 + return 0; 730 + } 731 + 732 + static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) 733 + { 734 + struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node); 735 + unsigned int target; 736 + 737 + if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask)) 738 + return 0; 739 + target = cpumask_any_but(cpu_online_mask, cpu); 740 + if (target >= nr_cpu_ids) 741 + return 0; 742 + perf_pmu_migrate_context(&l3pmu->pmu, cpu, target); 743 + cpumask_set_cpu(target, &l3pmu->cpumask); 744 + return 0; 745 + } 746 + 747 + static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) 748 + { 749 + struct l3cache_pmu *l3pmu; 750 + struct acpi_device *acpi_dev; 751 + struct resource *memrc; 752 + int ret; 753 + char *name; 754 + 755 + /* Initialize the PMU data structures */ 756 + 757 + acpi_dev = ACPI_COMPANION(&pdev->dev); 758 + if (!acpi_dev) 759 + return -ENODEV; 760 + 761 + l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL); 762 + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s", 763 + acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id); 764 + if (!l3pmu || !name) 765 + return -ENOMEM; 766 + 767 + l3pmu->pmu = (struct pmu) { 768 + .task_ctx_nr = perf_invalid_context, 769 + 770 + .pmu_enable = qcom_l3_cache__pmu_enable, 771 + .pmu_disable = qcom_l3_cache__pmu_disable, 772 + .event_init = qcom_l3_cache__event_init, 773 + .add = qcom_l3_cache__event_add, 774 + .del = qcom_l3_cache__event_del, 775 + .start = qcom_l3_cache__event_start, 776 + .stop = qcom_l3_cache__event_stop, 777 + .read = qcom_l3_cache__event_read, 778 + 779 + .attr_groups = qcom_l3_cache_pmu_attr_grps, 780 + }; 781 + 782 + memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); 783 + l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc); 784 + if (IS_ERR(l3pmu->regs)) { 785 + dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start); 786 + return PTR_ERR(l3pmu->regs); 787 + } 788 + 789 + qcom_l3_cache__init(l3pmu); 790 + 791 + ret = platform_get_irq(pdev, 0); 792 + if (ret <= 0) 793 + return ret; 794 + 795 + ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0, 796 + name, l3pmu); 797 + if (ret) { 798 + dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n", 799 + &memrc->start); 800 + return ret; 801 + } 802 + 803 + /* Add this instance to the list used by the offline callback */ 804 + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node); 805 + if (ret) { 806 + dev_err(&pdev->dev, "Error %d registering hotplug", ret); 807 + return ret; 808 + } 809 + 810 + ret = perf_pmu_register(&l3pmu->pmu, name, -1); 811 + if (ret < 0) { 812 + dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret); 813 + return ret; 814 + } 815 + 816 + dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type); 817 + 818 + return 0; 819 + } 820 + 821 + static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = { 822 + { "QCOM8081", }, 823 + { } 824 + }; 825 + MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match); 826 + 827 + static struct platform_driver qcom_l3_cache_pmu_driver = { 828 + .driver = { 829 + .name = "qcom-l3cache-pmu", 830 + .acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match), 831 + }, 832 + .probe = qcom_l3_cache_pmu_probe, 833 + }; 834 + 835 + static int __init register_qcom_l3_cache_pmu_driver(void) 836 + { 837 + int ret; 838 + 839 + /* Install a hook to update the reader CPU in case it goes offline */ 840 + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, 841 + "perf/qcom/l3cache:online", 842 + qcom_l3_cache_pmu_online_cpu, 843 + qcom_l3_cache_pmu_offline_cpu); 844 + if (ret) 845 + return ret; 846 + 847 + return platform_driver_register(&qcom_l3_cache_pmu_driver); 848 + } 849 + device_initcall(register_qcom_l3_cache_pmu_driver);

+2

include/linux/cpuhotplug.h

··· 94 94 CPUHP_AP_ARM_VFP_STARTING, 95 95 CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, 96 96 CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, 97 + CPUHP_AP_PERF_ARM_ACPI_STARTING, 97 98 CPUHP_AP_PERF_ARM_STARTING, 98 99 CPUHP_AP_ARM_L2X0_STARTING, 99 100 CPUHP_AP_ARM_ARCH_TIMER_STARTING, ··· 138 137 CPUHP_AP_PERF_ARM_CCN_ONLINE, 139 138 CPUHP_AP_PERF_ARM_L2X0_ONLINE, 140 139 CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, 140 + CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, 141 141 CPUHP_AP_WORKQUEUE_ONLINE, 142 142 CPUHP_AP_RCUTREE_ONLINE, 143 143 CPUHP_AP_ONLINE_DYN,

+23 -6

include/linux/perf/arm_pmu.h

··· 75 75 * already have to allocate this struct per cpu. 76 76 */ 77 77 struct arm_pmu *percpu_pmu; 78 + 79 + int irq; 78 80 }; 79 81 80 82 enum armpmu_attr_groups { ··· 90 88 struct pmu pmu; 91 89 cpumask_t active_irqs; 92 90 cpumask_t supported_cpus; 93 - int *irq_affinity; 94 91 char *name; 95 92 irqreturn_t (*handle_irq)(int irq_num, void *dev); 96 93 void (*enable)(struct perf_event *event); ··· 105 104 void (*start)(struct arm_pmu *); 106 105 void (*stop)(struct arm_pmu *); 107 106 void (*reset)(void *); 108 - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); 109 - void (*free_irq)(struct arm_pmu *); 110 107 int (*map_event)(struct perf_event *event); 111 108 int num_events; 112 - atomic_t active_events; 113 - struct mutex reserve_mutex; 114 109 u64 max_period; 115 110 bool secure_access; /* 32-bit ARM only */ 116 111 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 ··· 117 120 struct notifier_block cpu_pm_nb; 118 121 /* the attr_groups array must be NULL-terminated */ 119 122 const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; 123 + 124 + /* Only to be used by ACPI probing code */ 125 + unsigned long acpi_cpuid; 120 126 }; 121 127 122 128 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) ··· 135 135 [PERF_COUNT_HW_CACHE_RESULT_MAX], 136 136 u32 raw_event_mask); 137 137 138 + typedef int (*armpmu_init_fn)(struct arm_pmu *); 139 + 138 140 struct pmu_probe_info { 139 141 unsigned int cpuid; 140 142 unsigned int mask; 141 - int (*init)(struct arm_pmu *); 143 + armpmu_init_fn init; 142 144 }; 143 145 144 146 #define PMU_PROBE(_cpuid, _mask, _fn) \ ··· 161 159 int arm_pmu_device_probe(struct platform_device *pdev, 162 160 const struct of_device_id *of_table, 163 161 const struct pmu_probe_info *probe_table); 162 + 163 + #ifdef CONFIG_ACPI 164 + int arm_pmu_acpi_probe(armpmu_init_fn init_fn); 165 + #else 166 + static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } 167 + #endif 168 + 169 + /* Internal functions only for core arm_pmu code */ 170 + struct arm_pmu *armpmu_alloc(void); 171 + void armpmu_free(struct arm_pmu *pmu); 172 + int armpmu_register(struct arm_pmu *pmu); 173 + int armpmu_request_irqs(struct arm_pmu *armpmu); 174 + void armpmu_free_irqs(struct arm_pmu *armpmu); 175 + int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); 176 + void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); 164 177 165 178 #define ARMV8_PMU_PDEV_NAME "armv8-pmu" 166 179