Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: perf: Add support for ARMv8.5-PMU 64-bit counters

At present ARMv8 event counters are limited to 32-bits, though by
using the CHAIN event it's possible to combine adjacent counters to
achieve 64-bits. The perf config1:0 bit can be set to use such a
configuration.

With the introduction of ARMv8.5-PMU support, all event counters can
now be used as 64-bit counters.

Let's enable 64-bit event counters where support exists. Unless the
user sets config1:0 we will adjust the counter value such that it
overflows upon 32-bit overflow. This follows the same behaviour as
the cycle counter which has always been (and remains) 64-bits.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
[Mark: fix ID field names, compare with 8.5 value]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Andrew Murray and committed by
Will Deacon
8673e02e c854188e

+78 -17
+2 -1
arch/arm64/include/asm/perf_event.h
··· 176 176 #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ 177 177 #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ 178 178 #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ 179 + #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ 179 180 #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ 180 181 #define ARMV8_PMU_PMCR_N_MASK 0x1f 181 - #define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ 182 + #define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ 182 183 183 184 /* 184 185 * PMOVSR: counters overflow flag status reg
+4
arch/arm64/include/asm/sysreg.h
··· 702 702 #define ID_AA64DFR0_TRACEVER_SHIFT 4 703 703 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 704 704 705 + #define ID_AA64DFR0_PMUVER_8_0 0x1 705 706 #define ID_AA64DFR0_PMUVER_8_1 0x4 707 + #define ID_AA64DFR0_PMUVER_8_4 0x5 708 + #define ID_AA64DFR0_PMUVER_8_5 0x6 709 + #define ID_AA64DFR0_PMUVER_IMP_DEF 0xf 706 710 707 711 #define ID_DFR0_PERFMON_SHIFT 24 708 712
+71 -16
arch/arm64/kernel/perf_event.c
··· 285 285 #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ 286 286 (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) 287 287 288 + 289 + /* 290 + * We unconditionally enable ARMv8.5-PMU long event counter support 291 + * (64-bit events) where supported. Indicate if this arm_pmu has long 292 + * event counter support. 293 + */ 294 + static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) 295 + { 296 + return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); 297 + } 298 + 288 299 /* 289 300 * We must chain two programmable counters for 64 bit events, 290 301 * except when we have allocated the 64bit cycle counter (for CPU ··· 305 294 static inline bool armv8pmu_event_is_chained(struct perf_event *event) 306 295 { 307 296 int idx = event->hw.idx; 297 + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 308 298 309 299 return !WARN_ON(idx < 0) && 310 300 armv8pmu_event_is_64bit(event) && 301 + !armv8pmu_has_long_event(cpu_pmu) && 311 302 (idx != ARMV8_IDX_CYCLE_COUNTER); 312 303 } 313 304 ··· 358 345 isb(); 359 346 } 360 347 361 - static inline u32 armv8pmu_read_evcntr(int idx) 348 + static inline u64 armv8pmu_read_evcntr(int idx) 362 349 { 363 350 armv8pmu_select_counter(idx); 364 351 return read_sysreg(pmxevcntr_el0); ··· 373 360 if (armv8pmu_event_is_chained(event)) 374 361 val = (val << 32) | armv8pmu_read_evcntr(idx - 1); 375 362 return val; 363 + } 364 + 365 + /* 366 + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP 367 + * is set the event counters also become 64-bit counters. Unless the 368 + * user has requested a long counter (attr.config1) then we want to 369 + * interrupt upon 32-bit overflow - we achieve this by applying a bias. 370 + */ 371 + static bool armv8pmu_event_needs_bias(struct perf_event *event) 372 + { 373 + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 374 + struct hw_perf_event *hwc = &event->hw; 375 + int idx = hwc->idx; 376 + 377 + if (armv8pmu_event_is_64bit(event)) 378 + return false; 379 + 380 + if (armv8pmu_has_long_event(cpu_pmu) || 381 + idx == ARMV8_IDX_CYCLE_COUNTER) 382 + return true; 383 + 384 + return false; 385 + } 386 + 387 + static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) 388 + { 389 + if (armv8pmu_event_needs_bias(event)) 390 + value |= GENMASK(63, 32); 391 + 392 + return value; 393 + } 394 + 395 + static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) 396 + { 397 + if (armv8pmu_event_needs_bias(event)) 398 + value &= ~GENMASK(63, 32); 399 + 400 + return value; 376 401 } 377 402 378 403 static u64 armv8pmu_read_counter(struct perf_event *event) ··· 428 377 else 429 378 value = armv8pmu_read_hw_counter(event); 430 379 431 - return value; 380 + return armv8pmu_unbias_long_counter(event, value); 432 381 } 433 382 434 - static inline void armv8pmu_write_evcntr(int idx, u32 value) 383 + static inline void armv8pmu_write_evcntr(int idx, u64 value) 435 384 { 436 385 armv8pmu_select_counter(idx); 437 386 write_sysreg(value, pmxevcntr_el0); ··· 456 405 struct hw_perf_event *hwc = &event->hw; 457 406 int idx = hwc->idx; 458 407 408 + value = armv8pmu_bias_long_counter(event, value); 409 + 459 410 if (!armv8pmu_counter_valid(cpu_pmu, idx)) 460 411 pr_err("CPU%u writing wrong counter %d\n", 461 412 smp_processor_id(), idx); 462 - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { 463 - /* 464 - * The cycles counter is really a 64-bit counter. 465 - * When treating it as a 32-bit counter, we only count 466 - * the lower 32 bits, and set the upper 32-bits so that 467 - * we get an interrupt upon 32-bit overflow. 468 - */ 469 - if (!armv8pmu_event_is_64bit(event)) 470 - value |= 0xffffffff00000000ULL; 413 + else if (idx == ARMV8_IDX_CYCLE_COUNTER) 471 414 write_sysreg(value, pmccntr_el0); 472 - } else 415 + else 473 416 armv8pmu_write_hw_counter(event, value); 474 417 } 475 418 ··· 776 731 /* 777 732 * Otherwise use events counters 778 733 */ 779 - if (armv8pmu_event_is_64bit(event)) 734 + if (armv8pmu_event_is_64bit(event) && 735 + !armv8pmu_has_long_event(cpu_pmu)) 780 736 return armv8pmu_get_chain_idx(cpuc, cpu_pmu); 781 737 else 782 738 return armv8pmu_get_single_idx(cpuc, cpu_pmu); ··· 848 802 849 803 static void armv8pmu_reset(void *info) 850 804 { 805 + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; 806 + u32 pmcr; 807 + 851 808 /* The counter and interrupt enable registers are unknown at reset. */ 852 809 armv8pmu_disable_counter(U32_MAX); 853 810 armv8pmu_disable_intens(U32_MAX); ··· 862 813 * Initialize & Reset PMNC. Request overflow interrupt for 863 814 * 64 bit cycle counter but cheat in armv8pmu_write_counter(). 864 815 */ 865 - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | 866 - ARMV8_PMU_PMCR_LC); 816 + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; 817 + 818 + /* Enable long event counter support where available */ 819 + if (armv8pmu_has_long_event(cpu_pmu)) 820 + pmcr |= ARMV8_PMU_PMCR_LP; 821 + 822 + armv8pmu_pmcr_write(pmcr); 867 823 } 868 824 869 825 static int __armv8_pmuv3_map_event(struct perf_event *event, ··· 951 897 if (pmuver == 0xf || pmuver == 0) 952 898 return; 953 899 900 + cpu_pmu->pmuver = pmuver; 954 901 probe->present = true; 955 902 956 903 /* Read the nb of CNTx counters supported from PMNC */
+1
include/linux/perf/arm_pmu.h
··· 80 80 struct pmu pmu; 81 81 cpumask_t supported_cpus; 82 82 char *name; 83 + int pmuver; 83 84 irqreturn_t (*handle_irq)(struct arm_pmu *pmu); 84 85 void (*enable)(struct perf_event *event); 85 86 void (*disable)(struct perf_event *event);