Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: perf: Enable PMU counter userspace access for perf event

Arm PMUs can support direct userspace access of counters which allows for
low overhead (i.e. no syscall) self-monitoring of tasks. The same feature
exists on x86 called 'rdpmc'. Unlike x86, userspace access will only be
enabled for thread bound events. This could be extended if needed, but
simplifies the implementation and reduces the chances for any
information leaks (which the x86 implementation suffers from).

PMU EL0 access will be enabled when an event with userspace access is
part of the thread's context. This includes when the event is not
scheduled on the PMU. There's some additional overhead clearing
dirty counters when access is enabled in order to prevent leaking
disabled counter data from other tasks.

Unlike x86, enabling of userspace access must be requested with a new
attr bit: config1:1. If the user requests userspace access with 64-bit
counters, then the event open will fail if the h/w doesn't support
64-bit counters. Chaining is not supported with userspace access. The
modes for config1 are as follows:

config1 = 0 : user access disabled and always 32-bit
config1 = 1 : user access disabled and always 64-bit (using chaining if needed)
config1 = 2 : user access enabled and always 32-bit
config1 = 3 : user access enabled and always 64-bit

Based on work by Raphael Gault <raphael.gault@arm.com>, but has been
completely re-written.

Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-perf-users@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211208201124.310740-5-robh@kernel.org
[will: Made armv8pmu_proc_user_access_handler() static]
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Rob Herring and committed by
Will Deacon
83a7a4d6 e2012600

+112 -7
+112 -7
arch/arm64/kernel/perf_event.c
··· 285 285 286 286 PMU_FORMAT_ATTR(event, "config:0-15"); 287 287 PMU_FORMAT_ATTR(long, "config1:0"); 288 + PMU_FORMAT_ATTR(rdpmc, "config1:1"); 288 289 289 290 static int sysctl_perf_user_access __read_mostly; 290 291 ··· 294 293 return event->attr.config1 & 0x1; 295 294 } 296 295 296 + static inline bool armv8pmu_event_want_user_access(struct perf_event *event) 297 + { 298 + return event->attr.config1 & 0x2; 299 + } 300 + 297 301 static struct attribute *armv8_pmuv3_format_attrs[] = { 298 302 &format_attr_event.attr, 299 303 &format_attr_long.attr, 304 + &format_attr_rdpmc.attr, 300 305 NULL, 301 306 }; 302 307 ··· 371 364 */ 372 365 #define ARMV8_IDX_CYCLE_COUNTER 0 373 366 #define ARMV8_IDX_COUNTER0 1 374 - 367 + #define ARMV8_IDX_CYCLE_COUNTER_USER 32 375 368 376 369 /* 377 370 * We unconditionally enable ARMv8.5-PMU long event counter support ··· 383 376 return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); 384 377 } 385 378 379 + static inline bool armv8pmu_event_has_user_read(struct perf_event *event) 380 + { 381 + return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; 382 + } 383 + 386 384 /* 387 385 * We must chain two programmable counters for 64 bit events, 388 386 * except when we have allocated the 64bit cycle counter (for CPU 389 - * cycles event). This must be called only when the event has 390 - * a counter allocated. 387 + * cycles event) or when user space counter access is enabled. 391 388 */ 392 389 static inline bool armv8pmu_event_is_chained(struct perf_event *event) 393 390 { 394 391 int idx = event->hw.idx; 395 392 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 396 393 397 - return !WARN_ON(idx < 0) && 394 + return !armv8pmu_event_has_user_read(event) && 398 395 armv8pmu_event_is_64bit(event) && 399 396 !armv8pmu_has_long_event(cpu_pmu) && 400 397 (idx != ARMV8_IDX_CYCLE_COUNTER); ··· 731 720 return value; 732 721 } 733 722 723 + static void armv8pmu_disable_user_access(void) 724 + { 725 + write_sysreg(0, pmuserenr_el0); 726 + } 727 + 728 + static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) 729 + { 730 + int i; 731 + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); 732 + 733 + /* Clear any unused counters to avoid leaking their contents */ 734 + for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { 735 + if (i == ARMV8_IDX_CYCLE_COUNTER) 736 + write_sysreg(0, pmccntr_el0); 737 + else 738 + armv8pmu_write_evcntr(i, 0); 739 + } 740 + 741 + write_sysreg(0, pmuserenr_el0); 742 + write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); 743 + } 744 + 734 745 static void armv8pmu_enable_event(struct perf_event *event) 735 746 { 736 747 /* ··· 796 763 797 764 static void armv8pmu_start(struct arm_pmu *cpu_pmu) 798 765 { 766 + struct perf_event_context *task_ctx = 767 + this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx; 768 + 769 + if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user) 770 + armv8pmu_enable_user_access(cpu_pmu); 771 + else 772 + armv8pmu_disable_user_access(); 773 + 799 774 /* Enable all counters */ 800 775 armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); 801 776 } ··· 921 880 if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { 922 881 if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) 923 882 return ARMV8_IDX_CYCLE_COUNTER; 883 + else if (armv8pmu_event_is_64bit(event) && 884 + armv8pmu_event_want_user_access(event) && 885 + !armv8pmu_has_long_event(cpu_pmu)) 886 + return -EAGAIN; 924 887 } 925 888 926 889 /* 927 890 * Otherwise use events counters 928 891 */ 929 - if (armv8pmu_event_is_64bit(event) && 930 - !armv8pmu_has_long_event(cpu_pmu)) 892 + if (armv8pmu_event_is_chained(event)) 931 893 return armv8pmu_get_chain_idx(cpuc, cpu_pmu); 932 894 else 933 895 return armv8pmu_get_single_idx(cpuc, cpu_pmu); ··· 944 900 clear_bit(idx, cpuc->used_mask); 945 901 if (armv8pmu_event_is_chained(event)) 946 902 clear_bit(idx - 1, cpuc->used_mask); 903 + } 904 + 905 + static int armv8pmu_user_event_idx(struct perf_event *event) 906 + { 907 + if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) 908 + return 0; 909 + 910 + /* 911 + * We remap the cycle counter index to 32 to 912 + * match the offset applied to the rest of 913 + * the counter indices. 914 + */ 915 + if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) 916 + return ARMV8_IDX_CYCLE_COUNTER_USER; 917 + 918 + return event->hw.idx; 947 919 } 948 920 949 921 /* ··· 1057 997 1058 998 if (armv8pmu_event_is_64bit(event)) 1059 999 event->hw.flags |= ARMPMU_EVT_64BIT; 1000 + 1001 + /* 1002 + * User events must be allocated into a single counter, and so 1003 + * must not be chained. 1004 + * 1005 + * Most 64-bit events require long counter support, but 64-bit 1006 + * CPU_CYCLES events can be placed into the dedicated cycle 1007 + * counter when this is free. 1008 + */ 1009 + if (armv8pmu_event_want_user_access(event)) { 1010 + if (!(event->attach_state & PERF_ATTACH_TASK)) 1011 + return -EINVAL; 1012 + if (armv8pmu_event_is_64bit(event) && 1013 + (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && 1014 + !armv8pmu_has_long_event(armpmu)) 1015 + return -EOPNOTSUPP; 1016 + 1017 + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; 1018 + } 1060 1019 1061 1020 /* Only expose micro/arch events supported by this PMU */ 1062 1021 if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) ··· 1185 1106 return probe.present ? 0 : -ENODEV; 1186 1107 } 1187 1108 1109 + static void armv8pmu_disable_user_access_ipi(void *unused) 1110 + { 1111 + armv8pmu_disable_user_access(); 1112 + } 1113 + 1114 + static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, 1115 + void *buffer, size_t *lenp, loff_t *ppos) 1116 + { 1117 + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 1118 + if (ret || !write || sysctl_perf_user_access) 1119 + return ret; 1120 + 1121 + on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1); 1122 + return 0; 1123 + } 1124 + 1188 1125 static struct ctl_table armv8_pmu_sysctl_table[] = { 1189 1126 { 1190 1127 .procname = "perf_user_access", 1191 1128 .data = &sysctl_perf_user_access, 1192 1129 .maxlen = sizeof(unsigned int), 1193 1130 .mode = 0644, 1194 - .proc_handler = proc_dointvec_minmax, 1131 + .proc_handler = armv8pmu_proc_user_access_handler, 1195 1132 .extra1 = SYSCTL_ZERO, 1196 1133 .extra2 = SYSCTL_ONE, 1197 1134 }, ··· 1236 1141 cpu_pmu->reset = armv8pmu_reset; 1237 1142 cpu_pmu->set_event_filter = armv8pmu_set_event_filter; 1238 1143 cpu_pmu->filter_match = armv8pmu_filter_match; 1144 + 1145 + cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; 1239 1146 1240 1147 cpu_pmu->name = name; 1241 1148 cpu_pmu->map_event = map_event; ··· 1415 1318 userpg->cap_user_time = 0; 1416 1319 userpg->cap_user_time_zero = 0; 1417 1320 userpg->cap_user_time_short = 0; 1321 + userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event); 1322 + 1323 + if (userpg->cap_user_rdpmc) { 1324 + if (event->hw.flags & ARMPMU_EVT_64BIT) 1325 + userpg->pmc_width = 64; 1326 + else 1327 + userpg->pmc_width = 32; 1328 + } 1418 1329 1419 1330 do { 1420 1331 rd = sched_clock_read_begin(&seq);