Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: Performance counters support

This patch adds support for the AArch64 performance counters.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

authored by

Will Deacon and committed by
Catalin Marinas
03089688 5c423369

+1478
+22
arch/arm64/include/asm/perf_event.h
··· 1 + /* 2 + * Copyright (C) 2012 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #ifndef __ASM_PERF_EVENT_H 18 + #define __ASM_PERF_EVENT_H 19 + 20 + /* It's quiet around here... */ 21 + 22 + #endif
+82
arch/arm64/include/asm/pmu.h
··· 1 + /* 2 + * Based on arch/arm/include/asm/pmu.h 3 + * 4 + * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles 5 + * Copyright (C) 2012 ARM Ltd. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 + */ 19 + #ifndef __ASM_PMU_H 20 + #define __ASM_PMU_H 21 + 22 + #ifdef CONFIG_HW_PERF_EVENTS 23 + 24 + /* The events for a given PMU register set. */ 25 + struct pmu_hw_events { 26 + /* 27 + * The events that are active on the PMU for the given index. 28 + */ 29 + struct perf_event **events; 30 + 31 + /* 32 + * A 1 bit for an index indicates that the counter is being used for 33 + * an event. A 0 means that the counter can be used. 34 + */ 35 + unsigned long *used_mask; 36 + 37 + /* 38 + * Hardware lock to serialize accesses to PMU registers. Needed for the 39 + * read/modify/write sequences. 40 + */ 41 + raw_spinlock_t pmu_lock; 42 + }; 43 + 44 + struct arm_pmu { 45 + struct pmu pmu; 46 + cpumask_t active_irqs; 47 + const char *name; 48 + irqreturn_t (*handle_irq)(int irq_num, void *dev); 49 + void (*enable)(struct hw_perf_event *evt, int idx); 50 + void (*disable)(struct hw_perf_event *evt, int idx); 51 + int (*get_event_idx)(struct pmu_hw_events *hw_events, 52 + struct hw_perf_event *hwc); 53 + int (*set_event_filter)(struct hw_perf_event *evt, 54 + struct perf_event_attr *attr); 55 + u32 (*read_counter)(int idx); 56 + void (*write_counter)(int idx, u32 val); 57 + void (*start)(void); 58 + void (*stop)(void); 59 + void (*reset)(void *); 60 + int (*map_event)(struct perf_event *event); 61 + int num_events; 62 + atomic_t active_events; 63 + struct mutex reserve_mutex; 64 + u64 max_period; 65 + struct platform_device *plat_device; 66 + struct pmu_hw_events *(*get_hw_events)(void); 67 + }; 68 + 69 + #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) 70 + 71 + int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type); 72 + 73 + u64 armpmu_event_update(struct perf_event *event, 74 + struct hw_perf_event *hwc, 75 + int idx); 76 + 77 + int armpmu_event_set_period(struct perf_event *event, 78 + struct hw_perf_event *hwc, 79 + int idx); 80 + 81 + #endif /* CONFIG_HW_PERF_EVENTS */ 82 + #endif /* __ASM_PMU_H */
+1368
arch/arm64/kernel/perf_event.c
··· 1 + /* 2 + * PMU support 3 + * 4 + * Copyright (C) 2012 ARM Limited 5 + * Author: Will Deacon <will.deacon@arm.com> 6 + * 7 + * This code is based heavily on the ARMv7 perf event code. 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 + */ 21 + #define pr_fmt(fmt) "hw perfevents: " fmt 22 + 23 + #include <linux/bitmap.h> 24 + #include <linux/interrupt.h> 25 + #include <linux/kernel.h> 26 + #include <linux/export.h> 27 + #include <linux/perf_event.h> 28 + #include <linux/platform_device.h> 29 + #include <linux/spinlock.h> 30 + #include <linux/uaccess.h> 31 + 32 + #include <asm/cputype.h> 33 + #include <asm/irq.h> 34 + #include <asm/irq_regs.h> 35 + #include <asm/pmu.h> 36 + #include <asm/stacktrace.h> 37 + 38 + /* 39 + * ARMv8 supports a maximum of 32 events. 40 + * The cycle counter is included in this total. 41 + */ 42 + #define ARMPMU_MAX_HWEVENTS 32 43 + 44 + static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); 45 + static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); 46 + static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); 47 + 48 + #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) 49 + 50 + /* Set at runtime when we know what CPU type we are. */ 51 + static struct arm_pmu *cpu_pmu; 52 + 53 + int 54 + armpmu_get_max_events(void) 55 + { 56 + int max_events = 0; 57 + 58 + if (cpu_pmu != NULL) 59 + max_events = cpu_pmu->num_events; 60 + 61 + return max_events; 62 + } 63 + EXPORT_SYMBOL_GPL(armpmu_get_max_events); 64 + 65 + int perf_num_counters(void) 66 + { 67 + return armpmu_get_max_events(); 68 + } 69 + EXPORT_SYMBOL_GPL(perf_num_counters); 70 + 71 + #define HW_OP_UNSUPPORTED 0xFFFF 72 + 73 + #define C(_x) \ 74 + PERF_COUNT_HW_CACHE_##_x 75 + 76 + #define CACHE_OP_UNSUPPORTED 0xFFFF 77 + 78 + static int 79 + armpmu_map_cache_event(const unsigned (*cache_map) 80 + [PERF_COUNT_HW_CACHE_MAX] 81 + [PERF_COUNT_HW_CACHE_OP_MAX] 82 + [PERF_COUNT_HW_CACHE_RESULT_MAX], 83 + u64 config) 84 + { 85 + unsigned int cache_type, cache_op, cache_result, ret; 86 + 87 + cache_type = (config >> 0) & 0xff; 88 + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 89 + return -EINVAL; 90 + 91 + cache_op = (config >> 8) & 0xff; 92 + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 93 + return -EINVAL; 94 + 95 + cache_result = (config >> 16) & 0xff; 96 + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 97 + return -EINVAL; 98 + 99 + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; 100 + 101 + if (ret == CACHE_OP_UNSUPPORTED) 102 + return -ENOENT; 103 + 104 + return ret; 105 + } 106 + 107 + static int 108 + armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) 109 + { 110 + int mapping = (*event_map)[config]; 111 + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; 112 + } 113 + 114 + static int 115 + armpmu_map_raw_event(u32 raw_event_mask, u64 config) 116 + { 117 + return (int)(config & raw_event_mask); 118 + } 119 + 120 + static int map_cpu_event(struct perf_event *event, 121 + const unsigned (*event_map)[PERF_COUNT_HW_MAX], 122 + const unsigned (*cache_map) 123 + [PERF_COUNT_HW_CACHE_MAX] 124 + [PERF_COUNT_HW_CACHE_OP_MAX] 125 + [PERF_COUNT_HW_CACHE_RESULT_MAX], 126 + u32 raw_event_mask) 127 + { 128 + u64 config = event->attr.config; 129 + 130 + switch (event->attr.type) { 131 + case PERF_TYPE_HARDWARE: 132 + return armpmu_map_event(event_map, config); 133 + case PERF_TYPE_HW_CACHE: 134 + return armpmu_map_cache_event(cache_map, config); 135 + case PERF_TYPE_RAW: 136 + return armpmu_map_raw_event(raw_event_mask, config); 137 + } 138 + 139 + return -ENOENT; 140 + } 141 + 142 + int 143 + armpmu_event_set_period(struct perf_event *event, 144 + struct hw_perf_event *hwc, 145 + int idx) 146 + { 147 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 148 + s64 left = local64_read(&hwc->period_left); 149 + s64 period = hwc->sample_period; 150 + int ret = 0; 151 + 152 + if (unlikely(left <= -period)) { 153 + left = period; 154 + local64_set(&hwc->period_left, left); 155 + hwc->last_period = period; 156 + ret = 1; 157 + } 158 + 159 + if (unlikely(left <= 0)) { 160 + left += period; 161 + local64_set(&hwc->period_left, left); 162 + hwc->last_period = period; 163 + ret = 1; 164 + } 165 + 166 + if (left > (s64)armpmu->max_period) 167 + left = armpmu->max_period; 168 + 169 + local64_set(&hwc->prev_count, (u64)-left); 170 + 171 + armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 172 + 173 + perf_event_update_userpage(event); 174 + 175 + return ret; 176 + } 177 + 178 + u64 179 + armpmu_event_update(struct perf_event *event, 180 + struct hw_perf_event *hwc, 181 + int idx) 182 + { 183 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 184 + u64 delta, prev_raw_count, new_raw_count; 185 + 186 + again: 187 + prev_raw_count = local64_read(&hwc->prev_count); 188 + new_raw_count = armpmu->read_counter(idx); 189 + 190 + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 191 + new_raw_count) != prev_raw_count) 192 + goto again; 193 + 194 + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; 195 + 196 + local64_add(delta, &event->count); 197 + local64_sub(delta, &hwc->period_left); 198 + 199 + return new_raw_count; 200 + } 201 + 202 + static void 203 + armpmu_read(struct perf_event *event) 204 + { 205 + struct hw_perf_event *hwc = &event->hw; 206 + 207 + /* Don't read disabled counters! */ 208 + if (hwc->idx < 0) 209 + return; 210 + 211 + armpmu_event_update(event, hwc, hwc->idx); 212 + } 213 + 214 + static void 215 + armpmu_stop(struct perf_event *event, int flags) 216 + { 217 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 218 + struct hw_perf_event *hwc = &event->hw; 219 + 220 + /* 221 + * ARM pmu always has to update the counter, so ignore 222 + * PERF_EF_UPDATE, see comments in armpmu_start(). 223 + */ 224 + if (!(hwc->state & PERF_HES_STOPPED)) { 225 + armpmu->disable(hwc, hwc->idx); 226 + barrier(); /* why? */ 227 + armpmu_event_update(event, hwc, hwc->idx); 228 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 229 + } 230 + } 231 + 232 + static void 233 + armpmu_start(struct perf_event *event, int flags) 234 + { 235 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 236 + struct hw_perf_event *hwc = &event->hw; 237 + 238 + /* 239 + * ARM pmu always has to reprogram the period, so ignore 240 + * PERF_EF_RELOAD, see the comment below. 241 + */ 242 + if (flags & PERF_EF_RELOAD) 243 + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 244 + 245 + hwc->state = 0; 246 + /* 247 + * Set the period again. Some counters can't be stopped, so when we 248 + * were stopped we simply disabled the IRQ source and the counter 249 + * may have been left counting. If we don't do this step then we may 250 + * get an interrupt too soon or *way* too late if the overflow has 251 + * happened since disabling. 252 + */ 253 + armpmu_event_set_period(event, hwc, hwc->idx); 254 + armpmu->enable(hwc, hwc->idx); 255 + } 256 + 257 + static void 258 + armpmu_del(struct perf_event *event, int flags) 259 + { 260 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 261 + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 262 + struct hw_perf_event *hwc = &event->hw; 263 + int idx = hwc->idx; 264 + 265 + WARN_ON(idx < 0); 266 + 267 + armpmu_stop(event, PERF_EF_UPDATE); 268 + hw_events->events[idx] = NULL; 269 + clear_bit(idx, hw_events->used_mask); 270 + 271 + perf_event_update_userpage(event); 272 + } 273 + 274 + static int 275 + armpmu_add(struct perf_event *event, int flags) 276 + { 277 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 278 + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 279 + struct hw_perf_event *hwc = &event->hw; 280 + int idx; 281 + int err = 0; 282 + 283 + perf_pmu_disable(event->pmu); 284 + 285 + /* If we don't have a space for the counter then finish early. */ 286 + idx = armpmu->get_event_idx(hw_events, hwc); 287 + if (idx < 0) { 288 + err = idx; 289 + goto out; 290 + } 291 + 292 + /* 293 + * If there is an event in the counter we are going to use then make 294 + * sure it is disabled. 295 + */ 296 + event->hw.idx = idx; 297 + armpmu->disable(hwc, idx); 298 + hw_events->events[idx] = event; 299 + 300 + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 301 + if (flags & PERF_EF_START) 302 + armpmu_start(event, PERF_EF_RELOAD); 303 + 304 + /* Propagate our changes to the userspace mapping. */ 305 + perf_event_update_userpage(event); 306 + 307 + out: 308 + perf_pmu_enable(event->pmu); 309 + return err; 310 + } 311 + 312 + static int 313 + validate_event(struct pmu_hw_events *hw_events, 314 + struct perf_event *event) 315 + { 316 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 317 + struct hw_perf_event fake_event = event->hw; 318 + struct pmu *leader_pmu = event->group_leader->pmu; 319 + 320 + if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) 321 + return 1; 322 + 323 + return armpmu->get_event_idx(hw_events, &fake_event) >= 0; 324 + } 325 + 326 + static int 327 + validate_group(struct perf_event *event) 328 + { 329 + struct perf_event *sibling, *leader = event->group_leader; 330 + struct pmu_hw_events fake_pmu; 331 + DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); 332 + 333 + /* 334 + * Initialise the fake PMU. We only need to populate the 335 + * used_mask for the purposes of validation. 336 + */ 337 + memset(fake_used_mask, 0, sizeof(fake_used_mask)); 338 + fake_pmu.used_mask = fake_used_mask; 339 + 340 + if (!validate_event(&fake_pmu, leader)) 341 + return -EINVAL; 342 + 343 + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 344 + if (!validate_event(&fake_pmu, sibling)) 345 + return -EINVAL; 346 + } 347 + 348 + if (!validate_event(&fake_pmu, event)) 349 + return -EINVAL; 350 + 351 + return 0; 352 + } 353 + 354 + static void 355 + armpmu_release_hardware(struct arm_pmu *armpmu) 356 + { 357 + int i, irq, irqs; 358 + struct platform_device *pmu_device = armpmu->plat_device; 359 + 360 + irqs = min(pmu_device->num_resources, num_possible_cpus()); 361 + 362 + for (i = 0; i < irqs; ++i) { 363 + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) 364 + continue; 365 + irq = platform_get_irq(pmu_device, i); 366 + if (irq >= 0) 367 + free_irq(irq, armpmu); 368 + } 369 + } 370 + 371 + static int 372 + armpmu_reserve_hardware(struct arm_pmu *armpmu) 373 + { 374 + int i, err, irq, irqs; 375 + struct platform_device *pmu_device = armpmu->plat_device; 376 + 377 + if (!pmu_device) { 378 + pr_err("no PMU device registered\n"); 379 + return -ENODEV; 380 + } 381 + 382 + irqs = min(pmu_device->num_resources, num_possible_cpus()); 383 + if (irqs < 1) { 384 + pr_err("no irqs for PMUs defined\n"); 385 + return -ENODEV; 386 + } 387 + 388 + for (i = 0; i < irqs; ++i) { 389 + err = 0; 390 + irq = platform_get_irq(pmu_device, i); 391 + if (irq < 0) 392 + continue; 393 + 394 + /* 395 + * If we have a single PMU interrupt that we can't shift, 396 + * assume that we're running on a uniprocessor machine and 397 + * continue. Otherwise, continue without this interrupt. 398 + */ 399 + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { 400 + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", 401 + irq, i); 402 + continue; 403 + } 404 + 405 + err = request_irq(irq, armpmu->handle_irq, 406 + IRQF_NOBALANCING, 407 + "arm-pmu", armpmu); 408 + if (err) { 409 + pr_err("unable to request IRQ%d for ARM PMU counters\n", 410 + irq); 411 + armpmu_release_hardware(armpmu); 412 + return err; 413 + } 414 + 415 + cpumask_set_cpu(i, &armpmu->active_irqs); 416 + } 417 + 418 + return 0; 419 + } 420 + 421 + static void 422 + hw_perf_event_destroy(struct perf_event *event) 423 + { 424 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 425 + atomic_t *active_events = &armpmu->active_events; 426 + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; 427 + 428 + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { 429 + armpmu_release_hardware(armpmu); 430 + mutex_unlock(pmu_reserve_mutex); 431 + } 432 + } 433 + 434 + static int 435 + event_requires_mode_exclusion(struct perf_event_attr *attr) 436 + { 437 + return attr->exclude_idle || attr->exclude_user || 438 + attr->exclude_kernel || attr->exclude_hv; 439 + } 440 + 441 + static int 442 + __hw_perf_event_init(struct perf_event *event) 443 + { 444 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 445 + struct hw_perf_event *hwc = &event->hw; 446 + int mapping, err; 447 + 448 + mapping = armpmu->map_event(event); 449 + 450 + if (mapping < 0) { 451 + pr_debug("event %x:%llx not supported\n", event->attr.type, 452 + event->attr.config); 453 + return mapping; 454 + } 455 + 456 + /* 457 + * We don't assign an index until we actually place the event onto 458 + * hardware. Use -1 to signify that we haven't decided where to put it 459 + * yet. For SMP systems, each core has it's own PMU so we can't do any 460 + * clever allocation or constraints checking at this point. 461 + */ 462 + hwc->idx = -1; 463 + hwc->config_base = 0; 464 + hwc->config = 0; 465 + hwc->event_base = 0; 466 + 467 + /* 468 + * Check whether we need to exclude the counter from certain modes. 469 + */ 470 + if ((!armpmu->set_event_filter || 471 + armpmu->set_event_filter(hwc, &event->attr)) && 472 + event_requires_mode_exclusion(&event->attr)) { 473 + pr_debug("ARM performance counters do not support mode exclusion\n"); 474 + return -EPERM; 475 + } 476 + 477 + /* 478 + * Store the event encoding into the config_base field. 479 + */ 480 + hwc->config_base |= (unsigned long)mapping; 481 + 482 + if (!hwc->sample_period) { 483 + /* 484 + * For non-sampling runs, limit the sample_period to half 485 + * of the counter width. That way, the new counter value 486 + * is far less likely to overtake the previous one unless 487 + * you have some serious IRQ latency issues. 488 + */ 489 + hwc->sample_period = armpmu->max_period >> 1; 490 + hwc->last_period = hwc->sample_period; 491 + local64_set(&hwc->period_left, hwc->sample_period); 492 + } 493 + 494 + err = 0; 495 + if (event->group_leader != event) { 496 + err = validate_group(event); 497 + if (err) 498 + return -EINVAL; 499 + } 500 + 501 + return err; 502 + } 503 + 504 + static int armpmu_event_init(struct perf_event *event) 505 + { 506 + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 507 + int err = 0; 508 + atomic_t *active_events = &armpmu->active_events; 509 + 510 + if (armpmu->map_event(event) == -ENOENT) 511 + return -ENOENT; 512 + 513 + event->destroy = hw_perf_event_destroy; 514 + 515 + if (!atomic_inc_not_zero(active_events)) { 516 + mutex_lock(&armpmu->reserve_mutex); 517 + if (atomic_read(active_events) == 0) 518 + err = armpmu_reserve_hardware(armpmu); 519 + 520 + if (!err) 521 + atomic_inc(active_events); 522 + mutex_unlock(&armpmu->reserve_mutex); 523 + } 524 + 525 + if (err) 526 + return err; 527 + 528 + err = __hw_perf_event_init(event); 529 + if (err) 530 + hw_perf_event_destroy(event); 531 + 532 + return err; 533 + } 534 + 535 + static void armpmu_enable(struct pmu *pmu) 536 + { 537 + struct arm_pmu *armpmu = to_arm_pmu(pmu); 538 + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 539 + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); 540 + 541 + if (enabled) 542 + armpmu->start(); 543 + } 544 + 545 + static void armpmu_disable(struct pmu *pmu) 546 + { 547 + struct arm_pmu *armpmu = to_arm_pmu(pmu); 548 + armpmu->stop(); 549 + } 550 + 551 + static void __init armpmu_init(struct arm_pmu *armpmu) 552 + { 553 + atomic_set(&armpmu->active_events, 0); 554 + mutex_init(&armpmu->reserve_mutex); 555 + 556 + armpmu->pmu = (struct pmu) { 557 + .pmu_enable = armpmu_enable, 558 + .pmu_disable = armpmu_disable, 559 + .event_init = armpmu_event_init, 560 + .add = armpmu_add, 561 + .del = armpmu_del, 562 + .start = armpmu_start, 563 + .stop = armpmu_stop, 564 + .read = armpmu_read, 565 + }; 566 + } 567 + 568 + int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) 569 + { 570 + armpmu_init(armpmu); 571 + return perf_pmu_register(&armpmu->pmu, name, type); 572 + } 573 + 574 + /* 575 + * ARMv8 PMUv3 Performance Events handling code. 576 + * Common event types. 577 + */ 578 + enum armv8_pmuv3_perf_types { 579 + /* Required events. */ 580 + ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, 581 + ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, 582 + ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, 583 + ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, 584 + ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, 585 + ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, 586 + 587 + /* At least one of the following is required. */ 588 + ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, 589 + ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, 590 + 591 + /* Common architectural events. */ 592 + ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, 593 + ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, 594 + ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, 595 + ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, 596 + ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, 597 + ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, 598 + ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, 599 + ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, 600 + ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, 601 + ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, 602 + 603 + /* Common microarchitectural events. */ 604 + ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, 605 + ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, 606 + ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, 607 + ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, 608 + ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, 609 + ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, 610 + ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, 611 + ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, 612 + ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, 613 + ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, 614 + ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, 615 + ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, 616 + 617 + /* 618 + * This isn't an architected event. 619 + * We detect this event number and use the cycle counter instead. 620 + */ 621 + ARMV8_PMUV3_PERFCTR_CPU_CYCLES = 0xFF, 622 + }; 623 + 624 + /* PMUv3 HW events mapping. */ 625 + static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { 626 + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, 627 + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, 628 + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, 629 + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, 630 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, 631 + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, 632 + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 633 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, 634 + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, 635 + }; 636 + 637 + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 638 + [PERF_COUNT_HW_CACHE_OP_MAX] 639 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 640 + [C(L1D)] = { 641 + [C(OP_READ)] = { 642 + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, 643 + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, 644 + }, 645 + [C(OP_WRITE)] = { 646 + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, 647 + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, 648 + }, 649 + [C(OP_PREFETCH)] = { 650 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 651 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 652 + }, 653 + }, 654 + [C(L1I)] = { 655 + [C(OP_READ)] = { 656 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 657 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 658 + }, 659 + [C(OP_WRITE)] = { 660 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 661 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 662 + }, 663 + [C(OP_PREFETCH)] = { 664 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 665 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 666 + }, 667 + }, 668 + [C(LL)] = { 669 + [C(OP_READ)] = { 670 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 671 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 672 + }, 673 + [C(OP_WRITE)] = { 674 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 675 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 676 + }, 677 + [C(OP_PREFETCH)] = { 678 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 679 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 680 + }, 681 + }, 682 + [C(DTLB)] = { 683 + [C(OP_READ)] = { 684 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 685 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 686 + }, 687 + [C(OP_WRITE)] = { 688 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 689 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 690 + }, 691 + [C(OP_PREFETCH)] = { 692 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 693 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 694 + }, 695 + }, 696 + [C(ITLB)] = { 697 + [C(OP_READ)] = { 698 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 699 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 700 + }, 701 + [C(OP_WRITE)] = { 702 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 703 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 704 + }, 705 + [C(OP_PREFETCH)] = { 706 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 707 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 708 + }, 709 + }, 710 + [C(BPU)] = { 711 + [C(OP_READ)] = { 712 + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, 713 + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, 714 + }, 715 + [C(OP_WRITE)] = { 716 + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, 717 + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, 718 + }, 719 + [C(OP_PREFETCH)] = { 720 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 721 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 722 + }, 723 + }, 724 + [C(NODE)] = { 725 + [C(OP_READ)] = { 726 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 727 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 728 + }, 729 + [C(OP_WRITE)] = { 730 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 731 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 732 + }, 733 + [C(OP_PREFETCH)] = { 734 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 735 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 736 + }, 737 + }, 738 + }; 739 + 740 + /* 741 + * Perf Events' indices 742 + */ 743 + #define ARMV8_IDX_CYCLE_COUNTER 0 744 + #define ARMV8_IDX_COUNTER0 1 745 + #define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) 746 + 747 + #define ARMV8_MAX_COUNTERS 32 748 + #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) 749 + 750 + /* 751 + * ARMv8 low level PMU access 752 + */ 753 + 754 + /* 755 + * Perf Event to low level counters mapping 756 + */ 757 + #define ARMV8_IDX_TO_COUNTER(x) \ 758 + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) 759 + 760 + /* 761 + * Per-CPU PMCR: config reg 762 + */ 763 + #define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ 764 + #define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ 765 + #define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ 766 + #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ 767 + #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ 768 + #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ 769 + #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ 770 + #define ARMV8_PMCR_N_MASK 0x1f 771 + #define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ 772 + 773 + /* 774 + * PMOVSR: counters overflow flag status reg 775 + */ 776 + #define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ 777 + #define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK 778 + 779 + /* 780 + * PMXEVTYPER: Event selection reg 781 + */ 782 + #define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ 783 + #define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ 784 + 785 + /* 786 + * Event filters for PMUv3 787 + */ 788 + #define ARMV8_EXCLUDE_EL1 (1 << 31) 789 + #define ARMV8_EXCLUDE_EL0 (1 << 30) 790 + #define ARMV8_INCLUDE_EL2 (1 << 27) 791 + 792 + static inline u32 armv8pmu_pmcr_read(void) 793 + { 794 + u32 val; 795 + asm volatile("mrs %0, pmcr_el0" : "=r" (val)); 796 + return val; 797 + } 798 + 799 + static inline void armv8pmu_pmcr_write(u32 val) 800 + { 801 + val &= ARMV8_PMCR_MASK; 802 + isb(); 803 + asm volatile("msr pmcr_el0, %0" :: "r" (val)); 804 + } 805 + 806 + static inline int armv8pmu_has_overflowed(u32 pmovsr) 807 + { 808 + return pmovsr & ARMV8_OVERFLOWED_MASK; 809 + } 810 + 811 + static inline int armv8pmu_counter_valid(int idx) 812 + { 813 + return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; 814 + } 815 + 816 + static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) 817 + { 818 + int ret = 0; 819 + u32 counter; 820 + 821 + if (!armv8pmu_counter_valid(idx)) { 822 + pr_err("CPU%u checking wrong counter %d overflow status\n", 823 + smp_processor_id(), idx); 824 + } else { 825 + counter = ARMV8_IDX_TO_COUNTER(idx); 826 + ret = pmnc & BIT(counter); 827 + } 828 + 829 + return ret; 830 + } 831 + 832 + static inline int armv8pmu_select_counter(int idx) 833 + { 834 + u32 counter; 835 + 836 + if (!armv8pmu_counter_valid(idx)) { 837 + pr_err("CPU%u selecting wrong PMNC counter %d\n", 838 + smp_processor_id(), idx); 839 + return -EINVAL; 840 + } 841 + 842 + counter = ARMV8_IDX_TO_COUNTER(idx); 843 + asm volatile("msr pmselr_el0, %0" :: "r" (counter)); 844 + isb(); 845 + 846 + return idx; 847 + } 848 + 849 + static inline u32 armv8pmu_read_counter(int idx) 850 + { 851 + u32 value = 0; 852 + 853 + if (!armv8pmu_counter_valid(idx)) 854 + pr_err("CPU%u reading wrong counter %d\n", 855 + smp_processor_id(), idx); 856 + else if (idx == ARMV8_IDX_CYCLE_COUNTER) 857 + asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); 858 + else if (armv8pmu_select_counter(idx) == idx) 859 + asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); 860 + 861 + return value; 862 + } 863 + 864 + static inline void armv8pmu_write_counter(int idx, u32 value) 865 + { 866 + if (!armv8pmu_counter_valid(idx)) 867 + pr_err("CPU%u writing wrong counter %d\n", 868 + smp_processor_id(), idx); 869 + else if (idx == ARMV8_IDX_CYCLE_COUNTER) 870 + asm volatile("msr pmccntr_el0, %0" :: "r" (value)); 871 + else if (armv8pmu_select_counter(idx) == idx) 872 + asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); 873 + } 874 + 875 + static inline void armv8pmu_write_evtype(int idx, u32 val) 876 + { 877 + if (armv8pmu_select_counter(idx) == idx) { 878 + val &= ARMV8_EVTYPE_MASK; 879 + asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); 880 + } 881 + } 882 + 883 + static inline int armv8pmu_enable_counter(int idx) 884 + { 885 + u32 counter; 886 + 887 + if (!armv8pmu_counter_valid(idx)) { 888 + pr_err("CPU%u enabling wrong PMNC counter %d\n", 889 + smp_processor_id(), idx); 890 + return -EINVAL; 891 + } 892 + 893 + counter = ARMV8_IDX_TO_COUNTER(idx); 894 + asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); 895 + return idx; 896 + } 897 + 898 + static inline int armv8pmu_disable_counter(int idx) 899 + { 900 + u32 counter; 901 + 902 + if (!armv8pmu_counter_valid(idx)) { 903 + pr_err("CPU%u disabling wrong PMNC counter %d\n", 904 + smp_processor_id(), idx); 905 + return -EINVAL; 906 + } 907 + 908 + counter = ARMV8_IDX_TO_COUNTER(idx); 909 + asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); 910 + return idx; 911 + } 912 + 913 + static inline int armv8pmu_enable_intens(int idx) 914 + { 915 + u32 counter; 916 + 917 + if (!armv8pmu_counter_valid(idx)) { 918 + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", 919 + smp_processor_id(), idx); 920 + return -EINVAL; 921 + } 922 + 923 + counter = ARMV8_IDX_TO_COUNTER(idx); 924 + asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); 925 + return idx; 926 + } 927 + 928 + static inline int armv8pmu_disable_intens(int idx) 929 + { 930 + u32 counter; 931 + 932 + if (!armv8pmu_counter_valid(idx)) { 933 + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", 934 + smp_processor_id(), idx); 935 + return -EINVAL; 936 + } 937 + 938 + counter = ARMV8_IDX_TO_COUNTER(idx); 939 + asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); 940 + isb(); 941 + /* Clear the overflow flag in case an interrupt is pending. */ 942 + asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); 943 + isb(); 944 + return idx; 945 + } 946 + 947 + static inline u32 armv8pmu_getreset_flags(void) 948 + { 949 + u32 value; 950 + 951 + /* Read */ 952 + asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); 953 + 954 + /* Write to clear flags */ 955 + value &= ARMV8_OVSR_MASK; 956 + asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); 957 + 958 + return value; 959 + } 960 + 961 + static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) 962 + { 963 + unsigned long flags; 964 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 965 + 966 + /* 967 + * Enable counter and interrupt, and set the counter to count 968 + * the event that we're interested in. 969 + */ 970 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 971 + 972 + /* 973 + * Disable counter 974 + */ 975 + armv8pmu_disable_counter(idx); 976 + 977 + /* 978 + * Set event (if destined for PMNx counters). 979 + */ 980 + armv8pmu_write_evtype(idx, hwc->config_base); 981 + 982 + /* 983 + * Enable interrupt for this counter 984 + */ 985 + armv8pmu_enable_intens(idx); 986 + 987 + /* 988 + * Enable counter 989 + */ 990 + armv8pmu_enable_counter(idx); 991 + 992 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 993 + } 994 + 995 + static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) 996 + { 997 + unsigned long flags; 998 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 999 + 1000 + /* 1001 + * Disable counter and interrupt 1002 + */ 1003 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 1004 + 1005 + /* 1006 + * Disable counter 1007 + */ 1008 + armv8pmu_disable_counter(idx); 1009 + 1010 + /* 1011 + * Disable interrupt for this counter 1012 + */ 1013 + armv8pmu_disable_intens(idx); 1014 + 1015 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 1016 + } 1017 + 1018 + static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) 1019 + { 1020 + u32 pmovsr; 1021 + struct perf_sample_data data; 1022 + struct pmu_hw_events *cpuc; 1023 + struct pt_regs *regs; 1024 + int idx; 1025 + 1026 + /* 1027 + * Get and reset the IRQ flags 1028 + */ 1029 + pmovsr = armv8pmu_getreset_flags(); 1030 + 1031 + /* 1032 + * Did an overflow occur? 1033 + */ 1034 + if (!armv8pmu_has_overflowed(pmovsr)) 1035 + return IRQ_NONE; 1036 + 1037 + /* 1038 + * Handle the counter(s) overflow(s) 1039 + */ 1040 + regs = get_irq_regs(); 1041 + 1042 + cpuc = &__get_cpu_var(cpu_hw_events); 1043 + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 1044 + struct perf_event *event = cpuc->events[idx]; 1045 + struct hw_perf_event *hwc; 1046 + 1047 + /* Ignore if we don't have an event. */ 1048 + if (!event) 1049 + continue; 1050 + 1051 + /* 1052 + * We have a single interrupt for all counters. Check that 1053 + * each counter has overflowed before we process it. 1054 + */ 1055 + if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) 1056 + continue; 1057 + 1058 + hwc = &event->hw; 1059 + armpmu_event_update(event, hwc, idx); 1060 + perf_sample_data_init(&data, 0, hwc->last_period); 1061 + if (!armpmu_event_set_period(event, hwc, idx)) 1062 + continue; 1063 + 1064 + if (perf_event_overflow(event, &data, regs)) 1065 + cpu_pmu->disable(hwc, idx); 1066 + } 1067 + 1068 + /* 1069 + * Handle the pending perf events. 1070 + * 1071 + * Note: this call *must* be run with interrupts disabled. For 1072 + * platforms that can have the PMU interrupts raised as an NMI, this 1073 + * will not work. 1074 + */ 1075 + irq_work_run(); 1076 + 1077 + return IRQ_HANDLED; 1078 + } 1079 + 1080 + static void armv8pmu_start(void) 1081 + { 1082 + unsigned long flags; 1083 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 1084 + 1085 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 1086 + /* Enable all counters */ 1087 + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); 1088 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 1089 + } 1090 + 1091 + static void armv8pmu_stop(void) 1092 + { 1093 + unsigned long flags; 1094 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 1095 + 1096 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 1097 + /* Disable all counters */ 1098 + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); 1099 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 1100 + } 1101 + 1102 + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, 1103 + struct hw_perf_event *event) 1104 + { 1105 + int idx; 1106 + unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; 1107 + 1108 + /* Always place a cycle counter into the cycle counter. */ 1109 + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { 1110 + if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) 1111 + return -EAGAIN; 1112 + 1113 + return ARMV8_IDX_CYCLE_COUNTER; 1114 + } 1115 + 1116 + /* 1117 + * For anything other than a cycle counter, try and use 1118 + * the events counters 1119 + */ 1120 + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { 1121 + if (!test_and_set_bit(idx, cpuc->used_mask)) 1122 + return idx; 1123 + } 1124 + 1125 + /* The counters are all in use. */ 1126 + return -EAGAIN; 1127 + } 1128 + 1129 + /* 1130 + * Add an event filter to a given event. This will only work for PMUv2 PMUs. 1131 + */ 1132 + static int armv8pmu_set_event_filter(struct hw_perf_event *event, 1133 + struct perf_event_attr *attr) 1134 + { 1135 + unsigned long config_base = 0; 1136 + 1137 + if (attr->exclude_idle) 1138 + return -EPERM; 1139 + if (attr->exclude_user) 1140 + config_base |= ARMV8_EXCLUDE_EL0; 1141 + if (attr->exclude_kernel) 1142 + config_base |= ARMV8_EXCLUDE_EL1; 1143 + if (!attr->exclude_hv) 1144 + config_base |= ARMV8_INCLUDE_EL2; 1145 + 1146 + /* 1147 + * Install the filter into config_base as this is used to 1148 + * construct the event type. 1149 + */ 1150 + event->config_base = config_base; 1151 + 1152 + return 0; 1153 + } 1154 + 1155 + static void armv8pmu_reset(void *info) 1156 + { 1157 + u32 idx, nb_cnt = cpu_pmu->num_events; 1158 + 1159 + /* The counter and interrupt enable registers are unknown at reset. */ 1160 + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) 1161 + armv8pmu_disable_event(NULL, idx); 1162 + 1163 + /* Initialize & Reset PMNC: C and P bits. */ 1164 + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); 1165 + 1166 + /* Disable access from userspace. */ 1167 + asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); 1168 + } 1169 + 1170 + static int armv8_pmuv3_map_event(struct perf_event *event) 1171 + { 1172 + return map_cpu_event(event, &armv8_pmuv3_perf_map, 1173 + &armv8_pmuv3_perf_cache_map, 0xFF); 1174 + } 1175 + 1176 + static struct arm_pmu armv8pmu = { 1177 + .handle_irq = armv8pmu_handle_irq, 1178 + .enable = armv8pmu_enable_event, 1179 + .disable = armv8pmu_disable_event, 1180 + .read_counter = armv8pmu_read_counter, 1181 + .write_counter = armv8pmu_write_counter, 1182 + .get_event_idx = armv8pmu_get_event_idx, 1183 + .start = armv8pmu_start, 1184 + .stop = armv8pmu_stop, 1185 + .reset = armv8pmu_reset, 1186 + .max_period = (1LLU << 32) - 1, 1187 + }; 1188 + 1189 + static u32 __init armv8pmu_read_num_pmnc_events(void) 1190 + { 1191 + u32 nb_cnt; 1192 + 1193 + /* Read the nb of CNTx counters supported from PMNC */ 1194 + nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; 1195 + 1196 + /* Add the CPU cycles counter and return */ 1197 + return nb_cnt + 1; 1198 + } 1199 + 1200 + static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) 1201 + { 1202 + armv8pmu.name = "arm/armv8-pmuv3"; 1203 + armv8pmu.map_event = armv8_pmuv3_map_event; 1204 + armv8pmu.num_events = armv8pmu_read_num_pmnc_events(); 1205 + armv8pmu.set_event_filter = armv8pmu_set_event_filter; 1206 + return &armv8pmu; 1207 + } 1208 + 1209 + /* 1210 + * Ensure the PMU has sane values out of reset. 1211 + * This requires SMP to be available, so exists as a separate initcall. 1212 + */ 1213 + static int __init 1214 + cpu_pmu_reset(void) 1215 + { 1216 + if (cpu_pmu && cpu_pmu->reset) 1217 + return on_each_cpu(cpu_pmu->reset, NULL, 1); 1218 + return 0; 1219 + } 1220 + arch_initcall(cpu_pmu_reset); 1221 + 1222 + /* 1223 + * PMU platform driver and devicetree bindings. 1224 + */ 1225 + static struct of_device_id armpmu_of_device_ids[] = { 1226 + {.compatible = "arm,armv8-pmuv3"}, 1227 + {}, 1228 + }; 1229 + 1230 + static int __devinit armpmu_device_probe(struct platform_device *pdev) 1231 + { 1232 + if (!cpu_pmu) 1233 + return -ENODEV; 1234 + 1235 + cpu_pmu->plat_device = pdev; 1236 + return 0; 1237 + } 1238 + 1239 + static struct platform_driver armpmu_driver = { 1240 + .driver = { 1241 + .name = "arm-pmu", 1242 + .of_match_table = armpmu_of_device_ids, 1243 + }, 1244 + .probe = armpmu_device_probe, 1245 + }; 1246 + 1247 + static int __init register_pmu_driver(void) 1248 + { 1249 + return platform_driver_register(&armpmu_driver); 1250 + } 1251 + device_initcall(register_pmu_driver); 1252 + 1253 + static struct pmu_hw_events *armpmu_get_cpu_events(void) 1254 + { 1255 + return &__get_cpu_var(cpu_hw_events); 1256 + } 1257 + 1258 + static void __init cpu_pmu_init(struct arm_pmu *armpmu) 1259 + { 1260 + int cpu; 1261 + for_each_possible_cpu(cpu) { 1262 + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); 1263 + events->events = per_cpu(hw_events, cpu); 1264 + events->used_mask = per_cpu(used_mask, cpu); 1265 + raw_spin_lock_init(&events->pmu_lock); 1266 + } 1267 + armpmu->get_hw_events = armpmu_get_cpu_events; 1268 + } 1269 + 1270 + static int __init init_hw_perf_events(void) 1271 + { 1272 + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); 1273 + 1274 + switch ((dfr >> 8) & 0xf) { 1275 + case 0x1: /* PMUv3 */ 1276 + cpu_pmu = armv8_pmuv3_pmu_init(); 1277 + break; 1278 + } 1279 + 1280 + if (cpu_pmu) { 1281 + pr_info("enabled with %s PMU driver, %d counters available\n", 1282 + cpu_pmu->name, cpu_pmu->num_events); 1283 + cpu_pmu_init(cpu_pmu); 1284 + armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); 1285 + } else { 1286 + pr_info("no hardware support available\n"); 1287 + } 1288 + 1289 + return 0; 1290 + } 1291 + early_initcall(init_hw_perf_events); 1292 + 1293 + /* 1294 + * Callchain handling code. 1295 + */ 1296 + struct frame_tail { 1297 + struct frame_tail __user *fp; 1298 + unsigned long lr; 1299 + } __attribute__((packed)); 1300 + 1301 + /* 1302 + * Get the return address for a single stackframe and return a pointer to the 1303 + * next frame tail. 1304 + */ 1305 + static struct frame_tail __user * 1306 + user_backtrace(struct frame_tail __user *tail, 1307 + struct perf_callchain_entry *entry) 1308 + { 1309 + struct frame_tail buftail; 1310 + unsigned long err; 1311 + 1312 + /* Also check accessibility of one struct frame_tail beyond */ 1313 + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) 1314 + return NULL; 1315 + 1316 + pagefault_disable(); 1317 + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); 1318 + pagefault_enable(); 1319 + 1320 + if (err) 1321 + return NULL; 1322 + 1323 + perf_callchain_store(entry, buftail.lr); 1324 + 1325 + /* 1326 + * Frame pointers should strictly progress back up the stack 1327 + * (towards higher addresses). 1328 + */ 1329 + if (tail >= buftail.fp) 1330 + return NULL; 1331 + 1332 + return buftail.fp; 1333 + } 1334 + 1335 + void perf_callchain_user(struct perf_callchain_entry *entry, 1336 + struct pt_regs *regs) 1337 + { 1338 + struct frame_tail __user *tail; 1339 + 1340 + tail = (struct frame_tail __user *)regs->regs[29]; 1341 + 1342 + while (entry->nr < PERF_MAX_STACK_DEPTH && 1343 + tail && !((unsigned long)tail & 0xf)) 1344 + tail = user_backtrace(tail, entry); 1345 + } 1346 + 1347 + /* 1348 + * Gets called by walk_stackframe() for every stackframe. This will be called 1349 + * whist unwinding the stackframe and is like a subroutine return so we use 1350 + * the PC. 1351 + */ 1352 + static int callchain_trace(struct stackframe *frame, void *data) 1353 + { 1354 + struct perf_callchain_entry *entry = data; 1355 + perf_callchain_store(entry, frame->pc); 1356 + return 0; 1357 + } 1358 + 1359 + void perf_callchain_kernel(struct perf_callchain_entry *entry, 1360 + struct pt_regs *regs) 1361 + { 1362 + struct stackframe frame; 1363 + 1364 + frame.fp = regs->regs[29]; 1365 + frame.sp = regs->sp; 1366 + frame.pc = regs->pc; 1367 + walk_stackframe(&frame, callchain_trace, entry); 1368 + }
+6
tools/perf/perf.h
··· 88 88 #define CPUINFO_PROC "Processor" 89 89 #endif 90 90 91 + #ifdef __aarch64__ 92 + #include "../../arch/arm64/include/asm/unistd.h" 93 + #define rmb() asm volatile("dmb ld" ::: "memory") 94 + #define cpu_relax() asm volatile("yield" ::: "memory") 95 + #endif 96 + 91 97 #ifdef __mips__ 92 98 #include "../../arch/mips/include/asm/unistd.h" 93 99 #define rmb() asm volatile( \