Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

LoongArch: Add perf events support

The perf events infrastructure of LoongArch is very similar to old MIPS-
based Loongson, so most of the codes are derived from MIPS.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>

+987 -1
+2
arch/loongarch/Kconfig
··· 97 97 select HAVE_NMI 98 98 select HAVE_PCI 99 99 select HAVE_PERF_EVENTS 100 + select HAVE_PERF_REGS 101 + select HAVE_PERF_USER_STACK_DUMP 100 102 select HAVE_REGS_AND_STACK_ACCESS_API 101 103 select HAVE_RSEQ 102 104 select HAVE_SETUP_PER_CPU_AREA if NUMA
+3 -1
arch/loongarch/include/asm/perf_event.h
··· 6 6 7 7 #ifndef __LOONGARCH_PERF_EVENT_H__ 8 8 #define __LOONGARCH_PERF_EVENT_H__ 9 - /* Nothing to show here; the file is required by linux/perf_event.h. */ 9 + 10 + #define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs 11 + 10 12 #endif /* __LOONGARCH_PERF_EVENT_H__ */
+40
arch/loongarch/include/uapi/asm/perf_regs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef _ASM_LOONGARCH_PERF_REGS_H 3 + #define _ASM_LOONGARCH_PERF_REGS_H 4 + 5 + enum perf_event_loongarch_regs { 6 + PERF_REG_LOONGARCH_PC, 7 + PERF_REG_LOONGARCH_R1, 8 + PERF_REG_LOONGARCH_R2, 9 + PERF_REG_LOONGARCH_R3, 10 + PERF_REG_LOONGARCH_R4, 11 + PERF_REG_LOONGARCH_R5, 12 + PERF_REG_LOONGARCH_R6, 13 + PERF_REG_LOONGARCH_R7, 14 + PERF_REG_LOONGARCH_R8, 15 + PERF_REG_LOONGARCH_R9, 16 + PERF_REG_LOONGARCH_R10, 17 + PERF_REG_LOONGARCH_R11, 18 + PERF_REG_LOONGARCH_R12, 19 + PERF_REG_LOONGARCH_R13, 20 + PERF_REG_LOONGARCH_R14, 21 + PERF_REG_LOONGARCH_R15, 22 + PERF_REG_LOONGARCH_R16, 23 + PERF_REG_LOONGARCH_R17, 24 + PERF_REG_LOONGARCH_R18, 25 + PERF_REG_LOONGARCH_R19, 26 + PERF_REG_LOONGARCH_R20, 27 + PERF_REG_LOONGARCH_R21, 28 + PERF_REG_LOONGARCH_R22, 29 + PERF_REG_LOONGARCH_R23, 30 + PERF_REG_LOONGARCH_R24, 31 + PERF_REG_LOONGARCH_R25, 32 + PERF_REG_LOONGARCH_R26, 33 + PERF_REG_LOONGARCH_R27, 34 + PERF_REG_LOONGARCH_R28, 35 + PERF_REG_LOONGARCH_R29, 36 + PERF_REG_LOONGARCH_R30, 37 + PERF_REG_LOONGARCH_R31, 38 + PERF_REG_LOONGARCH_MAX, 39 + }; 40 + #endif /* _ASM_LOONGARCH_PERF_REGS_H */
+2
arch/loongarch/kernel/Makefile
··· 26 26 obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o 27 27 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o 28 28 29 + obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o 30 + 29 31 CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS)
+887
arch/loongarch/kernel/perf_event.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Linux performance counter support for LoongArch. 4 + * 5 + * Copyright (C) 2022 Loongson Technology Corporation Limited 6 + * 7 + * Derived from MIPS: 8 + * Copyright (C) 2010 MIPS Technologies, Inc. 9 + * Copyright (C) 2011 Cavium Networks, Inc. 10 + * Author: Deng-Cheng Zhu 11 + */ 12 + 13 + #include <linux/cpumask.h> 14 + #include <linux/interrupt.h> 15 + #include <linux/smp.h> 16 + #include <linux/kernel.h> 17 + #include <linux/perf_event.h> 18 + #include <linux/uaccess.h> 19 + #include <linux/sched/task_stack.h> 20 + 21 + #include <asm/irq.h> 22 + #include <asm/irq_regs.h> 23 + #include <asm/stacktrace.h> 24 + #include <asm/unwind.h> 25 + 26 + /* 27 + * Get the return address for a single stackframe and return a pointer to the 28 + * next frame tail. 29 + */ 30 + static unsigned long 31 + user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 32 + { 33 + unsigned long err; 34 + unsigned long __user *user_frame_tail; 35 + struct stack_frame buftail; 36 + 37 + user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); 38 + 39 + /* Also check accessibility of one struct frame_tail beyond */ 40 + if (!access_ok(user_frame_tail, sizeof(buftail))) 41 + return 0; 42 + 43 + pagefault_disable(); 44 + err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); 45 + pagefault_enable(); 46 + 47 + if (err || (unsigned long)user_frame_tail >= buftail.fp) 48 + return 0; 49 + 50 + perf_callchain_store(entry, buftail.ra); 51 + 52 + return buftail.fp; 53 + } 54 + 55 + void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 56 + struct pt_regs *regs) 57 + { 58 + unsigned long fp; 59 + 60 + if (perf_guest_state()) { 61 + /* We don't support guest os callchain now */ 62 + return; 63 + } 64 + 65 + perf_callchain_store(entry, regs->csr_era); 66 + 67 + fp = regs->regs[22]; 68 + 69 + while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) 70 + fp = user_backtrace(entry, fp); 71 + } 72 + 73 + void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 74 + struct pt_regs *regs) 75 + { 76 + struct unwind_state state; 77 + unsigned long addr; 78 + 79 + for (unwind_start(&state, current, regs); 80 + !unwind_done(&state); unwind_next_frame(&state)) { 81 + addr = unwind_get_return_address(&state); 82 + if (!addr || perf_callchain_store(entry, addr)) 83 + return; 84 + } 85 + } 86 + 87 + #define LOONGARCH_MAX_HWEVENTS 32 88 + 89 + struct cpu_hw_events { 90 + /* Array of events on this cpu. */ 91 + struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; 92 + 93 + /* 94 + * Set the bit (indexed by the counter number) when the counter 95 + * is used for an event. 96 + */ 97 + unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; 98 + 99 + /* 100 + * Software copy of the control register for each performance counter. 101 + */ 102 + unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; 103 + }; 104 + static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 105 + .saved_ctrl = {0}, 106 + }; 107 + 108 + /* The description of LoongArch performance events. */ 109 + struct loongarch_perf_event { 110 + unsigned int event_id; 111 + }; 112 + 113 + static struct loongarch_perf_event raw_event; 114 + static DEFINE_MUTEX(raw_event_mutex); 115 + 116 + #define C(x) PERF_COUNT_HW_CACHE_##x 117 + #define HW_OP_UNSUPPORTED 0xffffffff 118 + #define CACHE_OP_UNSUPPORTED 0xffffffff 119 + 120 + #define PERF_MAP_ALL_UNSUPPORTED \ 121 + [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} 122 + 123 + #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ 124 + [0 ... C(MAX) - 1] = { \ 125 + [0 ... C(OP_MAX) - 1] = { \ 126 + [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ 127 + }, \ 128 + } 129 + 130 + struct loongarch_pmu { 131 + u64 max_period; 132 + u64 valid_count; 133 + u64 overflow; 134 + const char *name; 135 + unsigned int num_counters; 136 + u64 (*read_counter)(unsigned int idx); 137 + void (*write_counter)(unsigned int idx, u64 val); 138 + const struct loongarch_perf_event *(*map_raw_event)(u64 config); 139 + const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; 140 + const struct loongarch_perf_event (*cache_event_map) 141 + [PERF_COUNT_HW_CACHE_MAX] 142 + [PERF_COUNT_HW_CACHE_OP_MAX] 143 + [PERF_COUNT_HW_CACHE_RESULT_MAX]; 144 + }; 145 + 146 + static struct loongarch_pmu loongarch_pmu; 147 + 148 + #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) 149 + 150 + #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ 151 + CSR_PERFCTRL_PLV1 | \ 152 + CSR_PERFCTRL_PLV2 | \ 153 + CSR_PERFCTRL_PLV3 | \ 154 + CSR_PERFCTRL_IE) 155 + 156 + #define M_PERFCTL_CONFIG_MASK 0x1f0000 157 + 158 + static void pause_local_counters(void); 159 + static void resume_local_counters(void); 160 + 161 + static u64 loongarch_pmu_read_counter(unsigned int idx) 162 + { 163 + u64 val = -1; 164 + 165 + switch (idx) { 166 + case 0: 167 + val = read_csr_perfcntr0(); 168 + break; 169 + case 1: 170 + val = read_csr_perfcntr1(); 171 + break; 172 + case 2: 173 + val = read_csr_perfcntr2(); 174 + break; 175 + case 3: 176 + val = read_csr_perfcntr3(); 177 + break; 178 + default: 179 + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 180 + return 0; 181 + } 182 + 183 + return val; 184 + } 185 + 186 + static void loongarch_pmu_write_counter(unsigned int idx, u64 val) 187 + { 188 + switch (idx) { 189 + case 0: 190 + write_csr_perfcntr0(val); 191 + return; 192 + case 1: 193 + write_csr_perfcntr1(val); 194 + return; 195 + case 2: 196 + write_csr_perfcntr2(val); 197 + return; 198 + case 3: 199 + write_csr_perfcntr3(val); 200 + return; 201 + default: 202 + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 203 + return; 204 + } 205 + } 206 + 207 + static unsigned int loongarch_pmu_read_control(unsigned int idx) 208 + { 209 + unsigned int val = -1; 210 + 211 + switch (idx) { 212 + case 0: 213 + val = read_csr_perfctrl0(); 214 + break; 215 + case 1: 216 + val = read_csr_perfctrl1(); 217 + break; 218 + case 2: 219 + val = read_csr_perfctrl2(); 220 + break; 221 + case 3: 222 + val = read_csr_perfctrl3(); 223 + break; 224 + default: 225 + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 226 + return 0; 227 + } 228 + 229 + return val; 230 + } 231 + 232 + static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) 233 + { 234 + switch (idx) { 235 + case 0: 236 + write_csr_perfctrl0(val); 237 + return; 238 + case 1: 239 + write_csr_perfctrl1(val); 240 + return; 241 + case 2: 242 + write_csr_perfctrl2(val); 243 + return; 244 + case 3: 245 + write_csr_perfctrl3(val); 246 + return; 247 + default: 248 + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 249 + return; 250 + } 251 + } 252 + 253 + static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) 254 + { 255 + int i; 256 + 257 + for (i = 0; i < loongarch_pmu.num_counters; i++) { 258 + if (!test_and_set_bit(i, cpuc->used_mask)) 259 + return i; 260 + } 261 + 262 + return -EAGAIN; 263 + } 264 + 265 + static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) 266 + { 267 + unsigned int cpu; 268 + struct perf_event *event = container_of(evt, struct perf_event, hw); 269 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 270 + 271 + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 272 + 273 + /* Make sure interrupt enabled. */ 274 + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | 275 + (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; 276 + 277 + cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); 278 + 279 + /* 280 + * We do not actually let the counter run. Leave it until start(). 281 + */ 282 + pr_debug("Enabling perf counter for CPU%d\n", cpu); 283 + } 284 + 285 + static void loongarch_pmu_disable_event(int idx) 286 + { 287 + unsigned long flags; 288 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 + 290 + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 291 + 292 + local_irq_save(flags); 293 + cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & 294 + ~M_PERFCTL_COUNT_EVENT_WHENEVER; 295 + loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); 296 + local_irq_restore(flags); 297 + } 298 + 299 + static int loongarch_pmu_event_set_period(struct perf_event *event, 300 + struct hw_perf_event *hwc, 301 + int idx) 302 + { 303 + int ret = 0; 304 + u64 left = local64_read(&hwc->period_left); 305 + u64 period = hwc->sample_period; 306 + 307 + if (unlikely((left + period) & (1ULL << 63))) { 308 + /* left underflowed by more than period. */ 309 + left = period; 310 + local64_set(&hwc->period_left, left); 311 + hwc->last_period = period; 312 + ret = 1; 313 + } else if (unlikely((left + period) <= period)) { 314 + /* left underflowed by less than period. */ 315 + left += period; 316 + local64_set(&hwc->period_left, left); 317 + hwc->last_period = period; 318 + ret = 1; 319 + } 320 + 321 + if (left > loongarch_pmu.max_period) { 322 + left = loongarch_pmu.max_period; 323 + local64_set(&hwc->period_left, left); 324 + } 325 + 326 + local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); 327 + 328 + loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); 329 + 330 + perf_event_update_userpage(event); 331 + 332 + return ret; 333 + } 334 + 335 + static void loongarch_pmu_event_update(struct perf_event *event, 336 + struct hw_perf_event *hwc, 337 + int idx) 338 + { 339 + u64 delta; 340 + u64 prev_raw_count, new_raw_count; 341 + 342 + again: 343 + prev_raw_count = local64_read(&hwc->prev_count); 344 + new_raw_count = loongarch_pmu.read_counter(idx); 345 + 346 + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 + new_raw_count) != prev_raw_count) 348 + goto again; 349 + 350 + delta = new_raw_count - prev_raw_count; 351 + 352 + local64_add(delta, &event->count); 353 + local64_sub(delta, &hwc->period_left); 354 + } 355 + 356 + static void loongarch_pmu_start(struct perf_event *event, int flags) 357 + { 358 + struct hw_perf_event *hwc = &event->hw; 359 + 360 + if (flags & PERF_EF_RELOAD) 361 + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 362 + 363 + hwc->state = 0; 364 + 365 + /* Set the period for the event. */ 366 + loongarch_pmu_event_set_period(event, hwc, hwc->idx); 367 + 368 + /* Enable the event. */ 369 + loongarch_pmu_enable_event(hwc, hwc->idx); 370 + } 371 + 372 + static void loongarch_pmu_stop(struct perf_event *event, int flags) 373 + { 374 + struct hw_perf_event *hwc = &event->hw; 375 + 376 + if (!(hwc->state & PERF_HES_STOPPED)) { 377 + /* We are working on a local event. */ 378 + loongarch_pmu_disable_event(hwc->idx); 379 + barrier(); 380 + loongarch_pmu_event_update(event, hwc, hwc->idx); 381 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 382 + } 383 + } 384 + 385 + static int loongarch_pmu_add(struct perf_event *event, int flags) 386 + { 387 + int idx, err = 0; 388 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 389 + struct hw_perf_event *hwc = &event->hw; 390 + 391 + perf_pmu_disable(event->pmu); 392 + 393 + /* To look for a free counter for this event. */ 394 + idx = loongarch_pmu_alloc_counter(cpuc, hwc); 395 + if (idx < 0) { 396 + err = idx; 397 + goto out; 398 + } 399 + 400 + /* 401 + * If there is an event in the counter we are going to use then 402 + * make sure it is disabled. 403 + */ 404 + event->hw.idx = idx; 405 + loongarch_pmu_disable_event(idx); 406 + cpuc->events[idx] = event; 407 + 408 + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 409 + if (flags & PERF_EF_START) 410 + loongarch_pmu_start(event, PERF_EF_RELOAD); 411 + 412 + /* Propagate our changes to the userspace mapping. */ 413 + perf_event_update_userpage(event); 414 + 415 + out: 416 + perf_pmu_enable(event->pmu); 417 + return err; 418 + } 419 + 420 + static void loongarch_pmu_del(struct perf_event *event, int flags) 421 + { 422 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 423 + struct hw_perf_event *hwc = &event->hw; 424 + int idx = hwc->idx; 425 + 426 + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 427 + 428 + loongarch_pmu_stop(event, PERF_EF_UPDATE); 429 + cpuc->events[idx] = NULL; 430 + clear_bit(idx, cpuc->used_mask); 431 + 432 + perf_event_update_userpage(event); 433 + } 434 + 435 + static void loongarch_pmu_read(struct perf_event *event) 436 + { 437 + struct hw_perf_event *hwc = &event->hw; 438 + 439 + /* Don't read disabled counters! */ 440 + if (hwc->idx < 0) 441 + return; 442 + 443 + loongarch_pmu_event_update(event, hwc, hwc->idx); 444 + } 445 + 446 + static void loongarch_pmu_enable(struct pmu *pmu) 447 + { 448 + resume_local_counters(); 449 + } 450 + 451 + static void loongarch_pmu_disable(struct pmu *pmu) 452 + { 453 + pause_local_counters(); 454 + } 455 + 456 + static DEFINE_MUTEX(pmu_reserve_mutex); 457 + static atomic_t active_events = ATOMIC_INIT(0); 458 + 459 + static int get_pmc_irq(void) 460 + { 461 + struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); 462 + 463 + if (d) 464 + return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START); 465 + 466 + return -EINVAL; 467 + } 468 + 469 + static void reset_counters(void *arg); 470 + static int __hw_perf_event_init(struct perf_event *event); 471 + 472 + static void hw_perf_event_destroy(struct perf_event *event) 473 + { 474 + if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 475 + on_each_cpu(reset_counters, NULL, 1); 476 + free_irq(get_pmc_irq(), &loongarch_pmu); 477 + mutex_unlock(&pmu_reserve_mutex); 478 + } 479 + } 480 + 481 + static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, 482 + struct perf_sample_data *data, struct pt_regs *regs) 483 + { 484 + struct perf_event *event = cpuc->events[idx]; 485 + struct hw_perf_event *hwc = &event->hw; 486 + 487 + loongarch_pmu_event_update(event, hwc, idx); 488 + data->period = event->hw.last_period; 489 + if (!loongarch_pmu_event_set_period(event, hwc, idx)) 490 + return; 491 + 492 + if (perf_event_overflow(event, data, regs)) 493 + loongarch_pmu_disable_event(idx); 494 + } 495 + 496 + static irqreturn_t pmu_handle_irq(int irq, void *dev) 497 + { 498 + int n; 499 + int handled = IRQ_NONE; 500 + uint64_t counter; 501 + struct pt_regs *regs; 502 + struct perf_sample_data data; 503 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 504 + 505 + /* 506 + * First we pause the local counters, so that when we are locked 507 + * here, the counters are all paused. When it gets locked due to 508 + * perf_disable(), the timer interrupt handler will be delayed. 509 + * 510 + * See also loongarch_pmu_start(). 511 + */ 512 + pause_local_counters(); 513 + 514 + regs = get_irq_regs(); 515 + 516 + perf_sample_data_init(&data, 0, 0); 517 + 518 + for (n = 0; n < loongarch_pmu.num_counters; n++) { 519 + if (test_bit(n, cpuc->used_mask)) { 520 + counter = loongarch_pmu.read_counter(n); 521 + if (counter & loongarch_pmu.overflow) { 522 + handle_associated_event(cpuc, n, &data, regs); 523 + handled = IRQ_HANDLED; 524 + } 525 + } 526 + } 527 + 528 + resume_local_counters(); 529 + 530 + /* 531 + * Do all the work for the pending perf events. We can do this 532 + * in here because the performance counter interrupt is a regular 533 + * interrupt, not NMI. 534 + */ 535 + if (handled == IRQ_HANDLED) 536 + irq_work_run(); 537 + 538 + return handled; 539 + } 540 + 541 + static int loongarch_pmu_event_init(struct perf_event *event) 542 + { 543 + int r, irq; 544 + unsigned long flags; 545 + 546 + /* does not support taken branch sampling */ 547 + if (has_branch_stack(event)) 548 + return -EOPNOTSUPP; 549 + 550 + switch (event->attr.type) { 551 + case PERF_TYPE_RAW: 552 + case PERF_TYPE_HARDWARE: 553 + case PERF_TYPE_HW_CACHE: 554 + break; 555 + 556 + default: 557 + /* Init it to avoid false validate_group */ 558 + event->hw.event_base = 0xffffffff; 559 + return -ENOENT; 560 + } 561 + 562 + if (event->cpu >= 0 && !cpu_online(event->cpu)) 563 + return -ENODEV; 564 + 565 + irq = get_pmc_irq(); 566 + flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; 567 + if (!atomic_inc_not_zero(&active_events)) { 568 + mutex_lock(&pmu_reserve_mutex); 569 + if (atomic_read(&active_events) == 0) { 570 + r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); 571 + if (r < 0) { 572 + mutex_unlock(&pmu_reserve_mutex); 573 + pr_warn("PMU IRQ request failed\n"); 574 + return -ENODEV; 575 + } 576 + } 577 + atomic_inc(&active_events); 578 + mutex_unlock(&pmu_reserve_mutex); 579 + } 580 + 581 + return __hw_perf_event_init(event); 582 + } 583 + 584 + static struct pmu pmu = { 585 + .pmu_enable = loongarch_pmu_enable, 586 + .pmu_disable = loongarch_pmu_disable, 587 + .event_init = loongarch_pmu_event_init, 588 + .add = loongarch_pmu_add, 589 + .del = loongarch_pmu_del, 590 + .start = loongarch_pmu_start, 591 + .stop = loongarch_pmu_stop, 592 + .read = loongarch_pmu_read, 593 + }; 594 + 595 + static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) 596 + { 597 + return (pev->event_id & 0xff); 598 + } 599 + 600 + static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) 601 + { 602 + const struct loongarch_perf_event *pev; 603 + 604 + pev = &(*loongarch_pmu.general_event_map)[idx]; 605 + 606 + if (pev->event_id == HW_OP_UNSUPPORTED) 607 + return ERR_PTR(-ENOENT); 608 + 609 + return pev; 610 + } 611 + 612 + static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) 613 + { 614 + unsigned int cache_type, cache_op, cache_result; 615 + const struct loongarch_perf_event *pev; 616 + 617 + cache_type = (config >> 0) & 0xff; 618 + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 619 + return ERR_PTR(-EINVAL); 620 + 621 + cache_op = (config >> 8) & 0xff; 622 + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 623 + return ERR_PTR(-EINVAL); 624 + 625 + cache_result = (config >> 16) & 0xff; 626 + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 627 + return ERR_PTR(-EINVAL); 628 + 629 + pev = &((*loongarch_pmu.cache_event_map) 630 + [cache_type] 631 + [cache_op] 632 + [cache_result]); 633 + 634 + if (pev->event_id == CACHE_OP_UNSUPPORTED) 635 + return ERR_PTR(-ENOENT); 636 + 637 + return pev; 638 + } 639 + 640 + static int validate_group(struct perf_event *event) 641 + { 642 + struct cpu_hw_events fake_cpuc; 643 + struct perf_event *sibling, *leader = event->group_leader; 644 + 645 + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); 646 + 647 + if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) 648 + return -EINVAL; 649 + 650 + for_each_sibling_event(sibling, leader) { 651 + if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) 652 + return -EINVAL; 653 + } 654 + 655 + if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) 656 + return -EINVAL; 657 + 658 + return 0; 659 + } 660 + 661 + static void reset_counters(void *arg) 662 + { 663 + int n; 664 + int counters = loongarch_pmu.num_counters; 665 + 666 + for (n = 0; n < counters; n++) { 667 + loongarch_pmu_write_control(n, 0); 668 + loongarch_pmu.write_counter(n, 0); 669 + } 670 + } 671 + 672 + static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { 673 + PERF_MAP_ALL_UNSUPPORTED, 674 + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, 675 + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, 676 + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, 677 + [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, 678 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, 679 + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, 680 + }; 681 + 682 + static const struct loongarch_perf_event loongson_cache_map 683 + [PERF_COUNT_HW_CACHE_MAX] 684 + [PERF_COUNT_HW_CACHE_OP_MAX] 685 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 686 + PERF_CACHE_MAP_ALL_UNSUPPORTED, 687 + [C(L1D)] = { 688 + /* 689 + * Like some other architectures (e.g. ARM), the performance 690 + * counters don't differentiate between read and write 691 + * accesses/misses, so this isn't strictly correct, but it's the 692 + * best we can do. Writes and reads get combined. 693 + */ 694 + [C(OP_READ)] = { 695 + [C(RESULT_ACCESS)] = { 0x8 }, 696 + [C(RESULT_MISS)] = { 0x9 }, 697 + }, 698 + [C(OP_WRITE)] = { 699 + [C(RESULT_ACCESS)] = { 0x8 }, 700 + [C(RESULT_MISS)] = { 0x9 }, 701 + }, 702 + [C(OP_PREFETCH)] = { 703 + [C(RESULT_ACCESS)] = { 0xaa }, 704 + [C(RESULT_MISS)] = { 0xa9 }, 705 + }, 706 + }, 707 + [C(L1I)] = { 708 + [C(OP_READ)] = { 709 + [C(RESULT_ACCESS)] = { 0x6 }, 710 + [C(RESULT_MISS)] = { 0x7 }, 711 + }, 712 + }, 713 + [C(LL)] = { 714 + [C(OP_READ)] = { 715 + [C(RESULT_ACCESS)] = { 0xc }, 716 + [C(RESULT_MISS)] = { 0xd }, 717 + }, 718 + [C(OP_WRITE)] = { 719 + [C(RESULT_ACCESS)] = { 0xc }, 720 + [C(RESULT_MISS)] = { 0xd }, 721 + }, 722 + }, 723 + [C(ITLB)] = { 724 + [C(OP_READ)] = { 725 + [C(RESULT_MISS)] = { 0x3b }, 726 + }, 727 + }, 728 + [C(DTLB)] = { 729 + [C(OP_READ)] = { 730 + [C(RESULT_ACCESS)] = { 0x4 }, 731 + [C(RESULT_MISS)] = { 0x3c }, 732 + }, 733 + [C(OP_WRITE)] = { 734 + [C(RESULT_ACCESS)] = { 0x4 }, 735 + [C(RESULT_MISS)] = { 0x3c }, 736 + }, 737 + }, 738 + [C(BPU)] = { 739 + /* Using the same code for *HW_BRANCH* */ 740 + [C(OP_READ)] = { 741 + [C(RESULT_ACCESS)] = { 0x02 }, 742 + [C(RESULT_MISS)] = { 0x03 }, 743 + }, 744 + }, 745 + }; 746 + 747 + static int __hw_perf_event_init(struct perf_event *event) 748 + { 749 + int err; 750 + struct hw_perf_event *hwc = &event->hw; 751 + struct perf_event_attr *attr = &event->attr; 752 + const struct loongarch_perf_event *pev; 753 + 754 + /* Returning LoongArch event descriptor for generic perf event. */ 755 + if (PERF_TYPE_HARDWARE == event->attr.type) { 756 + if (event->attr.config >= PERF_COUNT_HW_MAX) 757 + return -EINVAL; 758 + pev = loongarch_pmu_map_general_event(event->attr.config); 759 + } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 760 + pev = loongarch_pmu_map_cache_event(event->attr.config); 761 + } else if (PERF_TYPE_RAW == event->attr.type) { 762 + /* We are working on the global raw event. */ 763 + mutex_lock(&raw_event_mutex); 764 + pev = loongarch_pmu.map_raw_event(event->attr.config); 765 + } else { 766 + /* The event type is not (yet) supported. */ 767 + return -EOPNOTSUPP; 768 + } 769 + 770 + if (IS_ERR(pev)) { 771 + if (PERF_TYPE_RAW == event->attr.type) 772 + mutex_unlock(&raw_event_mutex); 773 + return PTR_ERR(pev); 774 + } 775 + 776 + /* 777 + * We allow max flexibility on how each individual counter shared 778 + * by the single CPU operates (the mode exclusion and the range). 779 + */ 780 + hwc->config_base = CSR_PERFCTRL_IE; 781 + 782 + hwc->event_base = loongarch_pmu_perf_event_encode(pev); 783 + if (PERF_TYPE_RAW == event->attr.type) 784 + mutex_unlock(&raw_event_mutex); 785 + 786 + if (!attr->exclude_user) { 787 + hwc->config_base |= CSR_PERFCTRL_PLV3; 788 + hwc->config_base |= CSR_PERFCTRL_PLV2; 789 + } 790 + if (!attr->exclude_kernel) { 791 + hwc->config_base |= CSR_PERFCTRL_PLV0; 792 + } 793 + if (!attr->exclude_hv) { 794 + hwc->config_base |= CSR_PERFCTRL_PLV1; 795 + } 796 + 797 + hwc->config_base &= M_PERFCTL_CONFIG_MASK; 798 + /* 799 + * The event can belong to another cpu. We do not assign a local 800 + * counter for it for now. 801 + */ 802 + hwc->idx = -1; 803 + hwc->config = 0; 804 + 805 + if (!hwc->sample_period) { 806 + hwc->sample_period = loongarch_pmu.max_period; 807 + hwc->last_period = hwc->sample_period; 808 + local64_set(&hwc->period_left, hwc->sample_period); 809 + } 810 + 811 + err = 0; 812 + if (event->group_leader != event) 813 + err = validate_group(event); 814 + 815 + event->destroy = hw_perf_event_destroy; 816 + 817 + if (err) 818 + event->destroy(event); 819 + 820 + return err; 821 + } 822 + 823 + static void pause_local_counters(void) 824 + { 825 + unsigned long flags; 826 + int ctr = loongarch_pmu.num_counters; 827 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 828 + 829 + local_irq_save(flags); 830 + do { 831 + ctr--; 832 + cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); 833 + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & 834 + ~M_PERFCTL_COUNT_EVENT_WHENEVER); 835 + } while (ctr > 0); 836 + local_irq_restore(flags); 837 + } 838 + 839 + static void resume_local_counters(void) 840 + { 841 + int ctr = loongarch_pmu.num_counters; 842 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 843 + 844 + do { 845 + ctr--; 846 + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); 847 + } while (ctr > 0); 848 + } 849 + 850 + static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) 851 + { 852 + raw_event.event_id = config & 0xff; 853 + 854 + return &raw_event; 855 + } 856 + 857 + static int __init init_hw_perf_events(void) 858 + { 859 + int counters; 860 + 861 + if (!cpu_has_pmp) 862 + return -ENODEV; 863 + 864 + pr_info("Performance counters: "); 865 + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; 866 + 867 + loongarch_pmu.num_counters = counters; 868 + loongarch_pmu.max_period = (1ULL << 63) - 1; 869 + loongarch_pmu.valid_count = (1ULL << 63) - 1; 870 + loongarch_pmu.overflow = 1ULL << 63; 871 + loongarch_pmu.name = "loongarch/loongson64"; 872 + loongarch_pmu.read_counter = loongarch_pmu_read_counter; 873 + loongarch_pmu.write_counter = loongarch_pmu_write_counter; 874 + loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; 875 + loongarch_pmu.general_event_map = &loongson_event_map; 876 + loongarch_pmu.cache_event_map = &loongson_cache_map; 877 + 878 + on_each_cpu(reset_counters, NULL, 1); 879 + 880 + pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 881 + loongarch_pmu.name, counters, 64); 882 + 883 + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 884 + 885 + return 0; 886 + } 887 + early_initcall(init_hw_perf_events);
+53
arch/loongarch/kernel/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2022 Loongson Technology Corporation Limited 4 + * 5 + * Derived from MIPS: 6 + * Copyright (C) 2013 Cavium, Inc. 7 + */ 8 + 9 + #include <linux/perf_event.h> 10 + 11 + #include <asm/ptrace.h> 12 + 13 + #ifdef CONFIG_32BIT 14 + u64 perf_reg_abi(struct task_struct *tsk) 15 + { 16 + return PERF_SAMPLE_REGS_ABI_32; 17 + } 18 + #else /* Must be CONFIG_64BIT */ 19 + u64 perf_reg_abi(struct task_struct *tsk) 20 + { 21 + if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS)) 22 + return PERF_SAMPLE_REGS_ABI_32; 23 + else 24 + return PERF_SAMPLE_REGS_ABI_64; 25 + } 26 + #endif /* CONFIG_32BIT */ 27 + 28 + int perf_reg_validate(u64 mask) 29 + { 30 + if (!mask) 31 + return -EINVAL; 32 + if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1)) 33 + return -EINVAL; 34 + return 0; 35 + } 36 + 37 + u64 perf_reg_value(struct pt_regs *regs, int idx) 38 + { 39 + if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX)) 40 + return 0; 41 + 42 + if ((u32)idx == PERF_REG_LOONGARCH_PC) 43 + return regs->csr_era; 44 + 45 + return regs->regs[idx]; 46 + } 47 + 48 + void perf_get_regs_user(struct perf_regs *regs_user, 49 + struct pt_regs *regs) 50 + { 51 + regs_user->regs = task_pt_regs(current); 52 + regs_user->abi = perf_reg_abi(current); 53 + }