Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: perf: Add perf functionality for BMIPS5000

Add hardware performance counter support to kernel "perf" code for
BMIPS5000. The BMIPS5000 performance counters are similar to MIPS
MTI cores, so the changes were mostly made in perf_event_mipsxx.c
which is typically for MTI cores.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/4109/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

authored by

Al Cooper and committed by
Ralf Baechle
a7911a8f 399aaa25

+102 -1
+102 -1
arch/mips/kernel/perf_event_mipsxx.c
··· 28 28 #include <asm/time.h> /* For perf_irq */ 29 29 30 30 #define MIPS_MAX_HWEVENTS 4 31 + #define MIPS_TCS_PER_COUNTER 2 32 + #define MIPS_CPUID_TO_COUNTER_MASK (MIPS_TCS_PER_COUNTER - 1) 31 33 32 34 struct cpu_hw_events { 33 35 /* Array of events on this cpu. */ ··· 110 108 #define M_PERFCTL_INTERRUPT_ENABLE (1 << 4) 111 109 #define M_PERFCTL_EVENT(event) (((event) & 0x3ff) << 5) 112 110 #define M_PERFCTL_VPEID(vpe) ((vpe) << 16) 111 + 112 + #ifdef CONFIG_CPU_BMIPS5000 113 + #define M_PERFCTL_MT_EN(filter) 0 114 + #else /* !CONFIG_CPU_BMIPS5000 */ 113 115 #define M_PERFCTL_MT_EN(filter) ((filter) << 20) 116 + #endif /* CONFIG_CPU_BMIPS5000 */ 117 + 114 118 #define M_TC_EN_ALL M_PERFCTL_MT_EN(0) 115 119 #define M_TC_EN_VPE M_PERFCTL_MT_EN(1) 116 120 #define M_TC_EN_TC M_PERFCTL_MT_EN(2) 117 121 #define M_PERFCTL_TCID(tcid) ((tcid) << 22) 118 122 #define M_PERFCTL_WIDE (1 << 30) 119 123 #define M_PERFCTL_MORE (1 << 31) 124 + #define M_PERFCTL_TC (1 << 30) 120 125 121 126 #define M_PERFCTL_COUNT_EVENT_WHENEVER (M_PERFCTL_EXL | \ 122 127 M_PERFCTL_KERNEL | \ ··· 144 135 145 136 static DEFINE_RWLOCK(pmuint_rwlock); 146 137 138 + #if defined(CONFIG_CPU_BMIPS5000) 139 + #define vpe_id() (cpu_has_mipsmt_pertccounters ? \ 140 + 0 : (smp_processor_id() & MIPS_CPUID_TO_COUNTER_MASK)) 141 + #else 147 142 /* 148 143 * FIXME: For VSMP, vpe_id() is redefined for Perf-events, because 149 144 * cpu_data[cpuid].vpe_id reports 0 for _both_ CPUs. 150 145 */ 151 146 #define vpe_id() (cpu_has_mipsmt_pertccounters ? \ 152 - 0 : smp_processor_id()) 147 + 0 : smp_processor_id()) 148 + #endif 153 149 154 150 /* Copied from op_model_mipsxx.c */ 155 151 static unsigned int vpe_shift(void) ··· 348 334 (evt->config_base & M_PERFCTL_CONFIG_MASK) | 349 335 /* Make sure interrupt enabled. */ 350 336 M_PERFCTL_INTERRUPT_ENABLE; 337 + if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) 338 + /* enable the counter for the calling thread */ 339 + cpuc->saved_ctrl[idx] |= 340 + (1 << (12 + vpe_id())) | M_PERFCTL_TC; 341 + 351 342 /* 352 343 * We do not actually let the counter run. Leave it until start(). 353 344 */ ··· 833 814 [PERF_COUNT_HW_BUS_CYCLES] = { 0x25, CNTR_ALL }, 834 815 }; 835 816 817 + static const struct mips_perf_event bmips5000_event_map 818 + [PERF_COUNT_HW_MAX] = { 819 + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN | CNTR_ODD, T }, 820 + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_EVEN | CNTR_ODD, T }, 821 + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x02, CNTR_ODD, T }, 822 + }; 823 + 836 824 /* 24K/34K/1004K cores can share the same cache event map. */ 837 825 static const struct mips_perf_event mipsxxcore_cache_map 838 826 [PERF_COUNT_HW_CACHE_MAX] ··· 988 962 [C(OP_WRITE)] = { 989 963 [C(RESULT_ACCESS)] = { 0x27, CNTR_EVEN, T }, 990 964 [C(RESULT_MISS)] = { 0x27, CNTR_ODD, T }, 965 + }, 966 + }, 967 + }; 968 + 969 + /* BMIPS5000 */ 970 + static const struct mips_perf_event bmips5000_cache_map 971 + [PERF_COUNT_HW_CACHE_MAX] 972 + [PERF_COUNT_HW_CACHE_OP_MAX] 973 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 974 + [C(L1D)] = { 975 + /* 976 + * Like some other architectures (e.g. ARM), the performance 977 + * counters don't differentiate between read and write 978 + * accesses/misses, so this isn't strictly correct, but it's the 979 + * best we can do. Writes and reads get combined. 980 + */ 981 + [C(OP_READ)] = { 982 + [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, 983 + [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, 984 + }, 985 + [C(OP_WRITE)] = { 986 + [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, 987 + [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, 988 + }, 989 + }, 990 + [C(L1I)] = { 991 + [C(OP_READ)] = { 992 + [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, 993 + [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, 994 + }, 995 + [C(OP_WRITE)] = { 996 + [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, 997 + [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, 998 + }, 999 + [C(OP_PREFETCH)] = { 1000 + [C(RESULT_ACCESS)] = { 23, CNTR_EVEN, T }, 1001 + /* 1002 + * Note that MIPS has only "hit" events countable for 1003 + * the prefetch operation. 1004 + */ 1005 + }, 1006 + }, 1007 + [C(LL)] = { 1008 + [C(OP_READ)] = { 1009 + [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, 1010 + [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, 1011 + }, 1012 + [C(OP_WRITE)] = { 1013 + [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, 1014 + [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, 1015 + }, 1016 + }, 1017 + [C(BPU)] = { 1018 + /* Using the same code for *HW_BRANCH* */ 1019 + [C(OP_READ)] = { 1020 + [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, 1021 + }, 1022 + [C(OP_WRITE)] = { 1023 + [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, 991 1024 }, 992 1025 }, 993 1026 }; ··· 1325 1240 #define IS_RANGE_V_1004K_EVENT(r) ((r) == 47) 1326 1241 #endif 1327 1242 1243 + /* BMIPS5000 */ 1244 + #define IS_BOTH_COUNTERS_BMIPS5000_EVENT(b) \ 1245 + ((b) == 0 || (b) == 1) 1246 + 1247 + 1328 1248 /* 1329 1249 * User can use 0-255 raw events, where 0-127 for the events of even 1330 1250 * counters, and 128-255 for odd counters. Note that bit 7 is used to ··· 1400 1310 raw_event.range = T; 1401 1311 #endif 1402 1312 break; 1313 + case CPU_BMIPS5000: 1314 + if (IS_BOTH_COUNTERS_BMIPS5000_EVENT(base_id)) 1315 + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; 1316 + else 1317 + raw_event.cntr_mask = 1318 + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; 1403 1319 } 1404 1320 1405 1321 return &raw_event; ··· 1516 1420 mipspmu.general_event_map = &octeon_event_map; 1517 1421 mipspmu.cache_event_map = &octeon_cache_map; 1518 1422 mipspmu.map_raw_event = octeon_pmu_map_raw_event; 1423 + break; 1424 + case CPU_BMIPS5000: 1425 + mipspmu.name = "BMIPS5000"; 1426 + mipspmu.general_event_map = &bmips5000_event_map; 1427 + mipspmu.cache_event_map = &bmips5000_cache_map; 1519 1428 break; 1520 1429 default: 1521 1430 pr_cont("Either hardware does not support performance "