Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux

Pull nds32 updates from Greentime Hu:

- Perf support

- Power management support

- FPU support

- Hardware prefetcher support

- Build error fixed

- Performance enhancement

* tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux:
nds32: support hardware prefetcher
nds32: Fix the items of hwcap_str ordering issue.
math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
math-emu/op-2.h: Use statement expressions to prevent negative constant shift
nds32: support denormalized result through FP emulator
nds32: Support FP emulation
nds32: nds32 FPU port
nds32: Remove duplicated include from pm.c
nds32: Power management for nds32
nds32: Add document for NDS32 PMU.
nds32: Add perf call-graph support.
nds32: Perf porting
nds32: Fix bug in bitfield.h
nds32: Fix gcc 8.0 compiler option incompatible.
nds32: Fill all TLB entries with kernel image mapping
nds32: Remove the redundant assignment

+4440 -88
+17
Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
··· 1 + * NDS32 Performance Monitor Units 2 + 3 + NDS32 core have a PMU for counting cpu and cache events like cache misses. 4 + The NDS32 PMU representation in the device tree should be done as under: 5 + 6 + Required properties: 7 + 8 + - compatible : 9 + "andestech,nds32v3-pmu" 10 + 11 + - interrupts : The interrupt number for NDS32 PMU is 13. 12 + 13 + Example: 14 + pmu{ 15 + compatible = "andestech,nds32v3-pmu"; 16 + interrupts = <13>; 17 + }
+12
arch/nds32/Kconfig
··· 28 28 select HANDLE_DOMAIN_IRQ 29 29 select HAVE_ARCH_TRACEHOOK 30 30 select HAVE_DEBUG_KMEMLEAK 31 + select HAVE_EXIT_THREAD 31 32 select HAVE_REGS_AND_STACK_ACCESS_API 33 + select HAVE_PERF_EVENTS 32 34 select IRQ_DOMAIN 33 35 select LOCKDEP_SUPPORT 34 36 select MODULES_USE_ELF_RELA ··· 92 90 93 91 menu "Kernel Features" 94 92 source "kernel/Kconfig.hz" 93 + endmenu 94 + 95 + menu "Power management options" 96 + config SYS_SUPPORTS_APM_EMULATION 97 + bool 98 + 99 + config ARCH_SUSPEND_POSSIBLE 100 + def_bool y 101 + 102 + source "kernel/power/Kconfig" 95 103 endmenu
+41
arch/nds32/Kconfig.cpu
··· 7 7 bool "Little endian" 8 8 default y 9 9 10 + config FPU 11 + bool "FPU support" 12 + default n 13 + help 14 + If FPU ISA is used in user space, this configuration shall be Y to 15 + enable required support in kerenl such as fpu context switch and 16 + fpu exception handler. 17 + 18 + If no FPU ISA is used in user space, say N. 19 + 20 + config LAZY_FPU 21 + bool "lazy FPU support" 22 + depends on FPU 23 + default y 24 + help 25 + Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can 26 + enhance system performance by reducing the context switch 27 + frequency of the FPU register. 28 + 29 + For nomal case, say Y. 30 + 31 + config SUPPORT_DENORMAL_ARITHMETIC 32 + bool "Denormal arithmetic support" 33 + depends on FPU 34 + default n 35 + help 36 + Say Y here to enable arithmetic of denormalized number. Enabling 37 + this feature can enhance the precision for tininess number. 38 + However, performance loss in float pointe calculations is 39 + possibly significant due to additional FPU exception. 40 + 41 + If the calculated tolerance for tininess number is not critical, 42 + say N to prevent performance loss. 43 + 10 44 config HWZOL 11 45 bool "hardware zero overhead loop support" 12 46 depends on CPU_D10 || CPU_D15 ··· 176 142 help 177 143 Say Y here to enable L2 cache if your SoC are integrated with L2CC. 178 144 If unsure, say N. 145 + 146 + config HW_PRE 147 + bool "Enable hardware prefetcher" 148 + default y 149 + help 150 + Say Y here to enable hardware prefetcher feature. 151 + Only when CPU_VER.REV >= 0x09 can support. 179 152 180 153 menu "Memory configuration" 181 154
+5
arch/nds32/Makefile
··· 5 5 6 6 comma = , 7 7 8 + 8 9 ifdef CONFIG_FUNCTION_TRACER 9 10 arch-y += -malways-save-lp -mno-relax 10 11 endif 12 + 13 + # Avoid generating FPU instructions 14 + arch-y += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft 11 15 12 16 KBUILD_CFLAGS += $(call cc-option, -mno-sched-prolog-epilog) 13 17 KBUILD_CFLAGS += -mcmodel=large ··· 30 26 31 27 # If we have a machine-specific directory, then include it in the build. 32 28 core-y += arch/nds32/kernel/ arch/nds32/mm/ 29 + core-$(CONFIG_FPU) += arch/nds32/math-emu/ 33 30 libs-y += arch/nds32/lib/ 34 31 35 32 ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
+5
arch/nds32/boot/dts/ae3xx.dts
··· 82 82 interrupts = <18>; 83 83 }; 84 84 }; 85 + 86 + pmu { 87 + compatible = "andestech,nds32v3-pmu"; 88 + interrupts= <13>; 89 + }; 85 90 };
+1
arch/nds32/include/asm/Kbuild
··· 36 36 generic-y += kvm_para.h 37 37 generic-y += limits.h 38 38 generic-y += local.h 39 + generic-y += local64.h 39 40 generic-y += mm-arch-hooks.h 40 41 generic-y += mman.h 41 42 generic-y += parport.h
+23 -2
arch/nds32/include/asm/bitfield.h
··· 251 251 #define ITYPE_mskSTYPE ( 0xF << ITYPE_offSTYPE ) 252 252 #define ITYPE_mskCPID ( 0x3 << ITYPE_offCPID ) 253 253 254 + /* Additional definitions of ITYPE register for FPU */ 255 + #define FPU_DISABLE_EXCEPTION (0x1 << ITYPE_offSTYPE) 256 + #define FPU_EXCEPTION (0x2 << ITYPE_offSTYPE) 257 + #define FPU_CPID 0 /* FPU Co-Processor ID is 0 */ 258 + 254 259 #define NDS32_VECTOR_mskNONEXCEPTION 0x78 255 260 #define NDS32_VECTOR_offEXCEPTION 8 256 261 #define NDS32_VECTOR_offINTERRUPT 9 ··· 697 692 #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ 698 693 #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ 699 694 #define PFM_CTL_offSEL0 15 /* The event selection for PFMC0 */ 700 - #define PFM_CTL_offSEL1 21 /* The event selection for PFMC1 */ 701 - #define PFM_CTL_offSEL2 27 /* The event selection for PFMC2 */ 695 + #define PFM_CTL_offSEL1 16 /* The event selection for PFMC1 */ 696 + #define PFM_CTL_offSEL2 22 /* The event selection for PFMC2 */ 702 697 /* bit 28:31 reserved */ 703 698 704 699 #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) ··· 740 735 #define N13MISC_CTL_offRTP 1 /* Disable Return Target Predictor */ 741 736 #define N13MISC_CTL_offPTEPF 2 /* Disable HPTWK L2 PTE pefetch */ 742 737 #define N13MISC_CTL_offSP_SHADOW_EN 4 /* Enable shadow stack pointers */ 738 + #define MISC_CTL_offHWPRE 11 /* Enable HardWare PREFETCH */ 743 739 /* bit 6, 9:31 reserved */ 744 740 745 741 #define N13MISC_CTL_makBTB ( 0x1 << N13MISC_CTL_offBTB ) 746 742 #define N13MISC_CTL_makRTP ( 0x1 << N13MISC_CTL_offRTP ) 747 743 #define N13MISC_CTL_makPTEPF ( 0x1 << N13MISC_CTL_offPTEPF ) 748 744 #define N13MISC_CTL_makSP_SHADOW_EN ( 0x1 << N13MISC_CTL_offSP_SHADOW_EN ) 745 + #define MISC_CTL_makHWPRE_EN ( 0x1 << MISC_CTL_offHWPRE ) 749 746 747 + #ifdef CONFIG_HW_PRE 748 + #define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN) 749 + #else 750 750 #define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN) 751 + #endif 751 752 752 753 /****************************************************************************** 753 754 * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers) ··· 937 926 #define FPCSR_mskDNIT ( 0x1 << FPCSR_offDNIT ) 938 927 #define FPCSR_mskRIT ( 0x1 << FPCSR_offRIT ) 939 928 #define FPCSR_mskALL (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX) 929 + #define FPCSR_mskALLE_NO_UDFE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE) 940 930 #define FPCSR_mskALLE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE) 941 931 #define FPCSR_mskALLT (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT) 942 932 ··· 957 945 #define FPCFG_mskFMA ( 0x1 << FPCFG_offFMA ) 958 946 #define FPCFG_mskIMVER ( 0x1F << FPCFG_offIMVER ) 959 947 #define FPCFG_mskAVER ( 0x1F << FPCFG_offAVER ) 948 + 949 + /* 8 Single precision or 4 double precision registers are available */ 950 + #define SP8_DP4_reg 0 951 + /* 16 Single precision or 8 double precision registers are available */ 952 + #define SP16_DP8_reg 1 953 + /* 32 Single precision or 16 double precision registers are available */ 954 + #define SP32_DP16_reg 2 955 + /* 32 Single precision or 32 double precision registers are available */ 956 + #define SP32_DP32_reg 3 960 957 961 958 /****************************************************************************** 962 959 * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
+11
arch/nds32/include/asm/elf.h
··· 9 9 */ 10 10 11 11 #include <asm/ptrace.h> 12 + #include <asm/fpu.h> 12 13 13 14 typedef unsigned long elf_greg_t; 14 15 typedef unsigned long elf_freg_t[3]; ··· 160 159 161 160 #endif 162 161 162 + 163 + #if IS_ENABLED(CONFIG_FPU) 164 + #define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT) 165 + #else 166 + #define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0) 167 + #endif 168 + 163 169 #define ARCH_DLINFO \ 164 170 do { \ 171 + /* Optional FPU initialization */ \ 172 + FPU_AUX_ENT; \ 173 + \ 165 174 NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 166 175 (elf_addr_t)current->mm->context.vdso); \ 167 176 } while (0)
+126
arch/nds32/include/asm/fpu.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2005-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __ASM_NDS32_FPU_H 5 + #define __ASM_NDS32_FPU_H 6 + 7 + #if IS_ENABLED(CONFIG_FPU) 8 + #ifndef __ASSEMBLY__ 9 + #include <linux/sched/task_stack.h> 10 + #include <linux/preempt.h> 11 + #include <asm/ptrace.h> 12 + 13 + extern bool has_fpu; 14 + 15 + extern void save_fpu(struct task_struct *__tsk); 16 + extern void load_fpu(const struct fpu_struct *fpregs); 17 + extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs); 18 + extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu); 19 + 20 + #define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN) 21 + 22 + /* 23 + * Initially load the FPU with signalling NANS. This bit pattern 24 + * has the property that no matter whether considered as single or as 25 + * double precision, it still represents a signalling NAN. 26 + */ 27 + 28 + #define sNAN64 0xFFFFFFFFFFFFFFFFULL 29 + #define sNAN32 0xFFFFFFFFUL 30 + 31 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 32 + /* 33 + * Denormalized number is unsupported by nds32 FPU. Hence the operation 34 + * is treated as underflow cases when the final result is a denormalized 35 + * number. To enhance precision, underflow exception trap should be 36 + * enabled by default and kerenl will re-execute it by fpu emulator 37 + * when getting underflow exception. 38 + */ 39 + #define FPCSR_INIT FPCSR_mskUDFE 40 + #else 41 + #define FPCSR_INIT 0x0UL 42 + #endif 43 + 44 + extern const struct fpu_struct init_fpuregs; 45 + 46 + static inline void disable_ptreg_fpu(struct pt_regs *regs) 47 + { 48 + regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN; 49 + } 50 + 51 + static inline void enable_ptreg_fpu(struct pt_regs *regs) 52 + { 53 + regs->fucop_ctl |= FUCOP_CTL_mskCP0EN; 54 + } 55 + 56 + static inline void enable_fpu(void) 57 + { 58 + unsigned long fucop_ctl; 59 + 60 + fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN; 61 + __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL); 62 + __nds32__isb(); 63 + } 64 + 65 + static inline void disable_fpu(void) 66 + { 67 + unsigned long fucop_ctl; 68 + 69 + fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN; 70 + __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL); 71 + __nds32__isb(); 72 + } 73 + 74 + static inline void lose_fpu(void) 75 + { 76 + preempt_disable(); 77 + #if IS_ENABLED(CONFIG_LAZY_FPU) 78 + if (last_task_used_math == current) { 79 + last_task_used_math = NULL; 80 + #else 81 + if (test_tsk_fpu(task_pt_regs(current))) { 82 + #endif 83 + save_fpu(current); 84 + } 85 + disable_ptreg_fpu(task_pt_regs(current)); 86 + preempt_enable(); 87 + } 88 + 89 + static inline void own_fpu(void) 90 + { 91 + preempt_disable(); 92 + #if IS_ENABLED(CONFIG_LAZY_FPU) 93 + if (last_task_used_math != current) { 94 + if (last_task_used_math != NULL) 95 + save_fpu(last_task_used_math); 96 + load_fpu(&current->thread.fpu); 97 + last_task_used_math = current; 98 + } 99 + #else 100 + if (!test_tsk_fpu(task_pt_regs(current))) { 101 + load_fpu(&current->thread.fpu); 102 + } 103 + #endif 104 + enable_ptreg_fpu(task_pt_regs(current)); 105 + preempt_enable(); 106 + } 107 + 108 + #if !IS_ENABLED(CONFIG_LAZY_FPU) 109 + static inline void unlazy_fpu(struct task_struct *tsk) 110 + { 111 + preempt_disable(); 112 + if (test_tsk_fpu(task_pt_regs(tsk))) 113 + save_fpu(tsk); 114 + preempt_enable(); 115 + } 116 + #endif /* !CONFIG_LAZY_FPU */ 117 + static inline void clear_fpu(struct pt_regs *regs) 118 + { 119 + preempt_disable(); 120 + if (test_tsk_fpu(regs)) 121 + disable_ptreg_fpu(regs); 122 + preempt_enable(); 123 + } 124 + #endif /* CONFIG_FPU */ 125 + #endif /* __ASSEMBLY__ */ 126 + #endif /* __ASM_NDS32_FPU_H */
+32
arch/nds32/include/asm/fpuemu.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2005-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __ARCH_NDS32_FPUEMU_H 5 + #define __ARCH_NDS32_FPUEMU_H 6 + 7 + /* 8 + * single precision 9 + */ 10 + 11 + void fadds(void *ft, void *fa, void *fb); 12 + void fsubs(void *ft, void *fa, void *fb); 13 + void fmuls(void *ft, void *fa, void *fb); 14 + void fdivs(void *ft, void *fa, void *fb); 15 + void fs2d(void *ft, void *fa); 16 + void fsqrts(void *ft, void *fa); 17 + void fnegs(void *ft, void *fa); 18 + int fcmps(void *ft, void *fa, void *fb, int cop); 19 + 20 + /* 21 + * double precision 22 + */ 23 + void faddd(void *ft, void *fa, void *fb); 24 + void fsubd(void *ft, void *fa, void *fb); 25 + void fmuld(void *ft, void *fa, void *fb); 26 + void fdivd(void *ft, void *fa, void *fb); 27 + void fsqrtd(void *ft, void *fa); 28 + void fd2s(void *ft, void *fa); 29 + void fnegd(void *ft, void *fa); 30 + int fcmpd(void *ft, void *fa, void *fb, int cop); 31 + 32 + #endif /* __ARCH_NDS32_FPUEMU_H */
+109
arch/nds32/include/asm/nds32_fpu_inst.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2005-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __NDS32_FPU_INST_H 5 + #define __NDS32_FPU_INST_H 6 + 7 + #define cop0_op 0x35 8 + 9 + /* 10 + * COP0 field of opcodes. 11 + */ 12 + #define fs1_op 0x0 13 + #define fs2_op 0x4 14 + #define fd1_op 0x8 15 + #define fd2_op 0xc 16 + 17 + /* 18 + * FS1 opcode. 19 + */ 20 + enum fs1 { 21 + fadds_op, fsubs_op, fcpynss_op, fcpyss_op, 22 + fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op, 23 + fnmadds_op, fnmsubs_op, 24 + fmuls_op = 0xc, fdivs_op, 25 + fs1_f2op_op = 0xf 26 + }; 27 + 28 + /* 29 + * FS1/F2OP opcode. 30 + */ 31 + enum fs1_f2 { 32 + fs2d_op, fsqrts_op, 33 + fui2s_op = 0x8, fsi2s_op = 0xc, 34 + fs2ui_op = 0x10, fs2ui_z_op = 0x14, 35 + fs2si_op = 0x18, fs2si_z_op = 0x1c 36 + }; 37 + 38 + /* 39 + * FS2 opcode. 40 + */ 41 + enum fs2 { 42 + fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op, 43 + fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op 44 + }; 45 + 46 + /* 47 + * FD1 opcode. 48 + */ 49 + enum fd1 { 50 + faddd_op, fsubd_op, fcpynsd_op, fcpysd_op, 51 + fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op, 52 + fnmaddd_op, fnmsubd_op, 53 + fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf 54 + }; 55 + 56 + /* 57 + * FD1/F2OP opcode. 58 + */ 59 + enum fd1_f2 { 60 + fd2s_op, fsqrtd_op, 61 + fui2d_op = 0x8, fsi2d_op = 0xc, 62 + fd2ui_op = 0x10, fd2ui_z_op = 0x14, 63 + fd2si_op = 0x18, fd2si_z_op = 0x1c 64 + }; 65 + 66 + /* 67 + * FD2 opcode. 68 + */ 69 + enum fd2 { 70 + fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op, 71 + fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op 72 + }; 73 + 74 + #define NDS32Insn(x) x 75 + 76 + #define I_OPCODE_off 25 77 + #define NDS32Insn_OPCODE(x) (NDS32Insn(x) >> I_OPCODE_off) 78 + 79 + #define I_OPCODE_offRt 20 80 + #define I_OPCODE_mskRt (0x1fUL << I_OPCODE_offRt) 81 + #define NDS32Insn_OPCODE_Rt(x) \ 82 + ((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt) 83 + 84 + #define I_OPCODE_offRa 15 85 + #define I_OPCODE_mskRa (0x1fUL << I_OPCODE_offRa) 86 + #define NDS32Insn_OPCODE_Ra(x) \ 87 + ((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa) 88 + 89 + #define I_OPCODE_offRb 10 90 + #define I_OPCODE_mskRb (0x1fUL << I_OPCODE_offRb) 91 + #define NDS32Insn_OPCODE_Rb(x) \ 92 + ((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb) 93 + 94 + #define I_OPCODE_offbit1014 10 95 + #define I_OPCODE_mskbit1014 (0x1fUL << I_OPCODE_offbit1014) 96 + #define NDS32Insn_OPCODE_BIT1014(x) \ 97 + ((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014) 98 + 99 + #define I_OPCODE_offbit69 6 100 + #define I_OPCODE_mskbit69 (0xfUL << I_OPCODE_offbit69) 101 + #define NDS32Insn_OPCODE_BIT69(x) \ 102 + ((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69) 103 + 104 + #define I_OPCODE_offCOP0 0 105 + #define I_OPCODE_mskCOP0 (0x3fUL << I_OPCODE_offCOP0) 106 + #define NDS32Insn_OPCODE_COP0(x) \ 107 + ((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0) 108 + 109 + #endif /* __NDS32_FPU_INST_H */
+16
arch/nds32/include/asm/perf_event.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2008-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __ASM_PERF_EVENT_H 5 + #define __ASM_PERF_EVENT_H 6 + 7 + /* 8 + * This file is request by Perf, 9 + * please refer to tools/perf/design.txt for more details 10 + */ 11 + struct pt_regs; 12 + unsigned long perf_instruction_pointer(struct pt_regs *regs); 13 + unsigned long perf_misc_flags(struct pt_regs *regs); 14 + #define perf_misc_flags(regs) perf_misc_flags(regs) 15 + 16 + #endif
+386
arch/nds32/include/asm/pmu.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2008-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __ASM_PMU_H 5 + #define __ASM_PMU_H 6 + 7 + #include <linux/interrupt.h> 8 + #include <linux/perf_event.h> 9 + #include <asm/unistd.h> 10 + #include <asm/bitfield.h> 11 + 12 + /* Has special meaning for perf core implementation */ 13 + #define HW_OP_UNSUPPORTED 0x0 14 + #define C(_x) PERF_COUNT_HW_CACHE_##_x 15 + #define CACHE_OP_UNSUPPORTED 0x0 16 + 17 + /* Enough for both software and hardware defined events */ 18 + #define SOFTWARE_EVENT_MASK 0xFF 19 + 20 + #define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */ 21 + #define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36) 22 + #define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36) 23 + 24 + enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS }; 25 + 26 + u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1, 27 + PFM_CTL_mskOVF2 }; 28 + u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1, 29 + PFM_CTL_mskEN2 }; 30 + u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1, 31 + PFM_CTL_offSEL2 }; 32 + u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 }; 33 + u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 }; 34 + u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 }; 35 + u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 }; 36 + /* 37 + * Perf Events' indices 38 + */ 39 + #define NDS32_IDX_CYCLE_COUNTER 0 40 + #define NDS32_IDX_COUNTER0 1 41 + #define NDS32_IDX_COUNTER1 2 42 + 43 + /* The events for a given PMU register set. */ 44 + struct pmu_hw_events { 45 + /* 46 + * The events that are active on the PMU for the given index. 47 + */ 48 + struct perf_event *events[MAX_COUNTERS]; 49 + 50 + /* 51 + * A 1 bit for an index indicates that the counter is being used for 52 + * an event. A 0 means that the counter can be used. 53 + */ 54 + unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)]; 55 + 56 + /* 57 + * Hardware lock to serialize accesses to PMU registers. Needed for the 58 + * read/modify/write sequences. 59 + */ 60 + raw_spinlock_t pmu_lock; 61 + }; 62 + 63 + struct nds32_pmu { 64 + struct pmu pmu; 65 + cpumask_t active_irqs; 66 + char *name; 67 + irqreturn_t (*handle_irq)(int irq_num, void *dev); 68 + void (*enable)(struct perf_event *event); 69 + void (*disable)(struct perf_event *event); 70 + int (*get_event_idx)(struct pmu_hw_events *hw_events, 71 + struct perf_event *event); 72 + int (*set_event_filter)(struct hw_perf_event *evt, 73 + struct perf_event_attr *attr); 74 + u32 (*read_counter)(struct perf_event *event); 75 + void (*write_counter)(struct perf_event *event, u32 val); 76 + void (*start)(struct nds32_pmu *nds32_pmu); 77 + void (*stop)(struct nds32_pmu *nds32_pmu); 78 + void (*reset)(void *data); 79 + int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler); 80 + void (*free_irq)(struct nds32_pmu *nds32_pmu); 81 + int (*map_event)(struct perf_event *event); 82 + int num_events; 83 + atomic_t active_events; 84 + u64 max_period; 85 + struct platform_device *plat_device; 86 + struct pmu_hw_events *(*get_hw_events)(void); 87 + }; 88 + 89 + #define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu)) 90 + 91 + int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type); 92 + 93 + u64 nds32_pmu_event_update(struct perf_event *event); 94 + 95 + int nds32_pmu_event_set_period(struct perf_event *event); 96 + 97 + /* 98 + * Common NDS32 SPAv3 event types 99 + * 100 + * Note: An implementation may not be able to count all of these events 101 + * but the encodings are considered to be `reserved' in the case that 102 + * they are not available. 103 + * 104 + * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as 105 + * NOT_SUPPORTED EVENT mapping in generic perf code. 106 + * You will need to deal it in the event writing implementation. 107 + */ 108 + enum spav3_counter_0_perf_types { 109 + SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */ 110 + SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0, 111 + SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0, 112 + SPAV3_0_SEL_LAST /* counting symbol */ 113 + }; 114 + 115 + enum spav3_counter_1_perf_types { 116 + SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */ 117 + SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1, 118 + SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1, 119 + SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1, 120 + SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1, 121 + SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1, 122 + SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1, 123 + SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1, 124 + SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1, 125 + SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1, 126 + SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1, 127 + SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1, 128 + SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1, 129 + SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1, 130 + SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1, 131 + SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1, 132 + SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1, 133 + SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1, 134 + SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1, 135 + SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1, 136 + SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1, 137 + SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1, 138 + SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1, 139 + SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1, 140 + SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1, 141 + SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1, 142 + SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1, 143 + SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1, 144 + SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1, 145 + SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1, 146 + SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1, 147 + SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1, 148 + SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1, 149 + SPAV3_1_SEL_LAST /* counting symbol */ 150 + }; 151 + 152 + enum spav3_counter_2_perf_types { 153 + SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */ 154 + SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2, 155 + SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2, 156 + SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2, 157 + SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT = 158 + 3 + PFM_OFFSET_MAGIC_2, 159 + SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2, 160 + SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2, 161 + SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2, 162 + SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2, 163 + SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2, 164 + SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2, 165 + SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2, 166 + SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2, 167 + SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2, 168 + SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2, 169 + SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2, 170 + SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2, 171 + SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2, 172 + SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2, 173 + SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2, 174 + SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2, 175 + SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2, 176 + SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2, 177 + SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2, 178 + SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2, 179 + SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2, 180 + SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2, 181 + SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2, 182 + SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2, 183 + SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2, 184 + SPAV3_2_SEL_LAST /* counting symbol */ 185 + }; 186 + 187 + /* Get converted event counter index */ 188 + static inline int get_converted_event_idx(unsigned long event) 189 + { 190 + int idx; 191 + 192 + if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) { 193 + idx = 0; 194 + } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) { 195 + idx = 1; 196 + } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) { 197 + idx = 2; 198 + } else { 199 + pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n"); 200 + return -EPERM; 201 + } 202 + 203 + return idx; 204 + } 205 + 206 + /* Get converted hardware event number */ 207 + static inline u32 get_converted_evet_hw_num(u32 event) 208 + { 209 + if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) 210 + event -= PFM_OFFSET_MAGIC_0; 211 + else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) 212 + event -= PFM_OFFSET_MAGIC_1; 213 + else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) 214 + event -= PFM_OFFSET_MAGIC_2; 215 + else if (event != 0) 216 + pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n"); 217 + 218 + return event; 219 + } 220 + 221 + /* 222 + * NDS32 HW events mapping 223 + * 224 + * The hardware events that we support. We do support cache operations but 225 + * we have harvard caches and no way to combine instruction and data 226 + * accesses/misses in hardware. 227 + */ 228 + static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = { 229 + [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES, 230 + [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION, 231 + [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS, 232 + [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS, 233 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, 234 + [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED, 235 + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 236 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, 237 + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, 238 + [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED 239 + }; 240 + 241 + static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 242 + [PERF_COUNT_HW_CACHE_OP_MAX] 243 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 244 + [C(L1D)] = { 245 + [C(OP_READ)] = { 246 + [C(RESULT_ACCESS)] = 247 + SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS, 248 + [C(RESULT_MISS)] = 249 + SPAV3_2_SEL_LOAD_DATA_CACHE_MISS, 250 + }, 251 + [C(OP_WRITE)] = { 252 + [C(RESULT_ACCESS)] = 253 + SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS, 254 + [C(RESULT_MISS)] = 255 + SPAV3_2_SEL_STORE_DATA_CACHE_MISS, 256 + }, 257 + [C(OP_PREFETCH)] = { 258 + [C(RESULT_ACCESS)] = 259 + CACHE_OP_UNSUPPORTED, 260 + [C(RESULT_MISS)] = 261 + CACHE_OP_UNSUPPORTED, 262 + }, 263 + }, 264 + [C(L1I)] = { 265 + [C(OP_READ)] = { 266 + [C(RESULT_ACCESS)] = 267 + SPAV3_1_SEL_CODE_CACHE_ACCESS, 268 + [C(RESULT_MISS)] = 269 + SPAV3_2_SEL_CODE_CACHE_MISS, 270 + }, 271 + [C(OP_WRITE)] = { 272 + [C(RESULT_ACCESS)] = 273 + SPAV3_1_SEL_CODE_CACHE_ACCESS, 274 + [C(RESULT_MISS)] = 275 + SPAV3_2_SEL_CODE_CACHE_MISS, 276 + }, 277 + [C(OP_PREFETCH)] = { 278 + [C(RESULT_ACCESS)] = 279 + CACHE_OP_UNSUPPORTED, 280 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 281 + }, 282 + }, 283 + /* TODO: L2CC */ 284 + [C(LL)] = { 285 + [C(OP_READ)] = { 286 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 287 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 288 + }, 289 + [C(OP_WRITE)] = { 290 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 291 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 292 + }, 293 + [C(OP_PREFETCH)] = { 294 + [C(RESULT_ACCESS)] = 295 + CACHE_OP_UNSUPPORTED, 296 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 297 + }, 298 + }, 299 + /* NDS32 PMU does not support TLB read/write hit/miss, 300 + * However, it can count access/miss, which mixed with read and write. 301 + * Therefore, only READ counter will use it. 302 + * We do as possible as we can. 303 + */ 304 + [C(DTLB)] = { 305 + [C(OP_READ)] = { 306 + [C(RESULT_ACCESS)] = 307 + SPAV3_1_SEL_UDTLB_ACCESS, 308 + [C(RESULT_MISS)] = 309 + SPAV3_2_SEL_UDTLB_MISS, 310 + }, 311 + [C(OP_WRITE)] = { 312 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 313 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 314 + }, 315 + [C(OP_PREFETCH)] = { 316 + [C(RESULT_ACCESS)] = 317 + CACHE_OP_UNSUPPORTED, 318 + [C(RESULT_MISS)] = 319 + CACHE_OP_UNSUPPORTED, 320 + }, 321 + }, 322 + [C(ITLB)] = { 323 + [C(OP_READ)] = { 324 + [C(RESULT_ACCESS)] = 325 + SPAV3_1_SEL_UITLB_ACCESS, 326 + [C(RESULT_MISS)] = 327 + SPAV3_2_SEL_UITLB_MISS, 328 + }, 329 + [C(OP_WRITE)] = { 330 + [C(RESULT_ACCESS)] = 331 + CACHE_OP_UNSUPPORTED, 332 + [C(RESULT_MISS)] = 333 + CACHE_OP_UNSUPPORTED, 334 + }, 335 + [C(OP_PREFETCH)] = { 336 + [C(RESULT_ACCESS)] = 337 + CACHE_OP_UNSUPPORTED, 338 + [C(RESULT_MISS)] = 339 + CACHE_OP_UNSUPPORTED, 340 + }, 341 + }, 342 + [C(BPU)] = { /* What is BPU? */ 343 + [C(OP_READ)] = { 344 + [C(RESULT_ACCESS)] = 345 + CACHE_OP_UNSUPPORTED, 346 + [C(RESULT_MISS)] = 347 + CACHE_OP_UNSUPPORTED, 348 + }, 349 + [C(OP_WRITE)] = { 350 + [C(RESULT_ACCESS)] = 351 + CACHE_OP_UNSUPPORTED, 352 + [C(RESULT_MISS)] = 353 + CACHE_OP_UNSUPPORTED, 354 + }, 355 + [C(OP_PREFETCH)] = { 356 + [C(RESULT_ACCESS)] = 357 + CACHE_OP_UNSUPPORTED, 358 + [C(RESULT_MISS)] = 359 + CACHE_OP_UNSUPPORTED, 360 + }, 361 + }, 362 + [C(NODE)] = { /* What is NODE? */ 363 + [C(OP_READ)] = { 364 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 365 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 366 + }, 367 + [C(OP_WRITE)] = { 368 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 369 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 370 + }, 371 + [C(OP_PREFETCH)] = { 372 + [C(RESULT_ACCESS)] = 373 + CACHE_OP_UNSUPPORTED, 374 + [C(RESULT_MISS)] = 375 + CACHE_OP_UNSUPPORTED, 376 + }, 377 + }, 378 + }; 379 + 380 + int nds32_pmu_map_event(struct perf_event *event, 381 + const unsigned int (*event_map)[PERF_COUNT_HW_MAX], 382 + const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX] 383 + [PERF_COUNT_HW_CACHE_OP_MAX] 384 + [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask); 385 + 386 + #endif /* __ASM_PMU_H */
+7
arch/nds32/include/asm/processor.h
··· 35 35 unsigned long address; 36 36 unsigned long trap_no; 37 37 unsigned long error_code; 38 + 39 + struct fpu_struct fpu; 38 40 }; 39 41 40 42 #define INIT_THREAD { } ··· 74 72 75 73 /* Free all resources held by a thread. */ 76 74 #define release_thread(thread) do { } while(0) 75 + #if IS_ENABLED(CONFIG_FPU) 76 + #if !IS_ENABLED(CONFIG_UNLAZU_FPU) 77 + extern struct task_struct *last_task_used_math; 78 + #endif 79 + #endif 77 80 78 81 /* Prepare to copy thread state - unlazy all lazy status */ 79 82 #define prepare_to_copy(tsk) do { } while (0)
+158
arch/nds32/include/asm/sfp-machine.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2005-2018 Andes Technology Corporation */ 3 + 4 + #include <asm/bitfield.h> 5 + 6 + #define _FP_W_TYPE_SIZE 32 7 + #define _FP_W_TYPE unsigned long 8 + #define _FP_WS_TYPE signed long 9 + #define _FP_I_TYPE long 10 + 11 + #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 12 + #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 13 + #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 14 + 15 + #define _FP_MUL_MEAT_S(R, X, Y) \ 16 + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) 17 + #define _FP_MUL_MEAT_D(R, X, Y) \ 18 + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) 19 + #define _FP_MUL_MEAT_Q(R, X, Y) \ 20 + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm) 21 + 22 + #define _FP_MUL_MEAT_DW_S(R, X, Y) \ 23 + _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) 24 + #define _FP_MUL_MEAT_DW_D(R, X, Y) \ 25 + _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) 26 + 27 + #define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y) 28 + #define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv(D, R, X, Y) 29 + 30 + #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) 31 + #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 32 + #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 33 + #define _FP_NANSIGN_S 0 34 + #define _FP_NANSIGN_D 0 35 + #define _FP_NANSIGN_Q 0 36 + 37 + #define _FP_KEEPNANFRACP 1 38 + #define _FP_QNANNEGATEDP 0 39 + 40 + #define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ 41 + do { \ 42 + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ 43 + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \ 44 + R##_s = Y##_s; \ 45 + _FP_FRAC_COPY_##wc(R, Y); \ 46 + } else { \ 47 + R##_s = X##_s; \ 48 + _FP_FRAC_COPY_##wc(R, X); \ 49 + } \ 50 + R##_c = FP_CLS_NAN; \ 51 + } while (0) 52 + 53 + #define __FPU_FPCSR (current->thread.fpu.fpcsr) 54 + 55 + /* Obtain the current rounding mode. */ 56 + #define FP_ROUNDMODE \ 57 + ({ \ 58 + __FPU_FPCSR & FPCSR_mskRM; \ 59 + }) 60 + 61 + #define FP_RND_NEAREST 0 62 + #define FP_RND_PINF 1 63 + #define FP_RND_MINF 2 64 + #define FP_RND_ZERO 3 65 + 66 + #define FP_EX_INVALID FPCSR_mskIVO 67 + #define FP_EX_DIVZERO FPCSR_mskDBZ 68 + #define FP_EX_OVERFLOW FPCSR_mskOVF 69 + #define FP_EX_UNDERFLOW FPCSR_mskUDF 70 + #define FP_EX_INEXACT FPCSR_mskIEX 71 + 72 + #define SF_CEQ 2 73 + #define SF_CLT 1 74 + #define SF_CGT 3 75 + #define SF_CUN 4 76 + 77 + #include <asm/byteorder.h> 78 + 79 + #ifdef __BIG_ENDIAN__ 80 + #define __BYTE_ORDER __BIG_ENDIAN 81 + #define __LITTLE_ENDIAN 0 82 + #else 83 + #define __BYTE_ORDER __LITTLE_ENDIAN 84 + #define __BIG_ENDIAN 0 85 + #endif 86 + 87 + #define abort() do { } while (0) 88 + #define umul_ppmm(w1, w0, u, v) \ 89 + do { \ 90 + UWtype __x0, __x1, __x2, __x3; \ 91 + UHWtype __ul, __vl, __uh, __vh; \ 92 + \ 93 + __ul = __ll_lowpart(u); \ 94 + __uh = __ll_highpart(u); \ 95 + __vl = __ll_lowpart(v); \ 96 + __vh = __ll_highpart(v); \ 97 + \ 98 + __x0 = (UWtype) __ul * __vl; \ 99 + __x1 = (UWtype) __ul * __vh; \ 100 + __x2 = (UWtype) __uh * __vl; \ 101 + __x3 = (UWtype) __uh * __vh; \ 102 + \ 103 + __x1 += __ll_highpart(__x0); \ 104 + __x1 += __x2; \ 105 + if (__x1 < __x2) \ 106 + __x3 += __ll_B; \ 107 + \ 108 + (w1) = __x3 + __ll_highpart(__x1); \ 109 + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \ 110 + } while (0) 111 + 112 + #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 113 + do { \ 114 + UWtype __x; \ 115 + __x = (al) + (bl); \ 116 + (sh) = (ah) + (bh) + (__x < (al)); \ 117 + (sl) = __x; \ 118 + } while (0) 119 + 120 + #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 121 + do { \ 122 + UWtype __x; \ 123 + __x = (al) - (bl); \ 124 + (sh) = (ah) - (bh) - (__x > (al)); \ 125 + (sl) = __x; \ 126 + } while (0) 127 + 128 + #define udiv_qrnnd(q, r, n1, n0, d) \ 129 + do { \ 130 + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 131 + __d1 = __ll_highpart(d); \ 132 + __d0 = __ll_lowpart(d); \ 133 + \ 134 + __r1 = (n1) % __d1; \ 135 + __q1 = (n1) / __d1; \ 136 + __m = (UWtype) __q1 * __d0; \ 137 + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 138 + if (__r1 < __m) { \ 139 + __q1--, __r1 += (d); \ 140 + if (__r1 >= (d)) \ 141 + if (__r1 < __m) \ 142 + __q1--, __r1 += (d); \ 143 + } \ 144 + __r1 -= __m; \ 145 + __r0 = __r1 % __d1; \ 146 + __q0 = __r1 / __d1; \ 147 + __m = (UWtype) __q0 * __d0; \ 148 + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 149 + if (__r0 < __m) { \ 150 + __q0--, __r0 += (d); \ 151 + if (__r0 >= (d)) \ 152 + if (__r0 < __m) \ 153 + __q0--, __r0 += (d); \ 154 + } \ 155 + __r0 -= __m; \ 156 + (q) = (UWtype) __q1 * __ll_B | __q0; \ 157 + (r) = __r0; \ 158 + } while (0)
+39
arch/nds32/include/asm/stacktrace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2008-2018 Andes Technology Corporation */ 3 + 4 + #ifndef __ASM_STACKTRACE_H 5 + #define __ASM_STACKTRACE_H 6 + 7 + /* Kernel callchain */ 8 + struct stackframe { 9 + unsigned long fp; 10 + unsigned long sp; 11 + unsigned long lp; 12 + }; 13 + 14 + /* 15 + * struct frame_tail: User callchain 16 + * IMPORTANT: 17 + * This struct is used for call-stack walking, 18 + * the order and types matters. 19 + * Do not use array, it only stores sizeof(pointer) 20 + * 21 + * The details can refer to arch/arm/kernel/perf_event.c 22 + */ 23 + struct frame_tail { 24 + unsigned long stack_fp; 25 + unsigned long stack_lp; 26 + }; 27 + 28 + /* For User callchain with optimize for size */ 29 + struct frame_tail_opt_size { 30 + unsigned long stack_r6; 31 + unsigned long stack_fp; 32 + unsigned long stack_gp; 33 + unsigned long stack_lp; 34 + }; 35 + 36 + extern void 37 + get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph); 38 + 39 + #endif /* __ASM_STACKTRACE_H */
+11
arch/nds32/include/asm/suspend.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (C) 2008-2017 Andes Technology Corporation 3 + 4 + #ifndef __ASM_NDS32_SUSPEND_H 5 + #define __ASM_NDS32_SUSPEND_H 6 + 7 + extern void suspend2ram(void); 8 + extern void cpu_resume(void); 9 + extern unsigned long wake_mask; 10 + 11 + #endif
+1
arch/nds32/include/asm/syscalls.h
··· 7 7 asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op); 8 8 asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len); 9 9 asmlinkage long sys_rt_sigreturn_wrapper(void); 10 + asmlinkage long sys_udftrap(int option); 10 11 11 12 #include <asm-generic/syscalls.h> 12 13
+7
arch/nds32/include/uapi/asm/auxvec.h
··· 4 4 #ifndef __ASM_AUXVEC_H 5 5 #define __ASM_AUXVEC_H 6 6 7 + /* 8 + * This entry gives some information about the FPU initialization 9 + * performed by the kernel. 10 + */ 11 + #define AT_FPUCW 18 /* Used FPU control word. */ 12 + 13 + 7 14 /* VDSO location */ 8 15 #define AT_SYSINFO_EHDR 33 9 16
+14
arch/nds32/include/uapi/asm/sigcontext.h
··· 9 9 * before the signal handler was invoked. Note: only add new entries 10 10 * to the end of the structure. 11 11 */ 12 + struct fpu_struct { 13 + unsigned long long fd_regs[32]; 14 + unsigned long fpcsr; 15 + /* 16 + * UDF_trap is used to recognize whether underflow trap is enabled 17 + * or not. When UDF_trap == 1, this process will be traped and then 18 + * get a SIGFPE signal when encountering an underflow exception. 19 + * UDF_trap is only modified through setfputrap syscall. Therefore, 20 + * UDF_trap needn't be saved or loaded to context in each context 21 + * switch. 22 + */ 23 + unsigned long UDF_trap; 24 + }; 12 25 13 26 struct zol_struct { 14 27 unsigned long nds32_lc; /* $LC */ ··· 67 54 unsigned long fault_address; 68 55 unsigned long used_math_flag; 69 56 /* FPU Registers */ 57 + struct fpu_struct fpu; 70 58 struct zol_struct zol; 71 59 }; 72 60
+13
arch/nds32/include/uapi/asm/udftrap.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2005-2018 Andes Technology Corporation */ 3 + #ifndef _ASM_SETFPUTRAP 4 + #define _ASM_SETFPUTRAP 5 + 6 + /* 7 + * Options for setfputrap system call 8 + */ 9 + #define DISABLE_UDFTRAP 0 /* disable underflow exception trap */ 10 + #define ENABLE_UDFTRAP 1 /* enable undeflos exception trap */ 11 + #define GET_UDFTRAP 2 /* only get undeflos exception trap status */ 12 + 13 + #endif /* _ASM_CACHECTL */
+2
arch/nds32/include/uapi/asm/unistd.h
··· 9 9 10 10 /* Additional NDS32 specific syscalls. */ 11 11 #define __NR_cacheflush (__NR_arch_specific_syscall) 12 + #define __NR_udftrap (__NR_arch_specific_syscall + 1) 12 13 __SYSCALL(__NR_cacheflush, sys_cacheflush) 14 + __SYSCALL(__NR_udftrap, sys_udftrap)
+5 -2
arch/nds32/kernel/Makefile
··· 4 4 5 5 CPPFLAGS_vmlinux.lds := -DTEXTADDR=$(TEXTADDR) 6 6 AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR) 7 - 8 7 # Object file lists. 9 8 10 9 obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \ ··· 13 14 14 15 obj-$(CONFIG_MODULES) += nds32_ksyms.o module.o 15 16 obj-$(CONFIG_STACKTRACE) += stacktrace.o 17 + obj-$(CONFIG_FPU) += fpu.o 16 18 obj-$(CONFIG_OF) += devtree.o 17 19 obj-$(CONFIG_CACHE_L2) += atl2c.o 18 - 20 + obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o 21 + obj-$(CONFIG_PM) += pm.o sleep.o 19 22 extra-y := head.o vmlinux.lds 23 + 24 + CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp 20 25 21 26 22 27 obj-y += vdso/
+22 -2
arch/nds32/kernel/ex-entry.S
··· 7 7 #include <asm/errno.h> 8 8 #include <asm/asm-offsets.h> 9 9 #include <asm/page.h> 10 + #include <asm/fpu.h> 10 11 11 12 #ifdef CONFIG_HWZOL 12 13 .macro push_zol ··· 16 15 mfusr $r16, $LC 17 16 .endm 18 17 #endif 18 + .macro skip_save_fucop_ctl 19 + #if defined(CONFIG_FPU) 20 + skip_fucop_ctl: 21 + smw.adm $p0, [$sp], $p0, #0x1 22 + j fucop_ctl_done 23 + #endif 24 + .endm 19 25 20 26 .macro save_user_regs 21 - 27 + #if defined(CONFIG_FPU) 28 + sethi $p0, hi20(has_fpu) 29 + lbsi $p0, [$p0+lo12(has_fpu)] 30 + beqz $p0, skip_fucop_ctl 31 + mfsr $p0, $FUCOP_CTL 32 + smw.adm $p0, [$sp], $p0, #0x1 33 + bclr $p0, $p0, #FUCOP_CTL_offCP0EN 34 + mtsr $p0, $FUCOP_CTL 35 + fucop_ctl_done: 36 + /* move $SP to the bottom of pt_regs */ 37 + addi $sp, $sp, -FUCOP_CTL_OFFSET 38 + #else 22 39 smw.adm $sp, [$sp], $sp, #0x1 23 40 /* move $SP to the bottom of pt_regs */ 24 41 addi $sp, $sp, -OSP_OFFSET 42 + #endif 25 43 26 44 /* push $r0 ~ $r25 */ 27 45 smw.bim $r0, [$sp], $r25 ··· 99 79 .long eh_syscall !Syscall 100 80 .long asm_do_IRQ !IRQ 101 81 82 + skip_save_fucop_ctl 102 83 common_exception_handler: 103 84 save_user_regs 104 85 mfsr $p0, $ITYPE ··· 124 103 mtsr $r21, $PSW 125 104 dsb 126 105 jr $p1 127 - 128 106 /* syscall */ 129 107 1: 130 108 addi $p1, $p0, #-NDS32_VECTOR_offEXCEPTION
+11 -2
arch/nds32/kernel/ex-exit.S
··· 8 8 #include <asm/asm-offsets.h> 9 9 #include <asm/thread_info.h> 10 10 #include <asm/current.h> 11 + #include <asm/fpu.h> 11 12 12 13 13 14 ··· 23 22 .macro restore_user_regs_first 24 23 setgie.d 25 24 isb 26 - 25 + #if defined(CONFIG_FPU) 26 + addi $sp, $sp, OSP_OFFSET 27 + lmw.adm $r12, [$sp], $r25, #0x0 28 + sethi $p0, hi20(has_fpu) 29 + lbsi $p0, [$p0+lo12(has_fpu)] 30 + beqz $p0, 2f 31 + mtsr $r25, $FUCOP_CTL 32 + 2: 33 + #else 27 34 addi $sp, $sp, FUCOP_CTL_OFFSET 28 - 29 35 lmw.adm $r12, [$sp], $r24, #0x0 36 + #endif 30 37 mtsr $r12, $SP_USR 31 38 mtsr $r13, $IPC 32 39 #ifdef CONFIG_HWZOL
+5 -3
arch/nds32/kernel/ex-scall.S
··· 19 19 20 20 la $p0, __entry_task 21 21 sw $r1, [$p0] 22 - move $p1, $r0 23 - addi $p1, $p1, #THREAD_CPU_CONTEXT 22 + addi $p1, $r0, #THREAD_CPU_CONTEXT 24 23 smw.bi $r6, [$p1], $r14, #0xb ! push r6~r14, fp, lp, sp 25 24 move $r25, $r1 26 - addi $r1, $r1, #THREAD_CPU_CONTEXT 25 + #if defined(CONFIG_FPU) 26 + call _switch_fpu 27 + #endif 28 + addi $r1, $r25, #THREAD_CPU_CONTEXT 27 29 lmw.bi $r6, [$r1], $r14, #0xb ! pop r6~r14, fp, lp, sp 28 30 ret 29 31
+269
arch/nds32/kernel/fpu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <linux/sched.h> 5 + #include <linux/signal.h> 6 + #include <linux/sched/signal.h> 7 + #include <asm/processor.h> 8 + #include <asm/user.h> 9 + #include <asm/io.h> 10 + #include <asm/bitfield.h> 11 + #include <asm/fpu.h> 12 + 13 + const struct fpu_struct init_fpuregs = { 14 + .fd_regs = {[0 ... 31] = sNAN64}, 15 + .fpcsr = FPCSR_INIT, 16 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 17 + .UDF_trap = 0 18 + #endif 19 + }; 20 + 21 + void save_fpu(struct task_struct *tsk) 22 + { 23 + unsigned int fpcfg, fpcsr; 24 + 25 + enable_fpu(); 26 + fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG); 27 + switch (fpcfg) { 28 + case SP32_DP32_reg: 29 + asm volatile ("fsdi $fd31, [%0+0xf8]\n\t" 30 + "fsdi $fd30, [%0+0xf0]\n\t" 31 + "fsdi $fd29, [%0+0xe8]\n\t" 32 + "fsdi $fd28, [%0+0xe0]\n\t" 33 + "fsdi $fd27, [%0+0xd8]\n\t" 34 + "fsdi $fd26, [%0+0xd0]\n\t" 35 + "fsdi $fd25, [%0+0xc8]\n\t" 36 + "fsdi $fd24, [%0+0xc0]\n\t" 37 + "fsdi $fd23, [%0+0xb8]\n\t" 38 + "fsdi $fd22, [%0+0xb0]\n\t" 39 + "fsdi $fd21, [%0+0xa8]\n\t" 40 + "fsdi $fd20, [%0+0xa0]\n\t" 41 + "fsdi $fd19, [%0+0x98]\n\t" 42 + "fsdi $fd18, [%0+0x90]\n\t" 43 + "fsdi $fd17, [%0+0x88]\n\t" 44 + "fsdi $fd16, [%0+0x80]\n\t" 45 + : /* no output */ 46 + : "r" (&tsk->thread.fpu) 47 + : "memory"); 48 + /* fall through */ 49 + case SP32_DP16_reg: 50 + asm volatile ("fsdi $fd15, [%0+0x78]\n\t" 51 + "fsdi $fd14, [%0+0x70]\n\t" 52 + "fsdi $fd13, [%0+0x68]\n\t" 53 + "fsdi $fd12, [%0+0x60]\n\t" 54 + "fsdi $fd11, [%0+0x58]\n\t" 55 + "fsdi $fd10, [%0+0x50]\n\t" 56 + "fsdi $fd9, [%0+0x48]\n\t" 57 + "fsdi $fd8, [%0+0x40]\n\t" 58 + : /* no output */ 59 + : "r" (&tsk->thread.fpu) 60 + : "memory"); 61 + /* fall through */ 62 + case SP16_DP8_reg: 63 + asm volatile ("fsdi $fd7, [%0+0x38]\n\t" 64 + "fsdi $fd6, [%0+0x30]\n\t" 65 + "fsdi $fd5, [%0+0x28]\n\t" 66 + "fsdi $fd4, [%0+0x20]\n\t" 67 + : /* no output */ 68 + : "r" (&tsk->thread.fpu) 69 + : "memory"); 70 + /* fall through */ 71 + case SP8_DP4_reg: 72 + asm volatile ("fsdi $fd3, [%1+0x18]\n\t" 73 + "fsdi $fd2, [%1+0x10]\n\t" 74 + "fsdi $fd1, [%1+0x8]\n\t" 75 + "fsdi $fd0, [%1+0x0]\n\t" 76 + "fmfcsr %0\n\t" 77 + "swi %0, [%1+0x100]\n\t" 78 + : "=&r" (fpcsr) 79 + : "r"(&tsk->thread.fpu) 80 + : "memory"); 81 + } 82 + disable_fpu(); 83 + } 84 + 85 + void load_fpu(const struct fpu_struct *fpregs) 86 + { 87 + unsigned int fpcfg, fpcsr; 88 + 89 + enable_fpu(); 90 + fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG); 91 + switch (fpcfg) { 92 + case SP32_DP32_reg: 93 + asm volatile ("fldi $fd31, [%0+0xf8]\n\t" 94 + "fldi $fd30, [%0+0xf0]\n\t" 95 + "fldi $fd29, [%0+0xe8]\n\t" 96 + "fldi $fd28, [%0+0xe0]\n\t" 97 + "fldi $fd27, [%0+0xd8]\n\t" 98 + "fldi $fd26, [%0+0xd0]\n\t" 99 + "fldi $fd25, [%0+0xc8]\n\t" 100 + "fldi $fd24, [%0+0xc0]\n\t" 101 + "fldi $fd23, [%0+0xb8]\n\t" 102 + "fldi $fd22, [%0+0xb0]\n\t" 103 + "fldi $fd21, [%0+0xa8]\n\t" 104 + "fldi $fd20, [%0+0xa0]\n\t" 105 + "fldi $fd19, [%0+0x98]\n\t" 106 + "fldi $fd18, [%0+0x90]\n\t" 107 + "fldi $fd17, [%0+0x88]\n\t" 108 + "fldi $fd16, [%0+0x80]\n\t" 109 + : /* no output */ 110 + : "r" (fpregs)); 111 + /* fall through */ 112 + case SP32_DP16_reg: 113 + asm volatile ("fldi $fd15, [%0+0x78]\n\t" 114 + "fldi $fd14, [%0+0x70]\n\t" 115 + "fldi $fd13, [%0+0x68]\n\t" 116 + "fldi $fd12, [%0+0x60]\n\t" 117 + "fldi $fd11, [%0+0x58]\n\t" 118 + "fldi $fd10, [%0+0x50]\n\t" 119 + "fldi $fd9, [%0+0x48]\n\t" 120 + "fldi $fd8, [%0+0x40]\n\t" 121 + : /* no output */ 122 + : "r" (fpregs)); 123 + /* fall through */ 124 + case SP16_DP8_reg: 125 + asm volatile ("fldi $fd7, [%0+0x38]\n\t" 126 + "fldi $fd6, [%0+0x30]\n\t" 127 + "fldi $fd5, [%0+0x28]\n\t" 128 + "fldi $fd4, [%0+0x20]\n\t" 129 + : /* no output */ 130 + : "r" (fpregs)); 131 + /* fall through */ 132 + case SP8_DP4_reg: 133 + asm volatile ("fldi $fd3, [%1+0x18]\n\t" 134 + "fldi $fd2, [%1+0x10]\n\t" 135 + "fldi $fd1, [%1+0x8]\n\t" 136 + "fldi $fd0, [%1+0x0]\n\t" 137 + "lwi %0, [%1+0x100]\n\t" 138 + "fmtcsr %0\n\t":"=&r" (fpcsr) 139 + : "r"(fpregs)); 140 + } 141 + disable_fpu(); 142 + } 143 + void store_fpu_for_suspend(void) 144 + { 145 + #ifdef CONFIG_LAZY_FPU 146 + if (last_task_used_math != NULL) 147 + save_fpu(last_task_used_math); 148 + last_task_used_math = NULL; 149 + #else 150 + if (!used_math()) 151 + return; 152 + unlazy_fpu(current); 153 + #endif 154 + clear_fpu(task_pt_regs(current)); 155 + } 156 + inline void do_fpu_context_switch(struct pt_regs *regs) 157 + { 158 + /* Enable to use FPU. */ 159 + 160 + if (!user_mode(regs)) { 161 + pr_err("BUG: FPU is used in kernel mode.\n"); 162 + BUG(); 163 + return; 164 + } 165 + 166 + enable_ptreg_fpu(regs); 167 + #ifdef CONFIG_LAZY_FPU //Lazy FPU is used 168 + if (last_task_used_math == current) 169 + return; 170 + if (last_task_used_math != NULL) 171 + /* Other processes fpu state, save away */ 172 + save_fpu(last_task_used_math); 173 + last_task_used_math = current; 174 + #endif 175 + if (used_math()) { 176 + load_fpu(&current->thread.fpu); 177 + } else { 178 + /* First time FPU user. */ 179 + load_fpu(&init_fpuregs); 180 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 181 + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; 182 + #endif 183 + set_used_math(); 184 + } 185 + 186 + } 187 + 188 + inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo) 189 + { 190 + if (fpcsr & FPCSR_mskOVFT) 191 + *signo = FPE_FLTOVF; 192 + #ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC 193 + else if (fpcsr & FPCSR_mskUDFT) 194 + *signo = FPE_FLTUND; 195 + #endif 196 + else if (fpcsr & FPCSR_mskIVOT) 197 + *signo = FPE_FLTINV; 198 + else if (fpcsr & FPCSR_mskDBZT) 199 + *signo = FPE_FLTDIV; 200 + else if (fpcsr & FPCSR_mskIEXT) 201 + *signo = FPE_FLTRES; 202 + } 203 + 204 + inline void handle_fpu_exception(struct pt_regs *regs) 205 + { 206 + unsigned int fpcsr; 207 + int si_code = 0, si_signo = SIGFPE; 208 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 209 + unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT; 210 + #else 211 + unsigned long redo_except = FPCSR_mskDNIT; 212 + #endif 213 + 214 + lose_fpu(); 215 + fpcsr = current->thread.fpu.fpcsr; 216 + 217 + if (fpcsr & redo_except) { 218 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 219 + if (fpcsr & FPCSR_mskUDFT) 220 + current->thread.fpu.fpcsr &= ~FPCSR_mskIEX; 221 + #endif 222 + si_signo = do_fpuemu(regs, &current->thread.fpu); 223 + fpcsr = current->thread.fpu.fpcsr; 224 + if (!si_signo) 225 + goto done; 226 + } else if (fpcsr & FPCSR_mskRIT) { 227 + if (!user_mode(regs)) 228 + do_exit(SIGILL); 229 + si_signo = SIGILL; 230 + } 231 + 232 + 233 + switch (si_signo) { 234 + case SIGFPE: 235 + fill_sigfpe_signo(fpcsr, &si_code); 236 + break; 237 + case SIGILL: 238 + show_regs(regs); 239 + si_code = ILL_COPROC; 240 + break; 241 + case SIGBUS: 242 + si_code = BUS_ADRERR; 243 + break; 244 + default: 245 + break; 246 + } 247 + 248 + force_sig_fault(si_signo, si_code, 249 + (void __user *)instruction_pointer(regs), current); 250 + done: 251 + own_fpu(); 252 + } 253 + 254 + bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs) 255 + { 256 + int done = true; 257 + /* Coprocessor disabled exception */ 258 + if (subtype == FPU_DISABLE_EXCEPTION) { 259 + preempt_disable(); 260 + do_fpu_context_switch(regs); 261 + preempt_enable(); 262 + } 263 + /* Coprocessor exception such as underflow and overflow */ 264 + else if (subtype == FPU_EXCEPTION) 265 + handle_fpu_exception(regs); 266 + else 267 + done = false; 268 + return done; 269 + }
+2 -11
arch/nds32/kernel/head.S
··· 123 123 andi $r0, $r0, MMU_CFG_mskTBS 124 124 srli $r6, $r6, MMU_CFG_offTBW 125 125 srli $r0, $r0, MMU_CFG_offTBS 126 - /* 127 - * we just map the kernel to the maximum way - 1 of tlb 128 - * reserver one way for UART VA mapping 129 - * it will cause page fault if UART mapping cover the kernel mapping 130 - * 131 - * direct mapping is not supported now. 132 - */ 133 - li $r2, 't' 134 - beqz $r6, __error ! MMU_CFG.TBW = 0 is direct mappin 126 + addi $r6, $r6, #0x1 ! MMU_CFG.TBW value -> meaning 135 127 addi $r0, $r0, #0x2 ! MMU_CFG.TBS value -> meaning 136 128 sll $r0, $r6, $r0 ! entries = k-way * n-set 137 129 mul $r6, $r0, $r5 ! max size = entries * page size 138 130 /* check kernel image size */ 139 131 la $r3, (_end - PAGE_OFFSET) 140 - li $r2, 's' 141 132 bgt $r3, $r6, __error 142 133 143 134 li $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr) ··· 151 160 #endif 152 161 mtsr $r3, $TLB_MISC 153 162 154 - mfsr $r0, $MISC_CTL ! Enable BTB and RTP and shadow sp 163 + mfsr $r0, $MISC_CTL ! Enable BTB, RTP, shadow sp, and HW_PRE 155 164 ori $r0, $r0, #MISC_init 156 165 mtsr $r0, $MISC_CTL 157 166
+1522
arch/nds32/kernel/perf_event_cpu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2008-2017 Andes Technology Corporation 4 + * 5 + * Reference ARMv7: Jean Pihet <jpihet@mvista.com> 6 + * 2010 (c) MontaVista Software, LLC. 7 + */ 8 + 9 + #include <linux/perf_event.h> 10 + #include <linux/bitmap.h> 11 + #include <linux/export.h> 12 + #include <linux/kernel.h> 13 + #include <linux/of.h> 14 + #include <linux/platform_device.h> 15 + #include <linux/slab.h> 16 + #include <linux/spinlock.h> 17 + #include <linux/pm_runtime.h> 18 + #include <linux/ftrace.h> 19 + #include <linux/uaccess.h> 20 + #include <linux/sched/clock.h> 21 + #include <linux/percpu-defs.h> 22 + 23 + #include <asm/pmu.h> 24 + #include <asm/irq_regs.h> 25 + #include <asm/nds32.h> 26 + #include <asm/stacktrace.h> 27 + #include <asm/perf_event.h> 28 + #include <nds32_intrinsic.h> 29 + 30 + /* Set at runtime when we know what CPU type we are. */ 31 + static struct nds32_pmu *cpu_pmu; 32 + 33 + static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); 34 + static void nds32_pmu_start(struct nds32_pmu *cpu_pmu); 35 + static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu); 36 + static struct platform_device_id cpu_pmu_plat_device_ids[] = { 37 + {.name = "nds32-pfm"}, 38 + {}, 39 + }; 40 + 41 + static int nds32_pmu_map_cache_event(const unsigned int (*cache_map) 42 + [PERF_COUNT_HW_CACHE_MAX] 43 + [PERF_COUNT_HW_CACHE_OP_MAX] 44 + [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config) 45 + { 46 + unsigned int cache_type, cache_op, cache_result, ret; 47 + 48 + cache_type = (config >> 0) & 0xff; 49 + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 50 + return -EINVAL; 51 + 52 + cache_op = (config >> 8) & 0xff; 53 + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 54 + return -EINVAL; 55 + 56 + cache_result = (config >> 16) & 0xff; 57 + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 58 + return -EINVAL; 59 + 60 + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; 61 + 62 + if (ret == CACHE_OP_UNSUPPORTED) 63 + return -ENOENT; 64 + 65 + return ret; 66 + } 67 + 68 + static int 69 + nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX], 70 + u64 config) 71 + { 72 + int mapping; 73 + 74 + if (config >= PERF_COUNT_HW_MAX) 75 + return -ENOENT; 76 + 77 + mapping = (*event_map)[config]; 78 + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; 79 + } 80 + 81 + static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config) 82 + { 83 + int ev_type = (int)(config & raw_event_mask); 84 + int idx = config >> 8; 85 + 86 + switch (idx) { 87 + case 0: 88 + ev_type = PFM_OFFSET_MAGIC_0 + ev_type; 89 + if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE) 90 + return -ENOENT; 91 + break; 92 + case 1: 93 + ev_type = PFM_OFFSET_MAGIC_1 + ev_type; 94 + if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE) 95 + return -ENOENT; 96 + break; 97 + case 2: 98 + ev_type = PFM_OFFSET_MAGIC_2 + ev_type; 99 + if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE) 100 + return -ENOENT; 101 + break; 102 + default: 103 + return -ENOENT; 104 + } 105 + 106 + return ev_type; 107 + } 108 + 109 + int 110 + nds32_pmu_map_event(struct perf_event *event, 111 + const unsigned int (*event_map)[PERF_COUNT_HW_MAX], 112 + const unsigned int (*cache_map) 113 + [PERF_COUNT_HW_CACHE_MAX] 114 + [PERF_COUNT_HW_CACHE_OP_MAX] 115 + [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask) 116 + { 117 + u64 config = event->attr.config; 118 + 119 + switch (event->attr.type) { 120 + case PERF_TYPE_HARDWARE: 121 + return nds32_pmu_map_hw_event(event_map, config); 122 + case PERF_TYPE_HW_CACHE: 123 + return nds32_pmu_map_cache_event(cache_map, config); 124 + case PERF_TYPE_RAW: 125 + return nds32_pmu_map_raw_event(raw_event_mask, config); 126 + } 127 + 128 + return -ENOENT; 129 + } 130 + 131 + static int nds32_spav3_map_event(struct perf_event *event) 132 + { 133 + return nds32_pmu_map_event(event, &nds32_pfm_perf_map, 134 + &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK); 135 + } 136 + 137 + static inline u32 nds32_pfm_getreset_flags(void) 138 + { 139 + /* Read overflow status */ 140 + u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL); 141 + u32 old_val = val; 142 + 143 + /* Write overflow bit to clear status, and others keep it 0 */ 144 + u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; 145 + 146 + __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL); 147 + 148 + return old_val; 149 + } 150 + 151 + static inline int nds32_pfm_has_overflowed(u32 pfm) 152 + { 153 + u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; 154 + 155 + return pfm & ov_flag; 156 + } 157 + 158 + static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx) 159 + { 160 + u32 mask = 0; 161 + 162 + switch (idx) { 163 + case 0: 164 + mask = PFM_CTL_OVF[0]; 165 + break; 166 + case 1: 167 + mask = PFM_CTL_OVF[1]; 168 + break; 169 + case 2: 170 + mask = PFM_CTL_OVF[2]; 171 + break; 172 + default: 173 + pr_err("%s index wrong\n", __func__); 174 + break; 175 + } 176 + return pfm & mask; 177 + } 178 + 179 + /* 180 + * Set the next IRQ period, based on the hwc->period_left value. 181 + * To be called with the event disabled in hw: 182 + */ 183 + int nds32_pmu_event_set_period(struct perf_event *event) 184 + { 185 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 186 + struct hw_perf_event *hwc = &event->hw; 187 + s64 left = local64_read(&hwc->period_left); 188 + s64 period = hwc->sample_period; 189 + int ret = 0; 190 + 191 + /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ 192 + if (unlikely(period != hwc->last_period)) 193 + left = period - (hwc->last_period - left); 194 + 195 + if (unlikely(left <= -period)) { 196 + left = period; 197 + local64_set(&hwc->period_left, left); 198 + hwc->last_period = period; 199 + ret = 1; 200 + } 201 + 202 + if (unlikely(left <= 0)) { 203 + left += period; 204 + local64_set(&hwc->period_left, left); 205 + hwc->last_period = period; 206 + ret = 1; 207 + } 208 + 209 + if (left > (s64)nds32_pmu->max_period) 210 + left = nds32_pmu->max_period; 211 + 212 + /* 213 + * The hw event starts counting from this event offset, 214 + * mark it to be able to extract future "deltas": 215 + */ 216 + local64_set(&hwc->prev_count, (u64)(-left)); 217 + 218 + nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period); 219 + 220 + perf_event_update_userpage(event); 221 + 222 + return ret; 223 + } 224 + 225 + static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev) 226 + { 227 + u32 pfm; 228 + struct perf_sample_data data; 229 + struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev; 230 + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); 231 + struct pt_regs *regs; 232 + int idx; 233 + /* 234 + * Get and reset the IRQ flags 235 + */ 236 + pfm = nds32_pfm_getreset_flags(); 237 + 238 + /* 239 + * Did an overflow occur? 240 + */ 241 + if (!nds32_pfm_has_overflowed(pfm)) 242 + return IRQ_NONE; 243 + 244 + /* 245 + * Handle the counter(s) overflow(s) 246 + */ 247 + regs = get_irq_regs(); 248 + 249 + nds32_pmu_stop(cpu_pmu); 250 + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 251 + struct perf_event *event = cpuc->events[idx]; 252 + struct hw_perf_event *hwc; 253 + 254 + /* Ignore if we don't have an event. */ 255 + if (!event) 256 + continue; 257 + 258 + /* 259 + * We have a single interrupt for all counters. Check that 260 + * each counter has overflowed before we process it. 261 + */ 262 + if (!nds32_pfm_counter_has_overflowed(pfm, idx)) 263 + continue; 264 + 265 + hwc = &event->hw; 266 + nds32_pmu_event_update(event); 267 + perf_sample_data_init(&data, 0, hwc->last_period); 268 + if (!nds32_pmu_event_set_period(event)) 269 + continue; 270 + 271 + if (perf_event_overflow(event, &data, regs)) 272 + cpu_pmu->disable(event); 273 + } 274 + nds32_pmu_start(cpu_pmu); 275 + /* 276 + * Handle the pending perf events. 277 + * 278 + * Note: this call *must* be run with interrupts disabled. For 279 + * platforms that can have the PMU interrupts raised as an NMI, this 280 + * will not work. 281 + */ 282 + irq_work_run(); 283 + 284 + return IRQ_HANDLED; 285 + } 286 + 287 + static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx) 288 + { 289 + return ((idx >= 0) && (idx < cpu_pmu->num_events)); 290 + } 291 + 292 + static inline int nds32_pfm_disable_counter(int idx) 293 + { 294 + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); 295 + u32 mask = 0; 296 + 297 + mask = PFM_CTL_EN[idx]; 298 + val &= ~mask; 299 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 300 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 301 + return idx; 302 + } 303 + 304 + /* 305 + * Add an event filter to a given event. 306 + */ 307 + static int nds32_pmu_set_event_filter(struct hw_perf_event *event, 308 + struct perf_event_attr *attr) 309 + { 310 + unsigned long config_base = 0; 311 + int idx = event->idx; 312 + unsigned long no_kernel_tracing = 0; 313 + unsigned long no_user_tracing = 0; 314 + /* If index is -1, do not do anything */ 315 + if (idx == -1) 316 + return 0; 317 + 318 + no_kernel_tracing = PFM_CTL_KS[idx]; 319 + no_user_tracing = PFM_CTL_KU[idx]; 320 + /* 321 + * Default: enable both kernel and user mode tracing. 322 + */ 323 + if (attr->exclude_user) 324 + config_base |= no_user_tracing; 325 + 326 + if (attr->exclude_kernel) 327 + config_base |= no_kernel_tracing; 328 + 329 + /* 330 + * Install the filter into config_base as this is used to 331 + * construct the event type. 332 + */ 333 + event->config_base |= config_base; 334 + return 0; 335 + } 336 + 337 + static inline void nds32_pfm_write_evtsel(int idx, u32 evnum) 338 + { 339 + u32 offset = 0; 340 + u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL); 341 + u32 ev_mask = 0; 342 + u32 no_kernel_mask = 0; 343 + u32 no_user_mask = 0; 344 + u32 val; 345 + 346 + offset = PFM_CTL_OFFSEL[idx]; 347 + /* Clear previous mode selection, and write new one */ 348 + no_kernel_mask = PFM_CTL_KS[idx]; 349 + no_user_mask = PFM_CTL_KU[idx]; 350 + ori_val &= ~no_kernel_mask; 351 + ori_val &= ~no_user_mask; 352 + if (evnum & no_kernel_mask) 353 + ori_val |= no_kernel_mask; 354 + 355 + if (evnum & no_user_mask) 356 + ori_val |= no_user_mask; 357 + 358 + /* Clear previous event selection */ 359 + ev_mask = PFM_CTL_SEL[idx]; 360 + ori_val &= ~ev_mask; 361 + evnum &= SOFTWARE_EVENT_MASK; 362 + 363 + /* undo the linear mapping */ 364 + evnum = get_converted_evet_hw_num(evnum); 365 + val = ori_val | (evnum << offset); 366 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 367 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 368 + } 369 + 370 + static inline int nds32_pfm_enable_counter(int idx) 371 + { 372 + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); 373 + u32 mask = 0; 374 + 375 + mask = PFM_CTL_EN[idx]; 376 + val |= mask; 377 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 378 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 379 + return idx; 380 + } 381 + 382 + static inline int nds32_pfm_enable_intens(int idx) 383 + { 384 + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); 385 + u32 mask = 0; 386 + 387 + mask = PFM_CTL_IE[idx]; 388 + val |= mask; 389 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 390 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 391 + return idx; 392 + } 393 + 394 + static inline int nds32_pfm_disable_intens(int idx) 395 + { 396 + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); 397 + u32 mask = 0; 398 + 399 + mask = PFM_CTL_IE[idx]; 400 + val &= ~mask; 401 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 402 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 403 + return idx; 404 + } 405 + 406 + static int event_requires_mode_exclusion(struct perf_event_attr *attr) 407 + { 408 + /* Other modes NDS32 does not support */ 409 + return attr->exclude_user || attr->exclude_kernel; 410 + } 411 + 412 + static void nds32_pmu_enable_event(struct perf_event *event) 413 + { 414 + unsigned long flags; 415 + unsigned int evnum = 0; 416 + struct hw_perf_event *hwc = &event->hw; 417 + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); 418 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 419 + int idx = hwc->idx; 420 + 421 + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { 422 + pr_err("CPU enabling wrong pfm counter IRQ enable\n"); 423 + return; 424 + } 425 + 426 + /* 427 + * Enable counter and interrupt, and set the counter to count 428 + * the event that we're interested in. 429 + */ 430 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 431 + 432 + /* 433 + * Disable counter 434 + */ 435 + nds32_pfm_disable_counter(idx); 436 + 437 + /* 438 + * Check whether we need to exclude the counter from certain modes. 439 + */ 440 + if ((!cpu_pmu->set_event_filter || 441 + cpu_pmu->set_event_filter(hwc, &event->attr)) && 442 + event_requires_mode_exclusion(&event->attr)) { 443 + pr_notice 444 + ("NDS32 performance counters do not support mode exclusion\n"); 445 + hwc->config_base = 0; 446 + } 447 + /* Write event */ 448 + evnum = hwc->config_base; 449 + nds32_pfm_write_evtsel(idx, evnum); 450 + 451 + /* 452 + * Enable interrupt for this counter 453 + */ 454 + nds32_pfm_enable_intens(idx); 455 + 456 + /* 457 + * Enable counter 458 + */ 459 + nds32_pfm_enable_counter(idx); 460 + 461 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 462 + } 463 + 464 + static void nds32_pmu_disable_event(struct perf_event *event) 465 + { 466 + unsigned long flags; 467 + struct hw_perf_event *hwc = &event->hw; 468 + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); 469 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 470 + int idx = hwc->idx; 471 + 472 + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { 473 + pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx); 474 + return; 475 + } 476 + 477 + /* 478 + * Disable counter and interrupt 479 + */ 480 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 481 + 482 + /* 483 + * Disable counter 484 + */ 485 + nds32_pfm_disable_counter(idx); 486 + 487 + /* 488 + * Disable interrupt for this counter 489 + */ 490 + nds32_pfm_disable_intens(idx); 491 + 492 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 493 + } 494 + 495 + static inline u32 nds32_pmu_read_counter(struct perf_event *event) 496 + { 497 + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); 498 + struct hw_perf_event *hwc = &event->hw; 499 + int idx = hwc->idx; 500 + u32 count = 0; 501 + 502 + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { 503 + pr_err("CPU reading wrong counter %d\n", idx); 504 + } else { 505 + switch (idx) { 506 + case PFMC0: 507 + count = __nds32__mfsr(NDS32_SR_PFMC0); 508 + break; 509 + case PFMC1: 510 + count = __nds32__mfsr(NDS32_SR_PFMC1); 511 + break; 512 + case PFMC2: 513 + count = __nds32__mfsr(NDS32_SR_PFMC2); 514 + break; 515 + default: 516 + pr_err 517 + ("%s: CPU has no performance counters %d\n", 518 + __func__, idx); 519 + } 520 + } 521 + return count; 522 + } 523 + 524 + static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value) 525 + { 526 + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); 527 + struct hw_perf_event *hwc = &event->hw; 528 + int idx = hwc->idx; 529 + 530 + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { 531 + pr_err("CPU writing wrong counter %d\n", idx); 532 + } else { 533 + switch (idx) { 534 + case PFMC0: 535 + __nds32__mtsr_isb(value, NDS32_SR_PFMC0); 536 + break; 537 + case PFMC1: 538 + __nds32__mtsr_isb(value, NDS32_SR_PFMC1); 539 + break; 540 + case PFMC2: 541 + __nds32__mtsr_isb(value, NDS32_SR_PFMC2); 542 + break; 543 + default: 544 + pr_err 545 + ("%s: CPU has no performance counters %d\n", 546 + __func__, idx); 547 + } 548 + } 549 + } 550 + 551 + static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, 552 + struct perf_event *event) 553 + { 554 + int idx; 555 + struct hw_perf_event *hwc = &event->hw; 556 + /* 557 + * Current implementation maps cycles, instruction count and cache-miss 558 + * to specific counter. 559 + * However, multiple of the 3 counters are able to count these events. 560 + * 561 + * 562 + * SOFTWARE_EVENT_MASK mask for getting event num , 563 + * This is defined by Jia-Rung, you can change the polocies. 564 + * However, do not exceed 8 bits. This is hardware specific. 565 + * The last number is SPAv3_2_SEL_LAST. 566 + */ 567 + unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK; 568 + 569 + idx = get_converted_event_idx(evtype); 570 + /* 571 + * Try to get the counter for correpsonding event 572 + */ 573 + if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { 574 + if (!test_and_set_bit(idx, cpuc->used_mask)) 575 + return idx; 576 + if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) 577 + return NDS32_IDX_COUNTER0; 578 + if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) 579 + return NDS32_IDX_COUNTER1; 580 + } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { 581 + if (!test_and_set_bit(idx, cpuc->used_mask)) 582 + return idx; 583 + else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) 584 + return NDS32_IDX_COUNTER1; 585 + else if (!test_and_set_bit 586 + (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) 587 + return NDS32_IDX_CYCLE_COUNTER; 588 + } else { 589 + if (!test_and_set_bit(idx, cpuc->used_mask)) 590 + return idx; 591 + } 592 + return -EAGAIN; 593 + } 594 + 595 + static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) 596 + { 597 + unsigned long flags; 598 + unsigned int val; 599 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 600 + 601 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 602 + 603 + /* Enable all counters , NDS PFM has 3 counters */ 604 + val = __nds32__mfsr(NDS32_SR_PFM_CTL); 605 + val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); 606 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 607 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 608 + 609 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 610 + } 611 + 612 + static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu) 613 + { 614 + unsigned long flags; 615 + unsigned int val; 616 + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); 617 + 618 + raw_spin_lock_irqsave(&events->pmu_lock, flags); 619 + 620 + /* Disable all counters , NDS PFM has 3 counters */ 621 + val = __nds32__mfsr(NDS32_SR_PFM_CTL); 622 + val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); 623 + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 624 + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); 625 + 626 + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 627 + } 628 + 629 + static void nds32_pmu_reset(void *info) 630 + { 631 + u32 val = 0; 632 + 633 + val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); 634 + __nds32__mtsr(val, NDS32_SR_PFM_CTL); 635 + __nds32__mtsr(0, NDS32_SR_PFM_CTL); 636 + __nds32__mtsr(0, NDS32_SR_PFMC0); 637 + __nds32__mtsr(0, NDS32_SR_PFMC1); 638 + __nds32__mtsr(0, NDS32_SR_PFMC2); 639 + } 640 + 641 + static void nds32_pmu_init(struct nds32_pmu *cpu_pmu) 642 + { 643 + cpu_pmu->handle_irq = nds32_pmu_handle_irq; 644 + cpu_pmu->enable = nds32_pmu_enable_event; 645 + cpu_pmu->disable = nds32_pmu_disable_event; 646 + cpu_pmu->read_counter = nds32_pmu_read_counter; 647 + cpu_pmu->write_counter = nds32_pmu_write_counter; 648 + cpu_pmu->get_event_idx = nds32_pmu_get_event_idx; 649 + cpu_pmu->start = nds32_pmu_start; 650 + cpu_pmu->stop = nds32_pmu_stop; 651 + cpu_pmu->reset = nds32_pmu_reset; 652 + cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */ 653 + }; 654 + 655 + static u32 nds32_read_num_pfm_events(void) 656 + { 657 + /* NDS32 SPAv3 PMU support 3 counter */ 658 + return 3; 659 + } 660 + 661 + static int device_pmu_init(struct nds32_pmu *cpu_pmu) 662 + { 663 + nds32_pmu_init(cpu_pmu); 664 + /* 665 + * This name should be devive-specific name, whatever you like :) 666 + * I think "PMU" will be a good generic name. 667 + */ 668 + cpu_pmu->name = "nds32v3-pmu"; 669 + cpu_pmu->map_event = nds32_spav3_map_event; 670 + cpu_pmu->num_events = nds32_read_num_pfm_events(); 671 + cpu_pmu->set_event_filter = nds32_pmu_set_event_filter; 672 + return 0; 673 + } 674 + 675 + /* 676 + * CPU PMU identification and probing. 677 + */ 678 + static int probe_current_pmu(struct nds32_pmu *pmu) 679 + { 680 + int ret; 681 + 682 + get_cpu(); 683 + ret = -ENODEV; 684 + /* 685 + * If ther are various CPU types with its own PMU, initialize with 686 + * 687 + * the corresponding one 688 + */ 689 + device_pmu_init(pmu); 690 + put_cpu(); 691 + return ret; 692 + } 693 + 694 + static void nds32_pmu_enable(struct pmu *pmu) 695 + { 696 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); 697 + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); 698 + int enabled = bitmap_weight(hw_events->used_mask, 699 + nds32_pmu->num_events); 700 + 701 + if (enabled) 702 + nds32_pmu->start(nds32_pmu); 703 + } 704 + 705 + static void nds32_pmu_disable(struct pmu *pmu) 706 + { 707 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); 708 + 709 + nds32_pmu->stop(nds32_pmu); 710 + } 711 + 712 + static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu) 713 + { 714 + nds32_pmu->free_irq(nds32_pmu); 715 + pm_runtime_put_sync(&nds32_pmu->plat_device->dev); 716 + } 717 + 718 + static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev) 719 + { 720 + struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev; 721 + int ret; 722 + u64 start_clock, finish_clock; 723 + 724 + start_clock = local_clock(); 725 + ret = nds32_pmu->handle_irq(irq, dev); 726 + finish_clock = local_clock(); 727 + 728 + perf_sample_event_took(finish_clock - start_clock); 729 + return ret; 730 + } 731 + 732 + static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu) 733 + { 734 + int err; 735 + struct platform_device *pmu_device = nds32_pmu->plat_device; 736 + 737 + if (!pmu_device) 738 + return -ENODEV; 739 + 740 + pm_runtime_get_sync(&pmu_device->dev); 741 + err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq); 742 + if (err) { 743 + nds32_pmu_release_hardware(nds32_pmu); 744 + return err; 745 + } 746 + 747 + return 0; 748 + } 749 + 750 + static int 751 + validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, 752 + struct perf_event *event) 753 + { 754 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 755 + 756 + if (is_software_event(event)) 757 + return 1; 758 + 759 + if (event->pmu != pmu) 760 + return 0; 761 + 762 + if (event->state < PERF_EVENT_STATE_OFF) 763 + return 1; 764 + 765 + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) 766 + return 1; 767 + 768 + return nds32_pmu->get_event_idx(hw_events, event) >= 0; 769 + } 770 + 771 + static int validate_group(struct perf_event *event) 772 + { 773 + struct perf_event *sibling, *leader = event->group_leader; 774 + struct pmu_hw_events fake_pmu; 775 + DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS); 776 + /* 777 + * Initialize the fake PMU. We only need to populate the 778 + * used_mask for the purposes of validation. 779 + */ 780 + memset(fake_used_mask, 0, sizeof(fake_used_mask)); 781 + 782 + if (!validate_event(event->pmu, &fake_pmu, leader)) 783 + return -EINVAL; 784 + 785 + for_each_sibling_event(sibling, leader) { 786 + if (!validate_event(event->pmu, &fake_pmu, sibling)) 787 + return -EINVAL; 788 + } 789 + 790 + if (!validate_event(event->pmu, &fake_pmu, event)) 791 + return -EINVAL; 792 + 793 + return 0; 794 + } 795 + 796 + static int __hw_perf_event_init(struct perf_event *event) 797 + { 798 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 799 + struct hw_perf_event *hwc = &event->hw; 800 + int mapping; 801 + 802 + mapping = nds32_pmu->map_event(event); 803 + 804 + if (mapping < 0) { 805 + pr_debug("event %x:%llx not supported\n", event->attr.type, 806 + event->attr.config); 807 + return mapping; 808 + } 809 + 810 + /* 811 + * We don't assign an index until we actually place the event onto 812 + * hardware. Use -1 to signify that we haven't decided where to put it 813 + * yet. For SMP systems, each core has it's own PMU so we can't do any 814 + * clever allocation or constraints checking at this point. 815 + */ 816 + hwc->idx = -1; 817 + hwc->config_base = 0; 818 + hwc->config = 0; 819 + hwc->event_base = 0; 820 + 821 + /* 822 + * Check whether we need to exclude the counter from certain modes. 823 + */ 824 + if ((!nds32_pmu->set_event_filter || 825 + nds32_pmu->set_event_filter(hwc, &event->attr)) && 826 + event_requires_mode_exclusion(&event->attr)) { 827 + pr_debug 828 + ("NDS performance counters do not support mode exclusion\n"); 829 + return -EOPNOTSUPP; 830 + } 831 + 832 + /* 833 + * Store the event encoding into the config_base field. 834 + */ 835 + hwc->config_base |= (unsigned long)mapping; 836 + 837 + if (!hwc->sample_period) { 838 + /* 839 + * For non-sampling runs, limit the sample_period to half 840 + * of the counter width. That way, the new counter value 841 + * is far less likely to overtake the previous one unless 842 + * you have some serious IRQ latency issues. 843 + */ 844 + hwc->sample_period = nds32_pmu->max_period >> 1; 845 + hwc->last_period = hwc->sample_period; 846 + local64_set(&hwc->period_left, hwc->sample_period); 847 + } 848 + 849 + if (event->group_leader != event) { 850 + if (validate_group(event) != 0) 851 + return -EINVAL; 852 + } 853 + 854 + return 0; 855 + } 856 + 857 + static int nds32_pmu_event_init(struct perf_event *event) 858 + { 859 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 860 + int err = 0; 861 + atomic_t *active_events = &nds32_pmu->active_events; 862 + 863 + /* does not support taken branch sampling */ 864 + if (has_branch_stack(event)) 865 + return -EOPNOTSUPP; 866 + 867 + if (nds32_pmu->map_event(event) == -ENOENT) 868 + return -ENOENT; 869 + 870 + if (!atomic_inc_not_zero(active_events)) { 871 + if (atomic_read(active_events) == 0) { 872 + /* Register irq handler */ 873 + err = nds32_pmu_reserve_hardware(nds32_pmu); 874 + } 875 + 876 + if (!err) 877 + atomic_inc(active_events); 878 + } 879 + 880 + if (err) 881 + return err; 882 + 883 + err = __hw_perf_event_init(event); 884 + 885 + return err; 886 + } 887 + 888 + static void nds32_start(struct perf_event *event, int flags) 889 + { 890 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 891 + struct hw_perf_event *hwc = &event->hw; 892 + /* 893 + * NDS pmu always has to reprogram the period, so ignore 894 + * PERF_EF_RELOAD, see the comment below. 895 + */ 896 + if (flags & PERF_EF_RELOAD) 897 + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 898 + 899 + hwc->state = 0; 900 + /* Set the period for the event. */ 901 + nds32_pmu_event_set_period(event); 902 + 903 + nds32_pmu->enable(event); 904 + } 905 + 906 + static int nds32_pmu_add(struct perf_event *event, int flags) 907 + { 908 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 909 + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); 910 + struct hw_perf_event *hwc = &event->hw; 911 + int idx; 912 + int err = 0; 913 + 914 + perf_pmu_disable(event->pmu); 915 + 916 + /* If we don't have a space for the counter then finish early. */ 917 + idx = nds32_pmu->get_event_idx(hw_events, event); 918 + if (idx < 0) { 919 + err = idx; 920 + goto out; 921 + } 922 + 923 + /* 924 + * If there is an event in the counter we are going to use then make 925 + * sure it is disabled. 926 + */ 927 + event->hw.idx = idx; 928 + nds32_pmu->disable(event); 929 + hw_events->events[idx] = event; 930 + 931 + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 932 + if (flags & PERF_EF_START) 933 + nds32_start(event, PERF_EF_RELOAD); 934 + 935 + /* Propagate our changes to the userspace mapping. */ 936 + perf_event_update_userpage(event); 937 + 938 + out: 939 + perf_pmu_enable(event->pmu); 940 + return err; 941 + } 942 + 943 + u64 nds32_pmu_event_update(struct perf_event *event) 944 + { 945 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 946 + struct hw_perf_event *hwc = &event->hw; 947 + u64 delta, prev_raw_count, new_raw_count; 948 + 949 + again: 950 + prev_raw_count = local64_read(&hwc->prev_count); 951 + new_raw_count = nds32_pmu->read_counter(event); 952 + 953 + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 954 + new_raw_count) != prev_raw_count) { 955 + goto again; 956 + } 957 + /* 958 + * Whether overflow or not, "unsigned substraction" 959 + * will always get their delta 960 + */ 961 + delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period; 962 + 963 + local64_add(delta, &event->count); 964 + local64_sub(delta, &hwc->period_left); 965 + 966 + return new_raw_count; 967 + } 968 + 969 + static void nds32_stop(struct perf_event *event, int flags) 970 + { 971 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 972 + struct hw_perf_event *hwc = &event->hw; 973 + /* 974 + * NDS pmu always has to update the counter, so ignore 975 + * PERF_EF_UPDATE, see comments in nds32_start(). 976 + */ 977 + if (!(hwc->state & PERF_HES_STOPPED)) { 978 + nds32_pmu->disable(event); 979 + nds32_pmu_event_update(event); 980 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 981 + } 982 + } 983 + 984 + static void nds32_pmu_del(struct perf_event *event, int flags) 985 + { 986 + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); 987 + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); 988 + struct hw_perf_event *hwc = &event->hw; 989 + int idx = hwc->idx; 990 + 991 + nds32_stop(event, PERF_EF_UPDATE); 992 + hw_events->events[idx] = NULL; 993 + clear_bit(idx, hw_events->used_mask); 994 + 995 + perf_event_update_userpage(event); 996 + } 997 + 998 + static void nds32_pmu_read(struct perf_event *event) 999 + { 1000 + nds32_pmu_event_update(event); 1001 + } 1002 + 1003 + /* Please refer to SPAv3 for more hardware specific details */ 1004 + PMU_FORMAT_ATTR(event, "config:0-63"); 1005 + 1006 + static struct attribute *nds32_arch_formats_attr[] = { 1007 + &format_attr_event.attr, 1008 + NULL, 1009 + }; 1010 + 1011 + static struct attribute_group nds32_pmu_format_group = { 1012 + .name = "format", 1013 + .attrs = nds32_arch_formats_attr, 1014 + }; 1015 + 1016 + static ssize_t nds32_pmu_cpumask_show(struct device *dev, 1017 + struct device_attribute *attr, 1018 + char *buf) 1019 + { 1020 + return 0; 1021 + } 1022 + 1023 + static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL); 1024 + 1025 + static struct attribute *nds32_pmu_common_attrs[] = { 1026 + &dev_attr_cpus.attr, 1027 + NULL, 1028 + }; 1029 + 1030 + static struct attribute_group nds32_pmu_common_group = { 1031 + .attrs = nds32_pmu_common_attrs, 1032 + }; 1033 + 1034 + static const struct attribute_group *nds32_pmu_attr_groups[] = { 1035 + &nds32_pmu_format_group, 1036 + &nds32_pmu_common_group, 1037 + NULL, 1038 + }; 1039 + 1040 + static void nds32_init(struct nds32_pmu *nds32_pmu) 1041 + { 1042 + atomic_set(&nds32_pmu->active_events, 0); 1043 + 1044 + nds32_pmu->pmu = (struct pmu) { 1045 + .pmu_enable = nds32_pmu_enable, 1046 + .pmu_disable = nds32_pmu_disable, 1047 + .attr_groups = nds32_pmu_attr_groups, 1048 + .event_init = nds32_pmu_event_init, 1049 + .add = nds32_pmu_add, 1050 + .del = nds32_pmu_del, 1051 + .start = nds32_start, 1052 + .stop = nds32_stop, 1053 + .read = nds32_pmu_read, 1054 + }; 1055 + } 1056 + 1057 + int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type) 1058 + { 1059 + nds32_init(nds32_pmu); 1060 + pm_runtime_enable(&nds32_pmu->plat_device->dev); 1061 + pr_info("enabled with %s PMU driver, %d counters available\n", 1062 + nds32_pmu->name, nds32_pmu->num_events); 1063 + return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type); 1064 + } 1065 + 1066 + static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) 1067 + { 1068 + return this_cpu_ptr(&cpu_hw_events); 1069 + } 1070 + 1071 + static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler) 1072 + { 1073 + int err, irq, irqs; 1074 + struct platform_device *pmu_device = cpu_pmu->plat_device; 1075 + 1076 + if (!pmu_device) 1077 + return -ENODEV; 1078 + 1079 + irqs = min(pmu_device->num_resources, num_possible_cpus()); 1080 + if (irqs < 1) { 1081 + pr_err("no irqs for PMUs defined\n"); 1082 + return -ENODEV; 1083 + } 1084 + 1085 + irq = platform_get_irq(pmu_device, 0); 1086 + err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm", 1087 + cpu_pmu); 1088 + if (err) { 1089 + pr_err("unable to request IRQ%d for NDS PMU counters\n", 1090 + irq); 1091 + return err; 1092 + } 1093 + return 0; 1094 + } 1095 + 1096 + static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu) 1097 + { 1098 + int irq; 1099 + struct platform_device *pmu_device = cpu_pmu->plat_device; 1100 + 1101 + irq = platform_get_irq(pmu_device, 0); 1102 + if (irq >= 0) 1103 + free_irq(irq, cpu_pmu); 1104 + } 1105 + 1106 + static void cpu_pmu_init(struct nds32_pmu *cpu_pmu) 1107 + { 1108 + int cpu; 1109 + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); 1110 + 1111 + raw_spin_lock_init(&events->pmu_lock); 1112 + 1113 + cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; 1114 + cpu_pmu->request_irq = cpu_pmu_request_irq; 1115 + cpu_pmu->free_irq = cpu_pmu_free_irq; 1116 + 1117 + /* Ensure the PMU has sane values out of reset. */ 1118 + if (cpu_pmu->reset) 1119 + on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); 1120 + } 1121 + 1122 + const static struct of_device_id cpu_pmu_of_device_ids[] = { 1123 + {.compatible = "andestech,nds32v3-pmu", 1124 + .data = device_pmu_init}, 1125 + {}, 1126 + }; 1127 + 1128 + static int cpu_pmu_device_probe(struct platform_device *pdev) 1129 + { 1130 + const struct of_device_id *of_id; 1131 + int (*init_fn)(struct nds32_pmu *nds32_pmu); 1132 + struct device_node *node = pdev->dev.of_node; 1133 + struct nds32_pmu *pmu; 1134 + int ret = -ENODEV; 1135 + 1136 + if (cpu_pmu) { 1137 + pr_notice("[perf] attempt to register multiple PMU devices!\n"); 1138 + return -ENOSPC; 1139 + } 1140 + 1141 + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 1142 + if (!pmu) 1143 + return -ENOMEM; 1144 + 1145 + of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node); 1146 + if (node && of_id) { 1147 + init_fn = of_id->data; 1148 + ret = init_fn(pmu); 1149 + } else { 1150 + ret = probe_current_pmu(pmu); 1151 + } 1152 + 1153 + if (ret) { 1154 + pr_notice("[perf] failed to probe PMU!\n"); 1155 + goto out_free; 1156 + } 1157 + 1158 + cpu_pmu = pmu; 1159 + cpu_pmu->plat_device = pdev; 1160 + cpu_pmu_init(cpu_pmu); 1161 + ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW); 1162 + 1163 + if (!ret) 1164 + return 0; 1165 + 1166 + out_free: 1167 + pr_notice("[perf] failed to register PMU devices!\n"); 1168 + kfree(pmu); 1169 + return ret; 1170 + } 1171 + 1172 + static struct platform_driver cpu_pmu_driver = { 1173 + .driver = { 1174 + .name = "nds32-pfm", 1175 + .of_match_table = cpu_pmu_of_device_ids, 1176 + }, 1177 + .probe = cpu_pmu_device_probe, 1178 + .id_table = cpu_pmu_plat_device_ids, 1179 + }; 1180 + 1181 + static int __init register_pmu_driver(void) 1182 + { 1183 + int err = 0; 1184 + 1185 + err = platform_driver_register(&cpu_pmu_driver); 1186 + if (err) 1187 + pr_notice("[perf] PMU initialization failed\n"); 1188 + else 1189 + pr_notice("[perf] PMU initialization done\n"); 1190 + 1191 + return err; 1192 + } 1193 + 1194 + device_initcall(register_pmu_driver); 1195 + 1196 + /* 1197 + * References: arch/nds32/kernel/traps.c:__dump() 1198 + * You will need to know the NDS ABI first. 1199 + */ 1200 + static int unwind_frame_kernel(struct stackframe *frame) 1201 + { 1202 + int graph = 0; 1203 + #ifdef CONFIG_FRAME_POINTER 1204 + /* 0x3 means misalignment */ 1205 + if (!kstack_end((void *)frame->fp) && 1206 + !((unsigned long)frame->fp & 0x3) && 1207 + ((unsigned long)frame->fp >= TASK_SIZE)) { 1208 + /* 1209 + * The array index is based on the ABI, the below graph 1210 + * illustrate the reasons. 1211 + * Function call procedure: "smw" and "lmw" will always 1212 + * update SP and FP for you automatically. 1213 + * 1214 + * Stack Relative Address 1215 + * | | 0 1216 + * ---- 1217 + * |LP| <-- SP(before smw) <-- FP(after smw) -1 1218 + * ---- 1219 + * |FP| -2 1220 + * ---- 1221 + * | | <-- SP(after smw) -3 1222 + */ 1223 + frame->lp = ((unsigned long *)frame->fp)[-1]; 1224 + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; 1225 + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ 1226 + if (__kernel_text_address(frame->lp)) 1227 + frame->lp = ftrace_graph_ret_addr 1228 + (NULL, &graph, frame->lp, NULL); 1229 + 1230 + return 0; 1231 + } else { 1232 + return -EPERM; 1233 + } 1234 + #else 1235 + /* 1236 + * You can refer to arch/nds32/kernel/traps.c:__dump() 1237 + * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". 1238 + * And, the "sp" is not always correct. 1239 + * 1240 + * Stack Relative Address 1241 + * | | 0 1242 + * ---- 1243 + * |LP| <-- SP(before smw) -1 1244 + * ---- 1245 + * | | <-- SP(after smw) -2 1246 + * ---- 1247 + */ 1248 + if (!kstack_end((void *)frame->sp)) { 1249 + frame->lp = ((unsigned long *)frame->sp)[1]; 1250 + /* TODO: How to deal with the value in first 1251 + * "sp" is not correct? 1252 + */ 1253 + if (__kernel_text_address(frame->lp)) 1254 + frame->lp = ftrace_graph_ret_addr 1255 + (tsk, &graph, frame->lp, NULL); 1256 + 1257 + frame->sp = ((unsigned long *)frame->sp) + 1; 1258 + 1259 + return 0; 1260 + } else { 1261 + return -EPERM; 1262 + } 1263 + #endif 1264 + } 1265 + 1266 + static void notrace 1267 + walk_stackframe(struct stackframe *frame, 1268 + int (*fn_record)(struct stackframe *, void *), 1269 + void *data) 1270 + { 1271 + while (1) { 1272 + int ret; 1273 + 1274 + if (fn_record(frame, data)) 1275 + break; 1276 + 1277 + ret = unwind_frame_kernel(frame); 1278 + if (ret < 0) 1279 + break; 1280 + } 1281 + } 1282 + 1283 + /* 1284 + * Gets called by walk_stackframe() for every stackframe. This will be called 1285 + * whist unwinding the stackframe and is like a subroutine return so we use 1286 + * the PC. 1287 + */ 1288 + static int callchain_trace(struct stackframe *fr, void *data) 1289 + { 1290 + struct perf_callchain_entry_ctx *entry = data; 1291 + 1292 + perf_callchain_store(entry, fr->lp); 1293 + return 0; 1294 + } 1295 + 1296 + /* 1297 + * Get the return address for a single stackframe and return a pointer to the 1298 + * next frame tail. 1299 + */ 1300 + static unsigned long 1301 + user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 1302 + { 1303 + struct frame_tail buftail; 1304 + unsigned long lp = 0; 1305 + unsigned long *user_frame_tail = 1306 + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); 1307 + 1308 + /* Check accessibility of one struct frame_tail beyond */ 1309 + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) 1310 + return 0; 1311 + if (__copy_from_user_inatomic 1312 + (&buftail, user_frame_tail, sizeof(buftail))) 1313 + return 0; 1314 + 1315 + /* 1316 + * Refer to unwind_frame_kernel() for more illurstration 1317 + */ 1318 + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ 1319 + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ 1320 + perf_callchain_store(entry, lp); 1321 + return fp; 1322 + } 1323 + 1324 + static unsigned long 1325 + user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry, 1326 + unsigned long fp) 1327 + { 1328 + struct frame_tail_opt_size buftail; 1329 + unsigned long lp = 0; 1330 + 1331 + unsigned long *user_frame_tail = 1332 + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); 1333 + 1334 + /* Check accessibility of one struct frame_tail beyond */ 1335 + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) 1336 + return 0; 1337 + if (__copy_from_user_inatomic 1338 + (&buftail, user_frame_tail, sizeof(buftail))) 1339 + return 0; 1340 + 1341 + /* 1342 + * Refer to unwind_frame_kernel() for more illurstration 1343 + */ 1344 + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ 1345 + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ 1346 + 1347 + perf_callchain_store(entry, lp); 1348 + return fp; 1349 + } 1350 + 1351 + /* 1352 + * This will be called when the target is in user mode 1353 + * This function will only be called when we use 1354 + * "PERF_SAMPLE_CALLCHAIN" in 1355 + * kernel/events/core.c:perf_prepare_sample() 1356 + * 1357 + * How to trigger perf_callchain_[user/kernel] : 1358 + * $ perf record -e cpu-clock --call-graph fp ./program 1359 + * $ perf report --call-graph 1360 + */ 1361 + unsigned long leaf_fp; 1362 + void 1363 + perf_callchain_user(struct perf_callchain_entry_ctx *entry, 1364 + struct pt_regs *regs) 1365 + { 1366 + unsigned long fp = 0; 1367 + unsigned long gp = 0; 1368 + unsigned long lp = 0; 1369 + unsigned long sp = 0; 1370 + unsigned long *user_frame_tail; 1371 + 1372 + leaf_fp = 0; 1373 + 1374 + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1375 + /* We don't support guest os callchain now */ 1376 + return; 1377 + } 1378 + 1379 + perf_callchain_store(entry, regs->ipc); 1380 + fp = regs->fp; 1381 + gp = regs->gp; 1382 + lp = regs->lp; 1383 + sp = regs->sp; 1384 + if (entry->nr < PERF_MAX_STACK_DEPTH && 1385 + (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) { 1386 + user_frame_tail = 1387 + (unsigned long *)(fp - (unsigned long)sizeof(fp)); 1388 + 1389 + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(fp))) 1390 + return; 1391 + 1392 + if (__copy_from_user_inatomic 1393 + (&leaf_fp, user_frame_tail, sizeof(fp))) 1394 + return; 1395 + 1396 + if (leaf_fp == lp) { 1397 + /* 1398 + * Maybe this is non leaf function 1399 + * with optimize for size, 1400 + * or maybe this is the function 1401 + * with optimize for size 1402 + */ 1403 + struct frame_tail buftail; 1404 + 1405 + user_frame_tail = 1406 + (unsigned long *)(fp - 1407 + (unsigned long)sizeof(buftail)); 1408 + 1409 + if (!access_ok 1410 + (VERIFY_READ, user_frame_tail, sizeof(buftail))) 1411 + return; 1412 + 1413 + if (__copy_from_user_inatomic 1414 + (&buftail, user_frame_tail, sizeof(buftail))) 1415 + return; 1416 + 1417 + if (buftail.stack_fp == gp) { 1418 + /* non leaf function with optimize 1419 + * for size condition 1420 + */ 1421 + struct frame_tail_opt_size buftail_opt_size; 1422 + 1423 + user_frame_tail = 1424 + (unsigned long *)(fp - (unsigned long) 1425 + sizeof(buftail_opt_size)); 1426 + 1427 + if (!access_ok(VERIFY_READ, user_frame_tail, 1428 + sizeof(buftail_opt_size))) 1429 + return; 1430 + 1431 + if (__copy_from_user_inatomic 1432 + (&buftail_opt_size, user_frame_tail, 1433 + sizeof(buftail_opt_size))) 1434 + return; 1435 + 1436 + perf_callchain_store(entry, lp); 1437 + fp = buftail_opt_size.stack_fp; 1438 + 1439 + while ((entry->nr < PERF_MAX_STACK_DEPTH) && 1440 + (unsigned long)fp && 1441 + !((unsigned long)fp & 0x7) && 1442 + fp > sp) { 1443 + sp = fp; 1444 + fp = user_backtrace_opt_size(entry, fp); 1445 + } 1446 + 1447 + } else { 1448 + /* this is the function 1449 + * without optimize for size 1450 + */ 1451 + fp = buftail.stack_fp; 1452 + perf_callchain_store(entry, lp); 1453 + while ((entry->nr < PERF_MAX_STACK_DEPTH) && 1454 + (unsigned long)fp && 1455 + !((unsigned long)fp & 0x7) && 1456 + fp > sp) { 1457 + sp = fp; 1458 + fp = user_backtrace(entry, fp); 1459 + } 1460 + } 1461 + } else { 1462 + /* this is leaf function */ 1463 + fp = leaf_fp; 1464 + perf_callchain_store(entry, lp); 1465 + 1466 + /* previous function callcahin */ 1467 + while ((entry->nr < PERF_MAX_STACK_DEPTH) && 1468 + (unsigned long)fp && 1469 + !((unsigned long)fp & 0x7) && fp > sp) { 1470 + sp = fp; 1471 + fp = user_backtrace(entry, fp); 1472 + } 1473 + } 1474 + return; 1475 + } 1476 + } 1477 + 1478 + /* This will be called when the target is in kernel mode */ 1479 + void 1480 + perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 1481 + struct pt_regs *regs) 1482 + { 1483 + struct stackframe fr; 1484 + 1485 + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1486 + /* We don't support guest os callchain now */ 1487 + return; 1488 + } 1489 + fr.fp = regs->fp; 1490 + fr.lp = regs->lp; 1491 + fr.sp = regs->sp; 1492 + walk_stackframe(&fr, callchain_trace, entry); 1493 + } 1494 + 1495 + unsigned long perf_instruction_pointer(struct pt_regs *regs) 1496 + { 1497 + /* However, NDS32 does not support virtualization */ 1498 + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 1499 + return perf_guest_cbs->get_guest_ip(); 1500 + 1501 + return instruction_pointer(regs); 1502 + } 1503 + 1504 + unsigned long perf_misc_flags(struct pt_regs *regs) 1505 + { 1506 + int misc = 0; 1507 + 1508 + /* However, NDS32 does not support virtualization */ 1509 + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1510 + if (perf_guest_cbs->is_user_mode()) 1511 + misc |= PERF_RECORD_MISC_GUEST_USER; 1512 + else 1513 + misc |= PERF_RECORD_MISC_GUEST_KERNEL; 1514 + } else { 1515 + if (user_mode(regs)) 1516 + misc |= PERF_RECORD_MISC_USER; 1517 + else 1518 + misc |= PERF_RECORD_MISC_KERNEL; 1519 + } 1520 + 1521 + return misc; 1522 + }
+78
arch/nds32/kernel/pm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2008-2017 Andes Technology Corporation 3 + 4 + #include <linux/init.h> 5 + #include <linux/suspend.h> 6 + #include <linux/device.h> 7 + #include <linux/printk.h> 8 + #include <asm/suspend.h> 9 + #include <nds32_intrinsic.h> 10 + 11 + unsigned int resume_addr; 12 + unsigned int *phy_addr_sp_tmp; 13 + 14 + static void nds32_suspend2ram(void) 15 + { 16 + pgd_t *pgdv; 17 + pud_t *pudv; 18 + pmd_t *pmdv; 19 + pte_t *ptev; 20 + 21 + pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) & 22 + L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume); 23 + 24 + pudv = pud_offset(pgdv, (unsigned int)cpu_resume); 25 + pmdv = pmd_offset(pudv, (unsigned int)cpu_resume); 26 + ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume); 27 + 28 + resume_addr = ((*ptev) & TLB_DATA_mskPPN) 29 + | ((unsigned int)cpu_resume & 0x00000fff); 30 + 31 + suspend2ram(); 32 + } 33 + 34 + static void nds32_suspend_cpu(void) 35 + { 36 + while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask)) 37 + __asm__ volatile ("standby no_wake_grant\n\t"); 38 + } 39 + 40 + static int nds32_pm_valid(suspend_state_t state) 41 + { 42 + switch (state) { 43 + case PM_SUSPEND_ON: 44 + case PM_SUSPEND_STANDBY: 45 + case PM_SUSPEND_MEM: 46 + return 1; 47 + default: 48 + return 0; 49 + } 50 + } 51 + 52 + static int nds32_pm_enter(suspend_state_t state) 53 + { 54 + pr_debug("%s:state:%d\n", __func__, state); 55 + switch (state) { 56 + case PM_SUSPEND_STANDBY: 57 + nds32_suspend_cpu(); 58 + return 0; 59 + case PM_SUSPEND_MEM: 60 + nds32_suspend2ram(); 61 + return 0; 62 + default: 63 + return -EINVAL; 64 + } 65 + } 66 + 67 + static const struct platform_suspend_ops nds32_pm_ops = { 68 + .valid = nds32_pm_valid, 69 + .enter = nds32_pm_enter, 70 + }; 71 + 72 + static int __init nds32_pm_init(void) 73 + { 74 + pr_debug("Enter %s\n", __func__); 75 + suspend_set_ops(&nds32_pm_ops); 76 + return 0; 77 + } 78 + late_initcall(nds32_pm_init);
+59 -5
arch/nds32/kernel/process.c
··· 9 9 #include <linux/uaccess.h> 10 10 #include <asm/elf.h> 11 11 #include <asm/proc-fns.h> 12 + #include <asm/fpu.h> 12 13 #include <linux/ptrace.h> 13 14 #include <linux/reboot.h> 14 15 15 - extern void setup_mm_for_reboot(char mode); 16 - #ifdef CONFIG_PROC_FS 17 - struct proc_dir_entry *proc_dir_cpu; 18 - EXPORT_SYMBOL(proc_dir_cpu); 16 + #if IS_ENABLED(CONFIG_LAZY_FPU) 17 + struct task_struct *last_task_used_math; 19 18 #endif 19 + 20 + extern void setup_mm_for_reboot(char mode); 20 21 21 22 extern inline void arch_reset(char mode) 22 23 { ··· 126 125 127 126 EXPORT_SYMBOL(show_regs); 128 127 128 + void exit_thread(struct task_struct *tsk) 129 + { 130 + #if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU) 131 + if (last_task_used_math == tsk) 132 + last_task_used_math = NULL; 133 + #endif 134 + } 135 + 129 136 void flush_thread(void) 130 137 { 138 + #if defined(CONFIG_FPU) 139 + clear_fpu(task_pt_regs(current)); 140 + clear_used_math(); 141 + # ifdef CONFIG_LAZY_FPU 142 + if (last_task_used_math == current) 143 + last_task_used_math = NULL; 144 + # endif 145 + #endif 131 146 } 132 147 133 148 DEFINE_PER_CPU(struct task_struct *, __entry_task); 134 149 135 150 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 136 151 int copy_thread(unsigned long clone_flags, unsigned long stack_start, 137 - unsigned long stk_sz, struct task_struct *p) 152 + unsigned long stk_sz, struct task_struct *p) 138 153 { 139 154 struct pt_regs *childregs = task_pt_regs(p); 140 155 ··· 176 159 p->thread.cpu_context.pc = (unsigned long)ret_from_fork; 177 160 p->thread.cpu_context.sp = (unsigned long)childregs; 178 161 162 + #if IS_ENABLED(CONFIG_FPU) 163 + if (used_math()) { 164 + # if !IS_ENABLED(CONFIG_LAZY_FPU) 165 + unlazy_fpu(current); 166 + # else 167 + preempt_disable(); 168 + if (last_task_used_math == current) 169 + save_fpu(current); 170 + preempt_enable(); 171 + # endif 172 + p->thread.fpu = current->thread.fpu; 173 + clear_fpu(task_pt_regs(p)); 174 + set_stopped_child_used_math(p); 175 + } 176 + #endif 177 + 179 178 #ifdef CONFIG_HWZOL 180 179 childregs->lb = 0; 181 180 childregs->le = 0; ··· 201 168 return 0; 202 169 } 203 170 171 + #if IS_ENABLED(CONFIG_FPU) 172 + struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next) 173 + { 174 + #if !IS_ENABLED(CONFIG_LAZY_FPU) 175 + unlazy_fpu(prev); 176 + #endif 177 + if (!(next->flags & PF_KTHREAD)) 178 + clear_fpu(task_pt_regs(next)); 179 + return prev; 180 + } 181 + #endif 182 + 204 183 /* 205 184 * fill in the fpe structure for a core dump... 206 185 */ 207 186 int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu) 208 187 { 209 188 int fpvalid = 0; 189 + #if IS_ENABLED(CONFIG_FPU) 190 + struct task_struct *tsk = current; 191 + 192 + fpvalid = tsk_used_math(tsk); 193 + if (fpvalid) { 194 + lose_fpu(); 195 + memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu)); 196 + } 197 + #endif 210 198 return fpvalid; 211 199 } 212 200
+19 -3
arch/nds32/kernel/setup.c
··· 15 15 #include <asm/proc-fns.h> 16 16 #include <asm/cache_info.h> 17 17 #include <asm/elf.h> 18 + #include <asm/fpu.h> 18 19 #include <nds32_intrinsic.h> 19 20 20 21 #define HWCAP_MFUSR_PC 0x000001 ··· 39 38 #define HWCAP_FPU_DP 0x040000 40 39 #define HWCAP_V2 0x080000 41 40 #define HWCAP_DX_REGS 0x100000 41 + #define HWCAP_HWPRE 0x200000 42 42 43 43 unsigned long cpu_id, cpu_rev, cpu_cfgid; 44 + bool has_fpu = false; 44 45 char cpu_series; 45 46 char *endianness = NULL; 46 47 ··· 73 70 "div", 74 71 "mac", 75 72 "l2c", 76 - "dx_regs", 73 + "fpu_dp", 77 74 "v2", 75 + "dx_regs", 76 + "hw_pre", 78 77 NULL, 79 78 }; 80 79 ··· 141 136 (aliasing_num - 1) << PAGE_SHIFT; 142 137 } 143 138 #endif 139 + #ifdef CONFIG_FPU 140 + /* Disable fpu and enable when it is used. */ 141 + if (has_fpu) 142 + disable_fpu(); 143 + #endif 144 144 } 145 145 146 146 static void __init setup_cpuinfo(void) ··· 190 180 if (cpu_cfgid & 0x0004) 191 181 elf_hwcap |= HWCAP_EXT2; 192 182 193 - if (cpu_cfgid & 0x0008) 183 + if (cpu_cfgid & 0x0008) { 194 184 elf_hwcap |= HWCAP_FPU; 195 - 185 + has_fpu = true; 186 + } 196 187 if (cpu_cfgid & 0x0010) 197 188 elf_hwcap |= HWCAP_STRING; 198 189 ··· 222 211 223 212 if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C) 224 213 elf_hwcap |= HWCAP_L2C; 214 + 215 + #ifdef CONFIG_HW_PRE 216 + if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN) 217 + elf_hwcap |= HWCAP_HWPRE; 218 + #endif 225 219 226 220 tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL); 227 221 if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE))
+61 -1
arch/nds32/kernel/signal.c
··· 12 12 #include <asm/cacheflush.h> 13 13 #include <asm/ucontext.h> 14 14 #include <asm/unistd.h> 15 + #include <asm/fpu.h> 15 16 16 17 #include <asm/ptrace.h> 17 18 #include <asm/vdso.h> ··· 21 20 struct siginfo info; 22 21 struct ucontext uc; 23 22 }; 23 + #if IS_ENABLED(CONFIG_FPU) 24 + static inline int restore_sigcontext_fpu(struct pt_regs *regs, 25 + struct sigcontext __user *sc) 26 + { 27 + struct task_struct *tsk = current; 28 + unsigned long used_math_flag; 29 + int ret = 0; 30 + 31 + clear_used_math(); 32 + __get_user_error(used_math_flag, &sc->used_math_flag, ret); 33 + 34 + if (!used_math_flag) 35 + return 0; 36 + set_used_math(); 37 + 38 + #if IS_ENABLED(CONFIG_LAZY_FPU) 39 + preempt_disable(); 40 + if (current == last_task_used_math) { 41 + last_task_used_math = NULL; 42 + disable_ptreg_fpu(regs); 43 + } 44 + preempt_enable(); 45 + #else 46 + clear_fpu(regs); 47 + #endif 48 + 49 + return __copy_from_user(&tsk->thread.fpu, &sc->fpu, 50 + sizeof(struct fpu_struct)); 51 + } 52 + 53 + static inline int setup_sigcontext_fpu(struct pt_regs *regs, 54 + struct sigcontext __user *sc) 55 + { 56 + struct task_struct *tsk = current; 57 + int ret = 0; 58 + 59 + __put_user_error(used_math(), &sc->used_math_flag, ret); 60 + 61 + if (!used_math()) 62 + return ret; 63 + 64 + preempt_disable(); 65 + #if IS_ENABLED(CONFIG_LAZY_FPU) 66 + if (last_task_used_math == tsk) 67 + save_fpu(last_task_used_math); 68 + #else 69 + unlazy_fpu(tsk); 70 + #endif 71 + ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu, 72 + sizeof(struct fpu_struct)); 73 + preempt_enable(); 74 + return ret; 75 + } 76 + #endif 24 77 25 78 static int restore_sigframe(struct pt_regs *regs, 26 79 struct rt_sigframe __user * sf) ··· 124 69 __get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err); 125 70 __get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err); 126 71 #endif 127 - 72 + #if IS_ENABLED(CONFIG_FPU) 73 + err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext); 74 + #endif 128 75 /* 129 76 * Avoid sys_rt_sigreturn() restarting. 130 77 */ ··· 209 152 __put_user_error(regs->lc, &sf->uc.uc_mcontext.zol.nds32_lc, err); 210 153 __put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err); 211 154 __put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err); 155 + #endif 156 + #if IS_ENABLED(CONFIG_FPU) 157 + err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext); 212 158 #endif 213 159 214 160 __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
+131
arch/nds32/kernel/sleep.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2017 Andes Technology Corporation */ 3 + 4 + #include <asm/memory.h> 5 + 6 + .data 7 + .global sp_tmp 8 + sp_tmp: 9 + .long 10 + 11 + .text 12 + .globl suspend2ram 13 + .globl cpu_resume 14 + 15 + suspend2ram: 16 + pushm $r0, $r31 17 + #if defined(CONFIG_HWZOL) 18 + mfusr $r0, $lc 19 + mfusr $r1, $le 20 + mfusr $r2, $lb 21 + #endif 22 + mfsr $r3, $mr0 23 + mfsr $r4, $mr1 24 + mfsr $r5, $mr4 25 + mfsr $r6, $mr6 26 + mfsr $r7, $mr7 27 + mfsr $r8, $mr8 28 + mfsr $r9, $ir0 29 + mfsr $r10, $ir1 30 + mfsr $r11, $ir2 31 + mfsr $r12, $ir3 32 + mfsr $r13, $ir9 33 + mfsr $r14, $ir10 34 + mfsr $r15, $ir12 35 + mfsr $r16, $ir13 36 + mfsr $r17, $ir14 37 + mfsr $r18, $ir15 38 + pushm $r0, $r19 39 + #if defined(CONFIG_FPU) 40 + jal store_fpu_for_suspend 41 + #endif 42 + tlbop FlushAll 43 + isb 44 + 45 + // transfer $sp from va to pa 46 + sethi $r0, hi20(PAGE_OFFSET) 47 + ori $r0, $r0, lo12(PAGE_OFFSET) 48 + movi $r2, PHYS_OFFSET 49 + sub $r1, $sp, $r0 50 + add $r2, $r1, $r2 51 + 52 + // store pa($sp) to sp_tmp 53 + sethi $r1, hi20(sp_tmp) 54 + swi $r2, [$r1 + lo12(sp_tmp)] 55 + 56 + pushm $r16, $r25 57 + pushm $r29, $r30 58 + #ifdef CONFIG_CACHE_L2 59 + jal dcache_wb_all_level 60 + #else 61 + jal cpu_dcache_wb_all 62 + #endif 63 + popm $r29, $r30 64 + popm $r16, $r25 65 + 66 + // get wake_mask and loop in standby 67 + la $r1, wake_mask 68 + lwi $r1, [$r1] 69 + self_loop: 70 + standby wake_grant 71 + mfsr $r2, $ir15 72 + and $r2, $r1, $r2 73 + beqz $r2, self_loop 74 + 75 + // set ipc to resume address 76 + la $r1, resume_addr 77 + lwi $r1, [$r1] 78 + mtsr $r1, $ipc 79 + isb 80 + 81 + // reset psw, turn off the address translation 82 + li $r2, 0x7000a 83 + mtsr $r2, $ipsw 84 + isb 85 + 86 + iret 87 + cpu_resume: 88 + // translate the address of sp_tmp variable to pa 89 + la $r1, sp_tmp 90 + sethi $r0, hi20(PAGE_OFFSET) 91 + ori $r0, $r0, lo12(PAGE_OFFSET) 92 + movi $r2, PHYS_OFFSET 93 + sub $r1, $r1, $r0 94 + add $r1, $r1, $r2 95 + 96 + // access the sp_tmp to get stack pointer 97 + lwi $sp, [$r1] 98 + 99 + popm $r0, $r19 100 + #if defined(CONFIG_HWZOL) 101 + mtusr $r0, $lb 102 + mtusr $r1, $lc 103 + mtusr $r2, $le 104 + #endif 105 + mtsr $r3, $mr0 106 + mtsr $r4, $mr1 107 + mtsr $r5, $mr4 108 + mtsr $r6, $mr6 109 + mtsr $r7, $mr7 110 + mtsr $r8, $mr8 111 + // set original psw to ipsw 112 + mtsr $r9, $ir1 113 + 114 + mtsr $r11, $ir2 115 + mtsr $r12, $ir3 116 + 117 + // set ipc to RR 118 + la $r13, RR 119 + mtsr $r13, $ir9 120 + 121 + mtsr $r14, $ir10 122 + mtsr $r15, $ir12 123 + mtsr $r16, $ir13 124 + mtsr $r17, $ir14 125 + mtsr $r18, $ir15 126 + popm $r0, $r31 127 + 128 + isb 129 + iret 130 + RR: 131 + ret
+32
arch/nds32/kernel/sys_nds32.c
··· 6 6 7 7 #include <asm/cachectl.h> 8 8 #include <asm/proc-fns.h> 9 + #include <asm/udftrap.h> 10 + #include <asm/fpu.h> 9 11 10 12 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, 11 13 unsigned long, prot, unsigned long, flags, ··· 49 47 cpu_cache_wbinval_range_check(vma, start, end, flushi, wbd); 50 48 51 49 return 0; 50 + } 51 + 52 + SYSCALL_DEFINE1(udftrap, int, option) 53 + { 54 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 55 + int old_udftrap; 56 + 57 + if (!used_math()) { 58 + load_fpu(&init_fpuregs); 59 + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; 60 + set_used_math(); 61 + } 62 + 63 + old_udftrap = current->thread.fpu.UDF_trap; 64 + switch (option) { 65 + case DISABLE_UDFTRAP: 66 + current->thread.fpu.UDF_trap = 0; 67 + break; 68 + case ENABLE_UDFTRAP: 69 + current->thread.fpu.UDF_trap = FPCSR_mskUDFE; 70 + break; 71 + case GET_UDFTRAP: 72 + break; 73 + default: 74 + return -EINVAL; 75 + } 76 + return old_udftrap; 77 + #else 78 + return -ENOTSUPP; 79 + #endif 52 80 }
+16
arch/nds32/kernel/traps.c
··· 12 12 13 13 #include <asm/proc-fns.h> 14 14 #include <asm/unistd.h> 15 + #include <asm/fpu.h> 15 16 16 17 #include <linux/ptrace.h> 17 18 #include <nds32_intrinsic.h> ··· 358 357 } else if (type == ETYPE_RESERVED_INSTRUCTION) { 359 358 /* Reserved instruction */ 360 359 do_revinsn(regs); 360 + } else if (type == ETYPE_COPROCESSOR) { 361 + /* Coprocessor */ 362 + #if IS_ENABLED(CONFIG_FPU) 363 + unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST); 364 + unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID); 365 + 366 + if ((cpid == FPU_CPID) && 367 + (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) { 368 + unsigned int subtype = (itype & ITYPE_mskSTYPE); 369 + 370 + if (true == do_fpu_exception(subtype, regs)) 371 + return; 372 + } 373 + #endif 374 + unhandled_exceptions(entry, addr, type, regs); 361 375 } else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) { 362 376 /* trap, used on v3 EDM target debugging workaround */ 363 377 /*
+7
arch/nds32/math-emu/Makefile
··· 1 + # 2 + # Makefile for the Linux/nds32 kernel FPU emulation. 3 + # 4 + 5 + obj-y := fpuemu.o \ 6 + fdivd.o fmuld.o fsubd.o faddd.o fs2d.o fsqrtd.o fcmpd.o fnegs.o \ 7 + fdivs.o fmuls.o fsubs.o fadds.o fd2s.o fsqrts.o fcmps.o fnegd.o
+24
arch/nds32/math-emu/faddd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + void faddd(void *ft, void *fa, void *fb) 9 + { 10 + FP_DECL_D(A); 11 + FP_DECL_D(B); 12 + FP_DECL_D(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_DP(A, fa); 16 + FP_UNPACK_DP(B, fb); 17 + 18 + FP_ADD_D(R, A, B); 19 + 20 + FP_PACK_DP(ft, R); 21 + 22 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 23 + 24 + }
+24
arch/nds32/math-emu/fadds.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fadds(void *ft, void *fa, void *fb) 9 + { 10 + FP_DECL_S(A); 11 + FP_DECL_S(B); 12 + FP_DECL_S(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_SP(A, fa); 16 + FP_UNPACK_SP(B, fb); 17 + 18 + FP_ADD_S(R, A, B); 19 + 20 + FP_PACK_SP(ft, R); 21 + 22 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 23 + 24 + }
+24
arch/nds32/math-emu/fcmpd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <asm/sfp-machine.h> 4 + #include <math-emu/soft-fp.h> 5 + #include <math-emu/double.h> 6 + int fcmpd(void *ft, void *fa, void *fb, int cmpop) 7 + { 8 + FP_DECL_D(A); 9 + FP_DECL_D(B); 10 + FP_DECL_EX; 11 + long cmp; 12 + 13 + FP_UNPACK_DP(A, fa); 14 + FP_UNPACK_DP(B, fb); 15 + 16 + FP_CMP_D(cmp, A, B, SF_CUN); 17 + cmp += 2; 18 + if (cmp == SF_CGT) 19 + *(long *)ft = 0; 20 + else 21 + *(long *)ft = (cmp & cmpop) ? 1 : 0; 22 + 23 + return 0; 24 + }
+24
arch/nds32/math-emu/fcmps.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <asm/sfp-machine.h> 4 + #include <math-emu/soft-fp.h> 5 + #include <math-emu/single.h> 6 + int fcmps(void *ft, void *fa, void *fb, int cmpop) 7 + { 8 + FP_DECL_S(A); 9 + FP_DECL_S(B); 10 + FP_DECL_EX; 11 + long cmp; 12 + 13 + FP_UNPACK_SP(A, fa); 14 + FP_UNPACK_SP(B, fb); 15 + 16 + FP_CMP_S(cmp, A, B, SF_CUN); 17 + cmp += 2; 18 + if (cmp == SF_CGT) 19 + *(int *)ft = 0x0; 20 + else 21 + *(int *)ft = (cmp & cmpop) ? 0x1 : 0x0; 22 + 23 + return 0; 24 + }
+22
arch/nds32/math-emu/fd2s.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/double.h> 7 + #include <math-emu/single.h> 8 + #include <math-emu/soft-fp.h> 9 + void fd2s(void *ft, void *fa) 10 + { 11 + FP_DECL_D(A); 12 + FP_DECL_S(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_DP(A, fa); 16 + 17 + FP_CONV(S, D, 1, 2, R, A); 18 + 19 + FP_PACK_SP(ft, R); 20 + 21 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 22 + }
+27
arch/nds32/math-emu/fdivd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <linux/uaccess.h> 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + 9 + void fdivd(void *ft, void *fa, void *fb) 10 + { 11 + FP_DECL_D(A); 12 + FP_DECL_D(B); 13 + FP_DECL_D(R); 14 + FP_DECL_EX; 15 + 16 + FP_UNPACK_DP(A, fa); 17 + FP_UNPACK_DP(B, fb); 18 + 19 + if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) 20 + FP_SET_EXCEPTION(FP_EX_DIVZERO); 21 + 22 + FP_DIV_D(R, A, B); 23 + 24 + FP_PACK_DP(ft, R); 25 + 26 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 27 + }
+26
arch/nds32/math-emu/fdivs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fdivs(void *ft, void *fa, void *fb) 9 + { 10 + FP_DECL_S(A); 11 + FP_DECL_S(B); 12 + FP_DECL_S(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_SP(A, fa); 16 + FP_UNPACK_SP(B, fb); 17 + 18 + if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) 19 + FP_SET_EXCEPTION(FP_EX_DIVZERO); 20 + 21 + FP_DIV_S(R, A, B); 22 + 23 + FP_PACK_SP(ft, R); 24 + 25 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 26 + }
+23
arch/nds32/math-emu/fmuld.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + void fmuld(void *ft, void *fa, void *fb) 9 + { 10 + FP_DECL_D(A); 11 + FP_DECL_D(B); 12 + FP_DECL_D(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_DP(A, fa); 16 + FP_UNPACK_DP(B, fb); 17 + 18 + FP_MUL_D(R, A, B); 19 + 20 + FP_PACK_DP(ft, R); 21 + 22 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 23 + }
+23
arch/nds32/math-emu/fmuls.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fmuls(void *ft, void *fa, void *fb) 9 + { 10 + FP_DECL_S(A); 11 + FP_DECL_S(B); 12 + FP_DECL_S(R); 13 + FP_DECL_EX; 14 + 15 + FP_UNPACK_SP(A, fa); 16 + FP_UNPACK_SP(B, fb); 17 + 18 + FP_MUL_S(R, A, B); 19 + 20 + FP_PACK_SP(ft, R); 21 + 22 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 23 + }
+21
arch/nds32/math-emu/fnegd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + void fnegd(void *ft, void *fa) 9 + { 10 + FP_DECL_D(A); 11 + FP_DECL_D(R); 12 + FP_DECL_EX; 13 + 14 + FP_UNPACK_DP(A, fa); 15 + 16 + FP_NEG_D(R, A); 17 + 18 + FP_PACK_DP(ft, R); 19 + 20 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 21 + }
+21
arch/nds32/math-emu/fnegs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fnegs(void *ft, void *fa) 9 + { 10 + FP_DECL_S(A); 11 + FP_DECL_S(R); 12 + FP_DECL_EX; 13 + 14 + FP_UNPACK_SP(A, fa); 15 + 16 + FP_NEG_S(R, A); 17 + 18 + FP_PACK_SP(ft, R); 19 + 20 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 21 + }
+357
arch/nds32/math-emu/fpuemu.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <asm/bitfield.h> 5 + #include <asm/uaccess.h> 6 + #include <asm/sfp-machine.h> 7 + #include <asm/fpuemu.h> 8 + #include <asm/nds32_fpu_inst.h> 9 + 10 + #define DPFROMREG(dp, x) (dp = (void *)((unsigned long *)fpu_reg + 2*x)) 11 + #ifdef __NDS32_EL__ 12 + #define SPFROMREG(sp, x)\ 13 + ((sp) = (void *)((unsigned long *)fpu_reg + (x^1))) 14 + #else 15 + #define SPFROMREG(sp, x) ((sp) = (void *)((unsigned long *)fpu_reg + x)) 16 + #endif 17 + 18 + #define DEF3OP(name, p, f1, f2) \ 19 + void fpemu_##name##p(void *ft, void *fa, void *fb) \ 20 + { \ 21 + f1(fa, fa, fb); \ 22 + f2(ft, ft, fa); \ 23 + } 24 + 25 + #define DEF3OPNEG(name, p, f1, f2, f3) \ 26 + void fpemu_##name##p(void *ft, void *fa, void *fb) \ 27 + { \ 28 + f1(fa, fa, fb); \ 29 + f2(ft, ft, fa); \ 30 + f3(ft, ft); \ 31 + } 32 + DEF3OP(fmadd, s, fmuls, fadds); 33 + DEF3OP(fmsub, s, fmuls, fsubs); 34 + DEF3OP(fmadd, d, fmuld, faddd); 35 + DEF3OP(fmsub, d, fmuld, fsubd); 36 + DEF3OPNEG(fnmadd, s, fmuls, fadds, fnegs); 37 + DEF3OPNEG(fnmsub, s, fmuls, fsubs, fnegs); 38 + DEF3OPNEG(fnmadd, d, fmuld, faddd, fnegd); 39 + DEF3OPNEG(fnmsub, d, fmuld, fsubd, fnegd); 40 + 41 + static const unsigned char cmptab[8] = { 42 + SF_CEQ, 43 + SF_CEQ, 44 + SF_CLT, 45 + SF_CLT, 46 + SF_CLT | SF_CEQ, 47 + SF_CLT | SF_CEQ, 48 + SF_CUN, 49 + SF_CUN 50 + }; 51 + 52 + enum ARGTYPE { 53 + S1S = 1, 54 + S2S, 55 + S1D, 56 + CS, 57 + D1D, 58 + D2D, 59 + D1S, 60 + CD 61 + }; 62 + union func_t { 63 + void (*t)(void *ft, void *fa, void *fb); 64 + void (*b)(void *ft, void *fa); 65 + }; 66 + /* 67 + * Emulate a single FPU arithmetic instruction. 68 + */ 69 + static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn) 70 + { 71 + int rfmt; /* resulting format */ 72 + union func_t func; 73 + int ftype = 0; 74 + 75 + switch (rfmt = NDS32Insn_OPCODE_COP0(insn)) { 76 + case fs1_op:{ 77 + switch (NDS32Insn_OPCODE_BIT69(insn)) { 78 + case fadds_op: 79 + func.t = fadds; 80 + ftype = S2S; 81 + break; 82 + case fsubs_op: 83 + func.t = fsubs; 84 + ftype = S2S; 85 + break; 86 + case fmadds_op: 87 + func.t = fpemu_fmadds; 88 + ftype = S2S; 89 + break; 90 + case fmsubs_op: 91 + func.t = fpemu_fmsubs; 92 + ftype = S2S; 93 + break; 94 + case fnmadds_op: 95 + func.t = fpemu_fnmadds; 96 + ftype = S2S; 97 + break; 98 + case fnmsubs_op: 99 + func.t = fpemu_fnmsubs; 100 + ftype = S2S; 101 + break; 102 + case fmuls_op: 103 + func.t = fmuls; 104 + ftype = S2S; 105 + break; 106 + case fdivs_op: 107 + func.t = fdivs; 108 + ftype = S2S; 109 + break; 110 + case fs1_f2op_op: 111 + switch (NDS32Insn_OPCODE_BIT1014(insn)) { 112 + case fs2d_op: 113 + func.b = fs2d; 114 + ftype = S1D; 115 + break; 116 + case fsqrts_op: 117 + func.b = fsqrts; 118 + ftype = S1S; 119 + break; 120 + default: 121 + return SIGILL; 122 + } 123 + break; 124 + default: 125 + return SIGILL; 126 + } 127 + break; 128 + } 129 + case fs2_op: 130 + switch (NDS32Insn_OPCODE_BIT69(insn)) { 131 + case fcmpeqs_op: 132 + case fcmpeqs_e_op: 133 + case fcmplts_op: 134 + case fcmplts_e_op: 135 + case fcmples_op: 136 + case fcmples_e_op: 137 + case fcmpuns_op: 138 + case fcmpuns_e_op: 139 + ftype = CS; 140 + break; 141 + default: 142 + return SIGILL; 143 + } 144 + break; 145 + case fd1_op:{ 146 + switch (NDS32Insn_OPCODE_BIT69(insn)) { 147 + case faddd_op: 148 + func.t = faddd; 149 + ftype = D2D; 150 + break; 151 + case fsubd_op: 152 + func.t = fsubd; 153 + ftype = D2D; 154 + break; 155 + case fmaddd_op: 156 + func.t = fpemu_fmaddd; 157 + ftype = D2D; 158 + break; 159 + case fmsubd_op: 160 + func.t = fpemu_fmsubd; 161 + ftype = D2D; 162 + break; 163 + case fnmaddd_op: 164 + func.t = fpemu_fnmaddd; 165 + ftype = D2D; 166 + break; 167 + case fnmsubd_op: 168 + func.t = fpemu_fnmsubd; 169 + ftype = D2D; 170 + break; 171 + case fmuld_op: 172 + func.t = fmuld; 173 + ftype = D2D; 174 + break; 175 + case fdivd_op: 176 + func.t = fdivd; 177 + ftype = D2D; 178 + break; 179 + case fd1_f2op_op: 180 + switch (NDS32Insn_OPCODE_BIT1014(insn)) { 181 + case fd2s_op: 182 + func.b = fd2s; 183 + ftype = D1S; 184 + break; 185 + case fsqrtd_op: 186 + func.b = fsqrtd; 187 + ftype = D1D; 188 + break; 189 + default: 190 + return SIGILL; 191 + } 192 + break; 193 + default: 194 + return SIGILL; 195 + 196 + } 197 + break; 198 + } 199 + 200 + case fd2_op: 201 + switch (NDS32Insn_OPCODE_BIT69(insn)) { 202 + case fcmpeqd_op: 203 + case fcmpeqd_e_op: 204 + case fcmpltd_op: 205 + case fcmpltd_e_op: 206 + case fcmpled_op: 207 + case fcmpled_e_op: 208 + case fcmpund_op: 209 + case fcmpund_e_op: 210 + ftype = CD; 211 + break; 212 + default: 213 + return SIGILL; 214 + } 215 + break; 216 + 217 + default: 218 + return SIGILL; 219 + } 220 + 221 + switch (ftype) { 222 + case S1S:{ 223 + void *ft, *fa; 224 + 225 + SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 226 + SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 227 + func.b(ft, fa); 228 + break; 229 + } 230 + case S2S:{ 231 + void *ft, *fa, *fb; 232 + 233 + SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 234 + SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 235 + SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn)); 236 + func.t(ft, fa, fb); 237 + break; 238 + } 239 + case S1D:{ 240 + void *ft, *fa; 241 + 242 + DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 243 + SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 244 + func.b(ft, fa); 245 + break; 246 + } 247 + case CS:{ 248 + unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn); 249 + void *ft, *fa, *fb; 250 + 251 + SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 252 + SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 253 + SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn)); 254 + if (cmpop < 0x8) { 255 + cmpop = cmptab[cmpop]; 256 + fcmps(ft, fa, fb, cmpop); 257 + } else 258 + return SIGILL; 259 + break; 260 + } 261 + case D1D:{ 262 + void *ft, *fa; 263 + 264 + DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 265 + DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 266 + func.b(ft, fa); 267 + break; 268 + } 269 + case D2D:{ 270 + void *ft, *fa, *fb; 271 + 272 + DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 273 + DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 274 + DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn)); 275 + func.t(ft, fa, fb); 276 + break; 277 + } 278 + case D1S:{ 279 + void *ft, *fa; 280 + 281 + SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 282 + DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 283 + func.b(ft, fa); 284 + break; 285 + } 286 + case CD:{ 287 + unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn); 288 + void *ft, *fa, *fb; 289 + 290 + SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn)); 291 + DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn)); 292 + DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn)); 293 + if (cmpop < 0x8) { 294 + cmpop = cmptab[cmpop]; 295 + fcmpd(ft, fa, fb, cmpop); 296 + } else 297 + return SIGILL; 298 + break; 299 + } 300 + default: 301 + return SIGILL; 302 + } 303 + 304 + /* 305 + * If an exception is required, generate a tidy SIGFPE exception. 306 + */ 307 + #if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) 308 + if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDFE) || 309 + ((fpu_reg->fpcsr & FPCSR_mskUDF) && (fpu_reg->UDF_trap))) 310 + #else 311 + if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE) 312 + #endif 313 + return SIGFPE; 314 + return 0; 315 + } 316 + 317 + 318 + int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu) 319 + { 320 + unsigned long insn = 0, addr = regs->ipc; 321 + unsigned long emulpc, contpc; 322 + unsigned char *pc = (void *)&insn; 323 + char c; 324 + int i = 0, ret; 325 + 326 + for (i = 0; i < 4; i++) { 327 + if (__get_user(c, (unsigned char *)addr++)) 328 + return SIGBUS; 329 + *pc++ = c; 330 + } 331 + 332 + insn = be32_to_cpu(insn); 333 + 334 + emulpc = regs->ipc; 335 + contpc = regs->ipc + 4; 336 + 337 + if (NDS32Insn_OPCODE(insn) != cop0_op) 338 + return SIGILL; 339 + switch (NDS32Insn_OPCODE_COP0(insn)) { 340 + case fs1_op: 341 + case fs2_op: 342 + case fd1_op: 343 + case fd2_op: 344 + { 345 + /* a real fpu computation instruction */ 346 + ret = fpu_emu(fpu, insn); 347 + if (!ret) 348 + regs->ipc = contpc; 349 + } 350 + break; 351 + 352 + default: 353 + return SIGILL; 354 + } 355 + 356 + return ret; 357 + }
+23
arch/nds32/math-emu/fs2d.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <linux/uaccess.h> 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/double.h> 7 + #include <math-emu/single.h> 8 + #include <math-emu/soft-fp.h> 9 + 10 + void fs2d(void *ft, void *fa) 11 + { 12 + FP_DECL_S(A); 13 + FP_DECL_D(R); 14 + FP_DECL_EX; 15 + 16 + FP_UNPACK_SP(A, fa); 17 + 18 + FP_CONV(D, S, 2, 1, R, A); 19 + 20 + FP_PACK_DP(ft, R); 21 + 22 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 23 + }
+21
arch/nds32/math-emu/fsqrtd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <linux/uaccess.h> 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + void fsqrtd(void *ft, void *fa) 9 + { 10 + FP_DECL_D(A); 11 + FP_DECL_D(R); 12 + FP_DECL_EX; 13 + 14 + FP_UNPACK_DP(A, fa); 15 + 16 + FP_SQRT_D(R, A); 17 + 18 + FP_PACK_DP(ft, R); 19 + 20 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 21 + }
+21
arch/nds32/math-emu/fsqrts.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + 4 + #include <linux/uaccess.h> 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fsqrts(void *ft, void *fa) 9 + { 10 + FP_DECL_S(A); 11 + FP_DECL_S(R); 12 + FP_DECL_EX; 13 + 14 + FP_UNPACK_SP(A, fa); 15 + 16 + FP_SQRT_S(R, A); 17 + 18 + FP_PACK_SP(ft, R); 19 + 20 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 21 + }
+27
arch/nds32/math-emu/fsubd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/double.h> 8 + void fsubd(void *ft, void *fa, void *fb) 9 + { 10 + 11 + FP_DECL_D(A); 12 + FP_DECL_D(B); 13 + FP_DECL_D(R); 14 + FP_DECL_EX; 15 + 16 + FP_UNPACK_DP(A, fa); 17 + FP_UNPACK_DP(B, fb); 18 + 19 + if (B_c != FP_CLS_NAN) 20 + B_s ^= 1; 21 + 22 + FP_ADD_D(R, A, B); 23 + 24 + FP_PACK_DP(ft, R); 25 + 26 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 27 + }
+27
arch/nds32/math-emu/fsubs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2018 Andes Technology Corporation 3 + #include <linux/uaccess.h> 4 + 5 + #include <asm/sfp-machine.h> 6 + #include <math-emu/soft-fp.h> 7 + #include <math-emu/single.h> 8 + void fsubs(void *ft, void *fa, void *fb) 9 + { 10 + 11 + FP_DECL_S(A); 12 + FP_DECL_S(B); 13 + FP_DECL_S(R); 14 + FP_DECL_EX; 15 + 16 + FP_UNPACK_SP(A, fa); 17 + FP_UNPACK_SP(B, fb); 18 + 19 + if (B_c != FP_CLS_NAN) 20 + B_s ^= 1; 21 + 22 + FP_ADD_S(R, A, B); 23 + 24 + FP_PACK_SP(ft, R); 25 + 26 + __FPU_FPCSR |= FP_CUR_EXCEPTIONS; 27 + }
+5 -1
arch/nds32/mm/Makefile
··· 4 4 5 5 obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o 6 6 obj-$(CONFIG_HIGHMEM) += highmem.o 7 - CFLAGS_proc-n13.o += -fomit-frame-pointer 7 + 8 + ifdef CONFIG_FUNCTION_TRACER 9 + CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE) 10 + endif 11 + CFLAGS_proc.o += -fomit-frame-pointer
+9 -4
arch/nds32/mm/fault.c
··· 9 9 #include <linux/init.h> 10 10 #include <linux/hardirq.h> 11 11 #include <linux/uaccess.h> 12 + #include <linux/perf_event.h> 12 13 13 14 #include <asm/pgtable.h> 14 15 #include <asm/tlbflush.h> ··· 170 169 mask = VM_EXEC; 171 170 else { 172 171 mask = VM_READ | VM_WRITE; 173 - if (vma->vm_flags & VM_WRITE) 174 - flags |= FAULT_FLAG_WRITE; 175 172 } 176 173 } else if (entry == ENTRY_TLB_MISC) { 177 174 switch (error_code & ITYPE_mskETYPE) { ··· 230 231 * attempt. If we go through a retry, it is extremely likely that the 231 232 * page will be found in page cache at that point. 232 233 */ 234 + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 233 235 if (flags & FAULT_FLAG_ALLOW_RETRY) { 234 - if (fault & VM_FAULT_MAJOR) 236 + if (fault & VM_FAULT_MAJOR) { 235 237 tsk->maj_flt++; 236 - else 238 + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 239 + 1, regs, addr); 240 + } else { 237 241 tsk->min_flt++; 242 + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 243 + 1, regs, addr); 244 + } 238 245 if (fault & VM_FAULT_RETRY) { 239 246 flags &= ~FAULT_FLAG_ALLOW_RETRY; 240 247 flags |= FAULT_FLAG_TRIED;
+31
drivers/irqchip/irq-ativic32.c
··· 10 10 #include <linux/irqchip.h> 11 11 #include <nds32_intrinsic.h> 12 12 13 + unsigned long wake_mask; 14 + 13 15 static void ativic32_ack_irq(struct irq_data *data) 14 16 { 15 17 __nds32__mtsr_dsb(BIT(data->hwirq), NDS32_SR_INT_PEND2); ··· 29 27 __nds32__mtsr_dsb(int_mask2 | (BIT(data->hwirq)), NDS32_SR_INT_MASK2); 30 28 } 31 29 30 + static int nointc_set_wake(struct irq_data *data, unsigned int on) 31 + { 32 + unsigned long int_mask = __nds32__mfsr(NDS32_SR_INT_MASK); 33 + static unsigned long irq_orig_bit; 34 + u32 bit = 1 << data->hwirq; 35 + 36 + if (on) { 37 + if (int_mask & bit) 38 + __assign_bit(data->hwirq, &irq_orig_bit, true); 39 + else 40 + __assign_bit(data->hwirq, &irq_orig_bit, false); 41 + 42 + __assign_bit(data->hwirq, &int_mask, true); 43 + __assign_bit(data->hwirq, &wake_mask, true); 44 + 45 + } else { 46 + if (!(irq_orig_bit & bit)) 47 + __assign_bit(data->hwirq, &int_mask, false); 48 + 49 + __assign_bit(data->hwirq, &wake_mask, false); 50 + __assign_bit(data->hwirq, &irq_orig_bit, false); 51 + } 52 + 53 + __nds32__mtsr_dsb(int_mask, NDS32_SR_INT_MASK); 54 + 55 + return 0; 56 + } 57 + 32 58 static struct irq_chip ativic32_chip = { 33 59 .name = "ativic32", 34 60 .irq_ack = ativic32_ack_irq, 35 61 .irq_mask = ativic32_mask_irq, 36 62 .irq_unmask = ativic32_unmask_irq, 63 + .irq_set_wake = nointc_set_wake, 37 64 }; 38 65 39 66 static unsigned int __initdata nivic_map[6] = { 6, 2, 10, 16, 24, 32 };
+46 -51
include/math-emu/op-2.h
··· 31 31 #define _FP_FRAC_HIGH_2(X) (X##_f1) 32 32 #define _FP_FRAC_LOW_2(X) (X##_f0) 33 33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w) 34 + #define _FP_FRAC_SLL_2(X, N) ( \ 35 + (void) (((N) < _FP_W_TYPE_SIZE) \ 36 + ? ({ \ 37 + if (__builtin_constant_p(N) && (N) == 1) { \ 38 + X##_f1 = X##_f1 + X##_f1 + \ 39 + (((_FP_WS_TYPE) (X##_f0)) < 0); \ 40 + X##_f0 += X##_f0; \ 41 + } else { \ 42 + X##_f1 = X##_f1 << (N) | X##_f0 >> \ 43 + (_FP_W_TYPE_SIZE - (N)); \ 44 + X##_f0 <<= (N); \ 45 + } \ 46 + 0; \ 47 + }) \ 48 + : ({ \ 49 + X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 50 + X##_f0 = 0; \ 51 + }))) 34 52 35 - #define _FP_FRAC_SLL_2(X,N) \ 36 - do { \ 37 - if ((N) < _FP_W_TYPE_SIZE) \ 38 - { \ 39 - if (__builtin_constant_p(N) && (N) == 1) \ 40 - { \ 41 - X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ 42 - X##_f0 += X##_f0; \ 43 - } \ 44 - else \ 45 - { \ 46 - X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 47 - X##_f0 <<= (N); \ 48 - } \ 49 - } \ 50 - else \ 51 - { \ 52 - X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 53 - X##_f0 = 0; \ 54 - } \ 55 - } while (0) 56 53 57 - #define _FP_FRAC_SRL_2(X,N) \ 58 - do { \ 59 - if ((N) < _FP_W_TYPE_SIZE) \ 60 - { \ 61 - X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 62 - X##_f1 >>= (N); \ 63 - } \ 64 - else \ 65 - { \ 66 - X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 67 - X##_f1 = 0; \ 68 - } \ 69 - } while (0) 54 + #define _FP_FRAC_SRL_2(X, N) ( \ 55 + (void) (((N) < _FP_W_TYPE_SIZE) \ 56 + ? ({ \ 57 + X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 58 + X##_f1 >>= (N); \ 59 + }) \ 60 + : ({ \ 61 + X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 62 + X##_f1 = 0; \ 63 + }))) 64 + 70 65 71 66 /* Right shift with sticky-lsb. */ 72 - #define _FP_FRAC_SRS_2(X,N,sz) \ 73 - do { \ 74 - if ((N) < _FP_W_TYPE_SIZE) \ 75 - { \ 76 - X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ 77 - (__builtin_constant_p(N) && (N) == 1 \ 78 - ? X##_f0 & 1 \ 79 - : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 80 - X##_f1 >>= (N); \ 81 - } \ 82 - else \ 83 - { \ 84 - X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ 85 - (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \ 86 - X##_f1 = 0; \ 87 - } \ 88 - } while (0) 67 + #define _FP_FRAC_SRS_2(X, N, sz) ( \ 68 + (void) (((N) < _FP_W_TYPE_SIZE) \ 69 + ? ({ \ 70 + X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \ 71 + | (__builtin_constant_p(N) && (N) == 1 \ 72 + ? X##_f0 & 1 \ 73 + : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 74 + X##_f1 >>= (N); \ 75 + }) \ 76 + : ({ \ 77 + X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \ 78 + | ((((N) == _FP_W_TYPE_SIZE \ 79 + ? 0 \ 80 + : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 81 + | X##_f0) != 0)); \ 82 + X##_f1 = 0; \ 83 + }))) 89 84 90 85 #define _FP_FRAC_ADDI_2(X,I) \ 91 86 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
+1 -1
include/math-emu/soft-fp.h
··· 138 138 _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ 139 139 } while (0) 140 140 141 - #define _FP_ROUND_ZERO(wc, X) 0 141 + #define _FP_ROUND_ZERO(wc, X) (void)0 142 142 143 143 #define _FP_ROUND_PINF(wc, X) \ 144 144 do { \
+2
tools/include/asm/barrier.h
··· 24 24 #include "../../arch/ia64/include/asm/barrier.h" 25 25 #elif defined(__xtensa__) 26 26 #include "../../arch/xtensa/include/asm/barrier.h" 27 + #elif defined(__nds32__) 28 + #include "../../arch/nds32/include/asm/barrier.h" 27 29 #else 28 30 #include <asm-generic/barrier.h> 29 31 #endif
+1
tools/perf/arch/nds32/Build
··· 1 + libperf-y += util/
+1
tools/perf/arch/nds32/util/Build
··· 1 + libperf-y += header.o
+29
tools/perf/arch/nds32/util/header.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2005-2017 Andes Technology Corporation 3 + 4 + #include <stdio.h> 5 + #include <stdlib.h> 6 + #include <api/fs/fs.h> 7 + #include "header.h" 8 + 9 + #define STR_LEN 1024 10 + 11 + char *get_cpuid_str(struct perf_pmu *pmu) 12 + { 13 + /* In nds32, we only have one cpu */ 14 + char *buf = NULL; 15 + struct cpu_map *cpus; 16 + const char *sysfs = sysfs__mountpoint(); 17 + 18 + if (!sysfs || !pmu || !pmu->cpus) 19 + return NULL; 20 + 21 + buf = malloc(STR_LEN); 22 + if (!buf) 23 + return NULL; 24 + 25 + cpus = cpu_map__get(pmu->cpus); 26 + sprintf(buf, "0x%x", cpus->nr - 1); 27 + cpu_map__put(cpus); 28 + return buf; 29 + }
+15
tools/perf/pmu-events/arch/nds32/mapfile.csv
··· 1 + # Format: 2 + # MIDR,Version,JSON/file/pathname,Type 3 + # 4 + # where 5 + # MIDR Processor version 6 + # Variant[23:20] and Revision [3:0] should be zero. 7 + # Version could be used to track version of of JSON file 8 + # but currently unused. 9 + # JSON/file/pathname is the path to JSON file, relative 10 + # to tools/perf/pmu-events/arch/arm64/. 11 + # Type is core, uncore etc 12 + # 13 + # 14 + #Family-model,Version,Filename,EventType 15 + 0x0,v3,n13,core
+290
tools/perf/pmu-events/arch/nds32/n13/atcpmu.json
··· 1 + [ 2 + { 3 + "PublicDescription": "Conditional branch", 4 + "EventCode": "0x102", 5 + "EventName": "cond_br", 6 + "BriefDescription": "V3 Conditional branch" 7 + }, 8 + { 9 + "PublicDescription": "Taken conditional branches", 10 + "EventCode": "0x103", 11 + "EventName": "taken_cond_br", 12 + "BriefDescription": "V3 Taken Conditional branch" 13 + }, 14 + { 15 + "PublicDescription": "Prefetch Instruction", 16 + "EventCode": "0x104", 17 + "EventName": "prefetch_inst", 18 + "BriefDescription": "V3 Prefetch Instruction" 19 + }, 20 + { 21 + "PublicDescription": "RET Inst", 22 + "EventCode": "0x105", 23 + "EventName": "ret_inst", 24 + "BriefDescription": "V3 RET Inst" 25 + }, 26 + { 27 + "PublicDescription": "JR(non-RET) instructions", 28 + "EventCode": "0x106", 29 + "EventName": "jr_inst", 30 + "BriefDescription": "V3 JR(non-RET) instructions" 31 + }, 32 + { 33 + "PublicDescription": "JAL/JRAL instructions", 34 + "EventCode": "0x107", 35 + "EventName": "jal_jral_inst", 36 + "BriefDescription": "V3 JAL/JRAL instructions" 37 + }, 38 + { 39 + "PublicDescription": "NOP instructions", 40 + "EventCode": "0x108", 41 + "EventName": "nop_inst", 42 + "BriefDescription": "V3 NOP instructions" 43 + }, 44 + { 45 + "PublicDescription": "SCW instructions", 46 + "EventCode": "0x109", 47 + "EventName": "scw_inst", 48 + "BriefDescription": "V3 SCW instructions" 49 + }, 50 + { 51 + "PublicDescription": "ISB/DSB instructions", 52 + "EventCode": "0x10a", 53 + "EventName": "isb_dsb_inst", 54 + "BriefDescription": "V3 ISB/DSB instructions" 55 + }, 56 + { 57 + "PublicDescription": "CCTL instructions", 58 + "EventCode": "0x10b", 59 + "EventName": "cctl_inst", 60 + "BriefDescription": "V3 CCTL instructions" 61 + }, 62 + { 63 + "PublicDescription": "Taken Interrupts", 64 + "EventCode": "0x10c", 65 + "EventName": "taken_interrupts", 66 + "BriefDescription": "V3 Taken Interrupts" 67 + }, 68 + { 69 + "PublicDescription": "Loads Completed", 70 + "EventCode": "0x10d", 71 + "EventName": "load_completed", 72 + "BriefDescription": "V3 Loads Completed" 73 + }, 74 + { 75 + "PublicDescription": "uITLB accesses", 76 + "EventCode": "0x10e", 77 + "EventName": "uitlb_access", 78 + "BriefDescription": "V3 uITLB accesses" 79 + }, 80 + { 81 + "PublicDescription": "uDTLB accesses", 82 + "EventCode": "0x10f", 83 + "EventName": "udtlb_access", 84 + "BriefDescription": "V3 uDTLB accesses" 85 + }, 86 + { 87 + "PublicDescription": "MTLB accesses", 88 + "EventCode": "0x110", 89 + "EventName": "mtlb_access", 90 + "BriefDescription": "V3 MTLB accesses" 91 + }, 92 + { 93 + "PublicDescription": "DATA_DEPENDENCY_STALL_CYCLES", 94 + "EventCode": "0x112", 95 + "EventName": "data_dependency_stall", 96 + "BriefDescription": "V3 DATA_DEPENDENCY_STALL_CYCLES" 97 + }, 98 + { 99 + "PublicDescription": "DATA_CACHE_MISS_STALL_CYCLES", 100 + "EventCode": "0x113", 101 + "EventName": "dcache_miss_stall", 102 + "BriefDescription": "V3 DATA_CACHE_MISS_STALL_CYCLES" 103 + }, 104 + { 105 + "PublicDescription": "ILM access", 106 + "EventCode": "0x118", 107 + "EventName": "ilm_access", 108 + "BriefDescription": "V3 ILM accesses" 109 + }, 110 + { 111 + "PublicDescription": "LSU BIU CYCLES", 112 + "EventCode": "0x119", 113 + "EventName": "lsu_biu_cycles", 114 + "BriefDescription": "V3 LSU BIU CYCLES" 115 + }, 116 + { 117 + "PublicDescription": "HPTWK BIU CYCLES", 118 + "EventCode": "0x11a", 119 + "EventName": "hptwk_biu_cycles", 120 + "BriefDescription": "V3 HPTWK BIU CYCLES" 121 + }, 122 + { 123 + "PublicDescription": "DMA BIU CYCLES", 124 + "EventCode": "0x11b", 125 + "EventName": "dma_biu_cycles", 126 + "BriefDescription": "V3 DMA BIU CYCLES" 127 + }, 128 + { 129 + "PublicDescription": "CODE CACHE FILL BIU CYCLES", 130 + "EventCode": "0x11c", 131 + "EventName": "icache_fill_biu_cycles", 132 + "BriefDescription": "V3 CODE CACHE FILL BIU CYCLES" 133 + }, 134 + { 135 + "PublicDescription": "LEAGAL UNALIGN DCACHE ACCESS", 136 + "EventCode": "0x11d", 137 + "EventName": "legal_unalined_dcache_access", 138 + "BriefDescription": "V3 LEAGAL UNALIGN DCACHE ACCESS" 139 + }, 140 + { 141 + "PublicDescription": "PUSH25 instructions", 142 + "EventCode": "0x11e", 143 + "EventName": "push25_inst", 144 + "BriefDescription": "V3 PUSH25 instructions" 145 + }, 146 + { 147 + "PublicDescription": "SYSCALL instructions", 148 + "EventCode": "0x11f", 149 + "EventName": "syscall_inst", 150 + "BriefDescription": "V3 SYSCALL instructions" 151 + }, 152 + { 153 + "PublicDescription": "conditional branch miss", 154 + "EventCode": "0x202", 155 + "EventName": "cond_br_miss", 156 + "BriefDescription": "V3 conditional branch miss" 157 + }, 158 + { 159 + "PublicDescription": "taken conditional branch miss", 160 + "EventCode": "0x203", 161 + "EventName": "taken_cond_br_miss", 162 + "BriefDescription": "V3 taken conditional branch miss" 163 + }, 164 + { 165 + "PublicDescription": "Prefetch Instructions with cache hit", 166 + "EventCode": "0x204", 167 + "EventName": "prefetch_icache_hit", 168 + "BriefDescription": "V3 Prefetch Instructions with cache hit" 169 + }, 170 + { 171 + "PublicDescription": "RET mispredict", 172 + "EventCode": "0x205", 173 + "EventName": "ret_mispredict", 174 + "BriefDescription": "V3 RET mispredict" 175 + }, 176 + { 177 + "PublicDescription": "Immediate J instructions", 178 + "EventCode": "0x206", 179 + "EventName": "imm_j_inst", 180 + "BriefDescription": "V3 Immediate J instructions" 181 + }, 182 + { 183 + "PublicDescription": "Multiply instructions", 184 + "EventCode": "0x207", 185 + "EventName": "mul_inst", 186 + "BriefDescription": "V3 Multiply instructions" 187 + }, 188 + { 189 + "PublicDescription": "16 bits instructions", 190 + "EventCode": "0x208", 191 + "EventName": "sixteen_bits_inst", 192 + "BriefDescription": "V3 16 bits instructions" 193 + }, 194 + { 195 + "PublicDescription": "Failed SCW instructions", 196 + "EventCode": "0x209", 197 + "EventName": "fail_scw_inst", 198 + "BriefDescription": "V3 Failed SCW instructions" 199 + }, 200 + { 201 + "PublicDescription": "ld-after-st conflict replays", 202 + "EventCode": "0x20a", 203 + "EventName": "ld_af_st_conflict", 204 + "BriefDescription": "V3 ld-after-st conflict replays" 205 + }, 206 + { 207 + "PublicDescription": "Exception taken", 208 + "EventCode": "0x20c", 209 + "EventName": "exception_taken", 210 + "BriefDescription": "V3 Exception taken" 211 + }, 212 + { 213 + "PublicDescription": "Stores completed", 214 + "EventCode": "0x20d", 215 + "EventName": "store_completed", 216 + "BriefDescription": "V3 Stores completed" 217 + }, 218 + { 219 + "PublicDescription": "uITLB miss", 220 + "EventCode": "0x20e", 221 + "EventName": "uitlb_miss", 222 + "BriefDescription": "V3 uITLB miss" 223 + }, 224 + { 225 + "PublicDescription": "uDTLB miss", 226 + "EventCode": "0x20f", 227 + "EventName": "udtlb_miss", 228 + "BriefDescription": "V3 uDTLB miss" 229 + }, 230 + { 231 + "PublicDescription": "MTLB miss", 232 + "EventCode": "0x210", 233 + "EventName": "mtlb_miss", 234 + "BriefDescription": "V3 MTLB miss" 235 + }, 236 + { 237 + "PublicDescription": "Empty instructions queue stall cycles", 238 + "EventCode": "0x212", 239 + "EventName": "empty_inst_q_stall", 240 + "BriefDescription": "V3 Empty instructions queue stall cycles" 241 + }, 242 + { 243 + "PublicDescription": "Data write back", 244 + "EventCode": "0x213", 245 + "EventName": "data_wb", 246 + "BriefDescription": "V3 Data write back" 247 + }, 248 + { 249 + "PublicDescription": "DLM access", 250 + "EventCode": "0x218", 251 + "EventName": "dlm_access", 252 + "BriefDescription": "V3 DLM access" 253 + }, 254 + { 255 + "PublicDescription": "LSU BIU request", 256 + "EventCode": "0x219", 257 + "EventName": "lsu_biu_req", 258 + "BriefDescription": "V3 LSU BIU request" 259 + }, 260 + { 261 + "PublicDescription": "HPTWK BIU request", 262 + "EventCode": "0x21a", 263 + "EventName": "hptwk_biu_req", 264 + "BriefDescription": "V3 HPTWK BIU request" 265 + }, 266 + { 267 + "PublicDescription": "DMA BIU request", 268 + "EventCode": "0x21b", 269 + "EventName": "dma_biu_req", 270 + "BriefDescription": "V3 DMA BIU request" 271 + }, 272 + { 273 + "PublicDescription": "Icache fill BIU request", 274 + "EventCode": "0x21c", 275 + "EventName": "icache_fill_biu_req", 276 + "BriefDescription": "V3 Icache fill BIU request" 277 + }, 278 + { 279 + "PublicDescription": "External events", 280 + "EventCode": "0x21d", 281 + "EventName": "external_events", 282 + "BriefDescription": "V3 External events" 283 + }, 284 + { 285 + "PublicDescription": "POP25 instructions", 286 + "EventCode": "0x21e", 287 + "EventName": "pop25_inst", 288 + "BriefDescription": "V3 POP25 instructions" 289 + }, 290 + ]