Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[ARM] 4583/1: ARMv7: Add VFPv3 support

This patch adds the support for VFPv3 (the kernel currently supports
VFPv2). The main difference is 32 double registers (compared to 16).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Catalin Marinas and committed by
Russell King
25ebee02 c98929c0

+60 -12
+6 -1
arch/arm/Kconfig
··· 951 951 952 952 config VFP 953 953 bool "VFP-format floating point maths" 954 - depends on CPU_V6 || CPU_ARM926T 954 + depends on CPU_V6 || CPU_ARM926T || CPU_V7 955 955 help 956 956 Say Y to include VFP support code in the kernel. This is needed 957 957 if your hardware includes a VFP unit. ··· 960 960 release notes and additional status information. 961 961 962 962 Say N if your target does not have VFP hardware. 963 + 964 + config VFPv3 965 + bool 966 + depends on VFP 967 + default y if CPU_V7 963 968 964 969 endmenu 965 970
+4
arch/arm/vfp/vfp.h
··· 265 265 * which returns (double)0.0. This is useful for the compare with 266 266 * zero instructions. 267 267 */ 268 + #ifdef CONFIG_VFPv3 269 + #define VFP_REG_ZERO 32 270 + #else 268 271 #define VFP_REG_ZERO 16 272 + #endif 269 273 extern u64 vfp_get_double(unsigned int reg); 270 274 extern void vfp_put_double(u64 val, unsigned int reg); 271 275
+18 -4
arch/arm/vfp/vfphw.S
··· 99 99 DBGSTR1 "save old state %p", r4 100 100 cmp r4, #0 101 101 beq no_old_VFP_process 102 + VFPFSTMIA r4, r5 @ save the working registers 102 103 VFPFMRX r5, FPSCR @ current status 103 104 tst r1, #FPEXC_EX @ is there additional state to save? 104 105 VFPFMRX r6, FPINST, NE @ FPINST (only if FPEXC.EX is set) 105 106 tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? 106 107 VFPFMRX r8, FPINST2, NE @ FPINST2 if needed (and present) 107 - VFPFSTMIA r4 @ save the working registers 108 108 stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 109 109 @ and point r4 at the word at the 110 110 @ start of the register dump ··· 114 114 DBGSTR1 "load state %p", r10 115 115 str r10, [r3, r11, lsl #2] @ update the last_VFP_context pointer 116 116 @ Load the saved state back into the VFP 117 - VFPFLDMIA r10 @ reload the working registers while 117 + VFPFLDMIA r10, r5 @ reload the working registers while 118 118 @ FPEXC is in a safe state 119 119 ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 120 120 tst r1, #FPEXC_EX @ is there additional state to restore? ··· 174 174 @ r0 - save location 175 175 @ r1 - FPEXC 176 176 DBGSTR1 "save VFP state %p", r0 177 + VFPFSTMIA r0, r2 @ save the working registers 177 178 VFPFMRX r2, FPSCR @ current status 178 179 tst r1, #FPEXC_EX @ is there additional state to save? 179 180 VFPFMRX r3, FPINST, NE @ FPINST (only if FPEXC.EX is set) 180 181 tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? 181 182 VFPFMRX r12, FPINST2, NE @ FPINST2 if needed (and present) 182 - VFPFSTMIA r0 @ save the working registers 183 183 stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 184 184 mov pc, lr 185 185 #endif ··· 217 217 fmrrd r0, r1, d\dr 218 218 mov pc, lr 219 219 .endr 220 + #ifdef CONFIG_VFPv3 221 + @ d16 - d31 registers 222 + .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 223 + mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr 224 + mov pc, lr 225 + .endr 226 + #endif 220 227 221 - @ virtual register 16 for compare with zero 228 + @ virtual register 16 (or 32 if VFPv3) for compare with zero 222 229 mov r0, #0 223 230 mov r1, #0 224 231 mov pc, lr ··· 238 231 fmdrr d\dr, r0, r1 239 232 mov pc, lr 240 233 .endr 234 + #ifdef CONFIG_VFPv3 235 + @ d16 - d31 registers 236 + .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 237 + mcrr p11, 3, r1, r2, c\dr @ fmdrr r1, r2, d\dr 238 + mov pc, lr 239 + .endr 240 + #endif
+3 -3
arch/arm/vfp/vfpinstr.h
··· 52 52 #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) 53 53 54 54 #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) 55 - #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12) 55 + #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) 56 56 #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) 57 - #define vfp_get_dm(inst) ((inst & 0x0000000f)) 57 + #define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) 58 58 #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) 59 - #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16) 59 + #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) 60 60 61 61 #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) 62 62
+7 -2
include/asm-arm/fpstate.h
··· 17 17 /* 18 18 * VFP storage area has: 19 19 * - FPEXC, FPSCR, FPINST and FPINST2. 20 - * - 16 double precision data registers 21 - * - an implementation-dependant word of state for FLDMX/FSTMX 20 + * - 16 or 32 double precision data registers 21 + * - an implementation-dependant word of state for FLDMX/FSTMX (pre-ARMv6) 22 22 * 23 23 * FPEXC will always be non-zero once the VFP has been used in this process. 24 24 */ 25 25 26 26 struct vfp_hard_struct { 27 + #ifdef CONFIG_VFPv3 28 + __u64 fpregs[32]; 29 + #else 27 30 __u64 fpregs[16]; 31 + #endif 28 32 #if __LINUX_ARM_ARCH__ < 6 29 33 __u32 fpmx_state; 30 34 #endif ··· 39 35 */ 40 36 __u32 fpinst; 41 37 __u32 fpinst2; 38 + 42 39 #ifdef CONFIG_SMP 43 40 __u32 cpu; 44 41 #endif
+6
include/asm-arm/vfp.h
··· 7 7 8 8 #define FPSID cr0 9 9 #define FPSCR cr1 10 + #define MVFR1 cr6 11 + #define MVFR0 cr7 10 12 #define FPEXC cr8 11 13 #define FPINST cr9 12 14 #define FPINST2 cr10 ··· 71 69 #define FPSCR_UFC (1<<3) 72 70 #define FPSCR_IXC (1<<4) 73 71 #define FPSCR_IDC (1<<7) 72 + 73 + /* MVFR0 bits */ 74 + #define MVFR0_A_SIMD_BIT (0) 75 + #define MVFR0_A_SIMD_MASK (0xf << MVFR0_A_SIMD_BIT) 74 76 75 77 /* Bit patterns for decoding the packaged operation descriptors */ 76 78 #define VFPOPDESC_LENGTH_BIT (9)
+16 -2
include/asm-arm/vfpmacros.h
··· 15 15 .endm 16 16 17 17 @ read all the working registers back into the VFP 18 - .macro VFPFLDMIA, base 18 + .macro VFPFLDMIA, base, tmp 19 19 #if __LINUX_ARM_ARCH__ < 6 20 20 LDC p11, cr0, [\base],#33*4 @ FLDMIAX \base!, {d0-d15} 21 21 #else 22 22 LDC p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d0-d15} 23 23 #endif 24 + #ifdef CONFIG_VFPv3 25 + VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 26 + and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field 27 + cmp \tmp, #2 @ 32 x 64bit registers? 28 + ldceql p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} 29 + addne \base, \base, #32*4 @ step over unused register space 30 + #endif 24 31 .endm 25 32 26 33 @ write all the working registers out of the VFP 27 - .macro VFPFSTMIA, base 34 + .macro VFPFSTMIA, base, tmp 28 35 #if __LINUX_ARM_ARCH__ < 6 29 36 STC p11, cr0, [\base],#33*4 @ FSTMIAX \base!, {d0-d15} 30 37 #else 31 38 STC p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d0-d15} 39 + #endif 40 + #ifdef CONFIG_VFPv3 41 + VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 42 + and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field 43 + cmp \tmp, #2 @ 32 x 64bit registers? 44 + stceql p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} 45 + addne \base, \base, #32*4 @ step over unused register space 32 46 #endif 33 47 .endm