Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: Floating point and SIMD

This patch adds support for FP/ASIMD register bank saving and restoring
during context switch and FP exception handling to generate SIGFPE.
There are 32 128-bit registers and the context switching is currently
done non-lazily. Benchmarks on real hardware are required before
implementing lazy FP state saving/restoring.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

+250
+64
arch/arm64/include/asm/fpsimd.h
··· 1 + /* 2 + * Copyright (C) 2012 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef __ASM_FP_H 17 + #define __ASM_FP_H 18 + 19 + #include <asm/ptrace.h> 20 + 21 + #ifndef __ASSEMBLY__ 22 + 23 + /* 24 + * FP/SIMD storage area has: 25 + * - FPSR and FPCR 26 + * - 32 128-bit data registers 27 + * 28 + * Note that user_fp forms a prefix of this structure, which is relied 29 + * upon in the ptrace FP/SIMD accessors. struct user_fpsimd_state must 30 + * form a prefix of struct fpsimd_state. 31 + */ 32 + struct fpsimd_state { 33 + union { 34 + struct user_fpsimd_state user_fpsimd; 35 + struct { 36 + __uint128_t vregs[32]; 37 + u32 fpsr; 38 + u32 fpcr; 39 + }; 40 + }; 41 + }; 42 + 43 + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 44 + /* Masks for extracting the FPSR and FPCR from the FPSCR */ 45 + #define VFP_FPSCR_STAT_MASK 0xf800009f 46 + #define VFP_FPSCR_CTRL_MASK 0x07f79f00 47 + /* 48 + * The VFP state has 32x64-bit registers and a single 32-bit 49 + * control/status register. 50 + */ 51 + #define VFP_STATE_SIZE ((32 * 8) + 4) 52 + #endif 53 + 54 + struct task_struct; 55 + 56 + extern void fpsimd_save_state(struct fpsimd_state *state); 57 + extern void fpsimd_load_state(struct fpsimd_state *state); 58 + 59 + extern void fpsimd_thread_switch(struct task_struct *next); 60 + extern void fpsimd_flush_thread(void); 61 + 62 + #endif 63 + 64 + #endif
+80
arch/arm64/kernel/entry-fpsimd.S
··· 1 + /* 2 + * FP/SIMD state saving and restoring 3 + * 4 + * Copyright (C) 2012 ARM Ltd. 5 + * Author: Catalin Marinas <catalin.marinas@arm.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 + */ 19 + 20 + #include <linux/linkage.h> 21 + 22 + #include <asm/assembler.h> 23 + 24 + /* 25 + * Save the FP registers. 26 + * 27 + * x0 - pointer to struct fpsimd_state 28 + */ 29 + ENTRY(fpsimd_save_state) 30 + stp q0, q1, [x0, #16 * 0] 31 + stp q2, q3, [x0, #16 * 2] 32 + stp q4, q5, [x0, #16 * 4] 33 + stp q6, q7, [x0, #16 * 6] 34 + stp q8, q9, [x0, #16 * 8] 35 + stp q10, q11, [x0, #16 * 10] 36 + stp q12, q13, [x0, #16 * 12] 37 + stp q14, q15, [x0, #16 * 14] 38 + stp q16, q17, [x0, #16 * 16] 39 + stp q18, q19, [x0, #16 * 18] 40 + stp q20, q21, [x0, #16 * 20] 41 + stp q22, q23, [x0, #16 * 22] 42 + stp q24, q25, [x0, #16 * 24] 43 + stp q26, q27, [x0, #16 * 26] 44 + stp q28, q29, [x0, #16 * 28] 45 + stp q30, q31, [x0, #16 * 30]! 46 + mrs x8, fpsr 47 + str w8, [x0, #16 * 2] 48 + mrs x8, fpcr 49 + str w8, [x0, #16 * 2 + 4] 50 + ret 51 + ENDPROC(fpsimd_save_state) 52 + 53 + /* 54 + * Load the FP registers. 55 + * 56 + * x0 - pointer to struct fpsimd_state 57 + */ 58 + ENTRY(fpsimd_load_state) 59 + ldp q0, q1, [x0, #16 * 0] 60 + ldp q2, q3, [x0, #16 * 2] 61 + ldp q4, q5, [x0, #16 * 4] 62 + ldp q6, q7, [x0, #16 * 6] 63 + ldp q8, q9, [x0, #16 * 8] 64 + ldp q10, q11, [x0, #16 * 10] 65 + ldp q12, q13, [x0, #16 * 12] 66 + ldp q14, q15, [x0, #16 * 14] 67 + ldp q16, q17, [x0, #16 * 16] 68 + ldp q18, q19, [x0, #16 * 18] 69 + ldp q20, q21, [x0, #16 * 20] 70 + ldp q22, q23, [x0, #16 * 22] 71 + ldp q24, q25, [x0, #16 * 24] 72 + ldp q26, q27, [x0, #16 * 26] 73 + ldp q28, q29, [x0, #16 * 28] 74 + ldp q30, q31, [x0, #16 * 30]! 75 + ldr w8, [x0, #16 * 2] 76 + ldr w9, [x0, #16 * 2 + 4] 77 + msr fpsr, x8 78 + msr fpcr, x9 79 + ret 80 + ENDPROC(fpsimd_load_state)
+106
arch/arm64/kernel/fpsimd.c
··· 1 + /* 2 + * FP/SIMD context switching and fault handling 3 + * 4 + * Copyright (C) 2012 ARM Ltd. 5 + * Author: Catalin Marinas <catalin.marinas@arm.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 + */ 19 + 20 + #include <linux/kernel.h> 21 + #include <linux/init.h> 22 + #include <linux/sched.h> 23 + #include <linux/signal.h> 24 + 25 + #include <asm/fpsimd.h> 26 + #include <asm/cputype.h> 27 + 28 + #define FPEXC_IOF (1 << 0) 29 + #define FPEXC_DZF (1 << 1) 30 + #define FPEXC_OFF (1 << 2) 31 + #define FPEXC_UFF (1 << 3) 32 + #define FPEXC_IXF (1 << 4) 33 + #define FPEXC_IDF (1 << 7) 34 + 35 + /* 36 + * Trapped FP/ASIMD access. 37 + */ 38 + void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) 39 + { 40 + /* TODO: implement lazy context saving/restoring */ 41 + WARN_ON(1); 42 + } 43 + 44 + /* 45 + * Raise a SIGFPE for the current process. 46 + */ 47 + void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) 48 + { 49 + siginfo_t info; 50 + unsigned int si_code = 0; 51 + 52 + if (esr & FPEXC_IOF) 53 + si_code = FPE_FLTINV; 54 + else if (esr & FPEXC_DZF) 55 + si_code = FPE_FLTDIV; 56 + else if (esr & FPEXC_OFF) 57 + si_code = FPE_FLTOVF; 58 + else if (esr & FPEXC_UFF) 59 + si_code = FPE_FLTUND; 60 + else if (esr & FPEXC_IXF) 61 + si_code = FPE_FLTRES; 62 + 63 + memset(&info, 0, sizeof(info)); 64 + info.si_signo = SIGFPE; 65 + info.si_code = si_code; 66 + info.si_addr = (void __user *)instruction_pointer(regs); 67 + 68 + send_sig_info(SIGFPE, &info, current); 69 + } 70 + 71 + void fpsimd_thread_switch(struct task_struct *next) 72 + { 73 + /* check if not kernel threads */ 74 + if (current->mm) 75 + fpsimd_save_state(&current->thread.fpsimd_state); 76 + if (next->mm) 77 + fpsimd_load_state(&next->thread.fpsimd_state); 78 + } 79 + 80 + void fpsimd_flush_thread(void) 81 + { 82 + memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); 83 + fpsimd_load_state(&current->thread.fpsimd_state); 84 + } 85 + 86 + /* 87 + * FP/SIMD support code initialisation. 88 + */ 89 + static int __init fpsimd_init(void) 90 + { 91 + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); 92 + 93 + if (pfr & (0xf << 16)) { 94 + pr_notice("Floating-point is not implemented\n"); 95 + return 0; 96 + } 97 + elf_hwcap |= HWCAP_FP; 98 + 99 + if (pfr & (0xf << 20)) 100 + pr_notice("Advanced SIMD is not implemented\n"); 101 + else 102 + elf_hwcap |= HWCAP_ASIMD; 103 + 104 + return 0; 105 + } 106 + late_initcall(fpsimd_init);