Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: add support for kernel mode NEON in interrupt context

This patch modifies kernel_neon_begin() and kernel_neon_end(), so
they may be called from any context. To address the case where only
a couple of registers are needed, kernel_neon_begin_partial(u32) is
introduced which takes as a parameter the number of bottom 'n' NEON
q-registers required. To mark the end of such a partial section, the
regular kernel_neon_end() should be used.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

+109 -15
+15
arch/arm64/include/asm/fpsimd.h
··· 41 41 unsigned int cpu; 42 42 }; 43 43 44 + /* 45 + * Struct for stacking the bottom 'n' FP/SIMD registers. 46 + */ 47 + struct fpsimd_partial_state { 48 + u32 fpsr; 49 + u32 fpcr; 50 + u32 num_regs; 51 + __uint128_t vregs[32]; 52 + }; 53 + 54 + 44 55 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 45 56 /* Masks for extracting the FPSR and FPCR from the FPSCR */ 46 57 #define VFP_FPSCR_STAT_MASK 0xf800009f ··· 76 65 extern void fpsimd_update_current_state(struct fpsimd_state *state); 77 66 78 67 extern void fpsimd_flush_task_state(struct task_struct *target); 68 + 69 + extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, 70 + u32 num_regs); 71 + extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); 79 72 80 73 #endif 81 74
+35
arch/arm64/include/asm/fpsimdmacros.h
··· 62 62 ldr w\tmpnr, [\state, #16 * 2 + 4] 63 63 msr fpcr, x\tmpnr 64 64 .endm 65 + 66 + .altmacro 67 + .macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 68 + mrs x\tmpnr1, fpsr 69 + str w\numnr, [\state, #8] 70 + mrs x\tmpnr2, fpcr 71 + stp w\tmpnr1, w\tmpnr2, [\state] 72 + adr x\tmpnr1, 0f 73 + add \state, \state, x\numnr, lsl #4 74 + sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 75 + br x\tmpnr1 76 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 77 + .irp qb, %(qa + 1) 78 + stp q\qa, q\qb, [\state, # -16 * \qa - 16] 79 + .endr 80 + .endr 81 + 0: 82 + .endm 83 + 84 + .macro fpsimd_restore_partial state, tmpnr1, tmpnr2 85 + ldp w\tmpnr1, w\tmpnr2, [\state] 86 + msr fpsr, x\tmpnr1 87 + msr fpcr, x\tmpnr2 88 + adr x\tmpnr1, 0f 89 + ldr w\tmpnr2, [\state, #8] 90 + add \state, \state, x\tmpnr2, lsl #4 91 + sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 92 + br x\tmpnr1 93 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 94 + .irp qb, %(qa + 1) 95 + ldp q\qa, q\qb, [\state, # -16 * \qa - 16] 96 + .endr 97 + .endr 98 + 0: 99 + .endm
+5 -1
arch/arm64/include/asm/neon.h
··· 8 8 * published by the Free Software Foundation. 9 9 */ 10 10 11 + #include <linux/types.h> 12 + 11 13 #define cpu_has_neon() (1) 12 14 13 - void kernel_neon_begin(void); 15 + #define kernel_neon_begin() kernel_neon_begin_partial(32) 16 + 17 + void kernel_neon_begin_partial(u32 num_regs); 14 18 void kernel_neon_end(void);
+24
arch/arm64/kernel/entry-fpsimd.S
··· 41 41 fpsimd_restore x0, 8 42 42 ret 43 43 ENDPROC(fpsimd_load_state) 44 + 45 + #ifdef CONFIG_KERNEL_MODE_NEON 46 + 47 + /* 48 + * Save the bottom n FP registers. 49 + * 50 + * x0 - pointer to struct fpsimd_partial_state 51 + */ 52 + ENTRY(fpsimd_save_partial_state) 53 + fpsimd_save_partial x0, 1, 8, 9 54 + ret 55 + ENDPROC(fpsimd_load_partial_state) 56 + 57 + /* 58 + * Load the bottom n FP registers. 59 + * 60 + * x0 - pointer to struct fpsimd_partial_state 61 + */ 62 + ENTRY(fpsimd_load_partial_state) 63 + fpsimd_restore_partial x0, 8, 9 64 + ret 65 + ENDPROC(fpsimd_load_partial_state) 66 + 67 + #endif
+30 -14
arch/arm64/kernel/fpsimd.c
··· 218 218 219 219 #ifdef CONFIG_KERNEL_MODE_NEON 220 220 221 + static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); 222 + static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); 223 + 221 224 /* 222 225 * Kernel-side NEON support functions 223 226 */ 224 - void kernel_neon_begin(void) 227 + void kernel_neon_begin_partial(u32 num_regs) 225 228 { 226 - /* Avoid using the NEON in interrupt context */ 227 - BUG_ON(in_interrupt()); 228 - preempt_disable(); 229 + if (in_interrupt()) { 230 + struct fpsimd_partial_state *s = this_cpu_ptr( 231 + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); 229 232 230 - /* 231 - * Save the userland FPSIMD state if we have one and if we haven't done 232 - * so already. Clear fpsimd_last_state to indicate that there is no 233 - * longer userland FPSIMD state in the registers. 234 - */ 235 - if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) 236 - fpsimd_save_state(&current->thread.fpsimd_state); 237 - this_cpu_write(fpsimd_last_state, NULL); 233 + BUG_ON(num_regs > 32); 234 + fpsimd_save_partial_state(s, roundup(num_regs, 2)); 235 + } else { 236 + /* 237 + * Save the userland FPSIMD state if we have one and if we 238 + * haven't done so already. Clear fpsimd_last_state to indicate 239 + * that there is no longer userland FPSIMD state in the 240 + * registers. 241 + */ 242 + preempt_disable(); 243 + if (current->mm && 244 + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) 245 + fpsimd_save_state(&current->thread.fpsimd_state); 246 + this_cpu_write(fpsimd_last_state, NULL); 247 + } 238 248 } 239 - EXPORT_SYMBOL(kernel_neon_begin); 249 + EXPORT_SYMBOL(kernel_neon_begin_partial); 240 250 241 251 void kernel_neon_end(void) 242 252 { 243 - preempt_enable(); 253 + if (in_interrupt()) { 254 + struct fpsimd_partial_state *s = this_cpu_ptr( 255 + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); 256 + fpsimd_load_partial_state(s); 257 + } else { 258 + preempt_enable(); 259 + } 244 260 } 245 261 EXPORT_SYMBOL(kernel_neon_end); 246 262