Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: defer reloading a task's FPSIMD state to userland resume

If a task gets scheduled out and back in again and nothing has touched
its FPSIMD state in the mean time, there is really no reason to reload
it from memory. Similarly, repeated calls to kernel_neon_begin() and
kernel_neon_end() will preserve and restore the FPSIMD state every time.

This patch defers the FPSIMD state restore to the last possible moment,
i.e., right before the task returns to userland. If a task does not return to
userland at all (for any reason), the existing FPSIMD state is preserved
and may be reused by the owning task if it gets scheduled in again on the
same CPU.

This patch adds two more functions to abstract away from straight FPSIMD
register file saves and restores:
- fpsimd_restore_current_state -> ensure current's FPSIMD state is loaded
- fpsimd_flush_task_state -> invalidate live copies of a task's FPSIMD state

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

+143 -18
+5
arch/arm64/include/asm/fpsimd.h
··· 37 37 u32 fpcr; 38 38 }; 39 39 }; 40 + /* the id of the last cpu to have restored this state */ 41 + unsigned int cpu; 40 42 }; 41 43 42 44 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) ··· 61 59 extern void fpsimd_flush_thread(void); 62 60 63 61 extern void fpsimd_preserve_current_state(void); 62 + extern void fpsimd_restore_current_state(void); 64 63 extern void fpsimd_update_current_state(struct fpsimd_state *state); 64 + 65 + extern void fpsimd_flush_task_state(struct task_struct *target); 65 66 66 67 #endif 67 68
+3 -1
arch/arm64/include/asm/thread_info.h
··· 100 100 #define TIF_SIGPENDING 0 101 101 #define TIF_NEED_RESCHED 1 102 102 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 103 + #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ 103 104 #define TIF_SYSCALL_TRACE 8 104 105 #define TIF_POLLING_NRFLAG 16 105 106 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ ··· 113 112 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 114 113 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 115 114 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 115 + #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) 116 116 #define _TIF_32BIT (1 << TIF_32BIT) 117 117 118 118 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 119 - _TIF_NOTIFY_RESUME) 119 + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) 120 120 121 121 #endif /* __KERNEL__ */ 122 122 #endif /* __ASM_THREAD_INFO_H */
+1 -1
arch/arm64/kernel/entry.S
··· 576 576 str x0, [sp, #S_X0] // returned x0 577 577 work_pending: 578 578 tbnz x1, #TIF_NEED_RESCHED, work_resched 579 - /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ 579 + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ 580 580 ldr x2, [sp, #S_PSTATE] 581 581 mov x0, sp // 'regs' 582 582 tst x2, #PSR_MODE_MASK // user mode regs?
+128 -16
arch/arm64/kernel/fpsimd.c
··· 35 35 #define FPEXC_IDF (1 << 7) 36 36 37 37 /* 38 + * In order to reduce the number of times the FPSIMD state is needlessly saved 39 + * and restored, we need to keep track of two things: 40 + * (a) for each task, we need to remember which CPU was the last one to have 41 + * the task's FPSIMD state loaded into its FPSIMD registers; 42 + * (b) for each CPU, we need to remember which task's userland FPSIMD state has 43 + * been loaded into its FPSIMD registers most recently, or whether it has 44 + * been used to perform kernel mode NEON in the meantime. 45 + * 46 + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to 47 + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), 48 + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the 49 + * address of the userland FPSIMD state of the task that was loaded onto the CPU 50 + * the most recently, or NULL if kernel mode NEON has been performed after that. 51 + * 52 + * With this in place, we no longer have to restore the next FPSIMD state right 53 + * when switching between tasks. Instead, we can defer this check to userland 54 + * resume, at which time we verify whether the CPU's fpsimd_last_state and the 55 + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we 56 + * can omit the FPSIMD restore. 57 + * 58 + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to 59 + * indicate whether or not the userland FPSIMD state of the current task is 60 + * present in the registers. The flag is set unless the FPSIMD registers of this 61 + * CPU currently contain the most recent userland FPSIMD state of the current 62 + * task. 63 + * 64 + * For a certain task, the sequence may look something like this: 65 + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field 66 + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu 67 + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is 68 + * cleared, otherwise it is set; 69 + * 70 + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's 71 + * userland FPSIMD state is copied from memory to the registers, the task's 72 + * fpsimd_state.cpu field is set to the id of the current CPU, the current 73 + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the 74 + * TIF_FOREIGN_FPSTATE flag is cleared; 75 + * 76 + * - the task executes an ordinary syscall; upon return to userland, the 77 + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is 78 + * restored; 79 + * 80 + * - the task executes a syscall which executes some NEON instructions; this is 81 + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD 82 + * register contents to memory, clears the fpsimd_last_state per-cpu variable 83 + * and sets the TIF_FOREIGN_FPSTATE flag; 84 + * 85 + * - the task gets preempted after kernel_neon_end() is called; as we have not 86 + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so 87 + * whatever is in the FPSIMD registers is not saved to memory, but discarded. 88 + */ 89 + static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); 90 + 91 + /* 38 92 * Trapped FP/ASIMD access. 39 93 */ 40 94 void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) ··· 126 72 127 73 void fpsimd_thread_switch(struct task_struct *next) 128 74 { 129 - /* check if not kernel threads */ 130 - if (current->mm) 75 + /* 76 + * Save the current FPSIMD state to memory, but only if whatever is in 77 + * the registers is in fact the most recent userland FPSIMD state of 78 + * 'current'. 79 + */ 80 + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) 131 81 fpsimd_save_state(&current->thread.fpsimd_state); 132 - if (next->mm) 133 - fpsimd_load_state(&next->thread.fpsimd_state); 82 + 83 + if (next->mm) { 84 + /* 85 + * If we are switching to a task whose most recent userland 86 + * FPSIMD state is already in the registers of *this* cpu, 87 + * we can skip loading the state from memory. Otherwise, set 88 + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded 89 + * upon the next return to userland. 90 + */ 91 + struct fpsimd_state *st = &next->thread.fpsimd_state; 92 + 93 + if (__this_cpu_read(fpsimd_last_state) == st 94 + && st->cpu == smp_processor_id()) 95 + clear_ti_thread_flag(task_thread_info(next), 96 + TIF_FOREIGN_FPSTATE); 97 + else 98 + set_ti_thread_flag(task_thread_info(next), 99 + TIF_FOREIGN_FPSTATE); 100 + } 134 101 } 135 102 136 103 void fpsimd_flush_thread(void) 137 104 { 138 - preempt_disable(); 139 105 memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); 140 - fpsimd_load_state(&current->thread.fpsimd_state); 141 - preempt_enable(); 106 + set_thread_flag(TIF_FOREIGN_FPSTATE); 142 107 } 143 108 144 109 /* 145 - * Save the userland FPSIMD state of 'current' to memory 110 + * Save the userland FPSIMD state of 'current' to memory, but only if the state 111 + * currently held in the registers does in fact belong to 'current' 146 112 */ 147 113 void fpsimd_preserve_current_state(void) 148 114 { 149 115 preempt_disable(); 150 - fpsimd_save_state(&current->thread.fpsimd_state); 116 + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) 117 + fpsimd_save_state(&current->thread.fpsimd_state); 151 118 preempt_enable(); 152 119 } 153 120 154 121 /* 155 - * Load an updated userland FPSIMD state for 'current' from memory 122 + * Load the userland FPSIMD state of 'current' from memory, but only if the 123 + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD 124 + * state of 'current' 125 + */ 126 + void fpsimd_restore_current_state(void) 127 + { 128 + preempt_disable(); 129 + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { 130 + struct fpsimd_state *st = &current->thread.fpsimd_state; 131 + 132 + fpsimd_load_state(st); 133 + this_cpu_write(fpsimd_last_state, st); 134 + st->cpu = smp_processor_id(); 135 + } 136 + preempt_enable(); 137 + } 138 + 139 + /* 140 + * Load an updated userland FPSIMD state for 'current' from memory and set the 141 + * flag that indicates that the FPSIMD register contents are the most recent 142 + * FPSIMD state of 'current' 156 143 */ 157 144 void fpsimd_update_current_state(struct fpsimd_state *state) 158 145 { 159 146 preempt_disable(); 160 147 fpsimd_load_state(state); 148 + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { 149 + struct fpsimd_state *st = &current->thread.fpsimd_state; 150 + 151 + this_cpu_write(fpsimd_last_state, st); 152 + st->cpu = smp_processor_id(); 153 + } 161 154 preempt_enable(); 155 + } 156 + 157 + /* 158 + * Invalidate live CPU copies of task t's FPSIMD state 159 + */ 160 + void fpsimd_flush_task_state(struct task_struct *t) 161 + { 162 + t->thread.fpsimd_state.cpu = NR_CPUS; 162 163 } 163 164 164 165 #ifdef CONFIG_KERNEL_MODE_NEON ··· 227 118 BUG_ON(in_interrupt()); 228 119 preempt_disable(); 229 120 230 - if (current->mm) 121 + /* 122 + * Save the userland FPSIMD state if we have one and if we haven't done 123 + * so already. Clear fpsimd_last_state to indicate that there is no 124 + * longer userland FPSIMD state in the registers. 125 + */ 126 + if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) 231 127 fpsimd_save_state(&current->thread.fpsimd_state); 128 + this_cpu_write(fpsimd_last_state, NULL); 232 129 } 233 130 EXPORT_SYMBOL(kernel_neon_begin); 234 131 235 132 void kernel_neon_end(void) 236 133 { 237 - if (current->mm) 238 - fpsimd_load_state(&current->thread.fpsimd_state); 239 - 240 134 preempt_enable(); 241 135 } 242 136 EXPORT_SYMBOL(kernel_neon_end); ··· 252 140 { 253 141 switch (cmd) { 254 142 case CPU_PM_ENTER: 255 - if (current->mm) 143 + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) 256 144 fpsimd_save_state(&current->thread.fpsimd_state); 257 145 break; 258 146 case CPU_PM_EXIT: 259 147 if (current->mm) 260 - fpsimd_load_state(&current->thread.fpsimd_state); 148 + set_thread_flag(TIF_FOREIGN_FPSTATE); 261 149 break; 262 150 case CPU_PM_ENTER_FAILED: 263 151 default:
+2
arch/arm64/kernel/ptrace.c
··· 517 517 return ret; 518 518 519 519 target->thread.fpsimd_state.user_fpsimd = newstate; 520 + fpsimd_flush_task_state(target); 520 521 return ret; 521 522 } 522 523 ··· 765 764 uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; 766 765 } 767 766 767 + fpsimd_flush_task_state(target); 768 768 return ret; 769 769 } 770 770
+4
arch/arm64/kernel/signal.c
··· 413 413 clear_thread_flag(TIF_NOTIFY_RESUME); 414 414 tracehook_notify_resume(regs); 415 415 } 416 + 417 + if (thread_flags & _TIF_FOREIGN_FPSTATE) 418 + fpsimd_restore_current_state(); 419 + 416 420 }