Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack

Commit aefbab8e77eb16b5

("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")

added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-negligible bloat of task_struct, and the resulting memory
overhead may impact performance on systems with many processes.

This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and so it is
possible to transparently allocate this buffer on the caller's stack
instead.

So tweak the 'ksimd' scoped guard implementation so that a stack buffer
is allocated and passed to both kernel_neon_begin() and
kernel_neon_end(), and either record it in the task struct, or use it
directly to preserve the task mode kernel FP/SIMD when running in
softirq context. Passing the address to both functions, and checking the
addresses for consistency ensures that callers of the updated bare
begin/end API use it in a manner that is consistent with the new context
switch semantics.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

+55 -21
+2 -2
arch/arm64/include/asm/fpu.h
··· 15 15 { 16 16 BUG_ON(!in_task()); 17 17 preempt_disable(); 18 - kernel_neon_begin(); 18 + kernel_neon_begin(NULL); 19 19 } 20 20 21 21 static inline void kernel_fpu_end(void) 22 22 { 23 - kernel_neon_end(); 23 + kernel_neon_end(NULL); 24 24 preempt_enable(); 25 25 } 26 26
+2 -2
arch/arm64/include/asm/neon.h
··· 13 13 14 14 #define cpu_has_neon() system_supports_fpsimd() 15 15 16 - void kernel_neon_begin(void); 17 - void kernel_neon_end(void); 16 + void kernel_neon_begin(struct user_fpsimd_state *); 17 + void kernel_neon_end(struct user_fpsimd_state *); 18 18 19 19 #endif /* ! __ASM_NEON_H */
+6 -1
arch/arm64/include/asm/processor.h
··· 172 172 unsigned long fault_code; /* ESR_EL1 value */ 173 173 struct debug_info debug; /* debugging */ 174 174 175 - struct user_fpsimd_state kernel_fpsimd_state; 175 + /* 176 + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the 177 + * address of a caller provided buffer that will be used to preserve a 178 + * task's kernel mode FPSIMD state while it is scheduled out. 179 + */ 180 + struct user_fpsimd_state *kernel_fpsimd_state; 176 181 unsigned int kernel_fpsimd_cpu; 177 182 #ifdef CONFIG_ARM64_PTR_AUTH 178 183 struct ptrauth_keys_user keys_user;
+5 -2
arch/arm64/include/asm/simd.h
··· 43 43 44 44 #endif /* ! CONFIG_KERNEL_MODE_NEON */ 45 45 46 - DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end()) 46 + DEFINE_LOCK_GUARD_1(ksimd, 47 + struct user_fpsimd_state, 48 + kernel_neon_begin(_T->lock), 49 + kernel_neon_end(_T->lock)) 47 50 48 - #define scoped_ksimd() scoped_guard(ksimd) 51 + #define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) 49 52 50 53 #endif
+40 -14
arch/arm64/kernel/fpsimd.c
··· 1489 1489 * Elide the load if this CPU holds the most recent kernel mode 1490 1490 * FPSIMD context of the current task. 1491 1491 */ 1492 - if (last->st == &task->thread.kernel_fpsimd_state && 1492 + if (last->st == task->thread.kernel_fpsimd_state && 1493 1493 task->thread.kernel_fpsimd_cpu == smp_processor_id()) 1494 1494 return; 1495 1495 1496 - fpsimd_load_state(&task->thread.kernel_fpsimd_state); 1496 + fpsimd_load_state(task->thread.kernel_fpsimd_state); 1497 1497 } 1498 1498 1499 1499 static void fpsimd_save_kernel_state(struct task_struct *task) 1500 1500 { 1501 1501 struct cpu_fp_state cpu_fp_state = { 1502 - .st = &task->thread.kernel_fpsimd_state, 1502 + .st = task->thread.kernel_fpsimd_state, 1503 1503 .to_save = FP_STATE_FPSIMD, 1504 1504 }; 1505 1505 1506 - fpsimd_save_state(&task->thread.kernel_fpsimd_state); 1506 + BUG_ON(!cpu_fp_state.st); 1507 + 1508 + fpsimd_save_state(task->thread.kernel_fpsimd_state); 1507 1509 fpsimd_bind_state_to_cpu(&cpu_fp_state); 1508 1510 1509 1511 task->thread.kernel_fpsimd_cpu = smp_processor_id(); ··· 1776 1774 void fpsimd_flush_task_state(struct task_struct *t) 1777 1775 { 1778 1776 t->thread.fpsimd_cpu = NR_CPUS; 1777 + t->thread.kernel_fpsimd_state = NULL; 1779 1778 /* 1780 1779 * If we don't support fpsimd, bail out after we have 1781 1780 * reset the fpsimd_cpu for this task and clear the ··· 1836 1833 * 1837 1834 * The caller may freely use the FPSIMD registers until kernel_neon_end() is 1838 1835 * called. 1836 + * 1837 + * Unless called from non-preemptible task context, @state must point to a 1838 + * caller provided buffer that will be used to preserve the task's kernel mode 1839 + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel 1840 + * mode FPSIMD occurring in softirq context. May be %NULL otherwise. 1839 1841 */ 1840 - void kernel_neon_begin(void) 1842 + void kernel_neon_begin(struct user_fpsimd_state *state) 1841 1843 { 1842 1844 if (WARN_ON(!system_supports_fpsimd())) 1843 1845 return; 1846 + 1847 + WARN_ON((preemptible() || in_serving_softirq()) && !state); 1844 1848 1845 1849 BUG_ON(!may_use_simd()); 1846 1850 ··· 1856 1846 /* Save unsaved fpsimd state, if any: */ 1857 1847 if (test_thread_flag(TIF_KERNEL_FPSTATE)) { 1858 1848 BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); 1859 - fpsimd_save_kernel_state(current); 1849 + fpsimd_save_state(state); 1860 1850 } else { 1861 1851 fpsimd_save_user_state(); 1862 1852 ··· 1877 1867 * mode in task context. So in this case, setting the flag here 1878 1868 * is always appropriate. 1879 1869 */ 1880 - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) 1870 + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) { 1871 + /* 1872 + * Record the caller provided buffer as the kernel mode 1873 + * FP/SIMD buffer for this task, so that the state can 1874 + * be preserved and restored on a context switch. 1875 + */ 1876 + WARN_ON(current->thread.kernel_fpsimd_state != NULL); 1877 + current->thread.kernel_fpsimd_state = state; 1881 1878 set_thread_flag(TIF_KERNEL_FPSTATE); 1879 + } 1882 1880 } 1883 1881 1884 1882 /* Invalidate any task state remaining in the fpsimd regs: */ ··· 1904 1886 * 1905 1887 * The caller must not use the FPSIMD registers after this function is called, 1906 1888 * unless kernel_neon_begin() is called again in the meantime. 1889 + * 1890 + * The value of @state must match the value passed to the preceding call to 1891 + * kernel_neon_begin(). 1907 1892 */ 1908 - void kernel_neon_end(void) 1893 + void kernel_neon_end(struct user_fpsimd_state *state) 1909 1894 { 1910 1895 if (!system_supports_fpsimd()) 1896 + return; 1897 + 1898 + if (!test_thread_flag(TIF_KERNEL_FPSTATE)) 1911 1899 return; 1912 1900 1913 1901 /* ··· 1921 1897 * the task context kernel mode FPSIMD state. This can only happen when 1922 1898 * running in softirq context on non-PREEMPT_RT. 1923 1899 */ 1924 - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && 1925 - test_thread_flag(TIF_KERNEL_FPSTATE)) 1926 - fpsimd_load_kernel_state(current); 1927 - else 1900 + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) { 1901 + fpsimd_load_state(state); 1902 + } else { 1928 1903 clear_thread_flag(TIF_KERNEL_FPSTATE); 1904 + WARN_ON(current->thread.kernel_fpsimd_state != state); 1905 + current->thread.kernel_fpsimd_state = NULL; 1906 + } 1929 1907 } 1930 1908 EXPORT_SYMBOL_GPL(kernel_neon_end); 1931 1909 ··· 1963 1937 WARN_ON(preemptible()); 1964 1938 1965 1939 if (may_use_simd()) { 1966 - kernel_neon_begin(); 1940 + kernel_neon_begin(&efi_fpsimd_state); 1967 1941 } else { 1968 1942 /* 1969 1943 * If !efi_sve_state, SVE can't be in use yet and doesn't need ··· 2012 1986 return; 2013 1987 2014 1988 if (!efi_fpsimd_state_used) { 2015 - kernel_neon_end(); 1989 + kernel_neon_end(&efi_fpsimd_state); 2016 1990 } else { 2017 1991 if (system_supports_sve() && efi_sve_state_used) { 2018 1992 bool ffr = true;