Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64/sve: Skip flushing Z registers with 128 bit vectors

When the SVE vector length is 128 bits then there are no bits in the Z
registers which are not shared with the V registers so we can skip them
when zeroing state not shared with FPSIMD, this results in a minor
performance improvement.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512151131.27877-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Mark Brown and committed by
Will Deacon
ad4711f9 c9f6890b

+15 -5
+1 -1
arch/arm64/include/asm/fpsimd.h
··· 69 69 extern void sve_save_state(void *state, u32 *pfpsr); 70 70 extern void sve_load_state(void const *state, u32 const *pfpsr, 71 71 unsigned long vq_minus_1); 72 - extern void sve_flush_live(void); 72 + extern void sve_flush_live(unsigned long vq_minus_1); 73 73 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, 74 74 unsigned long vq_minus_1); 75 75 extern unsigned int sve_get_vl(void);
+10 -2
arch/arm64/kernel/entry-fpsimd.S
··· 69 69 ret 70 70 SYM_FUNC_END(sve_load_from_fpsimd_state) 71 71 72 - /* Zero all SVE registers but the first 128-bits of each vector */ 72 + /* 73 + * Zero all SVE registers but the first 128-bits of each vector 74 + * 75 + * VQ must already be configured by caller, any further updates of VQ 76 + * will need to ensure that the register state remains valid. 77 + * 78 + * x0 = VQ - 1 79 + */ 73 80 SYM_FUNC_START(sve_flush_live) 81 + cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state 74 82 sve_flush_z 75 - sve_flush_p_ffr 83 + 1: sve_flush_p_ffr 76 84 ret 77 85 SYM_FUNC_END(sve_flush_live) 78 86
+4 -2
arch/arm64/kernel/fpsimd.c
··· 957 957 * disabling the trap, otherwise update our in-memory copy. 958 958 */ 959 959 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { 960 - sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); 961 - sve_flush_live(); 960 + unsigned long vq_minus_one = 961 + sve_vq_from_vl(current->thread.sve_vl) - 1; 962 + sve_set_vq(vq_minus_one); 963 + sve_flush_live(vq_minus_one); 962 964 fpsimd_bind_task_to_cpu(); 963 965 } else { 964 966 fpsimd_to_sve(current);