Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64/sme: Implement ZA context switching

Allocate space for storing ZA on first access to SME and use that to save
and restore ZA state when context switching. We do this by using the vector
form of the LDR and STR ZA instructions, these do not require streaming
mode and have implementation recommendations that they avoid contention
issues in shared SMCU implementations.

Since ZA is architecturally guaranteed to be zeroed when enabled we do not
need to explicitly zero ZA, either we will be restoring from a saved copy
or trapping on first use of SME so we know that ZA must be disabled.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-16-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Mark Brown and committed by
Catalin Marinas
0033cd93 af7167d6

+66 -9
+4 -1
arch/arm64/include/asm/fpsimd.h
··· 47 47 48 48 extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, 49 49 void *sve_state, unsigned int sve_vl, 50 - unsigned int sme_vl, u64 *svcr); 50 + void *za_state, unsigned int sme_vl, 51 + u64 *svcr); 51 52 52 53 extern void fpsimd_flush_task_state(struct task_struct *target); 53 54 extern void fpsimd_save_and_flush_cpu_state(void); ··· 91 90 extern unsigned int sve_get_vl(void); 92 91 extern void sve_set_vq(unsigned long vq_minus_1); 93 92 extern void sme_set_vq(unsigned long vq_minus_1); 93 + extern void za_save_state(void *state); 94 + extern void za_load_state(void const *state); 94 95 95 96 struct arm64_cpu_capabilities; 96 97 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+22
arch/arm64/include/asm/fpsimdmacros.h
··· 319 319 ldr w\nxtmp, [\xpfpsr, #4] 320 320 msr fpcr, x\nxtmp 321 321 .endm 322 + 323 + .macro sme_save_za nxbase, xvl, nw 324 + mov w\nw, #0 325 + 326 + 423: 327 + _sme_str_zav \nw, \nxbase 328 + add x\nxbase, x\nxbase, \xvl 329 + add x\nw, x\nw, #1 330 + cmp \xvl, x\nw 331 + bne 423b 332 + .endm 333 + 334 + .macro sme_load_za nxbase, xvl, nw 335 + mov w\nw, #0 336 + 337 + 423: 338 + _sme_ldr_zav \nw, \nxbase 339 + add x\nxbase, x\nxbase, \xvl 340 + add x\nw, x\nw, #1 341 + cmp \xvl, x\nw 342 + bne 423b 343 + .endm
+3
arch/arm64/include/asm/kvm_host.h
··· 295 295 296 296 struct kvm_vcpu_arch { 297 297 struct kvm_cpu_context ctxt; 298 + 299 + /* Guest floating point state */ 298 300 void *sve_state; 299 301 unsigned int sve_max_vl; 302 + u64 svcr; 300 303 301 304 /* Stage 2 paging state used by the hardware on next switch */ 302 305 struct kvm_s2_mmu *hw_mmu;
+1
arch/arm64/include/asm/processor.h
··· 154 154 155 155 unsigned int fpsimd_cpu; 156 156 void *sve_state; /* SVE registers, if any */ 157 + void *za_state; /* ZA register, if any */ 157 158 unsigned int vl[ARM64_VEC_MAX]; /* vector length */ 158 159 unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ 159 160 unsigned long fault_address; /* fault info */
+22
arch/arm64/kernel/entry-fpsimd.S
··· 99 99 ret 100 100 SYM_FUNC_END(sme_set_vq) 101 101 102 + /* 103 + * Save the SME state 104 + * 105 + * x0 - pointer to buffer for state 106 + */ 107 + SYM_FUNC_START(za_save_state) 108 + _sme_rdsvl 1, 1 // x1 = VL/8 109 + sme_save_za 0, x1, 12 110 + ret 111 + SYM_FUNC_END(za_save_state) 112 + 113 + /* 114 + * Load the SME state 115 + * 116 + * x0 - pointer to buffer for state 117 + */ 118 + SYM_FUNC_START(za_load_state) 119 + _sme_rdsvl 1, 1 // x1 = VL/8 120 + sme_load_za 0, x1, 12 121 + ret 122 + SYM_FUNC_END(za_load_state) 123 + 102 124 #endif /* CONFIG_ARM64_SME */
+13 -7
arch/arm64/kernel/fpsimd.c
··· 121 121 struct fpsimd_last_state_struct { 122 122 struct user_fpsimd_state *st; 123 123 void *sve_state; 124 + void *za_state; 124 125 u64 *svcr; 125 126 unsigned int sve_vl; 126 127 unsigned int sme_vl; ··· 388 387 if (system_supports_sme()) { 389 388 unsigned long sme_vl = task_get_sme_vl(current); 390 389 390 + /* Ensure VL is set up for restoring data */ 391 391 if (test_thread_flag(TIF_SME)) 392 392 sme_set_vq(sve_vq_from_vl(sme_vl) - 1); 393 393 394 394 write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); 395 + 396 + if (thread_za_enabled(&current->thread)) 397 + za_load_state(current->thread.za_state); 395 398 396 399 if (thread_sm_enabled(&current->thread)) { 397 400 restore_sve_regs = true; ··· 446 441 u64 *svcr = last->svcr; 447 442 *svcr = read_sysreg_s(SYS_SVCR_EL0); 448 443 449 - if (thread_za_enabled(&current->thread)) { 450 - /* ZA state managment is not implemented yet */ 451 - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); 452 - return; 453 - } 444 + *svcr = read_sysreg_s(SYS_SVCR_EL0); 445 + 446 + if (*svcr & SYS_SVCR_EL0_ZA_MASK) 447 + za_save_state(last->za_state); 454 448 455 449 /* If we are in streaming mode override regular SVE. */ 456 450 if (*svcr & SYS_SVCR_EL0_SM_MASK) { ··· 1487 1483 WARN_ON(!system_supports_fpsimd()); 1488 1484 last->st = &current->thread.uw.fpsimd_state; 1489 1485 last->sve_state = current->thread.sve_state; 1486 + last->za_state = current->thread.za_state; 1490 1487 last->sve_vl = task_get_sve_vl(current); 1491 1488 last->sme_vl = task_get_sme_vl(current); 1492 1489 last->svcr = &current->thread.svcr; ··· 1505 1500 } 1506 1501 1507 1502 void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, 1508 - unsigned int sve_vl, unsigned int sme_vl, 1509 - u64 *svcr) 1503 + unsigned int sve_vl, void *za_state, 1504 + unsigned int sme_vl, u64 *svcr) 1510 1505 { 1511 1506 struct fpsimd_last_state_struct *last = 1512 1507 this_cpu_ptr(&fpsimd_last_state); ··· 1517 1512 last->st = st; 1518 1513 last->svcr = svcr; 1519 1514 last->sve_state = sve_state; 1515 + last->za_state = za_state; 1520 1516 last->sve_vl = sve_vl; 1521 1517 last->sme_vl = sme_vl; 1522 1518 }
+1 -1
arch/arm64/kvm/fpsimd.c
··· 116 116 fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, 117 117 vcpu->arch.sve_state, 118 118 vcpu->arch.sve_max_vl, 119 - 0, NULL); 119 + NULL, 0, &vcpu->arch.svcr); 120 120 121 121 clear_thread_flag(TIF_FOREIGN_FPSTATE); 122 122 update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));