Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly

Restoration of the host IA32_SPEC_CTRL value is probably too late
with respect to the return thunk training sequence.

With respect to the user/kernel boundary, AMD says, "If software chooses
to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel
exit), software should set STIBP to 1 before executing the return thunk
training sequence." I assume the same requirements apply to the guest/host
boundary. The return thunk training sequence is in vmenter.S, quite close
to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's
IA32_SPEC_CTRL value is not restored until much later.

To avoid this, move the restoration of host SPEC_CTRL to assembly and,
for consistency, move the restoration of the guest SPEC_CTRL as well.
This is not particularly difficult, apart from some care to cover both
32- and 64-bit, and to share code between SEV-ES and normal vmentry.

Cc: stable@vger.kernel.org
Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk")
Suggested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+136 -38
+3 -10
arch/x86/kernel/cpu/bugs.c
··· 196 196 } 197 197 198 198 /* 199 - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is 200 - * done in vmenter.S. 199 + * NOTE: This function is *only* called for SVM, since Intel uses 200 + * MSR_IA32_SPEC_CTRL for SSBD. 201 201 */ 202 202 void 203 203 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) 204 204 { 205 - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); 205 + u64 guestval, hostval; 206 206 struct thread_info *ti = current_thread_info(); 207 - 208 - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { 209 - if (hostval != guestval) { 210 - msrval = setguest ? guestval : hostval; 211 - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); 212 - } 213 - } 214 207 215 208 /* 216 209 * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+1
arch/x86/kvm/kvm-asm-offsets.c
··· 16 16 BLANK(); 17 17 OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); 18 18 OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); 19 + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); 19 20 OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); 20 21 OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); 21 22 OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa);
+14 -23
arch/x86/kvm/svm/svm.c
··· 720 720 u32 offset; 721 721 u32 *msrpm; 722 722 723 + /* 724 + * For non-nested case: 725 + * If the L01 MSR bitmap does not intercept the MSR, then we need to 726 + * save it. 727 + * 728 + * For nested case: 729 + * If the L02 MSR bitmap does not intercept the MSR, then we need to 730 + * save it. 731 + */ 723 732 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: 724 733 to_svm(vcpu)->msrpm; 725 734 ··· 3910 3901 return EXIT_FASTPATH_NONE; 3911 3902 } 3912 3903 3913 - static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) 3904 + static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) 3914 3905 { 3915 3906 struct vcpu_svm *svm = to_svm(vcpu); 3916 3907 3917 3908 guest_state_enter_irqoff(); 3918 3909 3919 3910 if (sev_es_guest(vcpu->kvm)) 3920 - __svm_sev_es_vcpu_run(svm); 3911 + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); 3921 3912 else 3922 - __svm_vcpu_run(svm); 3913 + __svm_vcpu_run(svm, spec_ctrl_intercepted); 3923 3914 3924 3915 guest_state_exit_irqoff(); 3925 3916 } ··· 3927 3918 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) 3928 3919 { 3929 3920 struct vcpu_svm *svm = to_svm(vcpu); 3921 + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); 3930 3922 3931 3923 trace_kvm_entry(vcpu); 3932 3924 ··· 3986 3976 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) 3987 3977 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); 3988 3978 3989 - svm_vcpu_enter_exit(vcpu); 3990 - 3991 - /* 3992 - * We do not use IBRS in the kernel. If this vCPU has used the 3993 - * SPEC_CTRL MSR it may have left it on; save the value and 3994 - * turn it off. This is much more efficient than blindly adding 3995 - * it to the atomic save/restore list. Especially as the former 3996 - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. 3997 - * 3998 - * For non-nested case: 3999 - * If the L01 MSR bitmap does not intercept the MSR, then we need to 4000 - * save it. 4001 - * 4002 - * For nested case: 4003 - * If the L02 MSR bitmap does not intercept the MSR, then we need to 4004 - * save it. 4005 - */ 4006 - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && 4007 - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) 4008 - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); 3979 + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); 4009 3980 4010 3981 if (!sev_es_guest(vcpu->kvm)) 4011 3982 reload_tss(vcpu);
+2 -2
arch/x86/kvm/svm/svm.h
··· 682 682 683 683 /* vmenter.S */ 684 684 685 - void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); 686 - void __svm_vcpu_run(struct vcpu_svm *svm); 685 + void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); 686 + void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); 687 687 688 688 #endif
+116 -3
arch/x86/kvm/svm/vmenter.S
··· 32 32 33 33 .section .noinstr.text, "ax" 34 34 35 + .macro RESTORE_GUEST_SPEC_CTRL 36 + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ 37 + ALTERNATIVE_2 "", \ 38 + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ 39 + "", X86_FEATURE_V_SPEC_CTRL 40 + 801: 41 + .endm 42 + .macro RESTORE_GUEST_SPEC_CTRL_BODY 43 + 800: 44 + /* 45 + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the 46 + * host's, write the MSR. This is kept out-of-line so that the common 47 + * case does not have to jump. 48 + * 49 + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, 50 + * there must not be any returns or indirect branches between this code 51 + * and vmentry. 52 + */ 53 + movl SVM_spec_ctrl(%_ASM_DI), %eax 54 + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax 55 + je 801b 56 + mov $MSR_IA32_SPEC_CTRL, %ecx 57 + xor %edx, %edx 58 + wrmsr 59 + jmp 801b 60 + .endm 61 + 62 + .macro RESTORE_HOST_SPEC_CTRL 63 + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ 64 + ALTERNATIVE_2 "", \ 65 + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ 66 + "", X86_FEATURE_V_SPEC_CTRL 67 + 901: 68 + .endm 69 + .macro RESTORE_HOST_SPEC_CTRL_BODY 70 + 900: 71 + /* Same for after vmexit. */ 72 + mov $MSR_IA32_SPEC_CTRL, %ecx 73 + 74 + /* 75 + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, 76 + * if it was not intercepted during guest execution. 77 + */ 78 + cmpb $0, (%_ASM_SP) 79 + jnz 998f 80 + rdmsr 81 + movl %eax, SVM_spec_ctrl(%_ASM_DI) 82 + 998: 83 + 84 + /* Now restore the host value of the MSR if different from the guest's. */ 85 + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax 86 + cmp SVM_spec_ctrl(%_ASM_DI), %eax 87 + je 901b 88 + xor %edx, %edx 89 + wrmsr 90 + jmp 901b 91 + .endm 92 + 93 + 35 94 /** 36 95 * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode 37 96 * @svm: struct vcpu_svm * 97 + * @spec_ctrl_intercepted: bool 38 98 */ 39 99 SYM_FUNC_START(__svm_vcpu_run) 40 100 push %_ASM_BP ··· 114 54 * order compared to when they are needed. 115 55 */ 116 56 57 + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ 58 + push %_ASM_ARG2 59 + 117 60 /* Needed to restore access to percpu variables. */ 118 61 __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) 119 62 120 - /* Save @svm. */ 63 + /* Finally save @svm. */ 121 64 push %_ASM_ARG1 122 65 123 66 .ifnc _ASM_ARG1, _ASM_DI 124 - /* Move @svm to RDI. */ 67 + /* 68 + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX 69 + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. 70 + */ 125 71 mov %_ASM_ARG1, %_ASM_DI 126 72 .endif 73 + 74 + /* Clobbers RAX, RCX, RDX. */ 75 + RESTORE_GUEST_SPEC_CTRL 127 76 128 77 /* 129 78 * Use a single vmcb (vmcb01 because it's always valid) for ··· 211 142 FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE 212 143 #endif 213 144 145 + /* Clobbers RAX, RCX, RDX. */ 146 + RESTORE_HOST_SPEC_CTRL 147 + 214 148 /* 215 149 * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be 216 150 * untrained as soon as we exit the VM and are back to the ··· 249 177 xor %r15d, %r15d 250 178 #endif 251 179 180 + /* "Pop" @spec_ctrl_intercepted. */ 181 + pop %_ASM_BX 182 + 252 183 pop %_ASM_BX 253 184 254 185 #ifdef CONFIG_X86_64 ··· 265 190 #endif 266 191 pop %_ASM_BP 267 192 RET 193 + 194 + RESTORE_GUEST_SPEC_CTRL_BODY 195 + RESTORE_HOST_SPEC_CTRL_BODY 268 196 269 197 10: cmpb $0, kvm_rebooting 270 198 jne 2b ··· 292 214 /** 293 215 * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode 294 216 * @svm: struct vcpu_svm * 217 + * @spec_ctrl_intercepted: bool 295 218 */ 296 219 SYM_FUNC_START(__svm_sev_es_vcpu_run) 297 220 push %_ASM_BP ··· 307 228 #endif 308 229 push %_ASM_BX 309 230 231 + /* 232 + * Save variables needed after vmexit on the stack, in inverse 233 + * order compared to when they are needed. 234 + */ 235 + 236 + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ 237 + push %_ASM_ARG2 238 + 239 + /* Save @svm. */ 240 + push %_ASM_ARG1 241 + 242 + .ifnc _ASM_ARG1, _ASM_DI 243 + /* 244 + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX 245 + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. 246 + */ 247 + mov %_ASM_ARG1, %_ASM_DI 248 + .endif 249 + 250 + /* Clobbers RAX, RCX, RDX. */ 251 + RESTORE_GUEST_SPEC_CTRL 252 + 310 253 /* Get svm->current_vmcb->pa into RAX. */ 311 - mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX 254 + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX 312 255 mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX 313 256 314 257 /* Enter guest mode */ ··· 340 239 341 240 2: cli 342 241 242 + /* Pop @svm to RDI, guest registers have been saved already. */ 243 + pop %_ASM_DI 244 + 343 245 #ifdef CONFIG_RETPOLINE 344 246 /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ 345 247 FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE 346 248 #endif 249 + 250 + /* Clobbers RAX, RCX, RDX. */ 251 + RESTORE_HOST_SPEC_CTRL 347 252 348 253 /* 349 254 * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ··· 359 252 * from the kernel. 360 253 */ 361 254 UNTRAIN_RET 255 + 256 + /* "Pop" @spec_ctrl_intercepted. */ 257 + pop %_ASM_BX 362 258 363 259 pop %_ASM_BX 364 260 ··· 376 266 #endif 377 267 pop %_ASM_BP 378 268 RET 269 + 270 + RESTORE_GUEST_SPEC_CTRL_BODY 271 + RESTORE_HOST_SPEC_CTRL_BODY 379 272 380 273 3: cmpb $0, kvm_rebooting 381 274 jne 2b