Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:
- Lazy FPSIMD switching fixes
- Really disable compat ioctls on architectures that don't want it
- Disable compat on arm64 (it was never implemented...)
- Rely on architectural requirements for GICV on GICv3
- Detect bad alignments in unmap_stage2_range

x86:
- Add nested VM entry checks to avoid broken error recovery path
- Minor documentation fix"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: fix KVM_CAP_HYPERV_TLBFLUSH paragraph number
kvm: vmx: Nested VM-entry prereqs for event inj.
KVM: arm64: Prevent KVM_COMPAT from being selected
KVM: Enforce error in ioctl for compat tasks when !KVM_COMPAT
KVM: arm/arm64: add WARN_ON if size is not PAGE_SIZE aligned in unmap_stage2_range
KVM: arm64: Avoid mistaken attempts to save SVE state for vcpus
KVM: arm64/sve: Fix SVE trap restoration for non-current tasks
KVM: arm64: Don't mask softirq with IRQs disabled in vcpu_put()
arm64: Introduce sysreg_clear_set()
KVM: arm/arm64: Drop resource size check for GICV window

+131 -26
+1 -1
Documentation/virtual/kvm/api.txt
··· 4610 reset, migration and nested KVM for branch prediction blocking. The stfle 4611 facility 82 should not be provided to the guest without this capability. 4612 4613 - 8.14 KVM_CAP_HYPERV_TLBFLUSH 4614 4615 Architectures: x86 4616
··· 4610 reset, migration and nested KVM for branch prediction blocking. The stfle 4611 facility 82 should not be provided to the guest without this capability. 4612 4613 + 8.18 KVM_CAP_HYPERV_TLBFLUSH 4614 4615 Architectures: x86 4616
+1
arch/arm64/include/asm/kvm_host.h
··· 306 #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ 307 #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ 308 #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ 309 310 #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) 311
··· 306 #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ 307 #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ 308 #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ 309 + #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ 310 311 #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) 312
+11
arch/arm64/include/asm/sysreg.h
··· 728 asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ 729 } while (0) 730 731 static inline void config_sctlr_el1(u32 clear, u32 set) 732 { 733 u32 val;
··· 728 asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ 729 } while (0) 730 731 + /* 732 + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the 733 + * set mask are set. Other bits are left as-is. 734 + */ 735 + #define sysreg_clear_set(sysreg, clear, set) do { \ 736 + u64 __scs_val = read_sysreg(sysreg); \ 737 + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ 738 + if (__scs_new != __scs_val) \ 739 + write_sysreg(__scs_new, sysreg); \ 740 + } while (0) 741 + 742 static inline void config_sctlr_el1(u32 clear, u32 set) 743 { 744 u32 val;
+27 -9
arch/arm64/kvm/fpsimd.c
··· 5 * Copyright 2018 Arm Limited 6 * Author: Dave Martin <Dave.Martin@arm.com> 7 */ 8 - #include <linux/bottom_half.h> 9 #include <linux/sched.h> 10 #include <linux/thread_info.h> 11 #include <linux/kvm_host.h> 12 #include <asm/kvm_asm.h> 13 #include <asm/kvm_host.h> 14 #include <asm/kvm_mmu.h> 15 16 /* 17 * Called on entry to KVM_RUN unless this vcpu previously ran at least ··· 62 { 63 BUG_ON(!current->mm); 64 65 - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE); 66 vcpu->arch.flags |= KVM_ARM64_FP_HOST; 67 if (test_thread_flag(TIF_SVE)) 68 vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; 69 } 70 71 /* ··· 99 */ 100 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) 101 { 102 - local_bh_disable(); 103 104 - update_thread_flag(TIF_SVE, 105 - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); 106 107 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 108 /* Clean guest FP state to memory and invalidate cpu view */ 109 fpsimd_save(); 110 fpsimd_flush_cpu_state(); 111 - } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { 112 - /* Ensure user trap controls are correctly restored */ 113 - fpsimd_bind_task_to_cpu(); 114 } 115 116 - local_bh_enable(); 117 }
··· 5 * Copyright 2018 Arm Limited 6 * Author: Dave Martin <Dave.Martin@arm.com> 7 */ 8 + #include <linux/irqflags.h> 9 #include <linux/sched.h> 10 #include <linux/thread_info.h> 11 #include <linux/kvm_host.h> 12 #include <asm/kvm_asm.h> 13 #include <asm/kvm_host.h> 14 #include <asm/kvm_mmu.h> 15 + #include <asm/sysreg.h> 16 17 /* 18 * Called on entry to KVM_RUN unless this vcpu previously ran at least ··· 61 { 62 BUG_ON(!current->mm); 63 64 + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 65 + KVM_ARM64_HOST_SVE_IN_USE | 66 + KVM_ARM64_HOST_SVE_ENABLED); 67 vcpu->arch.flags |= KVM_ARM64_FP_HOST; 68 + 69 if (test_thread_flag(TIF_SVE)) 70 vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; 71 + 72 + if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) 73 + vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; 74 } 75 76 /* ··· 92 */ 93 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) 94 { 95 + unsigned long flags; 96 97 + local_irq_save(flags); 98 99 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 100 /* Clean guest FP state to memory and invalidate cpu view */ 101 fpsimd_save(); 102 fpsimd_flush_cpu_state(); 103 + } else if (system_supports_sve()) { 104 + /* 105 + * The FPSIMD/SVE state in the CPU has not been touched, and we 106 + * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been 107 + * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE 108 + * for EL0. To avoid spurious traps, restore the trap state 109 + * seen by kvm_arch_vcpu_load_fp(): 110 + */ 111 + if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) 112 + sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); 113 + else 114 + sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); 115 } 116 117 + update_thread_flag(TIF_SVE, 118 + vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); 119 + 120 + local_irq_restore(flags); 121 }
+3
arch/x86/include/asm/vmx.h
··· 114 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 115 #define VMX_MISC_SAVE_EFER_LMA 0x00000020 116 #define VMX_MISC_ACTIVITY_HLT 0x00000040 117 118 /* VMFUNC functions */ 119 #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 ··· 352 #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 353 354 #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 355 #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 356 #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ 357 #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 358 #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ 359 #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ 360 361 /* GUEST_INTERRUPTIBILITY_INFO flags. */ 362 #define GUEST_INTR_STATE_STI 0x00000001
··· 114 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 115 #define VMX_MISC_SAVE_EFER_LMA 0x00000020 116 #define VMX_MISC_ACTIVITY_HLT 0x00000040 117 + #define VMX_MISC_ZERO_LEN_INS 0x40000000 118 119 /* VMFUNC functions */ 120 #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 ··· 351 #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 352 353 #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 354 + #define INTR_TYPE_RESERVED (1 << 8) /* reserved */ 355 #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 356 #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ 357 #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 358 #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ 359 #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ 360 + #define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ 361 362 /* GUEST_INTERRUPTIBILITY_INFO flags. */ 363 #define GUEST_INTR_STATE_STI 0x00000001
+67
arch/x86/kvm/vmx.c
··· 1705 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; 1706 } 1707 1708 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) 1709 { 1710 return vmcs12->cpu_based_vm_exec_control & bit; ··· 11630 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || 11631 !nested_cr3_valid(vcpu, vmcs12->host_cr3)) 11632 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; 11633 11634 return 0; 11635 }
··· 1705 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; 1706 } 1707 1708 + static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) 1709 + { 1710 + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; 1711 + } 1712 + 1713 + static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) 1714 + { 1715 + return to_vmx(vcpu)->nested.msrs.procbased_ctls_high & 1716 + CPU_BASED_MONITOR_TRAP_FLAG; 1717 + } 1718 + 1719 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) 1720 { 1721 return vmcs12->cpu_based_vm_exec_control & bit; ··· 11619 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || 11620 !nested_cr3_valid(vcpu, vmcs12->host_cr3)) 11621 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; 11622 + 11623 + /* 11624 + * From the Intel SDM, volume 3: 11625 + * Fields relevant to VM-entry event injection must be set properly. 11626 + * These fields are the VM-entry interruption-information field, the 11627 + * VM-entry exception error code, and the VM-entry instruction length. 11628 + */ 11629 + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { 11630 + u32 intr_info = vmcs12->vm_entry_intr_info_field; 11631 + u8 vector = intr_info & INTR_INFO_VECTOR_MASK; 11632 + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; 11633 + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; 11634 + bool should_have_error_code; 11635 + bool urg = nested_cpu_has2(vmcs12, 11636 + SECONDARY_EXEC_UNRESTRICTED_GUEST); 11637 + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; 11638 + 11639 + /* VM-entry interruption-info field: interruption type */ 11640 + if (intr_type == INTR_TYPE_RESERVED || 11641 + (intr_type == INTR_TYPE_OTHER_EVENT && 11642 + !nested_cpu_supports_monitor_trap_flag(vcpu))) 11643 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11644 + 11645 + /* VM-entry interruption-info field: vector */ 11646 + if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || 11647 + (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || 11648 + (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) 11649 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11650 + 11651 + /* VM-entry interruption-info field: deliver error code */ 11652 + should_have_error_code = 11653 + intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && 11654 + x86_exception_has_error_code(vector); 11655 + if (has_error_code != should_have_error_code) 11656 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11657 + 11658 + /* VM-entry exception error code */ 11659 + if (has_error_code && 11660 + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) 11661 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11662 + 11663 + /* VM-entry interruption-info field: reserved bits */ 11664 + if (intr_info & INTR_INFO_RESVD_BITS_MASK) 11665 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11666 + 11667 + /* VM-entry instruction length */ 11668 + switch (intr_type) { 11669 + case INTR_TYPE_SOFT_EXCEPTION: 11670 + case INTR_TYPE_SOFT_INTR: 11671 + case INTR_TYPE_PRIV_SW_EXCEPTION: 11672 + if ((vmcs12->vm_entry_instruction_len > 15) || 11673 + (vmcs12->vm_entry_instruction_len == 0 && 11674 + !nested_cpu_has_zero_length_injection(vcpu))) 11675 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11676 + } 11677 + } 11678 11679 return 0; 11680 }
+9
arch/x86/kvm/x86.h
··· 110 #endif 111 } 112 113 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) 114 { 115 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
··· 110 #endif 111 } 112 113 + static inline bool x86_exception_has_error_code(unsigned int vector) 114 + { 115 + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | 116 + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | 117 + BIT(PF_VECTOR) | BIT(AC_VECTOR); 118 + 119 + return (1U << vector) & exception_has_error_code; 120 + } 121 + 122 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) 123 { 124 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+1 -1
virt/kvm/Kconfig
··· 47 48 config KVM_COMPAT 49 def_bool y 50 - depends on KVM && COMPAT && !S390 51 52 config HAVE_KVM_IRQ_BYPASS 53 bool
··· 47 48 config KVM_COMPAT 49 def_bool y 50 + depends on KVM && COMPAT && !(S390 || ARM64) 51 52 config HAVE_KVM_IRQ_BYPASS 53 bool
+2
virt/kvm/arm/mmu.c
··· 297 phys_addr_t next; 298 299 assert_spin_locked(&kvm->mmu_lock); 300 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 301 do { 302 /*
··· 297 phys_addr_t next; 298 299 assert_spin_locked(&kvm->mmu_lock); 300 + WARN_ON(size & ~PAGE_MASK); 301 + 302 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 303 do { 304 /*
-5
virt/kvm/arm/vgic/vgic-v3.c
··· 617 pr_warn("GICV physical address 0x%llx not page aligned\n", 618 (unsigned long long)info->vcpu.start); 619 kvm_vgic_global_state.vcpu_base = 0; 620 - } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { 621 - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 622 - (unsigned long long)resource_size(&info->vcpu), 623 - PAGE_SIZE); 624 - kvm_vgic_global_state.vcpu_base = 0; 625 } else { 626 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 627 kvm_vgic_global_state.can_emulate_gicv2 = true;
··· 617 pr_warn("GICV physical address 0x%llx not page aligned\n", 618 (unsigned long long)info->vcpu.start); 619 kvm_vgic_global_state.vcpu_base = 0; 620 } else { 621 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 622 kvm_vgic_global_state.can_emulate_gicv2 = true;
+9 -10
virt/kvm/kvm_main.c
··· 116 #ifdef CONFIG_KVM_COMPAT 117 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, 118 unsigned long arg); 119 #endif 120 static int hardware_enable_all(void); 121 static void hardware_disable_all(void); ··· 2401 static struct file_operations kvm_vcpu_fops = { 2402 .release = kvm_vcpu_release, 2403 .unlocked_ioctl = kvm_vcpu_ioctl, 2404 - #ifdef CONFIG_KVM_COMPAT 2405 - .compat_ioctl = kvm_vcpu_compat_ioctl, 2406 - #endif 2407 .mmap = kvm_vcpu_mmap, 2408 .llseek = noop_llseek, 2409 }; 2410 2411 /* ··· 2827 2828 static const struct file_operations kvm_device_fops = { 2829 .unlocked_ioctl = kvm_device_ioctl, 2830 - #ifdef CONFIG_KVM_COMPAT 2831 - .compat_ioctl = kvm_device_ioctl, 2832 - #endif 2833 .release = kvm_device_release, 2834 }; 2835 2836 struct kvm_device *kvm_device_from_filp(struct file *filp) ··· 3166 static struct file_operations kvm_vm_fops = { 3167 .release = kvm_vm_release, 3168 .unlocked_ioctl = kvm_vm_ioctl, 3169 - #ifdef CONFIG_KVM_COMPAT 3170 - .compat_ioctl = kvm_vm_compat_ioctl, 3171 - #endif 3172 .llseek = noop_llseek, 3173 }; 3174 3175 static int kvm_dev_ioctl_create_vm(unsigned long type) ··· 3258 3259 static struct file_operations kvm_chardev_ops = { 3260 .unlocked_ioctl = kvm_dev_ioctl, 3261 - .compat_ioctl = kvm_dev_ioctl, 3262 .llseek = noop_llseek, 3263 }; 3264 3265 static struct miscdevice kvm_dev = {
··· 116 #ifdef CONFIG_KVM_COMPAT 117 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, 118 unsigned long arg); 119 + #define KVM_COMPAT(c) .compat_ioctl = (c) 120 + #else 121 + static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, 122 + unsigned long arg) { return -EINVAL; } 123 + #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl 124 #endif 125 static int hardware_enable_all(void); 126 static void hardware_disable_all(void); ··· 2396 static struct file_operations kvm_vcpu_fops = { 2397 .release = kvm_vcpu_release, 2398 .unlocked_ioctl = kvm_vcpu_ioctl, 2399 .mmap = kvm_vcpu_mmap, 2400 .llseek = noop_llseek, 2401 + KVM_COMPAT(kvm_vcpu_compat_ioctl), 2402 }; 2403 2404 /* ··· 2824 2825 static const struct file_operations kvm_device_fops = { 2826 .unlocked_ioctl = kvm_device_ioctl, 2827 .release = kvm_device_release, 2828 + KVM_COMPAT(kvm_device_ioctl), 2829 }; 2830 2831 struct kvm_device *kvm_device_from_filp(struct file *filp) ··· 3165 static struct file_operations kvm_vm_fops = { 3166 .release = kvm_vm_release, 3167 .unlocked_ioctl = kvm_vm_ioctl, 3168 .llseek = noop_llseek, 3169 + KVM_COMPAT(kvm_vm_compat_ioctl), 3170 }; 3171 3172 static int kvm_dev_ioctl_create_vm(unsigned long type) ··· 3259 3260 static struct file_operations kvm_chardev_ops = { 3261 .unlocked_ioctl = kvm_dev_ioctl, 3262 .llseek = noop_llseek, 3263 + KVM_COMPAT(kvm_dev_ioctl), 3264 }; 3265 3266 static struct miscdevice kvm_dev = {