Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:
- Lazy FPSIMD switching fixes
- Really disable compat ioctls on architectures that don't want it
- Disable compat on arm64 (it was never implemented...)
- Rely on architectural requirements for GICV on GICv3
- Detect bad alignments in unmap_stage2_range

x86:
- Add nested VM entry checks to avoid broken error recovery path
- Minor documentation fix"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: fix KVM_CAP_HYPERV_TLBFLUSH paragraph number
kvm: vmx: Nested VM-entry prereqs for event inj.
KVM: arm64: Prevent KVM_COMPAT from being selected
KVM: Enforce error in ioctl for compat tasks when !KVM_COMPAT
KVM: arm/arm64: add WARN_ON if size is not PAGE_SIZE aligned in unmap_stage2_range
KVM: arm64: Avoid mistaken attempts to save SVE state for vcpus
KVM: arm64/sve: Fix SVE trap restoration for non-current tasks
KVM: arm64: Don't mask softirq with IRQs disabled in vcpu_put()
arm64: Introduce sysreg_clear_set()
KVM: arm/arm64: Drop resource size check for GICV window

+131 -26
+1 -1
Documentation/virtual/kvm/api.txt
··· 4610 4610 reset, migration and nested KVM for branch prediction blocking. The stfle 4611 4611 facility 82 should not be provided to the guest without this capability. 4612 4612 4613 - 8.14 KVM_CAP_HYPERV_TLBFLUSH 4613 + 8.18 KVM_CAP_HYPERV_TLBFLUSH 4614 4614 4615 4615 Architectures: x86 4616 4616
+1
arch/arm64/include/asm/kvm_host.h
··· 306 306 #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ 307 307 #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ 308 308 #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ 309 + #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ 309 310 310 311 #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) 311 312
+11
arch/arm64/include/asm/sysreg.h
··· 728 728 asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ 729 729 } while (0) 730 730 731 + /* 732 + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the 733 + * set mask are set. Other bits are left as-is. 734 + */ 735 + #define sysreg_clear_set(sysreg, clear, set) do { \ 736 + u64 __scs_val = read_sysreg(sysreg); \ 737 + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ 738 + if (__scs_new != __scs_val) \ 739 + write_sysreg(__scs_new, sysreg); \ 740 + } while (0) 741 + 731 742 static inline void config_sctlr_el1(u32 clear, u32 set) 732 743 { 733 744 u32 val;
+27 -9
arch/arm64/kvm/fpsimd.c
··· 5 5 * Copyright 2018 Arm Limited 6 6 * Author: Dave Martin <Dave.Martin@arm.com> 7 7 */ 8 - #include <linux/bottom_half.h> 8 + #include <linux/irqflags.h> 9 9 #include <linux/sched.h> 10 10 #include <linux/thread_info.h> 11 11 #include <linux/kvm_host.h> 12 12 #include <asm/kvm_asm.h> 13 13 #include <asm/kvm_host.h> 14 14 #include <asm/kvm_mmu.h> 15 + #include <asm/sysreg.h> 15 16 16 17 /* 17 18 * Called on entry to KVM_RUN unless this vcpu previously ran at least ··· 62 61 { 63 62 BUG_ON(!current->mm); 64 63 65 - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE); 64 + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 65 + KVM_ARM64_HOST_SVE_IN_USE | 66 + KVM_ARM64_HOST_SVE_ENABLED); 66 67 vcpu->arch.flags |= KVM_ARM64_FP_HOST; 68 + 67 69 if (test_thread_flag(TIF_SVE)) 68 70 vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; 71 + 72 + if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) 73 + vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; 69 74 } 70 75 71 76 /* ··· 99 92 */ 100 93 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) 101 94 { 102 - local_bh_disable(); 95 + unsigned long flags; 103 96 104 - update_thread_flag(TIF_SVE, 105 - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); 97 + local_irq_save(flags); 106 98 107 99 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 108 100 /* Clean guest FP state to memory and invalidate cpu view */ 109 101 fpsimd_save(); 110 102 fpsimd_flush_cpu_state(); 111 - } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { 112 - /* Ensure user trap controls are correctly restored */ 113 - fpsimd_bind_task_to_cpu(); 103 + } else if (system_supports_sve()) { 104 + /* 105 + * The FPSIMD/SVE state in the CPU has not been touched, and we 106 + * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been 107 + * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE 108 + * for EL0. To avoid spurious traps, restore the trap state 109 + * seen by kvm_arch_vcpu_load_fp(): 110 + */ 111 + if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) 112 + sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); 113 + else 114 + sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); 114 115 } 115 116 116 - local_bh_enable(); 117 + update_thread_flag(TIF_SVE, 118 + vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); 119 + 120 + local_irq_restore(flags); 117 121 }
+3
arch/x86/include/asm/vmx.h
··· 114 114 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 115 115 #define VMX_MISC_SAVE_EFER_LMA 0x00000020 116 116 #define VMX_MISC_ACTIVITY_HLT 0x00000040 117 + #define VMX_MISC_ZERO_LEN_INS 0x40000000 117 118 118 119 /* VMFUNC functions */ 119 120 #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 ··· 352 351 #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 353 352 354 353 #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 354 + #define INTR_TYPE_RESERVED (1 << 8) /* reserved */ 355 355 #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 356 356 #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ 357 357 #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 358 358 #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ 359 359 #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ 360 + #define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ 360 361 361 362 /* GUEST_INTERRUPTIBILITY_INFO flags. */ 362 363 #define GUEST_INTR_STATE_STI 0x00000001
+67
arch/x86/kvm/vmx.c
··· 1705 1705 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; 1706 1706 } 1707 1707 1708 + static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) 1709 + { 1710 + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; 1711 + } 1712 + 1713 + static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) 1714 + { 1715 + return to_vmx(vcpu)->nested.msrs.procbased_ctls_high & 1716 + CPU_BASED_MONITOR_TRAP_FLAG; 1717 + } 1718 + 1708 1719 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) 1709 1720 { 1710 1721 return vmcs12->cpu_based_vm_exec_control & bit; ··· 11630 11619 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || 11631 11620 !nested_cr3_valid(vcpu, vmcs12->host_cr3)) 11632 11621 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; 11622 + 11623 + /* 11624 + * From the Intel SDM, volume 3: 11625 + * Fields relevant to VM-entry event injection must be set properly. 11626 + * These fields are the VM-entry interruption-information field, the 11627 + * VM-entry exception error code, and the VM-entry instruction length. 11628 + */ 11629 + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { 11630 + u32 intr_info = vmcs12->vm_entry_intr_info_field; 11631 + u8 vector = intr_info & INTR_INFO_VECTOR_MASK; 11632 + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; 11633 + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; 11634 + bool should_have_error_code; 11635 + bool urg = nested_cpu_has2(vmcs12, 11636 + SECONDARY_EXEC_UNRESTRICTED_GUEST); 11637 + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; 11638 + 11639 + /* VM-entry interruption-info field: interruption type */ 11640 + if (intr_type == INTR_TYPE_RESERVED || 11641 + (intr_type == INTR_TYPE_OTHER_EVENT && 11642 + !nested_cpu_supports_monitor_trap_flag(vcpu))) 11643 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11644 + 11645 + /* VM-entry interruption-info field: vector */ 11646 + if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || 11647 + (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || 11648 + (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) 11649 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11650 + 11651 + /* VM-entry interruption-info field: deliver error code */ 11652 + should_have_error_code = 11653 + intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && 11654 + x86_exception_has_error_code(vector); 11655 + if (has_error_code != should_have_error_code) 11656 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11657 + 11658 + /* VM-entry exception error code */ 11659 + if (has_error_code && 11660 + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) 11661 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11662 + 11663 + /* VM-entry interruption-info field: reserved bits */ 11664 + if (intr_info & INTR_INFO_RESVD_BITS_MASK) 11665 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11666 + 11667 + /* VM-entry instruction length */ 11668 + switch (intr_type) { 11669 + case INTR_TYPE_SOFT_EXCEPTION: 11670 + case INTR_TYPE_SOFT_INTR: 11671 + case INTR_TYPE_PRIV_SW_EXCEPTION: 11672 + if ((vmcs12->vm_entry_instruction_len > 15) || 11673 + (vmcs12->vm_entry_instruction_len == 0 && 11674 + !nested_cpu_has_zero_length_injection(vcpu))) 11675 + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11676 + } 11677 + } 11633 11678 11634 11679 return 0; 11635 11680 }
+9
arch/x86/kvm/x86.h
··· 110 110 #endif 111 111 } 112 112 113 + static inline bool x86_exception_has_error_code(unsigned int vector) 114 + { 115 + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | 116 + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | 117 + BIT(PF_VECTOR) | BIT(AC_VECTOR); 118 + 119 + return (1U << vector) & exception_has_error_code; 120 + } 121 + 113 122 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) 114 123 { 115 124 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+1 -1
virt/kvm/Kconfig
··· 47 47 48 48 config KVM_COMPAT 49 49 def_bool y 50 - depends on KVM && COMPAT && !S390 50 + depends on KVM && COMPAT && !(S390 || ARM64) 51 51 52 52 config HAVE_KVM_IRQ_BYPASS 53 53 bool
+2
virt/kvm/arm/mmu.c
··· 297 297 phys_addr_t next; 298 298 299 299 assert_spin_locked(&kvm->mmu_lock); 300 + WARN_ON(size & ~PAGE_MASK); 301 + 300 302 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 301 303 do { 302 304 /*
-5
virt/kvm/arm/vgic/vgic-v3.c
··· 617 617 pr_warn("GICV physical address 0x%llx not page aligned\n", 618 618 (unsigned long long)info->vcpu.start); 619 619 kvm_vgic_global_state.vcpu_base = 0; 620 - } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { 621 - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 622 - (unsigned long long)resource_size(&info->vcpu), 623 - PAGE_SIZE); 624 - kvm_vgic_global_state.vcpu_base = 0; 625 620 } else { 626 621 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 627 622 kvm_vgic_global_state.can_emulate_gicv2 = true;
+9 -10
virt/kvm/kvm_main.c
··· 116 116 #ifdef CONFIG_KVM_COMPAT 117 117 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, 118 118 unsigned long arg); 119 + #define KVM_COMPAT(c) .compat_ioctl = (c) 120 + #else 121 + static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, 122 + unsigned long arg) { return -EINVAL; } 123 + #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl 119 124 #endif 120 125 static int hardware_enable_all(void); 121 126 static void hardware_disable_all(void); ··· 2401 2396 static struct file_operations kvm_vcpu_fops = { 2402 2397 .release = kvm_vcpu_release, 2403 2398 .unlocked_ioctl = kvm_vcpu_ioctl, 2404 - #ifdef CONFIG_KVM_COMPAT 2405 - .compat_ioctl = kvm_vcpu_compat_ioctl, 2406 - #endif 2407 2399 .mmap = kvm_vcpu_mmap, 2408 2400 .llseek = noop_llseek, 2401 + KVM_COMPAT(kvm_vcpu_compat_ioctl), 2409 2402 }; 2410 2403 2411 2404 /* ··· 2827 2824 2828 2825 static const struct file_operations kvm_device_fops = { 2829 2826 .unlocked_ioctl = kvm_device_ioctl, 2830 - #ifdef CONFIG_KVM_COMPAT 2831 - .compat_ioctl = kvm_device_ioctl, 2832 - #endif 2833 2827 .release = kvm_device_release, 2828 + KVM_COMPAT(kvm_device_ioctl), 2834 2829 }; 2835 2830 2836 2831 struct kvm_device *kvm_device_from_filp(struct file *filp) ··· 3166 3165 static struct file_operations kvm_vm_fops = { 3167 3166 .release = kvm_vm_release, 3168 3167 .unlocked_ioctl = kvm_vm_ioctl, 3169 - #ifdef CONFIG_KVM_COMPAT 3170 - .compat_ioctl = kvm_vm_compat_ioctl, 3171 - #endif 3172 3168 .llseek = noop_llseek, 3169 + KVM_COMPAT(kvm_vm_compat_ioctl), 3173 3170 }; 3174 3171 3175 3172 static int kvm_dev_ioctl_create_vm(unsigned long type) ··· 3258 3259 3259 3260 static struct file_operations kvm_chardev_ops = { 3260 3261 .unlocked_ioctl = kvm_dev_ioctl, 3261 - .compat_ioctl = kvm_dev_ioctl, 3262 3262 .llseek = noop_llseek, 3263 + KVM_COMPAT(kvm_dev_ioctl), 3263 3264 }; 3264 3265 3265 3266 static struct miscdevice kvm_dev = {