Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: VMX: Enable bus lock VM exit

Virtual Machine can exploit bus locks to degrade the performance of
system. Bus lock can be caused by split locked access to writeback(WB)
memory or by using locks on uncacheable(UC) memory. The bus lock is
typically >1000 cycles slower than an atomic operation within a cache
line. It also disrupts performance on other cores (which must wait for
the bus lock to be released before their memory operations can
complete).

To address the threat, bus lock VM exit is introduced to notify the VMM
when a bus lock was acquired, allowing it to enforce throttling or other
policy based mitigations.

A VMM can enable VM exit due to bus locks by setting a new "Bus Lock
Detection" VM-execution control(bit 30 of Secondary Processor-based VM
execution controls). If delivery of this VM exit was preempted by a
higher priority VM exit (e.g. EPT misconfiguration, EPT violation, APIC
access VM exit, APIC write VM exit, exception bitmap exiting), bit 26 of
exit reason in vmcs field is set to 1.

In current implementation, the KVM exposes this capability through
KVM_CAP_X86_BUS_LOCK_EXIT. The user can get the supported mode bitmap
(i.e. off and exit) and enable it explicitly (disabled by default). If
bus locks in guest are detected by KVM, exit to user space even when
current exit reason is handled by KVM internally. Set a new field
KVM_RUN_BUS_LOCK in vcpu->run->flags to inform the user space that there
is a bus lock detected in guest.

Document for Bus Lock VM exit is now available at the latest "Intel
Architecture Instruction Set Extensions Programming Reference".

Document Link:
https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Message-Id: <20201106090315.18606-4-chenyi.qiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Chenyi Qiang and committed by
Paolo Bonzini
fe6b6bc8 15aad3be

+83 -4
+7
arch/x86/include/asm/kvm_host.h
··· 52 52 #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ 53 53 KVM_DIRTY_LOG_INITIALLY_SET) 54 54 55 + #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ 56 + KVM_BUS_LOCK_DETECTION_EXIT) 57 + 55 58 /* x86-specific vcpu->requests bit members */ 56 59 #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) 57 60 #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) ··· 999 996 struct msr_bitmap_range ranges[16]; 1000 997 } msr_filter; 1001 998 999 + bool bus_lock_detection_enabled; 1000 + 1002 1001 struct kvm_pmu_event_filter *pmu_event_filter; 1003 1002 struct task_struct *nx_lpage_recovery_thread; 1004 1003 ··· 1423 1418 extern u64 kvm_max_tsc_scaling_ratio; 1424 1419 /* 1ull << kvm_tsc_scaling_ratio_frac_bits */ 1425 1420 extern u64 kvm_default_tsc_scaling_ratio; 1421 + /* bus lock detection supported? */ 1422 + extern bool kvm_has_bus_lock_exit; 1426 1423 1427 1424 extern u64 kvm_mce_cap_supported; 1428 1425
+1
arch/x86/include/asm/vmx.h
··· 73 73 #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) 74 74 #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) 75 75 #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) 76 + #define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) 76 77 77 78 #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) 78 79 #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
+1
arch/x86/include/asm/vmxfeatures.h
··· 83 83 #define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ 84 84 #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ 85 85 #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ 86 + #define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ 86 87 87 88 #endif /* _ASM_X86_VMXFEATURES_H */
+1
arch/x86/include/uapi/asm/kvm.h
··· 112 112 #define KVM_NR_IRQCHIPS 3 113 113 114 114 #define KVM_RUN_X86_SMM (1 << 0) 115 + #define KVM_RUN_X86_BUS_LOCK (1 << 1) 115 116 116 117 /* for KVM_GET_REGS and KVM_SET_REGS */ 117 118 struct kvm_regs {
+3 -1
arch/x86/include/uapi/asm/vmx.h
··· 89 89 #define EXIT_REASON_XRSTORS 64 90 90 #define EXIT_REASON_UMWAIT 67 91 91 #define EXIT_REASON_TPAUSE 68 92 + #define EXIT_REASON_BUS_LOCK 74 92 93 93 94 #define VMX_EXIT_REASONS \ 94 95 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ ··· 151 150 { EXIT_REASON_XSAVES, "XSAVES" }, \ 152 151 { EXIT_REASON_XRSTORS, "XRSTORS" }, \ 153 152 { EXIT_REASON_UMWAIT, "UMWAIT" }, \ 154 - { EXIT_REASON_TPAUSE, "TPAUSE" } 153 + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ 154 + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } 155 155 156 156 #define VMX_EXIT_REASON_FLAGS \ 157 157 { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
+6
arch/x86/kvm/vmx/capabilities.h
··· 262 262 SECONDARY_EXEC_TSC_SCALING; 263 263 } 264 264 265 + static inline bool cpu_has_vmx_bus_lock_detection(void) 266 + { 267 + return vmcs_config.cpu_based_2nd_exec_ctrl & 268 + SECONDARY_EXEC_BUS_LOCK_DETECTION; 269 + } 270 + 265 271 static inline bool cpu_has_vmx_apicv(void) 266 272 { 267 273 return cpu_has_vmx_apic_register_virt() &&
+35 -2
arch/x86/kvm/vmx/vmx.c
··· 2428 2428 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | 2429 2429 SECONDARY_EXEC_PT_USE_GPA | 2430 2430 SECONDARY_EXEC_PT_CONCEAL_VMX | 2431 - SECONDARY_EXEC_ENABLE_VMFUNC; 2431 + SECONDARY_EXEC_ENABLE_VMFUNC | 2432 + SECONDARY_EXEC_BUS_LOCK_DETECTION; 2432 2433 if (cpu_has_sgx()) 2433 2434 opt2 |= SECONDARY_EXEC_ENCLS_EXITING; 2434 2435 if (adjust_vmx_controls(min2, opt2, ··· 4270 4269 vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG, 4271 4270 ENABLE_USR_WAIT_PAUSE, false); 4272 4271 4272 + if (!vcpu->kvm->arch.bus_lock_detection_enabled) 4273 + exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; 4274 + 4273 4275 vmx->secondary_exec_control = exec_control; 4274 4276 } 4275 4277 ··· 5604 5600 return 1; 5605 5601 } 5606 5602 5603 + static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) 5604 + { 5605 + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; 5606 + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; 5607 + return 0; 5608 + } 5609 + 5607 5610 /* 5608 5611 * The exit handlers return 1 if the exit was handled fully and guest execution 5609 5612 * may resume. Otherwise they set the kvm_run parameter to indicate what needs ··· 5667 5656 [EXIT_REASON_VMFUNC] = handle_vmx_instruction, 5668 5657 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, 5669 5658 [EXIT_REASON_ENCLS] = handle_encls, 5659 + [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, 5670 5660 }; 5671 5661 5672 5662 static const int kvm_vmx_max_exit_handlers = ··· 5920 5908 * The guest has exited. See if we can fix it or if we need userspace 5921 5909 * assistance. 5922 5910 */ 5923 - static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 5911 + static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 5924 5912 { 5925 5913 struct vcpu_vmx *vmx = to_vmx(vcpu); 5926 5914 union vmx_exit_reason exit_reason = vmx->exit_reason; ··· 6071 6059 vcpu->run->internal.data[0] = exit_reason.full; 6072 6060 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; 6073 6061 return 0; 6062 + } 6063 + 6064 + static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 6065 + { 6066 + int ret = __vmx_handle_exit(vcpu, exit_fastpath); 6067 + 6068 + /* 6069 + * Even when current exit reason is handled by KVM internally, we 6070 + * still need to exit to user space when bus lock detected to inform 6071 + * that there is a bus lock in guest. 6072 + */ 6073 + if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { 6074 + if (ret > 0) 6075 + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; 6076 + 6077 + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; 6078 + return 0; 6079 + } 6080 + return ret; 6074 6081 } 6075 6082 6076 6083 /* ··· 7842 7811 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; 7843 7812 kvm_tsc_scaling_ratio_frac_bits = 48; 7844 7813 } 7814 + 7815 + kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); 7845 7816 7846 7817 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 7847 7818
+1 -1
arch/x86/kvm/vmx/vmx.h
··· 83 83 u32 reserved23 : 1; 84 84 u32 reserved24 : 1; 85 85 u32 reserved25 : 1; 86 - u32 reserved26 : 1; 86 + u32 bus_lock_detected : 1; 87 87 u32 enclave_mode : 1; 88 88 u32 smi_pending_mtf : 1; 89 89 u32 smi_from_vmx_root : 1;
+23
arch/x86/kvm/x86.c
··· 136 136 EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); 137 137 u64 __read_mostly kvm_default_tsc_scaling_ratio; 138 138 EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); 139 + bool __read_mostly kvm_has_bus_lock_exit; 140 + EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit); 139 141 140 142 /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ 141 143 static u32 __read_mostly tsc_tolerance_ppm = 250; ··· 3845 3843 case KVM_CAP_STEAL_TIME: 3846 3844 r = sched_info_on(); 3847 3845 break; 3846 + case KVM_CAP_X86_BUS_LOCK_EXIT: 3847 + if (kvm_has_bus_lock_exit) 3848 + r = KVM_BUS_LOCK_DETECTION_OFF | 3849 + KVM_BUS_LOCK_DETECTION_EXIT; 3850 + else 3851 + r = 0; 3852 + break; 3848 3853 default: 3849 3854 break; 3850 3855 } ··· 5305 5296 break; 5306 5297 case KVM_CAP_X86_USER_SPACE_MSR: 5307 5298 kvm->arch.user_space_msr_mask = cap->args[0]; 5299 + r = 0; 5300 + break; 5301 + case KVM_CAP_X86_BUS_LOCK_EXIT: 5302 + r = -EINVAL; 5303 + if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE) 5304 + break; 5305 + 5306 + if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) && 5307 + (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)) 5308 + break; 5309 + 5310 + if (kvm_has_bus_lock_exit && 5311 + cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT) 5312 + kvm->arch.bus_lock_detection_enabled = true; 5308 5313 r = 0; 5309 5314 break; 5310 5315 default:
+5
include/uapi/linux/kvm.h
··· 252 252 #define KVM_EXIT_X86_WRMSR 30 253 253 #define KVM_EXIT_DIRTY_RING_FULL 31 254 254 #define KVM_EXIT_AP_RESET_HOLD 32 255 + #define KVM_EXIT_X86_BUS_LOCK 33 255 256 256 257 /* For KVM_EXIT_INTERNAL_ERROR */ 257 258 /* Emulate instruction failed. */ ··· 1059 1058 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 1060 1059 #define KVM_CAP_SYS_HYPERV_CPUID 191 1061 1060 #define KVM_CAP_DIRTY_LOG_RING 192 1061 + #define KVM_CAP_X86_BUS_LOCK_EXIT 193 1062 1062 1063 1063 #ifdef KVM_CAP_IRQ_ROUTING 1064 1064 ··· 1775 1773 __u32 slot; 1776 1774 __u64 offset; 1777 1775 }; 1776 + 1777 + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) 1778 + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) 1778 1779 1779 1780 #endif /* __LINUX_KVM_H */