Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: APIC: avoid instruction emulation for EOI writes

Instruction emulation for EOI writes can be skipped, since sane
guest simply uses MOV instead of string operations. This is a nice
improvement when guest doesn't support x2apic or hyper-V EOI
support.

a single VM bandwidth is observed with ~8% bandwidth improvement
(7.4Gbps->8Gbps), by saving ~5% cycles from EOI emulation.

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
<Based on earlier work from>:
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by

Kevin Tian and committed by
Avi Kivity
58fbbf26 45133eca

+43
+12
arch/x86/include/asm/vmx.h
··· 350 350 #define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ 351 351 352 352 353 + /* 354 + * Exit Qualifications for APIC-Access 355 + */ 356 + #define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */ 357 + #define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */ 358 + #define TYPE_LINEAR_APIC_INST_READ (0 << 12) 359 + #define TYPE_LINEAR_APIC_INST_WRITE (1 << 12) 360 + #define TYPE_LINEAR_APIC_INST_FETCH (2 << 12) 361 + #define TYPE_LINEAR_APIC_EVENT (3 << 12) 362 + #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) 363 + #define TYPE_PHYSICAL_APIC_INST (15 << 12) 364 + 353 365 /* segment AR */ 354 366 #define SEGMENT_AR_L_MASK (1 << 13) 355 367
+9
arch/x86/kvm/lapic.c
··· 864 864 return 0; 865 865 } 866 866 867 + void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 868 + { 869 + struct kvm_lapic *apic = vcpu->arch.apic; 870 + 871 + if (apic) 872 + apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 873 + } 874 + EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 875 + 867 876 void kvm_free_lapic(struct kvm_vcpu *vcpu) 868 877 { 869 878 if (!vcpu->arch.apic)
+1
arch/x86/kvm/lapic.h
··· 26 26 void kvm_lapic_reset(struct kvm_vcpu *vcpu); 27 27 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); 28 28 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); 29 + void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); 29 30 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); 30 31 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); 31 32 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
+21
arch/x86/kvm/vmx.c
··· 71 71 static int __read_mostly yield_on_hlt = 1; 72 72 module_param(yield_on_hlt, bool, S_IRUGO); 73 73 74 + static int __read_mostly fasteoi = 1; 75 + module_param(fasteoi, bool, S_IRUGO); 76 + 74 77 /* 75 78 * If nested=1, nested virtualization is supported, i.e., guests may use 76 79 * VMX and be a hypervisor for its own guests. If nested=0, guests may not ··· 4543 4540 4544 4541 static int handle_apic_access(struct kvm_vcpu *vcpu) 4545 4542 { 4543 + if (likely(fasteoi)) { 4544 + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 4545 + int access_type, offset; 4546 + 4547 + access_type = exit_qualification & APIC_ACCESS_TYPE; 4548 + offset = exit_qualification & APIC_ACCESS_OFFSET; 4549 + /* 4550 + * Sane guest uses MOV to write EOI, with written value 4551 + * not cared. So make a short-circuit here by avoiding 4552 + * heavy instruction emulation. 4553 + */ 4554 + if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && 4555 + (offset == APIC_EOI)) { 4556 + kvm_lapic_set_eoi(vcpu); 4557 + skip_emulated_instruction(vcpu); 4558 + return 1; 4559 + } 4560 + } 4546 4561 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 4547 4562 } 4548 4563