Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)

Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's
IPI virtualization support, but only for AMD. While not stated anywhere
in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs
store the 64-bit ICR as two separate 32-bit values in ICR and ICR2. When
IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled,
KVM needs to match CPU behavior as some ICR ICR writes will be handled by
the CPU, not by KVM.

Add a kvm_x86_ops knob to control the underlying format used by the CPU to
store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether
or not x2AVIC is enabled. If KVM is handling all ICR writes, the storage
format for x2APIC mode doesn't matter, and having the behavior follow AMD
versus Intel will provide better test coverage and ease debugging.

Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
Cc: stable@vger.kernel.org
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

+36 -12
+2
arch/x86/include/asm/kvm_host.h
··· 1727 1727 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 1728 1728 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 1729 1729 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 1730 + 1731 + const bool x2apic_icr_is_split; 1730 1732 const unsigned long required_apicv_inhibits; 1731 1733 bool allow_apicv_in_x2apic_without_x2apic_virtualization; 1732 1734 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
+30 -12
arch/x86/kvm/lapic.c
··· 2471 2471 data &= ~APIC_ICR_BUSY; 2472 2472 2473 2473 kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); 2474 - kvm_lapic_set_reg64(apic, APIC_ICR, data); 2474 + if (kvm_x86_ops.x2apic_icr_is_split) { 2475 + kvm_lapic_set_reg(apic, APIC_ICR, data); 2476 + kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); 2477 + } else { 2478 + kvm_lapic_set_reg64(apic, APIC_ICR, data); 2479 + } 2475 2480 trace_kvm_apic_write(APIC_ICR, data); 2476 2481 return 0; 2482 + } 2483 + 2484 + static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) 2485 + { 2486 + if (kvm_x86_ops.x2apic_icr_is_split) 2487 + return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | 2488 + (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; 2489 + 2490 + return kvm_lapic_get_reg64(apic, APIC_ICR); 2477 2491 } 2478 2492 2479 2493 /* emulate APIC access in a trap manner */ ··· 2507 2493 * maybe-unecessary write, and both are in the noise anyways. 2508 2494 */ 2509 2495 if (apic_x2apic_mode(apic) && offset == APIC_ICR) 2510 - WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); 2496 + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); 2511 2497 else 2512 2498 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); 2513 2499 } ··· 3027 3013 3028 3014 /* 3029 3015 * In x2APIC mode, the LDR is fixed and based on the id. And 3030 - * ICR is internally a single 64-bit register, but needs to be 3031 - * split to ICR+ICR2 in userspace for backwards compatibility. 3016 + * if the ICR is _not_ split, ICR is internally a single 64-bit 3017 + * register, but needs to be split to ICR+ICR2 in userspace for 3018 + * backwards compatibility. 3032 3019 */ 3033 - if (set) { 3020 + if (set) 3034 3021 *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); 3035 3022 3036 - icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | 3037 - (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; 3038 - __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); 3039 - } else { 3040 - icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); 3041 - __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); 3023 + if (!kvm_x86_ops.x2apic_icr_is_split) { 3024 + if (set) { 3025 + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | 3026 + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; 3027 + __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); 3028 + } else { 3029 + icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); 3030 + __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); 3031 + } 3042 3032 } 3043 3033 } 3044 3034 ··· 3240 3222 u32 low; 3241 3223 3242 3224 if (reg == APIC_ICR) { 3243 - *data = kvm_lapic_get_reg64(apic, APIC_ICR); 3225 + *data = kvm_x2apic_icr_read(apic); 3244 3226 return 0; 3245 3227 } 3246 3228
+2
arch/x86/kvm/svm/svm.c
··· 5053 5053 .enable_nmi_window = svm_enable_nmi_window, 5054 5054 .enable_irq_window = svm_enable_irq_window, 5055 5055 .update_cr8_intercept = svm_update_cr8_intercept, 5056 + 5057 + .x2apic_icr_is_split = true, 5056 5058 .set_virtual_apic_mode = avic_refresh_virtual_apic_mode, 5057 5059 .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, 5058 5060 .apicv_post_state_restore = avic_apicv_post_state_restore,
+2
arch/x86/kvm/vmx/main.c
··· 89 89 .enable_nmi_window = vmx_enable_nmi_window, 90 90 .enable_irq_window = vmx_enable_irq_window, 91 91 .update_cr8_intercept = vmx_update_cr8_intercept, 92 + 93 + .x2apic_icr_is_split = false, 92 94 .set_virtual_apic_mode = vmx_set_virtual_apic_mode, 93 95 .set_apic_access_page_addr = vmx_set_apic_access_page_addr, 94 96 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,