Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking

Configure IRTEs to GA log interrupts for device posted IRQs that hit
non-running vCPUs if and only if the target vCPU is blocking, i.e.
actually needs a wake event. If the vCPU has exited to userspace or was
preempted, generating GA log entries and interrupts is wasteful and
unnecessary, as the vCPU will be re-loaded and/or scheduled back in
irrespective of the GA log notification (avic_ga_log_notifier() is just a
fancy wrapper for kvm_vcpu_wake_up()).

Use a should-be-zero bit in the vCPU's Physical APIC ID Table Entry to
track whether or not the vCPU's associated IRTEs are configured to
generate GA logs, but only set the synthetic bit in KVM's "cache", i.e.
never set the should-be-zero bit in tables that are used by hardware.
Use a synthetic bit instead of a dedicated boolean to minimize the odds
of messing up the locking, i.e. so that all the existing rules that apply
to avic_physical_id_entry for IS_RUNNING are reused verbatim for
GA_LOG_INTR.

Note, because KVM (by design) "puts" AVIC state in a "pre-blocking"
phase, using kvm_vcpu_is_blocking() to track the need for notifications
isn't a viable option.

Link: https://lore.kernel.org/r/20250611224604.313496-63-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

+58 -12
+7
arch/x86/include/asm/svm.h
··· 252 252 #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 253 253 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) 254 254 255 + /* 256 + * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible 257 + * tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate 258 + * GA log interrupts to wake the vCPU (because it's blocking or about to block). 259 + */ 260 + #define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61) 261 + 255 262 #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) 256 263 #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12) 257 264 #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+51 -12
arch/x86/kvm/svm/avic.c
··· 789 789 pi_data.cpu = entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; 790 790 } else { 791 791 pi_data.cpu = -1; 792 - pi_data.ga_log_intr = true; 792 + pi_data.ga_log_intr = entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR; 793 793 } 794 794 795 795 ret = irq_set_vcpu_affinity(host_irq, &pi_data); ··· 826 826 827 827 /* 828 828 * No unique action is required to deal with a vCPU that stops/starts 829 - * running, as IRTEs are configured to generate GALog interrupts at all 830 - * times. 829 + * running. A vCPU that starts running by definition stops blocking as 830 + * well, and a vCPU that stops running can't have been blocking, i.e. 831 + * doesn't need to toggle GALogIntr. 831 832 */ 832 833 AVIC_START_RUNNING = 0, 833 834 AVIC_STOP_RUNNING = 0, 835 + 836 + /* 837 + * When a vCPU starts blocking, KVM needs to set the GALogIntr flag 838 + * int all associated IRTEs so that KVM can wake the vCPU if an IRQ is 839 + * sent to the vCPU. 840 + */ 841 + AVIC_START_BLOCKING = BIT(1), 834 842 }; 835 843 836 844 static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, 837 845 enum avic_vcpu_action action) 838 846 { 847 + bool ga_log_intr = (action & AVIC_START_BLOCKING); 839 848 struct vcpu_svm *svm = to_svm(vcpu); 840 849 struct kvm_kernel_irqfd *irqfd; 841 850 ··· 861 852 void *data = irqfd->irq_bypass_data; 862 853 863 854 if (!(action & AVIC_TOGGLE_ON_OFF)) 864 - WARN_ON_ONCE(amd_iommu_update_ga(data, cpu, true)); 855 + WARN_ON_ONCE(amd_iommu_update_ga(data, cpu, ga_log_intr)); 865 856 else if (cpu >= 0) 866 - WARN_ON_ONCE(amd_iommu_activate_guest_mode(data, cpu, true)); 857 + WARN_ON_ONCE(amd_iommu_activate_guest_mode(data, cpu, ga_log_intr)); 867 858 else 868 859 WARN_ON_ONCE(amd_iommu_deactivate_guest_mode(data)); 869 860 } ··· 898 889 entry = svm->avic_physical_id_entry; 899 890 WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); 900 891 901 - entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; 892 + entry &= ~(AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK | 893 + AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR); 902 894 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); 903 895 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; 904 896 ··· 960 950 961 951 avic_update_iommu_vcpu_affinity(vcpu, -1, action); 962 952 953 + WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR); 954 + 955 + /* 956 + * Keep the previous APIC ID in the entry so that a rogue doorbell from 957 + * hardware is at least restricted to a CPU associated with the vCPU. 958 + */ 963 959 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; 964 - svm->avic_physical_id_entry = entry; 965 960 966 961 if (enable_ipiv) 967 962 WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry); 963 + 964 + /* 965 + * Note! Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as 966 + * it's a synthetic flag that usurps an unused should-be-zero bit. 967 + */ 968 + if (action & AVIC_START_BLOCKING) 969 + entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR; 970 + 971 + svm->avic_physical_id_entry = entry; 968 972 969 973 spin_unlock_irqrestore(&svm->ir_list_lock, flags); 970 974 } ··· 994 970 */ 995 971 u64 entry = to_svm(vcpu)->avic_physical_id_entry; 996 972 997 - /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ 998 - if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) 999 - return; 973 + /* 974 + * Nothing to do if IsRunning == '0' due to vCPU blocking, i.e. if the 975 + * vCPU is preempted while its in the process of blocking. WARN if the 976 + * vCPU wasn't running and isn't blocking, KVM shouldn't attempt to put 977 + * the AVIC if it wasn't previously loaded. 978 + */ 979 + if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) { 980 + if (WARN_ON_ONCE(!kvm_vcpu_is_blocking(vcpu))) 981 + return; 1000 982 1001 - __avic_vcpu_put(vcpu, AVIC_STOP_RUNNING); 983 + /* 984 + * The vCPU was preempted while blocking, ensure its IRTEs are 985 + * configured to generate GA Log Interrupts. 986 + */ 987 + if (!(WARN_ON_ONCE(!(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR)))) 988 + return; 989 + } 990 + 991 + __avic_vcpu_put(vcpu, kvm_vcpu_is_blocking(vcpu) ? AVIC_START_BLOCKING : 992 + AVIC_STOP_RUNNING); 1002 993 } 1003 994 1004 995 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu) ··· 1079 1040 * CPU and cause noisy neighbor problems if the VM is sending interrupts 1080 1041 * to the vCPU while it's scheduled out. 1081 1042 */ 1082 - __avic_vcpu_put(vcpu, AVIC_STOP_RUNNING); 1043 + __avic_vcpu_put(vcpu, AVIC_START_BLOCKING); 1083 1044 } 1084 1045 1085 1046 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)