KVM: x86: Reset IRTE to host control if *new* route isn't postable

Restore an IRTE back to host control (remapped or posted MSI mode) if the
*new* GSI route prevents posting the IRQ directly to a vCPU, regardless of
the GSI routing type. Updating the IRTE if and only if the new GSI is an
MSI results in KVM leaving an IRTE posting to a vCPU.

The dangling IRTE can result in interrupts being incorrectly delivered to
the guest, and in the worst case scenario can result in use-after-free,
e.g. if the VM is torn down, but the underlying host IRQ isn't freed.

Fixes: efc644048ecd ("KVM: x86: Update IRTE for posted-interrupts")
Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250404193923.1413163-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by Sean Christopherson and committed by Paolo Bonzini 9bcac97d 7537deda

+41 -45
+31 -27
arch/x86/kvm/svm/avic.c
··· 896 896 { 897 897 struct kvm_kernel_irq_routing_entry *e; 898 898 struct kvm_irq_routing_table *irq_rt; 899 + bool enable_remapped_mode = true; 899 900 int idx, ret = 0; 900 901 901 902 if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) ··· 933 932 kvm_vcpu_apicv_active(&svm->vcpu)) { 934 933 struct amd_iommu_pi_data pi; 935 934 935 + enable_remapped_mode = false; 936 + 936 937 /* Try to enable guest_mode in IRTE */ 937 938 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & 938 939 AVIC_HPA_MASK); ··· 953 950 */ 954 951 if (!ret && pi.is_guest_mode) 955 952 svm_ir_list_add(svm, &pi); 956 - } else { 957 - /* Use legacy mode in IRTE */ 958 - struct amd_iommu_pi_data pi; 959 - 960 - /** 961 - * Here, pi is used to: 962 - * - Tell IOMMU to use legacy mode for this interrupt. 963 - * - Retrieve ga_tag of prior interrupt remapping data. 964 - */ 965 - pi.prev_ga_tag = 0; 966 - pi.is_guest_mode = false; 967 - ret = irq_set_vcpu_affinity(host_irq, &pi); 968 - 969 - /** 970 - * Check if the posted interrupt was previously 971 - * setup with the guest_mode by checking if the ga_tag 972 - * was cached. If so, we need to clean up the per-vcpu 973 - * ir_list. 974 - */ 975 - if (!ret && pi.prev_ga_tag) { 976 - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); 977 - struct kvm_vcpu *vcpu; 978 - 979 - vcpu = kvm_get_vcpu_by_id(kvm, id); 980 - if (vcpu) 981 - svm_ir_list_del(to_svm(vcpu), &pi); 982 - } 983 953 } 984 954 985 955 if (!ret && svm) { ··· 968 992 } 969 993 970 994 ret = 0; 995 + if (enable_remapped_mode) { 996 + /* Use legacy mode in IRTE */ 997 + struct amd_iommu_pi_data pi; 998 + 999 + /** 1000 + * Here, pi is used to: 1001 + * - Tell IOMMU to use legacy mode for this interrupt. 1002 + * - Retrieve ga_tag of prior interrupt remapping data. 1003 + */ 1004 + pi.prev_ga_tag = 0; 1005 + pi.is_guest_mode = false; 1006 + ret = irq_set_vcpu_affinity(host_irq, &pi); 1007 + 1008 + /** 1009 + * Check if the posted interrupt was previously 1010 + * setup with the guest_mode by checking if the ga_tag 1011 + * was cached. If so, we need to clean up the per-vcpu 1012 + * ir_list. 1013 + */ 1014 + if (!ret && pi.prev_ga_tag) { 1015 + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); 1016 + struct kvm_vcpu *vcpu; 1017 + 1018 + vcpu = kvm_get_vcpu_by_id(kvm, id); 1019 + if (vcpu) 1020 + svm_ir_list_del(to_svm(vcpu), &pi); 1021 + } 1022 + } 971 1023 out: 972 1024 srcu_read_unlock(&kvm->irq_srcu, idx); 973 1025 return ret;
+10 -18
arch/x86/kvm/vmx/posted_intr.c
··· 297 297 { 298 298 struct kvm_kernel_irq_routing_entry *e; 299 299 struct kvm_irq_routing_table *irq_rt; 300 + bool enable_remapped_mode = true; 300 301 struct kvm_lapic_irq irq; 301 302 struct kvm_vcpu *vcpu; 302 303 struct vcpu_data vcpu_info; ··· 336 335 337 336 kvm_set_msi_irq(kvm, e, &irq); 338 337 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || 339 - !kvm_irq_is_postable(&irq)) { 340 - /* 341 - * Make sure the IRTE is in remapped mode if 342 - * we don't handle it in posted mode. 343 - */ 344 - ret = irq_set_vcpu_affinity(host_irq, NULL); 345 - if (ret < 0) { 346 - printk(KERN_INFO 347 - "failed to back to remapped mode, irq: %u\n", 348 - host_irq); 349 - goto out; 350 - } 351 - 338 + !kvm_irq_is_postable(&irq)) 352 339 continue; 353 - } 354 340 355 341 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); 356 342 vcpu_info.vector = irq.vector; ··· 345 357 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, 346 358 vcpu_info.vector, vcpu_info.pi_desc_addr, set); 347 359 348 - if (set) 349 - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); 350 - else 351 - ret = irq_set_vcpu_affinity(host_irq, NULL); 360 + if (!set) 361 + continue; 352 362 363 + enable_remapped_mode = false; 364 + 365 + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); 353 366 if (ret < 0) { 354 367 printk(KERN_INFO "%s: failed to update PI IRTE\n", 355 368 __func__); 356 369 goto out; 357 370 } 358 371 } 372 + 373 + if (enable_remapped_mode) 374 + ret = irq_set_vcpu_affinity(host_irq, NULL); 359 375 360 376 ret = 0; 361 377 out: