KVM: x86: Reset IRTE to host control if *new* route isn't postable

Restore an IRTE back to host control (remapped or posted MSI mode) if the
*new* GSI route prevents posting the IRQ directly to a vCPU, regardless of
the GSI routing type. Updating the IRTE if and only if the new GSI is an
MSI results in KVM leaving an IRTE posting to a vCPU.

The dangling IRTE can result in interrupts being incorrectly delivered to
the guest, and in the worst case scenario can result in use-after-free,
e.g. if the VM is torn down, but the underlying host IRQ isn't freed.

Fixes: efc644048ecd ("KVM: x86: Update IRTE for posted-interrupts")
Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250404193923.1413163-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by Sean Christopherson and committed by Paolo Bonzini 9bcac97d 7537deda

+41 -45
+31 -27
arch/x86/kvm/svm/avic.c
··· 896 { 897 struct kvm_kernel_irq_routing_entry *e; 898 struct kvm_irq_routing_table *irq_rt; 899 int idx, ret = 0; 900 901 if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) ··· 933 kvm_vcpu_apicv_active(&svm->vcpu)) { 934 struct amd_iommu_pi_data pi; 935 936 /* Try to enable guest_mode in IRTE */ 937 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & 938 AVIC_HPA_MASK); ··· 953 */ 954 if (!ret && pi.is_guest_mode) 955 svm_ir_list_add(svm, &pi); 956 - } else { 957 - /* Use legacy mode in IRTE */ 958 - struct amd_iommu_pi_data pi; 959 - 960 - /** 961 - * Here, pi is used to: 962 - * - Tell IOMMU to use legacy mode for this interrupt. 963 - * - Retrieve ga_tag of prior interrupt remapping data. 964 - */ 965 - pi.prev_ga_tag = 0; 966 - pi.is_guest_mode = false; 967 - ret = irq_set_vcpu_affinity(host_irq, &pi); 968 - 969 - /** 970 - * Check if the posted interrupt was previously 971 - * setup with the guest_mode by checking if the ga_tag 972 - * was cached. If so, we need to clean up the per-vcpu 973 - * ir_list. 974 - */ 975 - if (!ret && pi.prev_ga_tag) { 976 - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); 977 - struct kvm_vcpu *vcpu; 978 - 979 - vcpu = kvm_get_vcpu_by_id(kvm, id); 980 - if (vcpu) 981 - svm_ir_list_del(to_svm(vcpu), &pi); 982 - } 983 } 984 985 if (!ret && svm) { ··· 968 } 969 970 ret = 0; 971 out: 972 srcu_read_unlock(&kvm->irq_srcu, idx); 973 return ret;
··· 896 { 897 struct kvm_kernel_irq_routing_entry *e; 898 struct kvm_irq_routing_table *irq_rt; 899 + bool enable_remapped_mode = true; 900 int idx, ret = 0; 901 902 if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) ··· 932 kvm_vcpu_apicv_active(&svm->vcpu)) { 933 struct amd_iommu_pi_data pi; 934 935 + enable_remapped_mode = false; 936 + 937 /* Try to enable guest_mode in IRTE */ 938 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & 939 AVIC_HPA_MASK); ··· 950 */ 951 if (!ret && pi.is_guest_mode) 952 svm_ir_list_add(svm, &pi); 953 } 954 955 if (!ret && svm) { ··· 992 } 993 994 ret = 0; 995 + if (enable_remapped_mode) { 996 + /* Use legacy mode in IRTE */ 997 + struct amd_iommu_pi_data pi; 998 + 999 + /** 1000 + * Here, pi is used to: 1001 + * - Tell IOMMU to use legacy mode for this interrupt. 1002 + * - Retrieve ga_tag of prior interrupt remapping data. 1003 + */ 1004 + pi.prev_ga_tag = 0; 1005 + pi.is_guest_mode = false; 1006 + ret = irq_set_vcpu_affinity(host_irq, &pi); 1007 + 1008 + /** 1009 + * Check if the posted interrupt was previously 1010 + * setup with the guest_mode by checking if the ga_tag 1011 + * was cached. If so, we need to clean up the per-vcpu 1012 + * ir_list. 1013 + */ 1014 + if (!ret && pi.prev_ga_tag) { 1015 + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); 1016 + struct kvm_vcpu *vcpu; 1017 + 1018 + vcpu = kvm_get_vcpu_by_id(kvm, id); 1019 + if (vcpu) 1020 + svm_ir_list_del(to_svm(vcpu), &pi); 1021 + } 1022 + } 1023 out: 1024 srcu_read_unlock(&kvm->irq_srcu, idx); 1025 return ret;
+10 -18
arch/x86/kvm/vmx/posted_intr.c
··· 297 { 298 struct kvm_kernel_irq_routing_entry *e; 299 struct kvm_irq_routing_table *irq_rt; 300 struct kvm_lapic_irq irq; 301 struct kvm_vcpu *vcpu; 302 struct vcpu_data vcpu_info; ··· 336 337 kvm_set_msi_irq(kvm, e, &irq); 338 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || 339 - !kvm_irq_is_postable(&irq)) { 340 - /* 341 - * Make sure the IRTE is in remapped mode if 342 - * we don't handle it in posted mode. 343 - */ 344 - ret = irq_set_vcpu_affinity(host_irq, NULL); 345 - if (ret < 0) { 346 - printk(KERN_INFO 347 - "failed to back to remapped mode, irq: %u\n", 348 - host_irq); 349 - goto out; 350 - } 351 - 352 continue; 353 - } 354 355 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); 356 vcpu_info.vector = irq.vector; ··· 345 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, 346 vcpu_info.vector, vcpu_info.pi_desc_addr, set); 347 348 - if (set) 349 - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); 350 - else 351 - ret = irq_set_vcpu_affinity(host_irq, NULL); 352 353 if (ret < 0) { 354 printk(KERN_INFO "%s: failed to update PI IRTE\n", 355 __func__); 356 goto out; 357 } 358 } 359 360 ret = 0; 361 out:
··· 297 { 298 struct kvm_kernel_irq_routing_entry *e; 299 struct kvm_irq_routing_table *irq_rt; 300 + bool enable_remapped_mode = true; 301 struct kvm_lapic_irq irq; 302 struct kvm_vcpu *vcpu; 303 struct vcpu_data vcpu_info; ··· 335 336 kvm_set_msi_irq(kvm, e, &irq); 337 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || 338 + !kvm_irq_is_postable(&irq)) 339 continue; 340 341 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); 342 vcpu_info.vector = irq.vector; ··· 357 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, 358 vcpu_info.vector, vcpu_info.pi_desc_addr, set); 359 360 + if (!set) 361 + continue; 362 363 + enable_remapped_mode = false; 364 + 365 + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); 366 if (ret < 0) { 367 printk(KERN_INFO "%s: failed to update PI IRTE\n", 368 __func__); 369 goto out; 370 } 371 } 372 + 373 + if (enable_remapped_mode) 374 + ret = irq_set_vcpu_affinity(host_irq, NULL); 375 376 ret = 0; 377 out: