Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/amd: Fix IOMMU interrupt generation in X2APIC mode

The AMD IOMMU has two modes for generating its own interrupts.

The first is very much based on PCI MSI, and can be configured by Linux
precisely that way. But like legacy unmapped PCI MSI it's limited to
8 bits of APIC ID.

The second method does not use PCI MSI at all in hardawre, and instead
configures the INTCAPXT registers in the IOMMU directly with the APIC ID
and vector.

In the latter case, the IOMMU driver would still use pci_enable_msi(),
read back (through MMIO) the MSI message that Linux wrote to the PCI MSI
table, then swizzle those bits into the appropriate register.

Historically, this worked because__irq_compose_msi_msg() would silently
generate an invalid MSI message with the high bits of the APIC ID in the
high bits of the MSI address. That hack was intended only for the Intel
IOMMU, and I recently enforced that, introducing a warning in
__irq_msi_compose_msg() if it was invoked with an APIC ID above 255.

Fix the AMD IOMMU not to depend on that hack any more, by having its own
irqdomain and directly putting the bits from the irq_cfg into the right
place in its ->activate() method.

Fixes: 47bea873cf80 "x86/msi: Only use high bits of MSI address for DMAR unit")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/05e3a5ba317f5ff48d2f8356f19e617f8b9d23a4.camel@infradead.org

authored by

David Woodhouse and committed by
Thomas Gleixner
d1adcfbb 2df985f5

+141 -67
+1
arch/x86/include/asm/hw_irq.h
··· 39 39 X86_IRQ_ALLOC_TYPE_PCI_MSI, 40 40 X86_IRQ_ALLOC_TYPE_PCI_MSIX, 41 41 X86_IRQ_ALLOC_TYPE_DMAR, 42 + X86_IRQ_ALLOC_TYPE_AMDVI, 42 43 X86_IRQ_ALLOC_TYPE_UV, 43 44 }; 44 45
+140 -67
drivers/iommu/amd/init.c
··· 16 16 #include <linux/syscore_ops.h> 17 17 #include <linux/interrupt.h> 18 18 #include <linux/msi.h> 19 + #include <linux/irq.h> 19 20 #include <linux/amd-iommu.h> 20 21 #include <linux/export.h> 21 22 #include <linux/kmemleak.h> ··· 1558 1557 break; 1559 1558 } 1560 1559 1561 - /* 1562 - * Note: Since iommu_update_intcapxt() leverages 1563 - * the IOMMU MMIO access to MSI capability block registers 1564 - * for MSI address lo/hi/data, we need to check both 1565 - * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support. 1566 - */ 1567 - if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && 1568 - (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) 1560 + if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1569 1561 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1570 1562 break; 1571 1563 default: ··· 1975 1981 } __attribute__ ((packed)); 1976 1982 1977 1983 /* 1978 - * Setup the IntCapXT registers with interrupt routing information 1979 - * based on the PCI MSI capability block registers, accessed via 1980 - * MMIO MSI address low/hi and MSI data registers. 1984 + * There isn't really any need to mask/unmask at the irqchip level because 1985 + * the 64-bit INTCAPXT registers can be updated atomically without tearing 1986 + * when the affinity is being updated. 1981 1987 */ 1982 - static void iommu_update_intcapxt(struct amd_iommu *iommu) 1988 + static void intcapxt_unmask_irq(struct irq_data *data) 1983 1989 { 1984 - struct msi_msg msg; 1990 + } 1991 + 1992 + static void intcapxt_mask_irq(struct irq_data *data) 1993 + { 1994 + } 1995 + 1996 + static struct irq_chip intcapxt_controller; 1997 + 1998 + static int intcapxt_irqdomain_activate(struct irq_domain *domain, 1999 + struct irq_data *irqd, bool reserve) 2000 + { 2001 + struct amd_iommu *iommu = irqd->chip_data; 2002 + struct irq_cfg *cfg = irqd_cfg(irqd); 1985 2003 union intcapxt xt; 1986 - u32 destid; 1987 - 1988 - msg.address_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); 1989 - msg.address_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); 1990 - msg.data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); 1991 - 1992 - destid = x86_msi_msg_get_destid(&msg, x2apic_enabled()); 1993 2004 1994 2005 xt.capxt = 0ULL; 1995 - xt.dest_mode_logical = msg.arch_data.dest_mode_logical; 1996 - xt.vector = msg.arch_data.vector; 1997 - xt.destid_0_23 = destid & GENMASK(23, 0); 1998 - xt.destid_24_31 = destid >> 24; 2006 + xt.dest_mode_logical = apic->dest_mode_logical; 2007 + xt.vector = cfg->vector; 2008 + xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2009 + xt.destid_24_31 = cfg->dest_apicid >> 24; 1999 2010 2000 2011 /** 2001 2012 * Current IOMMU implemtation uses the same IRQ for all ··· 2009 2010 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2010 2011 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2011 2012 writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2013 + return 0; 2012 2014 } 2013 2015 2014 - static void _irq_notifier_notify(struct irq_affinity_notify *notify, 2015 - const cpumask_t *mask) 2016 + static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2017 + struct irq_data *irqd) 2016 2018 { 2017 - struct amd_iommu *iommu; 2018 - 2019 - for_each_iommu(iommu) { 2020 - if (iommu->dev->irq == notify->irq) { 2021 - iommu_update_intcapxt(iommu); 2022 - break; 2023 - } 2024 - } 2019 + intcapxt_mask_irq(irqd); 2025 2020 } 2026 2021 2027 - static void _irq_notifier_release(struct kref *ref) 2022 + 2023 + static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2024 + unsigned int nr_irqs, void *arg) 2028 2025 { 2029 - } 2026 + struct irq_alloc_info *info = arg; 2027 + int i, ret; 2030 2028 2031 - static int iommu_init_intcapxt(struct amd_iommu *iommu) 2032 - { 2033 - int ret; 2034 - struct irq_affinity_notify *notify = &iommu->intcapxt_notify; 2029 + if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2030 + return -EINVAL; 2035 2031 2036 - /** 2037 - * IntCapXT requires XTSup=1 and MsiCapMmioSup=1, 2038 - * which can be inferred from amd_iommu_xt_mode. 2039 - */ 2040 - if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) 2041 - return 0; 2042 - 2043 - /** 2044 - * Also, we need to setup notifier to update the IntCapXT registers 2045 - * whenever the irq affinity is changed from user-space. 2046 - */ 2047 - notify->irq = iommu->dev->irq; 2048 - notify->notify = _irq_notifier_notify, 2049 - notify->release = _irq_notifier_release, 2050 - ret = irq_set_affinity_notifier(iommu->dev->irq, notify); 2051 - if (ret) { 2052 - pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", 2053 - iommu->devid, iommu->dev->irq); 2032 + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2033 + if (ret < 0) 2054 2034 return ret; 2035 + 2036 + for (i = virq; i < virq + nr_irqs; i++) { 2037 + struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2038 + 2039 + irqd->chip = &intcapxt_controller; 2040 + irqd->chip_data = info->data; 2041 + __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2055 2042 } 2056 2043 2057 - iommu_update_intcapxt(iommu); 2058 - iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2059 2044 return ret; 2060 2045 } 2061 2046 2062 - static int iommu_init_msi(struct amd_iommu *iommu) 2047 + static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2048 + unsigned int nr_irqs) 2049 + { 2050 + irq_domain_free_irqs_top(domain, virq, nr_irqs); 2051 + } 2052 + 2053 + static int intcapxt_set_affinity(struct irq_data *irqd, 2054 + const struct cpumask *mask, bool force) 2055 + { 2056 + struct irq_data *parent = irqd->parent_data; 2057 + int ret; 2058 + 2059 + ret = parent->chip->irq_set_affinity(parent, mask, force); 2060 + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2061 + return ret; 2062 + 2063 + return intcapxt_irqdomain_activate(irqd->domain, irqd, false); 2064 + } 2065 + 2066 + static struct irq_chip intcapxt_controller = { 2067 + .name = "IOMMU-MSI", 2068 + .irq_unmask = intcapxt_unmask_irq, 2069 + .irq_mask = intcapxt_mask_irq, 2070 + .irq_ack = irq_chip_ack_parent, 2071 + .irq_retrigger = irq_chip_retrigger_hierarchy, 2072 + .irq_set_affinity = intcapxt_set_affinity, 2073 + .flags = IRQCHIP_SKIP_SET_WAKE, 2074 + }; 2075 + 2076 + static const struct irq_domain_ops intcapxt_domain_ops = { 2077 + .alloc = intcapxt_irqdomain_alloc, 2078 + .free = intcapxt_irqdomain_free, 2079 + .activate = intcapxt_irqdomain_activate, 2080 + .deactivate = intcapxt_irqdomain_deactivate, 2081 + }; 2082 + 2083 + 2084 + static struct irq_domain *iommu_irqdomain; 2085 + 2086 + static struct irq_domain *iommu_get_irqdomain(void) 2087 + { 2088 + struct fwnode_handle *fn; 2089 + 2090 + /* No need for locking here (yet) as the init is single-threaded */ 2091 + if (iommu_irqdomain) 2092 + return iommu_irqdomain; 2093 + 2094 + fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2095 + if (!fn) 2096 + return NULL; 2097 + 2098 + iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2099 + fn, &intcapxt_domain_ops, 2100 + NULL); 2101 + if (!iommu_irqdomain) 2102 + irq_domain_free_fwnode(fn); 2103 + 2104 + return iommu_irqdomain; 2105 + } 2106 + 2107 + static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2108 + { 2109 + struct irq_domain *domain; 2110 + struct irq_alloc_info info; 2111 + int irq, ret; 2112 + 2113 + domain = iommu_get_irqdomain(); 2114 + if (!domain) 2115 + return -ENXIO; 2116 + 2117 + init_irq_alloc_info(&info, NULL); 2118 + info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2119 + info.data = iommu; 2120 + 2121 + irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); 2122 + if (irq < 0) { 2123 + irq_domain_remove(domain); 2124 + return irq; 2125 + } 2126 + 2127 + ret = request_threaded_irq(irq, amd_iommu_int_handler, 2128 + amd_iommu_int_thread, 0, "AMD-Vi", iommu); 2129 + if (ret) { 2130 + irq_domain_free_irqs(irq, 1); 2131 + irq_domain_remove(domain); 2132 + return ret; 2133 + } 2134 + 2135 + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2136 + return 0; 2137 + } 2138 + 2139 + static int iommu_init_irq(struct amd_iommu *iommu) 2063 2140 { 2064 2141 int ret; 2065 2142 2066 2143 if (iommu->int_enabled) 2067 2144 goto enable_faults; 2068 2145 2069 - if (iommu->dev->msi_cap) 2146 + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2147 + ret = iommu_setup_intcapxt(iommu); 2148 + else if (iommu->dev->msi_cap) 2070 2149 ret = iommu_setup_msi(iommu); 2071 2150 else 2072 2151 ret = -ENODEV; ··· 2153 2076 return ret; 2154 2077 2155 2078 enable_faults: 2156 - ret = iommu_init_intcapxt(iommu); 2157 - if (ret) 2158 - return ret; 2159 - 2160 2079 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2161 2080 2162 2081 if (iommu->ppr_log != NULL) ··· 2775 2702 int ret = 0; 2776 2703 2777 2704 for_each_iommu(iommu) { 2778 - ret = iommu_init_msi(iommu); 2705 + ret = iommu_init_irq(iommu); 2779 2706 if (ret) 2780 2707 goto out; 2781 2708 }