Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI/MSI: Support allocating virtual MSI interrupts

For NTB devices, we want to be able to trigger MSI interrupts
through a memory window. In these cases we may want to use
more interrupts than the NTB PCI device has available in its MSI-X
table.

We allow for this by creating a new 'virtual' interrupt. These
interrupts are allocated as usual but are not programmed into the
MSI-X table (as there may not be space for them).

The MSI address and data will then handled through an NTB MSI library
introduced later in this series.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>

authored by

Logan Gunthorpe and committed by
Jon Mason
d7cc609f a944ccc3

+62 -9
+45 -9
drivers/pci/msi.c
··· 192 192 193 193 static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) 194 194 { 195 + if (desc->msi_attrib.is_virtual) 196 + return NULL; 197 + 195 198 return desc->mask_base + 196 199 desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; 197 200 } ··· 209 206 u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) 210 207 { 211 208 u32 mask_bits = desc->masked; 209 + void __iomem *desc_addr; 212 210 213 211 if (pci_msi_ignore_mask) 212 + return 0; 213 + desc_addr = pci_msix_desc_addr(desc); 214 + if (!desc_addr) 214 215 return 0; 215 216 216 217 mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 217 218 if (flag) 218 219 mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 219 - writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); 220 + 221 + writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 220 222 221 223 return mask_bits; 222 224 } ··· 281 273 if (entry->msi_attrib.is_msix) { 282 274 void __iomem *base = pci_msix_desc_addr(entry); 283 275 276 + if (!base) { 277 + WARN_ON(1); 278 + return; 279 + } 280 + 284 281 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 285 282 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 286 283 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); ··· 316 303 } else if (entry->msi_attrib.is_msix) { 317 304 void __iomem *base = pci_msix_desc_addr(entry); 318 305 306 + if (!base) 307 + goto skip; 308 + 319 309 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 320 310 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 321 311 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); ··· 343 327 msg->data); 344 328 } 345 329 } 330 + 331 + skip: 346 332 entry->msg = *msg; 333 + 334 + if (entry->write_msi_msg) 335 + entry->write_msi_msg(entry, entry->write_msi_msg_data); 336 + 347 337 } 348 338 349 339 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) ··· 572 550 573 551 entry->msi_attrib.is_msix = 0; 574 552 entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 553 + entry->msi_attrib.is_virtual = 0; 575 554 entry->msi_attrib.entry_nr = 0; 576 555 entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); 577 556 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ ··· 697 674 struct irq_affinity_desc *curmsk, *masks = NULL; 698 675 struct msi_desc *entry; 699 676 int ret, i; 677 + int vec_count = pci_msix_vec_count(dev); 700 678 701 679 if (affd) 702 680 masks = irq_create_affinity_masks(nvec, affd); ··· 720 696 entry->msi_attrib.entry_nr = entries[i].entry; 721 697 else 722 698 entry->msi_attrib.entry_nr = i; 699 + 700 + entry->msi_attrib.is_virtual = 701 + entry->msi_attrib.entry_nr >= vec_count; 702 + 723 703 entry->msi_attrib.default_irq = dev->irq; 724 704 entry->mask_base = base; 725 705 ··· 742 714 { 743 715 struct msi_desc *entry; 744 716 int i = 0; 717 + void __iomem *desc_addr; 745 718 746 719 for_each_pci_msi_entry(entry, dev) { 747 720 if (entries) 748 721 entries[i++].vector = entry->irq; 749 - entry->masked = readl(pci_msix_desc_addr(entry) + 750 - PCI_MSIX_ENTRY_VECTOR_CTRL); 722 + 723 + desc_addr = pci_msix_desc_addr(entry); 724 + if (desc_addr) 725 + entry->masked = readl(desc_addr + 726 + PCI_MSIX_ENTRY_VECTOR_CTRL); 727 + else 728 + entry->masked = 0; 729 + 751 730 msix_mask_irq(entry, 1); 752 731 } 753 732 } ··· 967 932 EXPORT_SYMBOL(pci_msix_vec_count); 968 933 969 934 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, 970 - int nvec, struct irq_affinity *affd) 935 + int nvec, struct irq_affinity *affd, int flags) 971 936 { 972 937 int nr_entries; 973 938 int i, j; ··· 978 943 nr_entries = pci_msix_vec_count(dev); 979 944 if (nr_entries < 0) 980 945 return nr_entries; 981 - if (nvec > nr_entries) 946 + if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) 982 947 return nr_entries; 983 948 984 949 if (entries) { ··· 1114 1079 1115 1080 static int __pci_enable_msix_range(struct pci_dev *dev, 1116 1081 struct msix_entry *entries, int minvec, 1117 - int maxvec, struct irq_affinity *affd) 1082 + int maxvec, struct irq_affinity *affd, 1083 + int flags) 1118 1084 { 1119 1085 int rc, nvec = maxvec; 1120 1086 ··· 1132 1096 return -ENOSPC; 1133 1097 } 1134 1098 1135 - rc = __pci_enable_msix(dev, entries, nvec, affd); 1099 + rc = __pci_enable_msix(dev, entries, nvec, affd, flags); 1136 1100 if (rc == 0) 1137 1101 return nvec; 1138 1102 ··· 1163 1127 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, 1164 1128 int minvec, int maxvec) 1165 1129 { 1166 - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL); 1130 + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); 1167 1131 } 1168 1132 EXPORT_SYMBOL(pci_enable_msix_range); 1169 1133 ··· 1203 1167 1204 1168 if (flags & PCI_IRQ_MSIX) { 1205 1169 msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, 1206 - max_vecs, affd); 1170 + max_vecs, affd, flags); 1207 1171 if (msix_vecs > 0) 1208 1172 return msix_vecs; 1209 1173 }
+8
include/linux/msi.h
··· 64 64 * @msg: The last set MSI message cached for reuse 65 65 * @affinity: Optional pointer to a cpu affinity mask for this descriptor 66 66 * 67 + * @write_msi_msg: Callback that may be called when the MSI message 68 + * address or data changes 69 + * @write_msi_msg_data: Data parameter for the callback. 70 + * 67 71 * @masked: [PCI MSI/X] Mask bits 68 72 * @is_msix: [PCI MSI/X] True if MSI-X 69 73 * @multiple: [PCI MSI/X] log2 num of messages allocated ··· 94 90 const void *iommu_cookie; 95 91 #endif 96 92 93 + void (*write_msi_msg)(struct msi_desc *entry, void *data); 94 + void *write_msi_msg_data; 95 + 97 96 union { 98 97 /* PCI MSI/X specific data */ 99 98 struct { ··· 107 100 u8 multi_cap : 3; 108 101 u8 maskbit : 1; 109 102 u8 is_64 : 1; 103 + u8 is_virtual : 1; 110 104 u16 entry_nr; 111 105 unsigned default_irq; 112 106 } msi_attrib;
+9
include/linux/pci.h
··· 1362 1362 #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ 1363 1363 #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ 1364 1364 #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ 1365 + 1366 + /* 1367 + * Virtual interrupts allow for more interrupts to be allocated 1368 + * than the device has interrupts for. These are not programmed 1369 + * into the device's MSI-X table and must be handled by some 1370 + * other driver means. 1371 + */ 1372 + #define PCI_IRQ_VIRTUAL (1 << 4) 1373 + 1365 1374 #define PCI_IRQ_ALL_TYPES \ 1366 1375 (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) 1367 1376