Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI MSI: Add support for multiple MSI

Add the new API pci_enable_msi_block() to allow drivers to
request multiple MSI and reimplement pci_enable_msi in terms of
pci_enable_msi_block. Ensure that the architecture back ends don't
have to know about multiple MSI.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

authored by

Matthew Wilcox and committed by
Jesse Barnes
1c8d7b0a f2440d9a

+116 -41
+39 -6
Documentation/PCI/MSI-HOWTO.txt
··· 94 94 since enabling MSIs disables the pin-based IRQ and the driver will not 95 95 receive interrupts on the old interrupt. 96 96 97 - 4.2.2 pci_disable_msi 97 + 4.2.2 pci_enable_msi_block 98 + 99 + int pci_enable_msi_block(struct pci_dev *dev, int count) 100 + 101 + This variation on the above call allows a device driver to request multiple 102 + MSIs. The MSI specification only allows interrupts to be allocated in 103 + powers of two, up to a maximum of 2^5 (32). 104 + 105 + If this function returns 0, it has succeeded in allocating at least as many 106 + interrupts as the driver requested (it may have allocated more in order 107 + to satisfy the power-of-two requirement). In this case, the function 108 + enables MSI on this device and updates dev->irq to be the lowest of 109 + the new interrupts assigned to it. The other interrupts assigned to 110 + the device are in the range dev->irq to dev->irq + count - 1. 111 + 112 + If this function returns a negative number, it indicates an error and 113 + the driver should not attempt to request any more MSI interrupts for 114 + this device. If this function returns a positive number, it will be 115 + less than 'count' and indicate the number of interrupts that could have 116 + been allocated. In neither case will the irq value have been 117 + updated, nor will the device have been switched into MSI mode. 118 + 119 + The device driver must decide what action to take if 120 + pci_enable_msi_block() returns a value less than the number asked for. 121 + Some devices can make use of fewer interrupts than the maximum they 122 + request; in this case the driver should call pci_enable_msi_block() 123 + again. Note that it is not guaranteed to succeed, even when the 124 + 'count' has been reduced to the value returned from a previous call to 125 + pci_enable_msi_block(). This is because there are multiple constraints 126 + on the number of vectors that can be allocated; pci_enable_msi_block() 127 + will return as soon as it finds any constraint that doesn't allow the 128 + call to succeed. 129 + 130 + 4.2.3 pci_disable_msi 98 131 99 132 void pci_disable_msi(struct pci_dev *dev) 100 133 101 - This function should be used to undo the effect of pci_enable_msi(). 102 - Calling it restores dev->irq to the pin-based interrupt number and frees 103 - the previously allocated message signaled interrupt(s). The interrupt 104 - may subsequently be assigned to another device, so drivers should not 105 - cache the value of dev->irq. 134 + This function should be used to undo the effect of pci_enable_msi() or 135 + pci_enable_msi_block(). Calling it restores dev->irq to the pin-based 136 + interrupt number and frees the previously allocated message signaled 137 + interrupt(s). The interrupt may subsequently be assigned to another 138 + device, so drivers should not cache the value of dev->irq. 106 139 107 140 A device driver must always call free_irq() on the interrupt(s) 108 141 for which it has called request_irq() before calling this function.
+4
arch/powerpc/kernel/msi.c
··· 19 19 return -ENOSYS; 20 20 } 21 21 22 + /* PowerPC doesn't support multiple MSI yet */ 23 + if (type == PCI_CAP_ID_MSI && nvec > 1) 24 + return 1; 25 + 22 26 if (ppc_md.msi_check_device) { 23 27 pr_debug("msi: Using platform check routine.\n"); 24 28 return ppc_md.msi_check_device(dev, nvec, type);
+4
arch/x86/kernel/io_apic.c
··· 3510 3510 int index = 0; 3511 3511 #endif 3512 3512 3513 + /* x86 doesn't support multiple MSI yet */ 3514 + if (type == PCI_CAP_ID_MSI && nvec > 1) 3515 + return 1; 3516 + 3513 3517 irq_want = nr_irqs_gsi; 3514 3518 sub_handle = 0; 3515 3519 list_for_each_entry(msidesc, &dev->msi_list, list) {
+64 -27
drivers/pci/msi.c
··· 40 40 struct msi_desc *entry; 41 41 int ret; 42 42 43 + /* 44 + * If an architecture wants to support multiple MSI, it needs to 45 + * override arch_setup_msi_irqs() 46 + */ 47 + if (type == PCI_CAP_ID_MSI && nvec > 1) 48 + return 1; 49 + 43 50 list_for_each_entry(entry, &dev->msi_list, list) { 44 51 ret = arch_setup_msi_irq(dev, entry); 45 52 if (ret < 0) ··· 65 58 struct msi_desc *entry; 66 59 67 60 list_for_each_entry(entry, &dev->msi_list, list) { 68 - if (entry->irq != 0) 69 - arch_teardown_msi_irq(entry->irq); 61 + int i, nvec; 62 + if (entry->irq == 0) 63 + continue; 64 + nvec = 1 << entry->msi_attrib.multiple; 65 + for (i = 0; i < nvec; i++) 66 + arch_teardown_msi_irq(entry->irq + i); 70 67 } 71 68 } 72 69 #endif ··· 174 163 msix_mask_irq(desc, flag); 175 164 readl(desc->mask_base); /* Flush write to device */ 176 165 } else { 177 - msi_mask_irq(desc, 1, flag); 166 + unsigned offset = irq - desc->dev->irq; 167 + msi_mask_irq(desc, 1 << offset, flag << offset); 178 168 } 179 169 } 180 170 ··· 241 229 } else { 242 230 struct pci_dev *dev = entry->dev; 243 231 int pos = entry->msi_attrib.pos; 232 + u16 msgctl; 233 + 234 + pci_read_config_word(dev, msi_control_reg(pos), &msgctl); 235 + msgctl &= ~PCI_MSI_FLAGS_QSIZE; 236 + msgctl |= entry->msi_attrib.multiple << 4; 237 + pci_write_config_word(dev, msi_control_reg(pos), msgctl); 244 238 245 239 pci_write_config_dword(dev, msi_lower_address_reg(pos), 246 240 msg->address_lo); ··· 309 291 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); 310 292 msi_mask_irq(entry, msi_capable_mask(control), entry->masked); 311 293 control &= ~PCI_MSI_FLAGS_QSIZE; 312 - control |= PCI_MSI_FLAGS_ENABLE; 294 + control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; 313 295 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); 314 296 } 315 297 ··· 350 332 /** 351 333 * msi_capability_init - configure device's MSI capability structure 352 334 * @dev: pointer to the pci_dev data structure of MSI device function 335 + * @nvec: number of interrupts to allocate 353 336 * 354 - * Setup the MSI capability structure of device function with a single 355 - * MSI irq, regardless of device function is capable of handling 356 - * multiple messages. A return of zero indicates the successful setup 357 - * of an entry zero with the new MSI irq or non-zero for otherwise. 358 - **/ 359 - static int msi_capability_init(struct pci_dev *dev) 337 + * Setup the MSI capability structure of the device with the requested 338 + * number of interrupts. A return value of zero indicates the successful 339 + * setup of an entry with the new MSI irq. A negative return value indicates 340 + * an error, and a positive return value indicates the number of interrupts 341 + * which could have been allocated. 342 + */ 343 + static int msi_capability_init(struct pci_dev *dev, int nvec) 360 344 { 361 345 struct msi_desc *entry; 362 346 int pos, ret; ··· 391 371 list_add_tail(&entry->list, &dev->msi_list); 392 372 393 373 /* Configure MSI capability structure */ 394 - ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI); 374 + ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 395 375 if (ret) { 396 376 msi_free_irqs(dev); 397 377 return ret; ··· 544 524 } 545 525 546 526 /** 547 - * pci_enable_msi - configure device's MSI capability structure 548 - * @dev: pointer to the pci_dev data structure of MSI device function 527 + * pci_enable_msi_block - configure device's MSI capability structure 528 + * @dev: device to configure 529 + * @nvec: number of interrupts to configure 549 530 * 550 - * Setup the MSI capability structure of device function with 551 - * a single MSI irq upon its software driver call to request for 552 - * MSI mode enabled on its hardware device function. A return of zero 553 - * indicates the successful setup of an entry zero with the new MSI 554 - * irq or non-zero for otherwise. 555 - **/ 556 - int pci_enable_msi(struct pci_dev* dev) 531 + * Allocate IRQs for a device with the MSI capability. 532 + * This function returns a negative errno if an error occurs. If it 533 + * is unable to allocate the number of interrupts requested, it returns 534 + * the number of interrupts it might be able to allocate. If it successfully 535 + * allocates at least the number of interrupts requested, it returns 0 and 536 + * updates the @dev's irq member to the lowest new interrupt number; the 537 + * other interrupt numbers allocated to this device are consecutive. 538 + */ 539 + int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) 557 540 { 558 - int status; 541 + int status, pos, maxvec; 542 + u16 msgctl; 559 543 560 - status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); 544 + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); 545 + if (!pos) 546 + return -EINVAL; 547 + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 548 + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); 549 + if (nvec > maxvec) 550 + return maxvec; 551 + 552 + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); 561 553 if (status) 562 554 return status; 563 555 564 556 WARN_ON(!!dev->msi_enabled); 565 557 566 - /* Check whether driver already requested for MSI-X irqs */ 558 + /* Check whether driver already requested MSI-X irqs */ 567 559 if (dev->msix_enabled) { 568 560 dev_info(&dev->dev, "can't enable MSI " 569 561 "(MSI-X already enabled)\n"); 570 562 return -EINVAL; 571 563 } 572 - status = msi_capability_init(dev); 564 + 565 + status = msi_capability_init(dev, nvec); 573 566 return status; 574 567 } 575 - EXPORT_SYMBOL(pci_enable_msi); 568 + EXPORT_SYMBOL(pci_enable_msi_block); 576 569 577 570 void pci_msi_shutdown(struct pci_dev *dev) 578 571 { ··· 632 599 struct msi_desc *entry, *tmp; 633 600 634 601 list_for_each_entry(entry, &dev->msi_list, list) { 635 - if (entry->irq) 636 - BUG_ON(irq_has_action(entry->irq)); 602 + int i, nvec; 603 + if (!entry->irq) 604 + continue; 605 + nvec = 1 << entry->msi_attrib.multiple; 606 + for (i = 0; i < nvec; i++) 607 + BUG_ON(irq_has_action(entry->irq + i)); 637 608 } 638 609 639 610 arch_teardown_msi_irqs(dev);
-6
drivers/pci/msi.h
··· 20 20 #define msi_mask_bits_reg(base, is64bit) \ 21 21 ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) 22 22 #define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE 23 - #define multi_msi_capable(control) \ 24 - (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) 25 - #define multi_msi_enable(control, num) \ 26 - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); 27 23 #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) 28 24 #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) 29 - #define msi_enable(control, num) multi_msi_enable(control, num); \ 30 - control |= PCI_MSI_FLAGS_ENABLE 31 25 32 26 #define msix_table_offset_reg(base) (base + 0x04) 33 27 #define msix_pba_offset_reg(base) (base + 0x08)
+1
include/linux/msi.h
··· 21 21 struct msi_desc { 22 22 struct { 23 23 __u8 is_msix : 1; 24 + __u8 multiple: 3; /* log2 number of messages */ 24 25 __u8 maskbit : 1; /* mask-pending bit supported ? */ 25 26 __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ 26 27 __u8 pos; /* Location of the msi capability */
+4 -2
include/linux/pci.h
··· 789 789 790 790 791 791 #ifndef CONFIG_PCI_MSI 792 - static inline int pci_enable_msi(struct pci_dev *dev) 792 + static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) 793 793 { 794 794 return -1; 795 795 } ··· 824 824 return 0; 825 825 } 826 826 #else 827 - extern int pci_enable_msi(struct pci_dev *dev); 827 + extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); 828 828 extern void pci_msi_shutdown(struct pci_dev *dev); 829 829 extern void pci_disable_msi(struct pci_dev *dev); 830 830 extern int pci_msix_table_size(struct pci_dev *dev); ··· 845 845 #else 846 846 extern int pcie_aspm_enabled(void); 847 847 #endif 848 + 849 + #define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) 848 850 849 851 #ifdef CONFIG_HT_IRQ 850 852 /* The functions a driver should call */