Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/MSI: Fix race condition in tearing down MSI interrupts

This fixes a race which can result in the same virtual IRQ number
being assigned to two different MSI interrupts. The most visible
consequence of that is usually a warning and stack trace from the
sysfs code about an attempt to create a duplicate entry in sysfs.

The race happens when one CPU (say CPU 0) is disposing of an MSI
while another CPU (say CPU 1) is setting up an MSI. CPU 0 calls
(for example) pnv_teardown_msi_irqs(), which calls
msi_bitmap_free_hwirqs() to indicate that the MSI (i.e. its
hardware IRQ number) is no longer in use. Then, before CPU 0 gets
to calling irq_dispose_mapping() to free up the virtal IRQ number,
CPU 1 comes in and calls msi_bitmap_alloc_hwirqs() to allocate an
MSI, and gets the same hardware IRQ number that CPU 0 just freed.
CPU 1 then calls irq_create_mapping() to get a virtual IRQ number,
which sees that there is currently a mapping for that hardware IRQ
number and returns the corresponding virtual IRQ number (which is
the same virtual IRQ number that CPU 0 was using). CPU 0 then
calls irq_dispose_mapping() and frees that virtual IRQ number.
Now, if another CPU comes along and calls irq_create_mapping(), it
is likely to get the virtual IRQ number that was just freed,
resulting in the same virtual IRQ number apparently being used for
two different hardware interrupts.

To fix this race, we just move the call to msi_bitmap_free_hwirqs()
to after the call to irq_dispose_mapping(). Since virq_to_hw()
doesn't work for the virtual IRQ number after irq_dispose_mapping()
has been called, we need to call it before irq_dispose_mapping() and
remember the result for the msi_bitmap_free_hwirqs() call.

The pattern of calling msi_bitmap_free_hwirqs() before
irq_dispose_mapping() appears in 5 places under arch/powerpc, and
appears to have originated in commit 05af7bd2d75e ("[POWERPC] MPIC
U3/U4 MSI backend") from 2007.

Fixes: 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend")
Cc: stable@vger.kernel.org # v2.6.22+
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Paul Mackerras and committed by
Michael Ellerman
e297c939 b855d45d

+15 -10
+3 -2
arch/powerpc/platforms/pasemi/msi.c
··· 63 63 static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) 64 64 { 65 65 struct msi_desc *entry; 66 + irq_hw_number_t hwirq; 66 67 67 68 pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); 68 69 ··· 71 70 if (entry->irq == NO_IRQ) 72 71 continue; 73 72 73 + hwirq = virq_to_hw(entry->irq); 74 74 irq_set_msi_desc(entry->irq, NULL); 75 - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, 76 - virq_to_hw(entry->irq), ALLOC_CHUNK); 77 75 irq_dispose_mapping(entry->irq); 76 + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); 78 77 } 79 78 80 79 return;
+3 -2
arch/powerpc/platforms/powernv/pci.c
··· 99 99 struct pci_controller *hose = pci_bus_to_host(pdev->bus); 100 100 struct pnv_phb *phb = hose->private_data; 101 101 struct msi_desc *entry; 102 + irq_hw_number_t hwirq; 102 103 103 104 if (WARN_ON(!phb)) 104 105 return; ··· 107 106 for_each_pci_msi_entry(entry, pdev) { 108 107 if (entry->irq == NO_IRQ) 109 108 continue; 109 + hwirq = virq_to_hw(entry->irq); 110 110 irq_set_msi_desc(entry->irq, NULL); 111 - msi_bitmap_free_hwirqs(&phb->msi_bmp, 112 - virq_to_hw(entry->irq) - phb->msi_base, 1); 113 111 irq_dispose_mapping(entry->irq); 112 + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); 114 113 } 115 114 } 116 115 #endif /* CONFIG_PCI_MSI */
+3 -2
arch/powerpc/sysdev/fsl_msi.c
··· 128 128 { 129 129 struct msi_desc *entry; 130 130 struct fsl_msi *msi_data; 131 + irq_hw_number_t hwirq; 131 132 132 133 for_each_pci_msi_entry(entry, pdev) { 133 134 if (entry->irq == NO_IRQ) 134 135 continue; 136 + hwirq = virq_to_hw(entry->irq); 135 137 msi_data = irq_get_chip_data(entry->irq); 136 138 irq_set_msi_desc(entry->irq, NULL); 137 - msi_bitmap_free_hwirqs(&msi_data->bitmap, 138 - virq_to_hw(entry->irq), 1); 139 139 irq_dispose_mapping(entry->irq); 140 + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); 140 141 } 141 142 142 143 return;
+3 -2
arch/powerpc/sysdev/mpic_u3msi.c
··· 107 107 static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) 108 108 { 109 109 struct msi_desc *entry; 110 + irq_hw_number_t hwirq; 110 111 111 112 for_each_pci_msi_entry(entry, pdev) { 112 113 if (entry->irq == NO_IRQ) 113 114 continue; 114 115 116 + hwirq = virq_to_hw(entry->irq); 115 117 irq_set_msi_desc(entry->irq, NULL); 116 - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, 117 - virq_to_hw(entry->irq), 1); 118 118 irq_dispose_mapping(entry->irq); 119 + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); 119 120 } 120 121 121 122 return;
+3 -2
arch/powerpc/sysdev/ppc4xx_msi.c
··· 124 124 { 125 125 struct msi_desc *entry; 126 126 struct ppc4xx_msi *msi_data = &ppc4xx_msi; 127 + irq_hw_number_t hwirq; 127 128 128 129 dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); 129 130 130 131 for_each_pci_msi_entry(entry, dev) { 131 132 if (entry->irq == NO_IRQ) 132 133 continue; 134 + hwirq = virq_to_hw(entry->irq); 133 135 irq_set_msi_desc(entry->irq, NULL); 134 - msi_bitmap_free_hwirqs(&msi_data->bitmap, 135 - virq_to_hw(entry->irq), 1); 136 136 irq_dispose_mapping(entry->irq); 137 + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); 137 138 } 138 139 } 139 140