Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfio-v4.7-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- Hide INTx on certain known broken devices (Alex Williamson)

- Additional backdoor reset detection (Alex Williamson)

- Remove unused iommudata reference (Alexey Kardashevskiy)

- Use cfg_size to avoid probing extended config space (Alexey
Kardashevskiy)

* tag 'vfio-v4.7-rc1' of git://github.com/awilliam/linux-vfio:
vfio_pci: Test for extended capabilities if config space > 256 bytes
vfio_iommu_spapr_tce: Remove unneeded iommu_group_get_iommudata
vfio/pci: Add test for BAR restore
vfio/pci: Hide broken INTx support from user

+84 -20
+45 -10
drivers/vfio/pci/vfio_pci.c
··· 113 113 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); 114 114 static void vfio_pci_disable(struct vfio_pci_device *vdev); 115 115 116 + /* 117 + * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND 118 + * _and_ the ability detect when the device is asserting INTx via PCI_STATUS. 119 + * If a device implements the former but not the latter we would typically 120 + * expect broken_intx_masking be set and require an exclusive interrupt. 121 + * However since we do have control of the device's ability to assert INTx, 122 + * we can instead pretend that the device does not implement INTx, virtualizing 123 + * the pin register to report zero and maintaining DisINTx set on the host. 124 + */ 125 + static bool vfio_pci_nointx(struct pci_dev *pdev) 126 + { 127 + switch (pdev->vendor) { 128 + case PCI_VENDOR_ID_INTEL: 129 + switch (pdev->device) { 130 + /* All i40e (XL710/X710) 10/20/40GbE NICs */ 131 + case 0x1572: 132 + case 0x1574: 133 + case 0x1580 ... 0x1581: 134 + case 0x1583 ... 0x1589: 135 + case 0x37d0 ... 0x37d2: 136 + return true; 137 + default: 138 + return false; 139 + } 140 + } 141 + 142 + return false; 143 + } 144 + 116 145 static int vfio_pci_enable(struct vfio_pci_device *vdev) 117 146 { 118 147 struct pci_dev *pdev = vdev->pdev; ··· 165 136 pr_debug("%s: Couldn't store %s saved state\n", 166 137 __func__, dev_name(&pdev->dev)); 167 138 139 + if (likely(!nointxmask)) { 140 + if (vfio_pci_nointx(pdev)) { 141 + dev_info(&pdev->dev, "Masking broken INTx support\n"); 142 + vdev->nointx = true; 143 + pci_intx(pdev, 0); 144 + } else 145 + vdev->pci_2_3 = pci_intx_mask_supported(pdev); 146 + } 147 + 148 + pci_read_config_word(pdev, PCI_COMMAND, &cmd); 149 + if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) { 150 + cmd &= ~PCI_COMMAND_INTX_DISABLE; 151 + pci_write_config_word(pdev, PCI_COMMAND, cmd); 152 + } 153 + 168 154 ret = vfio_config_init(vdev); 169 155 if (ret) { 170 156 kfree(vdev->pci_saved_state); 171 157 vdev->pci_saved_state = NULL; 172 158 pci_disable_device(pdev); 173 159 return ret; 174 - } 175 - 176 - if (likely(!nointxmask)) 177 - vdev->pci_2_3 = pci_intx_mask_supported(pdev); 178 - 179 - pci_read_config_word(pdev, PCI_COMMAND, &cmd); 180 - if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) { 181 - cmd &= ~PCI_COMMAND_INTX_DISABLE; 182 - pci_write_config_word(pdev, PCI_COMMAND, cmd); 183 160 } 184 161 185 162 msix_pos = pdev->msix_cap; ··· 339 304 if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { 340 305 u8 pin; 341 306 pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); 342 - if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin) 307 + if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin) 343 308 return 1; 344 309 345 310 } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
+38 -8
drivers/vfio/pci/vfio_pci_config.c
··· 408 408 { 409 409 struct pci_dev *pdev = vdev->pdev; 410 410 u32 *rbar = vdev->rbar; 411 + u16 cmd; 411 412 int i; 412 413 413 414 if (pdev->is_virtfn) ··· 421 420 pci_user_write_config_dword(pdev, i, *rbar); 422 421 423 422 pci_user_write_config_dword(pdev, PCI_ROM_ADDRESS, *rbar); 423 + 424 + if (vdev->nointx) { 425 + pci_user_read_config_word(pdev, PCI_COMMAND, &cmd); 426 + cmd |= PCI_COMMAND_INTX_DISABLE; 427 + pci_user_write_config_word(pdev, PCI_COMMAND, cmd); 428 + } 424 429 } 425 430 426 431 static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar) ··· 522 515 return count; 523 516 } 524 517 518 + /* Test whether BARs match the value we think they should contain */ 519 + static bool vfio_need_bar_restore(struct vfio_pci_device *vdev) 520 + { 521 + int i = 0, pos = PCI_BASE_ADDRESS_0, ret; 522 + u32 bar; 523 + 524 + for (; pos <= PCI_BASE_ADDRESS_5; i++, pos += 4) { 525 + if (vdev->rbar[i]) { 526 + ret = pci_user_read_config_dword(vdev->pdev, pos, &bar); 527 + if (ret || vdev->rbar[i] != bar) 528 + return true; 529 + } 530 + } 531 + 532 + return false; 533 + } 534 + 525 535 static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, 526 536 int count, struct perm_bits *perm, 527 537 int offset, __le32 val) ··· 577 553 * SR-IOV devices will trigger this, but we catch them later 578 554 */ 579 555 if ((new_mem && virt_mem && !phys_mem) || 580 - (new_io && virt_io && !phys_io)) 556 + (new_io && virt_io && !phys_io) || 557 + vfio_need_bar_restore(vdev)) 581 558 vfio_bar_restore(vdev); 582 559 } 583 560 ··· 1149 1124 return pcibios_err_to_errno(ret); 1150 1125 1151 1126 if (PCI_X_CMD_VERSION(word)) { 1152 - /* Test for extended capabilities */ 1153 - pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); 1154 - vdev->extended_caps = (dword != 0); 1127 + if (pdev->cfg_size > PCI_CFG_SPACE_SIZE) { 1128 + /* Test for extended capabilities */ 1129 + pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, 1130 + &dword); 1131 + vdev->extended_caps = (dword != 0); 1132 + } 1155 1133 return PCI_CAP_PCIX_SIZEOF_V2; 1156 1134 } else 1157 1135 return PCI_CAP_PCIX_SIZEOF_V0; ··· 1166 1138 1167 1139 return byte; 1168 1140 case PCI_CAP_ID_EXP: 1169 - /* Test for extended capabilities */ 1170 - pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); 1171 - vdev->extended_caps = (dword != 0); 1141 + if (pdev->cfg_size > PCI_CFG_SPACE_SIZE) { 1142 + /* Test for extended capabilities */ 1143 + pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); 1144 + vdev->extended_caps = (dword != 0); 1145 + } 1172 1146 1173 1147 /* length based on version */ 1174 1148 if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1) ··· 1575 1545 *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); 1576 1546 } 1577 1547 1578 - if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX)) 1548 + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) 1579 1549 vconfig[PCI_INTERRUPT_PIN] = 0; 1580 1550 1581 1551 ret = vfio_cap_init(vdev);
+1
drivers/vfio/pci/vfio_pci_private.h
··· 83 83 bool bardirty; 84 84 bool has_vga; 85 85 bool needs_reset; 86 + bool nointx; 86 87 struct pci_saved_state *pci_saved_state; 87 88 int refcnt; 88 89 struct eventfd_ctx *err_trigger;
-2
drivers/vfio/vfio_iommu_spapr_tce.c
··· 331 331 static void tce_iommu_release(void *iommu_data) 332 332 { 333 333 struct tce_container *container = iommu_data; 334 - struct iommu_table_group *table_group; 335 334 struct tce_iommu_group *tcegrp; 336 335 long i; 337 336 338 337 while (tce_groups_attached(container)) { 339 338 tcegrp = list_first_entry(&container->group_list, 340 339 struct tce_iommu_group, next); 341 - table_group = iommu_group_get_iommudata(tcegrp->grp); 342 340 tce_iommu_detach_group(iommu_data, tcegrp->grp); 343 341 } 344 342