Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio/pci: Use kernel VPD access functions

The PCI VPD capability operates on a set of window registers in PCI
config space. Writing to the address register triggers either a read
or write, depending on the setting of the PCI_VPD_ADDR_F bit within
the address register. The data register provides either the source
for writes or the target for reads.

This model is susceptible to being broken by concurrent access, for
which the kernel has adopted a set of access functions to serialize
these registers. Additionally, commits like 932c435caba8 ("PCI: Add
dev_flags bit to access VPD through function 0") and 7aa6ca4d39ed
("PCI: Add VPD function 0 quirk for Intel Ethernet devices") indicate
that VPD registers can be shared between functions on multifunction
devices creating dependencies between otherwise independent devices.

Fortunately it's quite easy to emulate the VPD registers, simply
storing copies of the address and data registers in memory and
triggering a VPD read or write on writes to the address register.
This allows vfio users to avoid seeing spurious register changes from
accesses on other devices and enables the use of shared quirks in the
host kernel. We can theoretically still race with access through
sysfs, but the window of opportunity is much smaller.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Mark Rustad <mark.d.rustad@intel.com>

+69 -1
+69 -1
drivers/vfio/pci/vfio_pci_config.c
··· 671 671 return 0; 672 672 } 673 673 674 + static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos, 675 + int count, struct perm_bits *perm, 676 + int offset, __le32 val) 677 + { 678 + struct pci_dev *pdev = vdev->pdev; 679 + __le16 *paddr = (__le16 *)(vdev->vconfig + pos - offset + PCI_VPD_ADDR); 680 + __le32 *pdata = (__le32 *)(vdev->vconfig + pos - offset + PCI_VPD_DATA); 681 + u16 addr; 682 + u32 data; 683 + 684 + /* 685 + * Write through to emulation. If the write includes the upper byte 686 + * of PCI_VPD_ADDR, then the PCI_VPD_ADDR_F bit is written and we 687 + * have work to do. 688 + */ 689 + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); 690 + if (count < 0 || offset > PCI_VPD_ADDR + 1 || 691 + offset + count <= PCI_VPD_ADDR + 1) 692 + return count; 693 + 694 + addr = le16_to_cpu(*paddr); 695 + 696 + if (addr & PCI_VPD_ADDR_F) { 697 + data = le32_to_cpu(*pdata); 698 + if (pci_write_vpd(pdev, addr & ~PCI_VPD_ADDR_F, 4, &data) != 4) 699 + return count; 700 + } else { 701 + if (pci_read_vpd(pdev, addr, 4, &data) != 4) 702 + return count; 703 + *pdata = cpu_to_le32(data); 704 + } 705 + 706 + /* 707 + * Toggle PCI_VPD_ADDR_F in the emulated PCI_VPD_ADDR register to 708 + * signal completion. If an error occurs above, we assume that not 709 + * toggling this bit will induce a driver timeout. 710 + */ 711 + addr ^= PCI_VPD_ADDR_F; 712 + *paddr = cpu_to_le16(addr); 713 + 714 + return count; 715 + } 716 + 717 + /* Permissions for Vital Product Data capability */ 718 + static int __init init_pci_cap_vpd_perm(struct perm_bits *perm) 719 + { 720 + if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_VPD])) 721 + return -ENOMEM; 722 + 723 + perm->writefn = vfio_vpd_config_write; 724 + 725 + /* 726 + * We always virtualize the next field so we can remove 727 + * capabilities from the chain if we want to. 728 + */ 729 + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); 730 + 731 + /* 732 + * Both the address and data registers are virtualized to 733 + * enable access through the pci_vpd_read/write functions 734 + */ 735 + p_setw(perm, PCI_VPD_ADDR, (u16)ALL_VIRT, (u16)ALL_WRITE); 736 + p_setd(perm, PCI_VPD_DATA, ALL_VIRT, ALL_WRITE); 737 + 738 + return 0; 739 + } 740 + 674 741 /* Permissions for PCI-X capability */ 675 742 static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) 676 743 { ··· 857 790 free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]); 858 791 859 792 free_perm_bits(&cap_perms[PCI_CAP_ID_PM]); 793 + free_perm_bits(&cap_perms[PCI_CAP_ID_VPD]); 860 794 free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]); 861 795 free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]); 862 796 free_perm_bits(&cap_perms[PCI_CAP_ID_AF]); ··· 875 807 876 808 /* Capabilities */ 877 809 ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); 878 - cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write; 810 + ret |= init_pci_cap_vpd_perm(&cap_perms[PCI_CAP_ID_VPD]); 879 811 ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); 880 812 cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write; 881 813 ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]);