Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfio-v4.16-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- Mask INTx from user if pdev->irq is zero (Alexey Kardashevskiy)

- Capability helper cleanup (Alex Williamson)

- Allow mmaps overlapping MSI-X vector table with region capability
exposing this feature (Alexey Kardashevskiy)

- mdev static cleanups (Xiongwei Song)

* tag 'vfio-v4.16-rc1' of git://github.com/awilliam/linux-vfio:
vfio: mdev: make a couple of functions and structure vfio_mdev_driver static
vfio-pci: Allow mapping MSIX BAR
vfio: Simplify capability helper
vfio-pci: Mask INTx if a device is not capabable of enabling it

+46 -117
+10 -5
drivers/gpu/drm/i915/gvt/kvmgt.c
··· 1012 1012 if (!sparse) 1013 1013 return -ENOMEM; 1014 1014 1015 + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1016 + sparse->header.version = 1; 1015 1017 sparse->nr_areas = nr_areas; 1016 1018 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1017 1019 sparse->areas[0].offset = ··· 1035 1033 break; 1036 1034 default: 1037 1035 { 1038 - struct vfio_region_info_cap_type cap_type; 1036 + struct vfio_region_info_cap_type cap_type = { 1037 + .header.id = VFIO_REGION_INFO_CAP_TYPE, 1038 + .header.version = 1 }; 1039 1039 1040 1040 if (info.index >= VFIO_PCI_NUM_REGIONS + 1041 1041 vgpu->vdev.num_regions) ··· 1054 1050 cap_type.subtype = vgpu->vdev.region[i].subtype; 1055 1051 1056 1052 ret = vfio_info_add_capability(&caps, 1057 - VFIO_REGION_INFO_CAP_TYPE, 1058 - &cap_type); 1053 + &cap_type.header, 1054 + sizeof(cap_type)); 1059 1055 if (ret) 1060 1056 return ret; 1061 1057 } ··· 1065 1061 switch (cap_type_id) { 1066 1062 case VFIO_REGION_INFO_CAP_SPARSE_MMAP: 1067 1063 ret = vfio_info_add_capability(&caps, 1068 - VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1069 - sparse); 1064 + &sparse->header, sizeof(*sparse) + 1065 + (sparse->nr_areas * 1066 + sizeof(*sparse->areas))); 1070 1067 kfree(sparse); 1071 1068 if (ret) 1072 1069 return ret;
+3 -3
drivers/vfio/mdev/vfio_mdev.c
··· 111 111 .mmap = vfio_mdev_mmap, 112 112 }; 113 113 114 - int vfio_mdev_probe(struct device *dev) 114 + static int vfio_mdev_probe(struct device *dev) 115 115 { 116 116 struct mdev_device *mdev = to_mdev_device(dev); 117 117 118 118 return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev); 119 119 } 120 120 121 - void vfio_mdev_remove(struct device *dev) 121 + static void vfio_mdev_remove(struct device *dev) 122 122 { 123 123 vfio_del_group_dev(dev); 124 124 } 125 125 126 - struct mdev_driver vfio_mdev_driver = { 126 + static struct mdev_driver vfio_mdev_driver = { 127 127 .name = "vfio_mdev", 128 128 .probe = vfio_mdev_probe, 129 129 .remove = vfio_mdev_remove,
+16 -59
drivers/vfio/pci/vfio_pci.c
··· 207 207 } 208 208 } 209 209 210 + if (!pdev->irq) 211 + return true; 212 + 210 213 return false; 211 214 } 212 215 ··· 565 562 return walk.ret; 566 563 } 567 564 568 - static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, 569 - struct vfio_info_cap *caps) 565 + static int msix_mmappable_cap(struct vfio_pci_device *vdev, 566 + struct vfio_info_cap *caps) 570 567 { 571 - struct vfio_region_info_cap_sparse_mmap *sparse; 572 - size_t end, size; 573 - int nr_areas = 2, i = 0, ret; 568 + struct vfio_info_cap_header header = { 569 + .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE, 570 + .version = 1 571 + }; 574 572 575 - end = pci_resource_len(vdev->pdev, vdev->msix_bar); 576 - 577 - /* If MSI-X table is aligned to the start or end, only one area */ 578 - if (((vdev->msix_offset & PAGE_MASK) == 0) || 579 - (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end)) 580 - nr_areas = 1; 581 - 582 - size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas)); 583 - 584 - sparse = kzalloc(size, GFP_KERNEL); 585 - if (!sparse) 586 - return -ENOMEM; 587 - 588 - sparse->nr_areas = nr_areas; 589 - 590 - if (vdev->msix_offset & PAGE_MASK) { 591 - sparse->areas[i].offset = 0; 592 - sparse->areas[i].size = vdev->msix_offset & PAGE_MASK; 593 - i++; 594 - } 595 - 596 - if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) { 597 - sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset + 598 - vdev->msix_size); 599 - sparse->areas[i].size = end - sparse->areas[i].offset; 600 - i++; 601 - } 602 - 603 - ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP, 604 - sparse); 605 - kfree(sparse); 606 - 607 - return ret; 573 + return vfio_info_add_capability(caps, &header, sizeof(header)); 608 574 } 609 575 610 576 int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, ··· 664 692 if (vdev->bar_mmap_supported[info.index]) { 665 693 info.flags |= VFIO_REGION_INFO_FLAG_MMAP; 666 694 if (info.index == vdev->msix_bar) { 667 - ret = msix_sparse_mmap_cap(vdev, &caps); 695 + ret = msix_mmappable_cap(vdev, &caps); 668 696 if (ret) 669 697 return ret; 670 698 } ··· 713 741 break; 714 742 default: 715 743 { 716 - struct vfio_region_info_cap_type cap_type; 744 + struct vfio_region_info_cap_type cap_type = { 745 + .header.id = VFIO_REGION_INFO_CAP_TYPE, 746 + .header.version = 1 }; 717 747 718 748 if (info.index >= 719 749 VFIO_PCI_NUM_REGIONS + vdev->num_regions) ··· 730 756 cap_type.type = vdev->region[i].type; 731 757 cap_type.subtype = vdev->region[i].subtype; 732 758 733 - ret = vfio_info_add_capability(&caps, 734 - VFIO_REGION_INFO_CAP_TYPE, 735 - &cap_type); 759 + ret = vfio_info_add_capability(&caps, &cap_type.header, 760 + sizeof(cap_type)); 736 761 if (ret) 737 762 return ret; 738 763 ··· 1094 1121 1095 1122 if (req_start + req_len > phys_len) 1096 1123 return -EINVAL; 1097 - 1098 - if (index == vdev->msix_bar) { 1099 - /* 1100 - * Disallow mmaps overlapping the MSI-X table; users don't 1101 - * get to touch this directly. We could find somewhere 1102 - * else to map the overlap, but page granularity is only 1103 - * a recommendation, not a requirement, so the user needs 1104 - * to know which bits are real. Requiring them to mmap 1105 - * around the table makes that clear. 1106 - */ 1107 - 1108 - /* If neither entirely above nor below, then it overlaps */ 1109 - if (!(req_start >= vdev->msix_offset + vdev->msix_size || 1110 - req_start + req_len <= vdev->msix_offset)) 1111 - return -EINVAL; 1112 - } 1113 1124 1114 1125 /* 1115 1126 * Even though we don't make use of the barmap for the mmap,
+5 -49
drivers/vfio/vfio.c
··· 1857 1857 } 1858 1858 EXPORT_SYMBOL(vfio_info_cap_shift); 1859 1859 1860 - static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type) 1860 + int vfio_info_add_capability(struct vfio_info_cap *caps, 1861 + struct vfio_info_cap_header *cap, size_t size) 1861 1862 { 1862 1863 struct vfio_info_cap_header *header; 1863 - struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type; 1864 - size_t size; 1865 1864 1866 - size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas); 1867 - header = vfio_info_cap_add(caps, size, 1868 - VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); 1865 + header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1869 1866 if (IS_ERR(header)) 1870 1867 return PTR_ERR(header); 1871 1868 1872 - sparse_cap = container_of(header, 1873 - struct vfio_region_info_cap_sparse_mmap, header); 1874 - sparse_cap->nr_areas = sparse->nr_areas; 1875 - memcpy(sparse_cap->areas, sparse->areas, 1876 - sparse->nr_areas * sizeof(*sparse->areas)); 1869 + memcpy(header + 1, cap + 1, size - sizeof(*header)); 1870 + 1877 1871 return 0; 1878 - } 1879 - 1880 - static int region_type_cap(struct vfio_info_cap *caps, void *cap_type) 1881 - { 1882 - struct vfio_info_cap_header *header; 1883 - struct vfio_region_info_cap_type *type_cap, *cap = cap_type; 1884 - 1885 - header = vfio_info_cap_add(caps, sizeof(*cap), 1886 - VFIO_REGION_INFO_CAP_TYPE, 1); 1887 - if (IS_ERR(header)) 1888 - return PTR_ERR(header); 1889 - 1890 - type_cap = container_of(header, struct vfio_region_info_cap_type, 1891 - header); 1892 - type_cap->type = cap->type; 1893 - type_cap->subtype = cap->subtype; 1894 - return 0; 1895 - } 1896 - 1897 - int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id, 1898 - void *cap_type) 1899 - { 1900 - int ret = -EINVAL; 1901 - 1902 - if (!cap_type) 1903 - return 0; 1904 - 1905 - switch (cap_type_id) { 1906 - case VFIO_REGION_INFO_CAP_SPARSE_MMAP: 1907 - ret = sparse_mmap_cap(caps, cap_type); 1908 - break; 1909 - 1910 - case VFIO_REGION_INFO_CAP_TYPE: 1911 - ret = region_type_cap(caps, cap_type); 1912 - break; 1913 - } 1914 - 1915 - return ret; 1916 1872 } 1917 1873 EXPORT_SYMBOL(vfio_info_add_capability); 1918 1874
+2 -1
include/linux/vfio.h
··· 145 145 extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); 146 146 147 147 extern int vfio_info_add_capability(struct vfio_info_cap *caps, 148 - int cap_type_id, void *cap_type); 148 + struct vfio_info_cap_header *cap, 149 + size_t size); 149 150 150 151 extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, 151 152 int num_irqs, int max_irq_type,
+10
include/uapi/linux/vfio.h
··· 301 301 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) 302 302 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) 303 303 304 + /* 305 + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped 306 + * which allows direct access to non-MSIX registers which happened to be within 307 + * the same system page. 308 + * 309 + * Even though the userspace gets direct access to the MSIX data, the existing 310 + * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration. 311 + */ 312 + #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 313 + 304 314 /** 305 315 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, 306 316 * struct vfio_irq_info)