Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio: simplify iommu group allocation for mediated devices

Reuse the logic in vfio_noiommu_group_alloc to allocate a fake
single-device iommu group for mediated devices by factoring out a common
function, and replacing the noiommu boolean field in struct vfio_group
with an enum to distinguish the three different kinds of groups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-8-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

authored by

Christoph Hellwig and committed by
Alex Williamson
c68ea0d0 c04ac340

+76 -72
+1 -1
drivers/s390/crypto/vfio_ap_ops.c
··· 351 351 list_add(&matrix_mdev->node, &matrix_dev->mdev_list); 352 352 mutex_unlock(&matrix_dev->lock); 353 353 354 - ret = vfio_register_group_dev(&matrix_mdev->vdev); 354 + ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); 355 355 if (ret) 356 356 goto err_list; 357 357 dev_set_drvdata(&mdev->dev, matrix_mdev);
+4 -41
drivers/vfio/mdev/mdev_driver.c
··· 13 13 14 14 #include "mdev_private.h" 15 15 16 - static int mdev_attach_iommu(struct mdev_device *mdev) 17 - { 18 - int ret; 19 - struct iommu_group *group; 20 - 21 - group = iommu_group_alloc(); 22 - if (IS_ERR(group)) 23 - return PTR_ERR(group); 24 - 25 - ret = iommu_group_add_device(group, &mdev->dev); 26 - if (!ret) 27 - dev_info(&mdev->dev, "MDEV: group_id = %d\n", 28 - iommu_group_id(group)); 29 - 30 - iommu_group_put(group); 31 - return ret; 32 - } 33 - 34 - static void mdev_detach_iommu(struct mdev_device *mdev) 35 - { 36 - iommu_group_remove_device(&mdev->dev); 37 - dev_info(&mdev->dev, "MDEV: detaching iommu\n"); 38 - } 39 - 40 16 static int mdev_probe(struct device *dev) 41 17 { 42 18 struct mdev_driver *drv = 43 19 container_of(dev->driver, struct mdev_driver, driver); 44 - struct mdev_device *mdev = to_mdev_device(dev); 45 - int ret; 46 20 47 - ret = mdev_attach_iommu(mdev); 48 - if (ret) 49 - return ret; 50 - 51 - if (drv->probe) { 52 - ret = drv->probe(mdev); 53 - if (ret) 54 - mdev_detach_iommu(mdev); 55 - } 56 - 57 - return ret; 21 + if (!drv->probe) 22 + return 0; 23 + return drv->probe(to_mdev_device(dev)); 58 24 } 59 25 60 26 static void mdev_remove(struct device *dev) 61 27 { 62 28 struct mdev_driver *drv = 63 29 container_of(dev->driver, struct mdev_driver, driver); 64 - struct mdev_device *mdev = to_mdev_device(dev); 65 30 66 31 if (drv->remove) 67 - drv->remove(mdev); 68 - 69 - mdev_detach_iommu(mdev); 32 + drv->remove(to_mdev_device(dev)); 70 33 } 71 34 72 35 static int mdev_match(struct device *dev, struct device_driver *drv)
+1 -1
drivers/vfio/mdev/vfio_mdev.c
··· 119 119 return -ENOMEM; 120 120 121 121 vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); 122 - ret = vfio_register_group_dev(vdev); 122 + ret = vfio_register_emulated_iommu_dev(vdev); 123 123 if (ret) 124 124 goto out_uninit; 125 125
+66 -26
drivers/vfio/vfio.c
··· 67 67 struct list_head unbound_next; 68 68 }; 69 69 70 + enum vfio_group_type { 71 + /* 72 + * Physical device with IOMMU backing. 73 + */ 74 + VFIO_IOMMU, 75 + 76 + /* 77 + * Virtual device without IOMMU backing. The VFIO core fakes up an 78 + * iommu_group as the iommu_group sysfs interface is part of the 79 + * userspace ABI. The user of these devices must not be able to 80 + * directly trigger unmediated DMA. 81 + */ 82 + VFIO_EMULATED_IOMMU, 83 + 84 + /* 85 + * Physical device without IOMMU backing. The VFIO core fakes up an 86 + * iommu_group as the iommu_group sysfs interface is part of the 87 + * userspace ABI. Users can trigger unmediated DMA by the device, 88 + * usage is highly dangerous, requires an explicit opt-in and will 89 + * taint the kernel. 90 + */ 91 + VFIO_NO_IOMMU, 92 + }; 93 + 70 94 struct vfio_group { 71 95 struct kref kref; 72 96 int minor; ··· 107 83 struct mutex unbound_lock; 108 84 atomic_t opened; 109 85 wait_queue_head_t container_q; 110 - bool noiommu; 86 + enum vfio_group_type type; 111 87 unsigned int dev_counter; 112 88 struct kvm *kvm; 113 89 struct blocking_notifier_head notifier; ··· 360 336 * Group objects - create, release, get, put, search 361 337 */ 362 338 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 363 - bool noiommu) 339 + enum vfio_group_type type) 364 340 { 365 341 struct vfio_group *group, *tmp; 366 342 struct device *dev; ··· 379 355 atomic_set(&group->opened, 0); 380 356 init_waitqueue_head(&group->container_q); 381 357 group->iommu_group = iommu_group; 382 - group->noiommu = noiommu; 358 + group->type = type; 383 359 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 384 360 385 361 group->nb.notifier_call = vfio_iommu_group_notifier; ··· 415 391 } 416 392 417 393 dev = device_create(vfio.class, NULL, 418 - MKDEV(MAJOR(vfio.group_devt), minor), 419 - group, "%s%d", group->noiommu ? "noiommu-" : "", 394 + MKDEV(MAJOR(vfio.group_devt), minor), group, "%s%d", 395 + group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 420 396 iommu_group_id(iommu_group)); 421 397 if (IS_ERR(dev)) { 422 398 vfio_free_group_minor(minor); ··· 802 778 } 803 779 EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 804 780 805 - #ifdef CONFIG_VFIO_NOIOMMU 806 - static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev) 781 + static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 782 + enum vfio_group_type type) 807 783 { 808 784 struct iommu_group *iommu_group; 809 785 struct vfio_group *group; ··· 818 794 if (ret) 819 795 goto out_put_group; 820 796 821 - group = vfio_create_group(iommu_group, true); 797 + group = vfio_create_group(iommu_group, type); 822 798 if (IS_ERR(group)) { 823 799 ret = PTR_ERR(group); 824 800 goto out_remove_device; ··· 832 808 iommu_group_put(iommu_group); 833 809 return ERR_PTR(ret); 834 810 } 835 - #endif 836 811 837 812 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 838 813 { ··· 847 824 * bus. Taint the kernel because we're about to give a DMA 848 825 * capable device to a user without IOMMU protection. 849 826 */ 850 - group = vfio_noiommu_group_alloc(dev); 827 + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 851 828 if (!IS_ERR(group)) { 852 829 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 853 830 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); ··· 864 841 goto out_put; 865 842 866 843 /* a newly created vfio_group keeps the reference. */ 867 - group = vfio_create_group(iommu_group, false); 844 + group = vfio_create_group(iommu_group, VFIO_IOMMU); 868 845 if (IS_ERR(group)) 869 846 goto out_put; 870 847 return group; ··· 874 851 return group; 875 852 } 876 853 877 - int vfio_register_group_dev(struct vfio_device *device) 854 + static int __vfio_register_dev(struct vfio_device *device, 855 + struct vfio_group *group) 878 856 { 879 857 struct vfio_device *existing_device; 880 - struct vfio_group *group; 858 + 859 + if (IS_ERR(group)) 860 + return PTR_ERR(group); 881 861 882 862 /* 883 863 * If the driver doesn't specify a set then the device is added to a ··· 889 863 if (!device->dev_set) 890 864 vfio_assign_device_set(device, device); 891 865 892 - group = vfio_group_find_or_alloc(device->dev); 893 - if (IS_ERR(group)) 894 - return PTR_ERR(group); 895 - 896 866 existing_device = vfio_group_get_device(group, device->dev); 897 867 if (existing_device) { 898 868 dev_WARN(device->dev, "Device already exists on group %d\n", 899 869 iommu_group_id(group->iommu_group)); 900 870 vfio_device_put(existing_device); 901 - if (group->noiommu) 871 + if (group->type == VFIO_NO_IOMMU || 872 + group->type == VFIO_EMULATED_IOMMU) 902 873 iommu_group_remove_device(device->dev); 903 874 vfio_group_put(group); 904 875 return -EBUSY; ··· 914 891 915 892 return 0; 916 893 } 894 + 895 + int vfio_register_group_dev(struct vfio_device *device) 896 + { 897 + return __vfio_register_dev(device, 898 + vfio_group_find_or_alloc(device->dev)); 899 + } 917 900 EXPORT_SYMBOL_GPL(vfio_register_group_dev); 901 + 902 + /* 903 + * Register a virtual device without IOMMU backing. The user of this 904 + * device must not be able to directly trigger unmediated DMA. 905 + */ 906 + int vfio_register_emulated_iommu_dev(struct vfio_device *device) 907 + { 908 + return __vfio_register_dev(device, 909 + vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 910 + } 911 + EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 918 912 919 913 /** 920 914 * Get a reference to the vfio_device for a device. Even if the ··· 1059 1019 if (list_empty(&group->device_list)) 1060 1020 wait_event(group->container_q, !group->container); 1061 1021 1062 - if (group->noiommu) 1022 + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 1063 1023 iommu_group_remove_device(device->dev); 1064 1024 1065 1025 /* Matches the get in vfio_register_group_dev() */ ··· 1408 1368 if (atomic_read(&group->container_users)) 1409 1369 return -EINVAL; 1410 1370 1411 - if (group->noiommu && !capable(CAP_SYS_RAWIO)) 1371 + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 1412 1372 return -EPERM; 1413 1373 1414 1374 f = fdget(container_fd); ··· 1428 1388 1429 1389 /* Real groups and fake groups cannot mix */ 1430 1390 if (!list_empty(&container->group_list) && 1431 - container->noiommu != group->noiommu) { 1391 + container->noiommu != (group->type == VFIO_NO_IOMMU)) { 1432 1392 ret = -EPERM; 1433 1393 goto unlock_out; 1434 1394 } ··· 1442 1402 } 1443 1403 1444 1404 group->container = container; 1445 - container->noiommu = group->noiommu; 1405 + container->noiommu = (group->type == VFIO_NO_IOMMU); 1446 1406 list_add(&group->container_next, &container->group_list); 1447 1407 1448 1408 /* Get a reference on the container and mark a user within the group */ ··· 1466 1426 if (!atomic_inc_not_zero(&group->container_users)) 1467 1427 return -EINVAL; 1468 1428 1469 - if (group->noiommu) { 1429 + if (group->type == VFIO_NO_IOMMU) { 1470 1430 atomic_dec(&group->container_users); 1471 1431 return -EPERM; 1472 1432 } ··· 1491 1451 !group->container->iommu_driver || !vfio_group_viable(group)) 1492 1452 return -EINVAL; 1493 1453 1494 - if (group->noiommu && !capable(CAP_SYS_RAWIO)) 1454 + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 1495 1455 return -EPERM; 1496 1456 1497 1457 device = vfio_device_get_from_name(group, buf); ··· 1538 1498 1539 1499 fd_install(fdno, filep); 1540 1500 1541 - if (group->noiommu) 1501 + if (group->type == VFIO_NO_IOMMU) 1542 1502 dev_warn(device->dev, "vfio-noiommu device opened by user " 1543 1503 "(%s:%d)\n", current->comm, task_pid_nr(current)); 1544 1504 return fdno; ··· 1634 1594 if (!group) 1635 1595 return -ENODEV; 1636 1596 1637 - if (group->noiommu && !capable(CAP_SYS_RAWIO)) { 1597 + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 1638 1598 vfio_group_put(group); 1639 1599 return -EPERM; 1640 1600 }
+1
include/linux/vfio.h
··· 75 75 const struct vfio_device_ops *ops); 76 76 void vfio_uninit_group_dev(struct vfio_device *device); 77 77 int vfio_register_group_dev(struct vfio_device *device); 78 + int vfio_register_emulated_iommu_dev(struct vfio_device *device); 78 79 void vfio_unregister_group_dev(struct vfio_device *device); 79 80 extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); 80 81 extern void vfio_device_put(struct vfio_device *device);
+1 -1
samples/vfio-mdev/mbochs.c
··· 553 553 mbochs_create_config_space(mdev_state); 554 554 mbochs_reset(mdev_state); 555 555 556 - ret = vfio_register_group_dev(&mdev_state->vdev); 556 + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); 557 557 if (ret) 558 558 goto err_mem; 559 559 dev_set_drvdata(&mdev->dev, mdev_state);
+1 -1
samples/vfio-mdev/mdpy.c
··· 258 258 259 259 mdpy_count++; 260 260 261 - ret = vfio_register_group_dev(&mdev_state->vdev); 261 + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); 262 262 if (ret) 263 263 goto err_mem; 264 264 dev_set_drvdata(&mdev->dev, mdev_state);
+1 -1
samples/vfio-mdev/mtty.c
··· 741 741 742 742 mtty_create_config_space(mdev_state); 743 743 744 - ret = vfio_register_group_dev(&mdev_state->vdev); 744 + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); 745 745 if (ret) 746 746 goto err_vconfig; 747 747 dev_set_drvdata(&mdev->dev, mdev_state);