Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:
"Some polishing and small fixes for iommufd:

- Remove IOMMU_CAP_INTR_REMAP, instead rely on the interrupt
subsystem

- Use GFP_KERNEL_ACCOUNT inside the iommu_domains

- Support VFIO_NOIOMMU mode with iommufd

- Various typos

- A list corruption bug if HWPTs are used for attach"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd:
iommufd: Do not add the same hwpt to the ioas->hwpt_list twice
iommufd: Make sure to zero vfio_iommu_type1_info before copying to user
vfio: Support VFIO_NOIOMMU with iommufd
iommufd: Add three missing structures in ucmd_buffer
selftests: iommu: Fix test_cmd_destroy_access() call in user_copy
iommu: Remove IOMMU_CAP_INTR_REMAP
irq/s390: Add arch_is_isolated_msi() for s390
iommu/x86: Replace IOMMU_CAP_INTR_REMAP with IRQ_DOMAIN_FLAG_ISOLATED_MSI
genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI
genirq/irqdomain: Remove unused irq_domain_check_msi_remap() code
iommufd: Convert to msi_device_has_isolated_msi()
vfio/type1: Convert to iommu_group_has_isolated_msi()
iommu: Add iommu_group_has_isolated_msi()
genirq/msi: Add msi_device_has_isolated_msi()

+243 -134
+17
arch/s390/include/asm/msi.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_S390_MSI_H 3 + #define _ASM_S390_MSI_H 4 + #include <asm-generic/msi.h> 5 + 6 + /* 7 + * Work around S390 not using irq_domain at all so we can't set 8 + * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works: 9 + * 10 + * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/ 11 + * 12 + * Note this is less isolated than the ARM/x86 versions as userspace can trigger 13 + * MSI belonging to kernel devices within the same gisa. 14 + */ 15 + #define arch_is_isolated_msi() true 16 + 17 + #endif
+2 -2
drivers/infiniband/hw/usnic/usnic_uiom.c
··· 277 277 usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", 278 278 va_start, &pa_start, size, flags); 279 279 err = iommu_map(pd->domain, va_start, pa_start, 280 - size, flags, GFP_KERNEL); 280 + size, flags, GFP_ATOMIC); 281 281 if (err) { 282 282 usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", 283 283 va_start, &pa_start, size, err); ··· 294 294 usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", 295 295 va_start, &pa_start, size, flags); 296 296 err = iommu_map(pd->domain, va_start, pa_start, 297 - size, flags, GFP_KERNEL); 297 + size, flags, GFP_ATOMIC); 298 298 if (err) { 299 299 usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", 300 300 va_start, &pa_start, size, err);
+2 -3
drivers/iommu/amd/iommu.c
··· 2278 2278 switch (cap) { 2279 2279 case IOMMU_CAP_CACHE_COHERENCY: 2280 2280 return true; 2281 - case IOMMU_CAP_INTR_REMAP: 2282 - return (irq_remapping_enabled == 1); 2283 2281 case IOMMU_CAP_NOEXEC: 2284 2282 return false; 2285 2283 case IOMMU_CAP_PRE_BOOT_PROTECTION: ··· 3680 3682 } 3681 3683 3682 3684 irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); 3683 - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; 3685 + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | 3686 + IRQ_DOMAIN_FLAG_ISOLATED_MSI; 3684 3687 3685 3688 if (amd_iommu_np_cache) 3686 3689 iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
-2
drivers/iommu/intel/iommu.c
··· 4478 4478 switch (cap) { 4479 4479 case IOMMU_CAP_CACHE_COHERENCY: 4480 4480 return true; 4481 - case IOMMU_CAP_INTR_REMAP: 4482 - return irq_remapping_enabled == 1; 4483 4481 case IOMMU_CAP_PRE_BOOT_PROTECTION: 4484 4482 return dmar_platform_optin(); 4485 4483 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+2 -1
drivers/iommu/intel/irq_remapping.c
··· 573 573 } 574 574 575 575 irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); 576 - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; 576 + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | 577 + IRQ_DOMAIN_FLAG_ISOLATED_MSI; 577 578 578 579 if (cap_caching_mode(iommu->cap)) 579 580 iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
+24
drivers/iommu/iommu.c
··· 30 30 #include <linux/cc_platform.h> 31 31 #include <trace/events/iommu.h> 32 32 #include <linux/sched/mm.h> 33 + #include <linux/msi.h> 33 34 34 35 #include "dma-iommu.h" 35 36 ··· 1904 1903 return ops->capable(dev, cap); 1905 1904 } 1906 1905 EXPORT_SYMBOL_GPL(device_iommu_capable); 1906 + 1907 + /** 1908 + * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1909 + * for a group 1910 + * @group: Group to query 1911 + * 1912 + * IOMMU groups should not have differing values of 1913 + * msi_device_has_isolated_msi() for devices in a group. However nothing 1914 + * directly prevents this, so ensure mistakes don't result in isolation failures 1915 + * by checking that all the devices are the same. 1916 + */ 1917 + bool iommu_group_has_isolated_msi(struct iommu_group *group) 1918 + { 1919 + struct group_device *group_dev; 1920 + bool ret = true; 1921 + 1922 + mutex_lock(&group->mutex); 1923 + list_for_each_entry(group_dev, &group->devices, list) 1924 + ret &= msi_device_has_isolated_msi(group_dev->dev); 1925 + mutex_unlock(&group->mutex); 1926 + return ret; 1927 + } 1928 + EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1907 1929 1908 1930 /** 1909 1931 * iommu_set_fault_handler() - set a fault handler for an iommu domain
+1 -1
drivers/iommu/iommufd/Kconfig
··· 23 23 removed. 24 24 25 25 IOMMUFD VFIO container emulation is known to lack certain features 26 - of the native VFIO container, such as no-IOMMU support, peer-to-peer 26 + of the native VFIO container, such as peer-to-peer 27 27 DMA mapping, PPC IOMMU support, as well as other potentially 28 28 undiscovered gaps. This option is currently intended for the 29 29 purpose of testing IOMMUFD with unmodified userspace supporting VFIO
+1 -7
drivers/iommu/iommufd/device.c
··· 4 4 #include <linux/iommufd.h> 5 5 #include <linux/slab.h> 6 6 #include <linux/iommu.h> 7 - #include <linux/irqdomain.h> 8 7 9 8 #include "io_pagetable.h" 10 9 #include "iommufd_private.h" ··· 168 169 * operation from the device (eg a simple DMA) cannot trigger an 169 170 * interrupt outside this iommufd context. 170 171 */ 171 - if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && 172 - !irq_domain_check_msi_remap()) { 172 + if (!iommu_group_has_isolated_msi(idev->group)) { 173 173 if (!allow_unsafe_interrupts) 174 174 return -EPERM; 175 175 ··· 344 346 rc = iommufd_device_do_attach(idev, hwpt); 345 347 if (rc) 346 348 goto out_put_pt_obj; 347 - 348 - mutex_lock(&hwpt->ioas->mutex); 349 - list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); 350 - mutex_unlock(&hwpt->ioas->mutex); 351 349 break; 352 350 } 353 351 case IOMMUFD_OBJ_IOAS: {
+2
drivers/iommu/iommufd/iommufd_private.h
··· 18 18 struct xarray objects; 19 19 20 20 u8 account_mode; 21 + /* Compatibility with VFIO no iommu */ 22 + u8 no_iommu_mode; 21 23 struct iommufd_ioas *vfio_ioas; 22 24 }; 23 25
+3
drivers/iommu/iommufd/main.c
··· 252 252 struct iommu_destroy destroy; 253 253 struct iommu_ioas_alloc alloc; 254 254 struct iommu_ioas_allow_iovas allow_iovas; 255 + struct iommu_ioas_copy ioas_copy; 255 256 struct iommu_ioas_iova_ranges iova_ranges; 256 257 struct iommu_ioas_map map; 257 258 struct iommu_ioas_unmap unmap; 259 + struct iommu_option option; 260 + struct iommu_vfio_ioas vfio_ioas; 258 261 #ifdef CONFIG_IOMMUFD_TEST 259 262 struct iommu_test_cmd test; 260 263 #endif
+87 -20
drivers/iommu/iommufd/vfio_compat.c
··· 26 26 } 27 27 28 28 /** 29 - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use 29 + * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists 30 30 * @ictx: Context to operate on 31 - * @out_ioas_id: The ioas_id the caller should use 31 + * @out_ioas_id: The IOAS ID of the compatibility IOAS 32 + * 33 + * Return the ID of the current compatibility IOAS. The ID can be passed into 34 + * other functions that take an ioas_id. 35 + */ 36 + int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) 37 + { 38 + struct iommufd_ioas *ioas; 39 + 40 + ioas = get_compat_ioas(ictx); 41 + if (IS_ERR(ioas)) 42 + return PTR_ERR(ioas); 43 + *out_ioas_id = ioas->obj.id; 44 + iommufd_put_object(&ioas->obj); 45 + return 0; 46 + } 47 + EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); 48 + 49 + /** 50 + * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached 51 + * @ictx: Context to operate on 52 + * 53 + * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types. 54 + */ 55 + int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) 56 + { 57 + int ret; 58 + 59 + xa_lock(&ictx->objects); 60 + if (!ictx->vfio_ioas) { 61 + ictx->no_iommu_mode = 1; 62 + ret = 0; 63 + } else { 64 + ret = -EINVAL; 65 + } 66 + xa_unlock(&ictx->objects); 67 + return ret; 68 + } 69 + EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO); 70 + 71 + /** 72 + * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created 73 + * @ictx: Context to operate on 32 74 * 33 75 * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate 34 76 * on since they do not have an IOAS ID input in their ABI. Only attaching a 35 - * group should cause a default creation of the internal ioas, this returns the 36 - * existing ioas if it has already been assigned somehow. 77 + * group should cause a default creation of the internal ioas, this does nothing 78 + * if an existing ioas has already been assigned somehow. 37 79 */ 38 - int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) 80 + int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) 39 81 { 40 82 struct iommufd_ioas *ioas = NULL; 41 - struct iommufd_ioas *out_ioas; 83 + int ret; 42 84 43 85 ioas = iommufd_ioas_alloc(ictx); 44 86 if (IS_ERR(ioas)) 45 87 return PTR_ERR(ioas); 46 88 47 89 xa_lock(&ictx->objects); 48 - if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) 49 - out_ioas = ictx->vfio_ioas; 50 - else { 51 - out_ioas = ioas; 52 - ictx->vfio_ioas = ioas; 90 + /* 91 + * VFIO won't allow attaching a container to both iommu and no iommu 92 + * operation 93 + */ 94 + if (ictx->no_iommu_mode) { 95 + ret = -EINVAL; 96 + goto out_abort; 53 97 } 98 + 99 + if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { 100 + ret = 0; 101 + iommufd_put_object(&ictx->vfio_ioas->obj); 102 + goto out_abort; 103 + } 104 + ictx->vfio_ioas = ioas; 54 105 xa_unlock(&ictx->objects); 55 106 56 - *out_ioas_id = out_ioas->obj.id; 57 - if (out_ioas != ioas) { 58 - iommufd_put_object(&out_ioas->obj); 59 - iommufd_object_abort(ictx, &ioas->obj); 60 - return 0; 61 - } 62 107 /* 63 108 * An automatically created compat IOAS is treated as a userspace 64 109 * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, ··· 112 67 */ 113 68 iommufd_object_finalize(ictx, &ioas->obj); 114 69 return 0; 70 + 71 + out_abort: 72 + xa_unlock(&ictx->objects); 73 + iommufd_object_abort(ictx, &ioas->obj); 74 + return ret; 115 75 } 116 - EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); 76 + EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO); 117 77 118 78 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) 119 79 { ··· 285 235 case VFIO_UNMAP_ALL: 286 236 return 1; 287 237 238 + case VFIO_NOIOMMU_IOMMU: 239 + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); 240 + 288 241 case VFIO_DMA_CC_IOMMU: 289 242 return iommufd_vfio_cc_iommu(ictx); 290 243 ··· 314 261 315 262 static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) 316 263 { 264 + bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode); 317 265 struct iommufd_ioas *ioas = NULL; 318 266 int rc = 0; 319 267 320 - if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) 268 + /* 269 + * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all 270 + * other ioctls. We let them keep working but they mostly fail since no 271 + * IOAS should exist. 272 + */ 273 + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU && 274 + no_iommu_mode) { 275 + if (!capable(CAP_SYS_RAWIO)) 276 + return -EPERM; 277 + return 0; 278 + } 279 + 280 + if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) || 281 + no_iommu_mode) 321 282 return -EINVAL; 322 283 323 284 /* VFIO fails the set_iommu if there is no group */ ··· 448 381 }; 449 382 size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); 450 383 struct vfio_info_cap_header __user *last_cap = NULL; 451 - struct vfio_iommu_type1_info info; 384 + struct vfio_iommu_type1_info info = {}; 452 385 struct iommufd_ioas *ioas; 453 386 size_t total_cap_size; 454 387 int rc;
-2
drivers/iommu/s390-iommu.c
··· 34 34 switch (cap) { 35 35 case IOMMU_CAP_CACHE_COHERENCY: 36 36 return true; 37 - case IOMMU_CAP_INTR_REMAP: 38 - return true; 39 37 default: 40 38 return false; 41 39 }
+2 -2
drivers/irqchip/irq-gic-v3-its.c
··· 4692 4692 } 4693 4693 4694 4694 /* the pre-ITS breaks isolation, so disable MSI remapping */ 4695 - its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP; 4695 + its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_ISOLATED_MSI; 4696 4696 return true; 4697 4697 } 4698 4698 return false; ··· 5075 5075 its->cmd_write = its->cmd_base; 5076 5076 its->fwnode_handle = handle; 5077 5077 its->get_msi_base = its_irq_get_msi_base; 5078 - its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP; 5078 + its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI; 5079 5079 5080 5080 its_enable_quirks(its); 5081 5081
+1 -1
drivers/vfio/Kconfig
··· 32 32 tristate 33 33 depends on SPAPR_TCE_IOMMU 34 34 default VFIO 35 + endif 35 36 36 37 config VFIO_NOIOMMU 37 38 bool "VFIO No-IOMMU support" ··· 47 46 this mode since there is no IOMMU to provide DMA translation. 48 47 49 48 If you don't know what to do here, say N. 50 - endif 51 49 52 50 config VFIO_VIRQFD 53 51 bool
-7
drivers/vfio/container.c
··· 29 29 struct mutex iommu_drivers_lock; 30 30 } vfio; 31 31 32 - #ifdef CONFIG_VFIO_NOIOMMU 33 - bool vfio_noiommu __read_mostly; 34 - module_param_named(enable_unsafe_noiommu_mode, 35 - vfio_noiommu, bool, S_IRUGO | S_IWUSR); 36 - MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 37 - #endif 38 - 39 32 static void *vfio_noiommu_open(unsigned long arg) 40 33 { 41 34 if (arg != VFIO_NOIOMMU_IOMMU)
+5 -2
drivers/vfio/group.c
··· 133 133 134 134 iommufd = iommufd_ctx_from_file(f.file); 135 135 if (!IS_ERR(iommufd)) { 136 - u32 ioas_id; 136 + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && 137 + group->type == VFIO_NO_IOMMU) 138 + ret = iommufd_vfio_compat_set_no_iommu(iommufd); 139 + else 140 + ret = iommufd_vfio_compat_ioas_create(iommufd); 137 141 138 - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); 139 142 if (ret) { 140 143 iommufd_ctx_put(group->iommufd); 141 144 goto out_unlock;
+18 -1
drivers/vfio/iommufd.c
··· 18 18 19 19 lockdep_assert_held(&vdev->dev_set->lock); 20 20 21 + if (vfio_device_is_noiommu(vdev)) { 22 + if (!capable(CAP_SYS_RAWIO)) 23 + return -EPERM; 24 + 25 + /* 26 + * Require no compat ioas to be assigned to proceed. The basic 27 + * statement is that the user cannot have done something that 28 + * implies they expected translation to exist 29 + */ 30 + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) 31 + return -EPERM; 32 + return 0; 33 + } 34 + 21 35 /* 22 36 * If the driver doesn't provide this op then it means the device does 23 37 * not do DMA at all. So nothing to do. ··· 43 29 if (ret) 44 30 return ret; 45 31 46 - ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); 32 + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); 47 33 if (ret) 48 34 goto err_unbind; 49 35 ret = vdev->ops->attach_ioas(vdev, &ioas_id); ··· 65 51 void vfio_iommufd_unbind(struct vfio_device *vdev) 66 52 { 67 53 lockdep_assert_held(&vdev->dev_set->lock); 54 + 55 + if (vfio_device_is_noiommu(vdev)) 56 + return; 68 57 69 58 if (vdev->ops->unbind_iommufd) 70 59 vdev->ops->unbind_iommufd(vdev);
+7 -1
drivers/vfio/vfio.h
··· 10 10 #include <linux/device.h> 11 11 #include <linux/cdev.h> 12 12 #include <linux/module.h> 13 + #include <linux/vfio.h> 13 14 14 15 struct iommufd_ctx; 15 16 struct iommu_group; 16 - struct vfio_device; 17 17 struct vfio_container; 18 18 19 19 void vfio_device_put_registration(struct vfio_device *device); ··· 87 87 bool vfio_device_has_container(struct vfio_device *device); 88 88 int __init vfio_group_init(void); 89 89 void vfio_group_cleanup(void); 90 + 91 + static inline bool vfio_device_is_noiommu(struct vfio_device *vdev) 92 + { 93 + return IS_ENABLED(CONFIG_VFIO_NOIOMMU) && 94 + vdev->group->type == VFIO_NO_IOMMU; 95 + } 90 96 91 97 #if IS_ENABLED(CONFIG_VFIO_CONTAINER) 92 98 /* events for the backend driver notify callback */
+3 -13
drivers/vfio/vfio_iommu_type1.c
··· 37 37 #include <linux/vfio.h> 38 38 #include <linux/workqueue.h> 39 39 #include <linux/notifier.h> 40 - #include <linux/irqdomain.h> 41 40 #include "vfio.h" 42 41 43 42 #define DRIVER_VERSION "0.2" ··· 2169 2170 list_splice_tail(iova_copy, iova); 2170 2171 } 2171 2172 2172 - /* Redundantly walks non-present capabilities to simplify caller */ 2173 - static int vfio_iommu_device_capable(struct device *dev, void *data) 2174 - { 2175 - return device_iommu_capable(dev, (enum iommu_cap)data); 2176 - } 2177 - 2178 2173 static int vfio_iommu_domain_alloc(struct device *dev, void *data) 2179 2174 { 2180 2175 struct iommu_domain **domain = data; ··· 2183 2190 struct vfio_iommu *iommu = iommu_data; 2184 2191 struct vfio_iommu_group *group; 2185 2192 struct vfio_domain *domain, *d; 2186 - bool resv_msi, msi_remap; 2193 + bool resv_msi; 2187 2194 phys_addr_t resv_msi_base = 0; 2188 2195 struct iommu_domain_geometry *geo; 2189 2196 LIST_HEAD(iova_copy); ··· 2281 2288 INIT_LIST_HEAD(&domain->group_list); 2282 2289 list_add(&group->next, &domain->group_list); 2283 2290 2284 - msi_remap = irq_domain_check_msi_remap() || 2285 - iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, 2286 - vfio_iommu_device_capable); 2287 - 2288 - if (!allow_unsafe_interrupts && !msi_remap) { 2291 + if (!allow_unsafe_interrupts && 2292 + !iommu_group_has_isolated_msi(iommu_group)) { 2289 2293 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", 2290 2294 __func__); 2291 2295 ret = -EPERM;
+7
drivers/vfio/vfio_main.c
··· 45 45 struct ida device_ida; 46 46 } vfio; 47 47 48 + #ifdef CONFIG_VFIO_NOIOMMU 49 + bool vfio_noiommu __read_mostly; 50 + module_param_named(enable_unsafe_noiommu_mode, 51 + vfio_noiommu, bool, S_IRUGO | S_IWUSR); 52 + MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 53 + #endif 54 + 48 55 static DEFINE_XARRAY(vfio_device_set_xa); 49 56 50 57 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
+1 -1
include/linux/iommu.h
··· 120 120 121 121 enum iommu_cap { 122 122 IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ 123 - IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ 124 123 IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ 125 124 IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for 126 125 DMA protection and we should too */ ··· 458 459 extern int bus_iommu_probe(struct bus_type *bus); 459 460 extern bool iommu_present(struct bus_type *bus); 460 461 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); 462 + extern bool iommu_group_has_isolated_msi(struct iommu_group *group); 461 463 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); 462 464 extern struct iommu_group *iommu_group_get_by_id(int id); 463 465 extern void iommu_domain_free(struct iommu_domain *domain);
+9 -3
include/linux/iommufd.h
··· 57 57 unsigned long iova, unsigned long length); 58 58 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, 59 59 void *data, size_t len, unsigned int flags); 60 - int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); 60 + int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); 61 + int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx); 62 + int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx); 61 63 #else /* !CONFIG_IOMMUFD */ 62 64 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) 63 65 { ··· 91 89 return -EOPNOTSUPP; 92 90 } 93 91 94 - static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, 95 - u32 *out_ioas_id) 92 + static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) 93 + { 94 + return -EOPNOTSUPP; 95 + } 96 + 97 + static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) 96 98 { 97 99 return -EOPNOTSUPP; 98 100 }
+4 -25
include/linux/irqdomain.h
··· 194 194 /* Irq domain implements MSIs */ 195 195 IRQ_DOMAIN_FLAG_MSI = (1 << 4), 196 196 197 - /* Irq domain implements MSI remapping */ 198 - IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), 197 + /* 198 + * Irq domain implements isolated MSI, see msi_device_has_isolated_msi() 199 + */ 200 + IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5), 199 201 200 202 /* Irq domain doesn't translate anything */ 201 203 IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), ··· 280 278 void *host_data); 281 279 extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, 282 280 enum irq_domain_bus_token bus_token); 283 - extern bool irq_domain_check_msi_remap(void); 284 281 extern void irq_set_default_host(struct irq_domain *host); 285 282 extern struct irq_domain *irq_get_default_host(void); 286 283 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, ··· 562 561 return domain->flags & IRQ_DOMAIN_FLAG_MSI; 563 562 } 564 563 565 - static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) 566 - { 567 - return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP; 568 - } 569 - 570 - extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); 571 - 572 564 static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) 573 565 { 574 566 return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; ··· 607 613 return false; 608 614 } 609 615 610 - static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) 611 - { 612 - return false; 613 - } 614 - 615 - static inline bool 616 - irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) 617 - { 618 - return false; 619 - } 620 - 621 616 static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) 622 617 { 623 618 return false; ··· 625 642 struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) 626 643 { 627 644 return NULL; 628 - } 629 - static inline bool irq_domain_check_msi_remap(void) 630 - { 631 - return false; 632 645 } 633 646 #endif /* !CONFIG_IRQ_DOMAIN */ 634 647
+17
include/linux/msi.h
··· 48 48 } __attribute__ ((packed)) arch_msi_msg_data_t; 49 49 #endif 50 50 51 + #ifndef arch_is_isolated_msi 52 + #define arch_is_isolated_msi() false 53 + #endif 54 + 51 55 /** 52 56 * msi_msg - Representation of a MSI message 53 57 * @address_lo: Low 32 bits of msi message address ··· 653 649 void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, 654 650 unsigned int nvec); 655 651 void *platform_msi_get_host_data(struct irq_domain *domain); 652 + 653 + bool msi_device_has_isolated_msi(struct device *dev); 654 + #else /* CONFIG_GENERIC_MSI_IRQ */ 655 + static inline bool msi_device_has_isolated_msi(struct device *dev) 656 + { 657 + /* 658 + * Arguably if the platform does not enable MSI support then it has 659 + * "isolated MSI", as an interrupt controller that cannot receive MSIs 660 + * is inherently isolated by our definition. The default definition for 661 + * arch_is_isolated_msi() is conservative and returns false anyhow. 662 + */ 663 + return arch_is_isolated_msi(); 664 + } 656 665 #endif /* CONFIG_GENERIC_MSI_IRQ */ 657 666 658 667 /* PCI specific interfaces */
-39
kernel/irq/irqdomain.c
··· 470 470 EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); 471 471 472 472 /** 473 - * irq_domain_check_msi_remap - Check whether all MSI irq domains implement 474 - * IRQ remapping 475 - * 476 - * Return: false if any MSI irq domain does not support IRQ remapping, 477 - * true otherwise (including if there is no MSI irq domain) 478 - */ 479 - bool irq_domain_check_msi_remap(void) 480 - { 481 - struct irq_domain *h; 482 - bool ret = true; 483 - 484 - mutex_lock(&irq_domain_mutex); 485 - list_for_each_entry(h, &irq_domain_list, link) { 486 - if (irq_domain_is_msi(h) && 487 - !irq_domain_hierarchical_is_msi_remap(h)) { 488 - ret = false; 489 - break; 490 - } 491 - } 492 - mutex_unlock(&irq_domain_mutex); 493 - return ret; 494 - } 495 - EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap); 496 - 497 - /** 498 473 * irq_set_default_host() - Set a "default" irq domain 499 474 * @domain: default domain pointer 500 475 * ··· 1864 1889 /* Hierarchy irq_domains must implement callback alloc() */ 1865 1890 if (domain->ops->alloc) 1866 1891 domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; 1867 - } 1868 - 1869 - /** 1870 - * irq_domain_hierarchical_is_msi_remap - Check if the domain or any 1871 - * parent has MSI remapping support 1872 - * @domain: domain pointer 1873 - */ 1874 - bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) 1875 - { 1876 - for (; domain; domain = domain->parent) { 1877 - if (irq_domain_is_msi_remap(domain)) 1878 - return true; 1879 - } 1880 - return false; 1881 1892 } 1882 1893 #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ 1883 1894 /**
+27
kernel/irq/msi.c
··· 1627 1627 { 1628 1628 return (struct msi_domain_info *)domain->host_data; 1629 1629 } 1630 + 1631 + /** 1632 + * msi_device_has_isolated_msi - True if the device has isolated MSI 1633 + * @dev: The device to check 1634 + * 1635 + * Isolated MSI means that HW modeled by an irq_domain on the path from the 1636 + * initiating device to the CPU will validate that the MSI message specifies an 1637 + * interrupt number that the device is authorized to trigger. This must block 1638 + * devices from triggering interrupts they are not authorized to trigger. 1639 + * Currently authorization means the MSI vector is one assigned to the device. 1640 + * 1641 + * This is interesting for securing VFIO use cases where a rouge MSI (eg created 1642 + * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to 1643 + * impact outside its security domain, eg userspace triggering interrupts on 1644 + * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM 1645 + * triggering interrupts on another VM. 1646 + */ 1647 + bool msi_device_has_isolated_msi(struct device *dev) 1648 + { 1649 + struct irq_domain *domain = dev_get_msi_domain(dev); 1650 + 1651 + for (; domain; domain = domain->parent) 1652 + if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) 1653 + return true; 1654 + return arch_is_isolated_msi(); 1655 + } 1656 + EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
+1 -1
tools/testing/selftests/iommu/iommufd.c
··· 1259 1259 1260 1260 test_cmd_destroy_access_pages( 1261 1261 access_cmd.id, access_cmd.access_pages.out_access_pages_id); 1262 - test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id); 1262 + test_cmd_destroy_access(access_cmd.id); 1263 1263 1264 1264 test_ioctl_destroy(ioas_id); 1265 1265 }