Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:
"On top of the vfio updates is built some new iommufd functionality:

- IOMMU_HWPT_ALLOC allows userspace to directly create the low level
IO Page table objects and affiliate them with IOAS objects that
hold the translation mapping. This is the basic functionality for
the normal IOMMU_DOMAIN_PAGING domains.

- VFIO_DEVICE_ATTACH_IOMMUFD_PT can be used to replace the current
translation. This is wired up to through all the layers down to the
driver so the driver has the ability to implement a hitless
replacement. This is necessary to fully support guest behaviors
when emulating HW (eg guest atomic change of translation)

- IOMMU_GET_HW_INFO returns information about the IOMMU driver HW
that owns a VFIO device. This includes support for the Intel iommu,
and patches have been posted for all the other server IOMMU.

Along the way are a number of internal items:

- New iommufd kernel APIs: iommufd_ctx_has_group(),
iommufd_device_to_ictx(), iommufd_device_to_id(),
iommufd_access_detach(), iommufd_ctx_from_fd(),
iommufd_device_replace()

- iommufd now internally tracks iommu_groups as it needs some
per-group data

- Reorganize how the internal hwpt allocation flows to have more
robust locking

- Improve the access interfaces to support detach and replace of an
IOAS from an access

- New selftests and a rework of how the selftests creates a mock
iommu driver to be more like a real iommu driver"

Link: https://lore.kernel.org/lkml/ZO%2FTe6LU1ENf58ZW@nvidia.com/

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (34 commits)
iommufd/selftest: Don't leak the platform device memory when unloading the module
iommu/vt-d: Implement hw_info for iommu capability query
iommufd/selftest: Add coverage for IOMMU_GET_HW_INFO ioctl
iommufd: Add IOMMU_GET_HW_INFO
iommu: Add new iommu op to get iommu hardware information
iommu: Move dev_iommu_ops() to private header
iommufd: Remove iommufd_ref_to_users()
iommufd/selftest: Make the mock iommu driver into a real driver
vfio: Support IO page table replacement
iommufd/selftest: Add IOMMU_TEST_OP_ACCESS_REPLACE_IOAS coverage
iommufd: Add iommufd_access_replace() API
iommufd: Use iommufd_access_change_ioas in iommufd_access_destroy_object
iommufd: Add iommufd_access_change_ioas(_id) helpers
iommufd: Allow passing in iopt_access_list_id to iopt_remove_access()
vfio: Do not allow !ops->dma_unmap in vfio_pin/unpin_pages()
iommufd/selftest: Add a selftest for IOMMU_HWPT_ALLOC
iommufd/selftest: Return the real idev id from selftest mock_domain
iommufd: Add IOMMU_HWPT_ALLOC
iommufd/selftest: Test iommufd_device_replace()
iommufd: Make destroy_rwsem use a lock class per object type
...

+1503 -372
+19
drivers/iommu/intel/iommu.c
··· 22 22 #include <linux/spinlock.h> 23 23 #include <linux/syscore_ops.h> 24 24 #include <linux/tboot.h> 25 + #include <uapi/linux/iommufd.h> 25 26 26 27 #include "iommu.h" 27 28 #include "../dma-iommu.h" ··· 4733 4732 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 4734 4733 } 4735 4734 4735 + static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) 4736 + { 4737 + struct device_domain_info *info = dev_iommu_priv_get(dev); 4738 + struct intel_iommu *iommu = info->iommu; 4739 + struct iommu_hw_info_vtd *vtd; 4740 + 4741 + vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); 4742 + if (!vtd) 4743 + return ERR_PTR(-ENOMEM); 4744 + 4745 + vtd->cap_reg = iommu->cap; 4746 + vtd->ecap_reg = iommu->ecap; 4747 + *length = sizeof(*vtd); 4748 + *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; 4749 + return vtd; 4750 + } 4751 + 4736 4752 const struct iommu_ops intel_iommu_ops = { 4737 4753 .capable = intel_iommu_capable, 4754 + .hw_info = intel_iommu_hw_info, 4738 4755 .domain_alloc = intel_iommu_domain_alloc, 4739 4756 .probe_device = intel_iommu_probe_device, 4740 4757 .probe_finalize = intel_iommu_probe_finalize,
+30
drivers/iommu/iommu-priv.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. 3 + */ 4 + #ifndef __LINUX_IOMMU_PRIV_H 5 + #define __LINUX_IOMMU_PRIV_H 6 + 7 + #include <linux/iommu.h> 8 + 9 + static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) 10 + { 11 + /* 12 + * Assume that valid ops must be installed if iommu_probe_device() 13 + * has succeeded. The device ops are essentially for internal use 14 + * within the IOMMU subsystem itself, so we should be able to trust 15 + * ourselves not to misuse the helper. 16 + */ 17 + return dev->iommu->iommu_dev->ops; 18 + } 19 + 20 + int iommu_group_replace_domain(struct iommu_group *group, 21 + struct iommu_domain *new_domain); 22 + 23 + int iommu_device_register_bus(struct iommu_device *iommu, 24 + const struct iommu_ops *ops, struct bus_type *bus, 25 + struct notifier_block *nb); 26 + void iommu_device_unregister_bus(struct iommu_device *iommu, 27 + struct bus_type *bus, 28 + struct notifier_block *nb); 29 + 30 + #endif /* __LINUX_IOMMU_PRIV_H */
+80 -1
drivers/iommu/iommu.c
··· 34 34 #include <linux/msi.h> 35 35 36 36 #include "dma-iommu.h" 37 + #include "iommu-priv.h" 37 38 38 39 #include "iommu-sva.h" 40 + #include "iommu-priv.h" 39 41 40 42 static struct kset *iommu_group_kset; 41 43 static DEFINE_IDA(iommu_group_ida); ··· 288 286 spin_unlock(&iommu_device_lock); 289 287 } 290 288 EXPORT_SYMBOL_GPL(iommu_device_unregister); 289 + 290 + #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 291 + void iommu_device_unregister_bus(struct iommu_device *iommu, 292 + struct bus_type *bus, 293 + struct notifier_block *nb) 294 + { 295 + bus_unregister_notifier(bus, nb); 296 + iommu_device_unregister(iommu); 297 + } 298 + EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 299 + 300 + /* 301 + * Register an iommu driver against a single bus. This is only used by iommufd 302 + * selftest to create a mock iommu driver. The caller must provide 303 + * some memory to hold a notifier_block. 304 + */ 305 + int iommu_device_register_bus(struct iommu_device *iommu, 306 + const struct iommu_ops *ops, struct bus_type *bus, 307 + struct notifier_block *nb) 308 + { 309 + int err; 310 + 311 + iommu->ops = ops; 312 + nb->notifier_call = iommu_bus_notifier; 313 + err = bus_register_notifier(bus, nb); 314 + if (err) 315 + return err; 316 + 317 + spin_lock(&iommu_device_lock); 318 + list_add_tail(&iommu->list, &iommu_device_list); 319 + spin_unlock(&iommu_device_lock); 320 + 321 + bus->iommu_ops = ops; 322 + err = bus_iommu_probe(bus); 323 + if (err) { 324 + iommu_device_unregister_bus(iommu, bus, nb); 325 + return err; 326 + } 327 + return 0; 328 + } 329 + EXPORT_SYMBOL_GPL(iommu_device_register_bus); 330 + #endif 291 331 292 332 static struct dev_iommu *dev_iommu_get(struct device *dev) 293 333 { ··· 2158 2114 } 2159 2115 EXPORT_SYMBOL_GPL(iommu_attach_group); 2160 2116 2117 + /** 2118 + * iommu_group_replace_domain - replace the domain that a group is attached to 2119 + * @new_domain: new IOMMU domain to replace with 2120 + * @group: IOMMU group that will be attached to the new domain 2121 + * 2122 + * This API allows the group to switch domains without being forced to go to 2123 + * the blocking domain in-between. 2124 + * 2125 + * If the currently attached domain is a core domain (e.g. a default_domain), 2126 + * it will act just like the iommu_attach_group(). 2127 + */ 2128 + int iommu_group_replace_domain(struct iommu_group *group, 2129 + struct iommu_domain *new_domain) 2130 + { 2131 + int ret; 2132 + 2133 + if (!new_domain) 2134 + return -EINVAL; 2135 + 2136 + mutex_lock(&group->mutex); 2137 + ret = __iommu_group_set_domain(group, new_domain); 2138 + mutex_unlock(&group->mutex); 2139 + return ret; 2140 + } 2141 + EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, IOMMUFD_INTERNAL); 2142 + 2161 2143 static int __iommu_device_set_domain(struct iommu_group *group, 2162 2144 struct device *dev, 2163 2145 struct iommu_domain *new_domain, ··· 2712 2642 } 2713 2643 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2714 2644 2645 + /** 2646 + * iommu_get_resv_regions - get reserved regions 2647 + * @dev: device for which to get reserved regions 2648 + * @list: reserved region list for device 2649 + * 2650 + * This returns a list of reserved IOVA regions specific to this device. 2651 + * A domain user should not map IOVA in these ranges. 2652 + */ 2715 2653 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2716 2654 { 2717 2655 const struct iommu_ops *ops = dev_iommu_ops(dev); ··· 2727 2649 if (ops->get_resv_regions) 2728 2650 ops->get_resv_regions(dev, list); 2729 2651 } 2652 + EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2730 2653 2731 2654 /** 2732 - * iommu_put_resv_regions - release resered regions 2655 + * iommu_put_resv_regions - release reserved regions 2733 2656 * @dev: device for which to free reserved regions 2734 2657 * @list: reserved region list for device 2735 2658 *
+568 -195
drivers/iommu/iommufd/device.c
··· 4 4 #include <linux/iommufd.h> 5 5 #include <linux/slab.h> 6 6 #include <linux/iommu.h> 7 + #include <uapi/linux/iommufd.h> 8 + #include "../iommu-priv.h" 7 9 8 10 #include "io_pagetable.h" 9 11 #include "iommufd_private.h" ··· 17 15 "Allow IOMMUFD to bind to devices even if the platform cannot isolate " 18 16 "the MSI interrupt window. Enabling this is a security weakness."); 19 17 18 + static void iommufd_group_release(struct kref *kref) 19 + { 20 + struct iommufd_group *igroup = 21 + container_of(kref, struct iommufd_group, ref); 22 + 23 + WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); 24 + 25 + xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, 26 + NULL, GFP_KERNEL); 27 + iommu_group_put(igroup->group); 28 + mutex_destroy(&igroup->lock); 29 + kfree(igroup); 30 + } 31 + 32 + static void iommufd_put_group(struct iommufd_group *group) 33 + { 34 + kref_put(&group->ref, iommufd_group_release); 35 + } 36 + 37 + static bool iommufd_group_try_get(struct iommufd_group *igroup, 38 + struct iommu_group *group) 39 + { 40 + if (!igroup) 41 + return false; 42 + /* 43 + * group ID's cannot be re-used until the group is put back which does 44 + * not happen if we could get an igroup pointer under the xa_lock. 45 + */ 46 + if (WARN_ON(igroup->group != group)) 47 + return false; 48 + return kref_get_unless_zero(&igroup->ref); 49 + } 50 + 51 + /* 52 + * iommufd needs to store some more data for each iommu_group, we keep a 53 + * parallel xarray indexed by iommu_group id to hold this instead of putting it 54 + * in the core structure. To keep things simple the iommufd_group memory is 55 + * unique within the iommufd_ctx. This makes it easy to check there are no 56 + * memory leaks. 57 + */ 58 + static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, 59 + struct device *dev) 60 + { 61 + struct iommufd_group *new_igroup; 62 + struct iommufd_group *cur_igroup; 63 + struct iommufd_group *igroup; 64 + struct iommu_group *group; 65 + unsigned int id; 66 + 67 + group = iommu_group_get(dev); 68 + if (!group) 69 + return ERR_PTR(-ENODEV); 70 + 71 + id = iommu_group_id(group); 72 + 73 + xa_lock(&ictx->groups); 74 + igroup = xa_load(&ictx->groups, id); 75 + if (iommufd_group_try_get(igroup, group)) { 76 + xa_unlock(&ictx->groups); 77 + iommu_group_put(group); 78 + return igroup; 79 + } 80 + xa_unlock(&ictx->groups); 81 + 82 + new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); 83 + if (!new_igroup) { 84 + iommu_group_put(group); 85 + return ERR_PTR(-ENOMEM); 86 + } 87 + 88 + kref_init(&new_igroup->ref); 89 + mutex_init(&new_igroup->lock); 90 + INIT_LIST_HEAD(&new_igroup->device_list); 91 + new_igroup->sw_msi_start = PHYS_ADDR_MAX; 92 + /* group reference moves into new_igroup */ 93 + new_igroup->group = group; 94 + 95 + /* 96 + * The ictx is not additionally refcounted here becase all objects using 97 + * an igroup must put it before their destroy completes. 98 + */ 99 + new_igroup->ictx = ictx; 100 + 101 + /* 102 + * We dropped the lock so igroup is invalid. NULL is a safe and likely 103 + * value to assume for the xa_cmpxchg algorithm. 104 + */ 105 + cur_igroup = NULL; 106 + xa_lock(&ictx->groups); 107 + while (true) { 108 + igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, 109 + GFP_KERNEL); 110 + if (xa_is_err(igroup)) { 111 + xa_unlock(&ictx->groups); 112 + iommufd_put_group(new_igroup); 113 + return ERR_PTR(xa_err(igroup)); 114 + } 115 + 116 + /* new_group was successfully installed */ 117 + if (cur_igroup == igroup) { 118 + xa_unlock(&ictx->groups); 119 + return new_igroup; 120 + } 121 + 122 + /* Check again if the current group is any good */ 123 + if (iommufd_group_try_get(igroup, group)) { 124 + xa_unlock(&ictx->groups); 125 + iommufd_put_group(new_igroup); 126 + return igroup; 127 + } 128 + cur_igroup = igroup; 129 + } 130 + } 131 + 20 132 void iommufd_device_destroy(struct iommufd_object *obj) 21 133 { 22 134 struct iommufd_device *idev = 23 135 container_of(obj, struct iommufd_device, obj); 24 136 25 137 iommu_device_release_dma_owner(idev->dev); 26 - iommu_group_put(idev->group); 138 + iommufd_put_group(idev->igroup); 27 139 if (!iommufd_selftest_is_mock_dev(idev->dev)) 28 140 iommufd_ctx_put(idev->ictx); 29 141 } ··· 162 46 struct device *dev, u32 *id) 163 47 { 164 48 struct iommufd_device *idev; 165 - struct iommu_group *group; 49 + struct iommufd_group *igroup; 166 50 int rc; 167 51 168 52 /* ··· 172 56 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) 173 57 return ERR_PTR(-EINVAL); 174 58 175 - group = iommu_group_get(dev); 176 - if (!group) 177 - return ERR_PTR(-ENODEV); 59 + igroup = iommufd_get_group(ictx, dev); 60 + if (IS_ERR(igroup)) 61 + return ERR_CAST(igroup); 62 + 63 + /* 64 + * For historical compat with VFIO the insecure interrupt path is 65 + * allowed if the module parameter is set. Secure/Isolated means that a 66 + * MemWr operation from the device (eg a simple DMA) cannot trigger an 67 + * interrupt outside this iommufd context. 68 + */ 69 + if (!iommufd_selftest_is_mock_dev(dev) && 70 + !iommu_group_has_isolated_msi(igroup->group)) { 71 + if (!allow_unsafe_interrupts) { 72 + rc = -EPERM; 73 + goto out_group_put; 74 + } 75 + 76 + dev_warn( 77 + dev, 78 + "MSI interrupts are not secure, they cannot be isolated by the platform. " 79 + "Check that platform features like interrupt remapping are enabled. " 80 + "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 81 + } 178 82 179 83 rc = iommu_device_claim_dma_owner(dev, ictx); 180 84 if (rc) ··· 213 77 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 214 78 /* The calling driver is a user until iommufd_device_unbind() */ 215 79 refcount_inc(&idev->obj.users); 216 - /* group refcount moves into iommufd_device */ 217 - idev->group = group; 80 + /* igroup refcount moves into iommufd_device */ 81 + idev->igroup = igroup; 218 82 219 83 /* 220 84 * If the caller fails after this success it must call ··· 229 93 out_release_owner: 230 94 iommu_device_release_dma_owner(dev); 231 95 out_group_put: 232 - iommu_group_put(group); 96 + iommufd_put_group(igroup); 233 97 return ERR_PTR(rc); 234 98 } 235 99 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); ··· 254 118 xa_lock(&ictx->objects); 255 119 xa_for_each(&ictx->objects, index, obj) { 256 120 if (obj->type == IOMMUFD_OBJ_DEVICE && 257 - container_of(obj, struct iommufd_device, obj)->group == group) { 121 + container_of(obj, struct iommufd_device, obj) 122 + ->igroup->group == group) { 258 123 xa_unlock(&ictx->objects); 259 124 return true; 260 125 } ··· 292 155 } 293 156 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); 294 157 295 - static int iommufd_device_setup_msi(struct iommufd_device *idev, 296 - struct iommufd_hw_pagetable *hwpt, 297 - phys_addr_t sw_msi_start) 158 + static int iommufd_group_setup_msi(struct iommufd_group *igroup, 159 + struct iommufd_hw_pagetable *hwpt) 298 160 { 161 + phys_addr_t sw_msi_start = igroup->sw_msi_start; 299 162 int rc; 300 163 301 164 /* ··· 322 185 */ 323 186 hwpt->msi_cookie = true; 324 187 } 325 - 326 - /* 327 - * For historical compat with VFIO the insecure interrupt path is 328 - * allowed if the module parameter is set. Insecure means that a MemWr 329 - * operation from the device (eg a simple DMA) cannot trigger an 330 - * interrupt outside this iommufd context. 331 - */ 332 - if (!iommufd_selftest_is_mock_dev(idev->dev) && 333 - !iommu_group_has_isolated_msi(idev->group)) { 334 - if (!allow_unsafe_interrupts) 335 - return -EPERM; 336 - 337 - dev_warn( 338 - idev->dev, 339 - "MSI interrupts are not secure, they cannot be isolated by the platform. " 340 - "Check that platform features like interrupt remapping are enabled. " 341 - "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 342 - } 343 188 return 0; 344 - } 345 - 346 - static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, 347 - struct iommu_group *group) 348 - { 349 - struct iommufd_device *cur_dev; 350 - 351 - lockdep_assert_held(&hwpt->devices_lock); 352 - 353 - list_for_each_entry(cur_dev, &hwpt->devices, devices_item) 354 - if (cur_dev->group == group) 355 - return true; 356 - return false; 357 189 } 358 190 359 191 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 360 192 struct iommufd_device *idev) 361 193 { 362 - phys_addr_t sw_msi_start = PHYS_ADDR_MAX; 363 194 int rc; 364 195 365 - lockdep_assert_held(&hwpt->devices_lock); 196 + mutex_lock(&idev->igroup->lock); 366 197 367 - if (WARN_ON(idev->hwpt)) 368 - return -EINVAL; 369 - 370 - /* 371 - * Try to upgrade the domain we have, it is an iommu driver bug to 372 - * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail 373 - * enforce_cache_coherency when there are no devices attached to the 374 - * domain. 375 - */ 376 - if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { 377 - if (hwpt->domain->ops->enforce_cache_coherency) 378 - hwpt->enforce_cache_coherency = 379 - hwpt->domain->ops->enforce_cache_coherency( 380 - hwpt->domain); 381 - if (!hwpt->enforce_cache_coherency) { 382 - WARN_ON(list_empty(&hwpt->devices)); 383 - return -EINVAL; 384 - } 198 + if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { 199 + rc = -EINVAL; 200 + goto err_unlock; 385 201 } 386 202 387 - rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, 388 - idev->group, &sw_msi_start); 389 - if (rc) 390 - return rc; 203 + /* Try to upgrade the domain we have */ 204 + if (idev->enforce_cache_coherency) { 205 + rc = iommufd_hw_pagetable_enforce_cc(hwpt); 206 + if (rc) 207 + goto err_unlock; 208 + } 391 209 392 - rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); 210 + rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev, 211 + &idev->igroup->sw_msi_start); 393 212 if (rc) 394 - goto err_unresv; 213 + goto err_unlock; 395 214 396 215 /* 397 - * FIXME: Hack around missing a device-centric iommu api, only attach to 398 - * the group once for the first device that is in the group. 216 + * Only attach to the group once for the first device that is in the 217 + * group. All the other devices will follow this attachment. The user 218 + * should attach every device individually to the hwpt as the per-device 219 + * reserved regions are only updated during individual device 220 + * attachment. 399 221 */ 400 - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { 401 - rc = iommu_attach_group(hwpt->domain, idev->group); 222 + if (list_empty(&idev->igroup->device_list)) { 223 + rc = iommufd_group_setup_msi(idev->igroup, hwpt); 402 224 if (rc) 403 225 goto err_unresv; 226 + 227 + rc = iommu_attach_group(hwpt->domain, idev->igroup->group); 228 + if (rc) 229 + goto err_unresv; 230 + idev->igroup->hwpt = hwpt; 404 231 } 232 + refcount_inc(&hwpt->obj.users); 233 + list_add_tail(&idev->group_item, &idev->igroup->device_list); 234 + mutex_unlock(&idev->igroup->lock); 405 235 return 0; 406 236 err_unresv: 407 237 iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); 238 + err_unlock: 239 + mutex_unlock(&idev->igroup->lock); 408 240 return rc; 409 241 } 410 242 411 - void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, 412 - struct iommufd_device *idev) 243 + struct iommufd_hw_pagetable * 244 + iommufd_hw_pagetable_detach(struct iommufd_device *idev) 413 245 { 414 - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) 415 - iommu_detach_group(hwpt->domain, idev->group); 246 + struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; 247 + 248 + mutex_lock(&idev->igroup->lock); 249 + list_del(&idev->group_item); 250 + if (list_empty(&idev->igroup->device_list)) { 251 + iommu_detach_group(hwpt->domain, idev->igroup->group); 252 + idev->igroup->hwpt = NULL; 253 + } 416 254 iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); 255 + mutex_unlock(&idev->igroup->lock); 256 + 257 + /* Caller must destroy hwpt */ 258 + return hwpt; 417 259 } 418 260 419 - static int iommufd_device_do_attach(struct iommufd_device *idev, 420 - struct iommufd_hw_pagetable *hwpt) 261 + static struct iommufd_hw_pagetable * 262 + iommufd_device_do_attach(struct iommufd_device *idev, 263 + struct iommufd_hw_pagetable *hwpt) 421 264 { 422 265 int rc; 423 266 424 - mutex_lock(&hwpt->devices_lock); 425 267 rc = iommufd_hw_pagetable_attach(hwpt, idev); 426 268 if (rc) 427 - goto out_unlock; 428 - 429 - idev->hwpt = hwpt; 430 - refcount_inc(&hwpt->obj.users); 431 - list_add(&idev->devices_item, &hwpt->devices); 432 - out_unlock: 433 - mutex_unlock(&hwpt->devices_lock); 434 - return rc; 269 + return ERR_PTR(rc); 270 + return NULL; 435 271 } 272 + 273 + static struct iommufd_hw_pagetable * 274 + iommufd_device_do_replace(struct iommufd_device *idev, 275 + struct iommufd_hw_pagetable *hwpt) 276 + { 277 + struct iommufd_group *igroup = idev->igroup; 278 + struct iommufd_hw_pagetable *old_hwpt; 279 + unsigned int num_devices = 0; 280 + struct iommufd_device *cur; 281 + int rc; 282 + 283 + mutex_lock(&idev->igroup->lock); 284 + 285 + if (igroup->hwpt == NULL) { 286 + rc = -EINVAL; 287 + goto err_unlock; 288 + } 289 + 290 + if (hwpt == igroup->hwpt) { 291 + mutex_unlock(&idev->igroup->lock); 292 + return NULL; 293 + } 294 + 295 + /* Try to upgrade the domain we have */ 296 + list_for_each_entry(cur, &igroup->device_list, group_item) { 297 + num_devices++; 298 + if (cur->enforce_cache_coherency) { 299 + rc = iommufd_hw_pagetable_enforce_cc(hwpt); 300 + if (rc) 301 + goto err_unlock; 302 + } 303 + } 304 + 305 + old_hwpt = igroup->hwpt; 306 + if (hwpt->ioas != old_hwpt->ioas) { 307 + list_for_each_entry(cur, &igroup->device_list, group_item) { 308 + rc = iopt_table_enforce_dev_resv_regions( 309 + &hwpt->ioas->iopt, cur->dev, NULL); 310 + if (rc) 311 + goto err_unresv; 312 + } 313 + } 314 + 315 + rc = iommufd_group_setup_msi(idev->igroup, hwpt); 316 + if (rc) 317 + goto err_unresv; 318 + 319 + rc = iommu_group_replace_domain(igroup->group, hwpt->domain); 320 + if (rc) 321 + goto err_unresv; 322 + 323 + if (hwpt->ioas != old_hwpt->ioas) { 324 + list_for_each_entry(cur, &igroup->device_list, group_item) 325 + iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, 326 + cur->dev); 327 + } 328 + 329 + igroup->hwpt = hwpt; 330 + 331 + /* 332 + * Move the refcounts held by the device_list to the new hwpt. Retain a 333 + * refcount for this thread as the caller will free it. 334 + */ 335 + refcount_add(num_devices, &hwpt->obj.users); 336 + if (num_devices > 1) 337 + WARN_ON(refcount_sub_and_test(num_devices - 1, 338 + &old_hwpt->obj.users)); 339 + mutex_unlock(&idev->igroup->lock); 340 + 341 + /* Caller must destroy old_hwpt */ 342 + return old_hwpt; 343 + err_unresv: 344 + list_for_each_entry(cur, &igroup->device_list, group_item) 345 + iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); 346 + err_unlock: 347 + mutex_unlock(&idev->igroup->lock); 348 + return ERR_PTR(rc); 349 + } 350 + 351 + typedef struct iommufd_hw_pagetable *(*attach_fn)( 352 + struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); 436 353 437 354 /* 438 355 * When automatically managing the domains we search for a compatible domain in 439 356 * the iopt and if one is found use it, otherwise create a new domain. 440 357 * Automatic domain selection will never pick a manually created domain. 441 358 */ 442 - static int iommufd_device_auto_get_domain(struct iommufd_device *idev, 443 - struct iommufd_ioas *ioas) 359 + static struct iommufd_hw_pagetable * 360 + iommufd_device_auto_get_domain(struct iommufd_device *idev, 361 + struct iommufd_ioas *ioas, u32 *pt_id, 362 + attach_fn do_attach) 444 363 { 364 + /* 365 + * iommufd_hw_pagetable_attach() is called by 366 + * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as 367 + * iommufd_device_do_attach(). So if we are in this mode then we prefer 368 + * to use the immediate_attach path as it supports drivers that can't 369 + * directly allocate a domain. 370 + */ 371 + bool immediate_attach = do_attach == iommufd_device_do_attach; 372 + struct iommufd_hw_pagetable *destroy_hwpt; 445 373 struct iommufd_hw_pagetable *hwpt; 446 - int rc; 447 374 448 375 /* 449 376 * There is no differentiation when domains are allocated, so any domain ··· 521 320 522 321 if (!iommufd_lock_obj(&hwpt->obj)) 523 322 continue; 524 - rc = iommufd_device_do_attach(idev, hwpt); 323 + destroy_hwpt = (*do_attach)(idev, hwpt); 324 + if (IS_ERR(destroy_hwpt)) { 325 + iommufd_put_object(&hwpt->obj); 326 + /* 327 + * -EINVAL means the domain is incompatible with the 328 + * device. Other error codes should propagate to 329 + * userspace as failure. Success means the domain is 330 + * attached. 331 + */ 332 + if (PTR_ERR(destroy_hwpt) == -EINVAL) 333 + continue; 334 + goto out_unlock; 335 + } 336 + *pt_id = hwpt->obj.id; 525 337 iommufd_put_object(&hwpt->obj); 526 - 527 - /* 528 - * -EINVAL means the domain is incompatible with the device. 529 - * Other error codes should propagate to userspace as failure. 530 - * Success means the domain is attached. 531 - */ 532 - if (rc == -EINVAL) 533 - continue; 534 338 goto out_unlock; 535 339 } 536 340 537 - hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true); 341 + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, 342 + immediate_attach); 538 343 if (IS_ERR(hwpt)) { 539 - rc = PTR_ERR(hwpt); 344 + destroy_hwpt = ERR_CAST(hwpt); 540 345 goto out_unlock; 541 346 } 542 - hwpt->auto_domain = true; 543 347 544 - mutex_unlock(&ioas->mutex); 348 + if (!immediate_attach) { 349 + destroy_hwpt = (*do_attach)(idev, hwpt); 350 + if (IS_ERR(destroy_hwpt)) 351 + goto out_abort; 352 + } else { 353 + destroy_hwpt = NULL; 354 + } 355 + 356 + hwpt->auto_domain = true; 357 + *pt_id = hwpt->obj.id; 358 + 545 359 iommufd_object_finalize(idev->ictx, &hwpt->obj); 546 - return 0; 360 + mutex_unlock(&ioas->mutex); 361 + return destroy_hwpt; 362 + 363 + out_abort: 364 + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); 547 365 out_unlock: 548 366 mutex_unlock(&ioas->mutex); 549 - return rc; 367 + return destroy_hwpt; 368 + } 369 + 370 + static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, 371 + attach_fn do_attach) 372 + { 373 + struct iommufd_hw_pagetable *destroy_hwpt; 374 + struct iommufd_object *pt_obj; 375 + 376 + pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 377 + if (IS_ERR(pt_obj)) 378 + return PTR_ERR(pt_obj); 379 + 380 + switch (pt_obj->type) { 381 + case IOMMUFD_OBJ_HW_PAGETABLE: { 382 + struct iommufd_hw_pagetable *hwpt = 383 + container_of(pt_obj, struct iommufd_hw_pagetable, obj); 384 + 385 + destroy_hwpt = (*do_attach)(idev, hwpt); 386 + if (IS_ERR(destroy_hwpt)) 387 + goto out_put_pt_obj; 388 + break; 389 + } 390 + case IOMMUFD_OBJ_IOAS: { 391 + struct iommufd_ioas *ioas = 392 + container_of(pt_obj, struct iommufd_ioas, obj); 393 + 394 + destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, 395 + do_attach); 396 + if (IS_ERR(destroy_hwpt)) 397 + goto out_put_pt_obj; 398 + break; 399 + } 400 + default: 401 + destroy_hwpt = ERR_PTR(-EINVAL); 402 + goto out_put_pt_obj; 403 + } 404 + iommufd_put_object(pt_obj); 405 + 406 + /* This destruction has to be after we unlock everything */ 407 + if (destroy_hwpt) 408 + iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); 409 + return 0; 410 + 411 + out_put_pt_obj: 412 + iommufd_put_object(pt_obj); 413 + return PTR_ERR(destroy_hwpt); 550 414 } 551 415 552 416 /** 553 - * iommufd_device_attach - Connect a device from an iommu_domain 417 + * iommufd_device_attach - Connect a device to an iommu_domain 554 418 * @idev: device to attach 555 419 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE 556 420 * Output the IOMMUFD_OBJ_HW_PAGETABLE ID ··· 628 362 */ 629 363 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) 630 364 { 631 - struct iommufd_object *pt_obj; 632 365 int rc; 633 366 634 - pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 635 - if (IS_ERR(pt_obj)) 636 - return PTR_ERR(pt_obj); 367 + rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); 368 + if (rc) 369 + return rc; 637 370 638 - switch (pt_obj->type) { 639 - case IOMMUFD_OBJ_HW_PAGETABLE: { 640 - struct iommufd_hw_pagetable *hwpt = 641 - container_of(pt_obj, struct iommufd_hw_pagetable, obj); 642 - 643 - rc = iommufd_device_do_attach(idev, hwpt); 644 - if (rc) 645 - goto out_put_pt_obj; 646 - break; 647 - } 648 - case IOMMUFD_OBJ_IOAS: { 649 - struct iommufd_ioas *ioas = 650 - container_of(pt_obj, struct iommufd_ioas, obj); 651 - 652 - rc = iommufd_device_auto_get_domain(idev, ioas); 653 - if (rc) 654 - goto out_put_pt_obj; 655 - break; 656 - } 657 - default: 658 - rc = -EINVAL; 659 - goto out_put_pt_obj; 660 - } 661 - 371 + /* 372 + * Pairs with iommufd_device_detach() - catches caller bugs attempting 373 + * to destroy a device with an attachment. 374 + */ 662 375 refcount_inc(&idev->obj.users); 663 - *pt_id = idev->hwpt->obj.id; 664 - rc = 0; 665 - 666 - out_put_pt_obj: 667 - iommufd_put_object(pt_obj); 668 - return rc; 376 + return 0; 669 377 } 670 378 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); 379 + 380 + /** 381 + * iommufd_device_replace - Change the device's iommu_domain 382 + * @idev: device to change 383 + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE 384 + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID 385 + * 386 + * This is the same as:: 387 + * 388 + * iommufd_device_detach(); 389 + * iommufd_device_attach(); 390 + * 391 + * If it fails then no change is made to the attachment. The iommu driver may 392 + * implement this so there is no disruption in translation. This can only be 393 + * called if iommufd_device_attach() has already succeeded. 394 + */ 395 + int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) 396 + { 397 + return iommufd_device_change_pt(idev, pt_id, 398 + &iommufd_device_do_replace); 399 + } 400 + EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); 671 401 672 402 /** 673 403 * iommufd_device_detach - Disconnect a device to an iommu_domain ··· 674 412 */ 675 413 void iommufd_device_detach(struct iommufd_device *idev) 676 414 { 677 - struct iommufd_hw_pagetable *hwpt = idev->hwpt; 415 + struct iommufd_hw_pagetable *hwpt; 678 416 679 - mutex_lock(&hwpt->devices_lock); 680 - list_del(&idev->devices_item); 681 - idev->hwpt = NULL; 682 - iommufd_hw_pagetable_detach(hwpt, idev); 683 - mutex_unlock(&hwpt->devices_lock); 684 - 685 - if (hwpt->auto_domain) 686 - iommufd_object_deref_user(idev->ictx, &hwpt->obj); 687 - else 688 - refcount_dec(&hwpt->obj.users); 689 - 417 + hwpt = iommufd_hw_pagetable_detach(idev); 418 + iommufd_hw_pagetable_put(idev->ictx, hwpt); 690 419 refcount_dec(&idev->obj.users); 691 420 } 692 421 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); 422 + 423 + /* 424 + * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at 425 + * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should 426 + * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. 427 + */ 428 + static int iommufd_access_change_ioas(struct iommufd_access *access, 429 + struct iommufd_ioas *new_ioas) 430 + { 431 + u32 iopt_access_list_id = access->iopt_access_list_id; 432 + struct iommufd_ioas *cur_ioas = access->ioas; 433 + int rc; 434 + 435 + lockdep_assert_held(&access->ioas_lock); 436 + 437 + /* We are racing with a concurrent detach, bail */ 438 + if (cur_ioas != access->ioas_unpin) 439 + return -EBUSY; 440 + 441 + if (cur_ioas == new_ioas) 442 + return 0; 443 + 444 + /* 445 + * Set ioas to NULL to block any further iommufd_access_pin_pages(). 446 + * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 447 + */ 448 + access->ioas = NULL; 449 + 450 + if (new_ioas) { 451 + rc = iopt_add_access(&new_ioas->iopt, access); 452 + if (rc) { 453 + access->ioas = cur_ioas; 454 + return rc; 455 + } 456 + refcount_inc(&new_ioas->obj.users); 457 + } 458 + 459 + if (cur_ioas) { 460 + if (access->ops->unmap) { 461 + mutex_unlock(&access->ioas_lock); 462 + access->ops->unmap(access->data, 0, ULONG_MAX); 463 + mutex_lock(&access->ioas_lock); 464 + } 465 + iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); 466 + refcount_dec(&cur_ioas->obj.users); 467 + } 468 + 469 + access->ioas = new_ioas; 470 + access->ioas_unpin = new_ioas; 471 + 472 + return 0; 473 + } 474 + 475 + static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) 476 + { 477 + struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); 478 + int rc; 479 + 480 + if (IS_ERR(ioas)) 481 + return PTR_ERR(ioas); 482 + rc = iommufd_access_change_ioas(access, ioas); 483 + iommufd_put_object(&ioas->obj); 484 + return rc; 485 + } 693 486 694 487 void iommufd_access_destroy_object(struct iommufd_object *obj) 695 488 { 696 489 struct iommufd_access *access = 697 490 container_of(obj, struct iommufd_access, obj); 698 491 699 - if (access->ioas) { 700 - iopt_remove_access(&access->ioas->iopt, access); 701 - refcount_dec(&access->ioas->obj.users); 702 - access->ioas = NULL; 703 - } 492 + mutex_lock(&access->ioas_lock); 493 + if (access->ioas) 494 + WARN_ON(iommufd_access_change_ioas(access, NULL)); 495 + mutex_unlock(&access->ioas_lock); 704 496 iommufd_ctx_put(access->ictx); 705 497 } 706 498 ··· 818 502 819 503 void iommufd_access_detach(struct iommufd_access *access) 820 504 { 821 - struct iommufd_ioas *cur_ioas = access->ioas; 822 - 823 505 mutex_lock(&access->ioas_lock); 824 - if (WARN_ON(!access->ioas)) 825 - goto out; 826 - /* 827 - * Set ioas to NULL to block any further iommufd_access_pin_pages(). 828 - * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 829 - */ 830 - access->ioas = NULL; 831 - 832 - if (access->ops->unmap) { 506 + if (WARN_ON(!access->ioas)) { 833 507 mutex_unlock(&access->ioas_lock); 834 - access->ops->unmap(access->data, 0, ULONG_MAX); 835 - mutex_lock(&access->ioas_lock); 508 + return; 836 509 } 837 - iopt_remove_access(&cur_ioas->iopt, access); 838 - refcount_dec(&cur_ioas->obj.users); 839 - out: 840 - access->ioas_unpin = NULL; 510 + WARN_ON(iommufd_access_change_ioas(access, NULL)); 841 511 mutex_unlock(&access->ioas_lock); 842 512 } 843 513 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); 844 514 845 515 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) 846 516 { 847 - struct iommufd_ioas *new_ioas; 848 - int rc = 0; 517 + int rc; 849 518 850 519 mutex_lock(&access->ioas_lock); 851 - if (WARN_ON(access->ioas || access->ioas_unpin)) { 520 + if (WARN_ON(access->ioas)) { 852 521 mutex_unlock(&access->ioas_lock); 853 522 return -EINVAL; 854 523 } 855 524 856 - new_ioas = iommufd_get_ioas(access->ictx, ioas_id); 857 - if (IS_ERR(new_ioas)) { 858 - mutex_unlock(&access->ioas_lock); 859 - return PTR_ERR(new_ioas); 860 - } 861 - 862 - rc = iopt_add_access(&new_ioas->iopt, access); 863 - if (rc) { 864 - mutex_unlock(&access->ioas_lock); 865 - iommufd_put_object(&new_ioas->obj); 866 - return rc; 867 - } 868 - iommufd_ref_to_users(&new_ioas->obj); 869 - 870 - access->ioas = new_ioas; 871 - access->ioas_unpin = new_ioas; 525 + rc = iommufd_access_change_ioas_id(access, ioas_id); 872 526 mutex_unlock(&access->ioas_lock); 873 - return 0; 527 + return rc; 874 528 } 875 529 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); 530 + 531 + int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) 532 + { 533 + int rc; 534 + 535 + mutex_lock(&access->ioas_lock); 536 + if (!access->ioas) { 537 + mutex_unlock(&access->ioas_lock); 538 + return -ENOENT; 539 + } 540 + rc = iommufd_access_change_ioas_id(access, ioas_id); 541 + mutex_unlock(&access->ioas_lock); 542 + return rc; 543 + } 544 + EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); 876 545 877 546 /** 878 547 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it ··· 1120 819 return rc; 1121 820 } 1122 821 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); 822 + 823 + int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) 824 + { 825 + struct iommu_hw_info *cmd = ucmd->cmd; 826 + void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); 827 + const struct iommu_ops *ops; 828 + struct iommufd_device *idev; 829 + unsigned int data_len; 830 + unsigned int copy_len; 831 + void *data; 832 + int rc; 833 + 834 + if (cmd->flags || cmd->__reserved) 835 + return -EOPNOTSUPP; 836 + 837 + idev = iommufd_get_device(ucmd, cmd->dev_id); 838 + if (IS_ERR(idev)) 839 + return PTR_ERR(idev); 840 + 841 + ops = dev_iommu_ops(idev->dev); 842 + if (ops->hw_info) { 843 + data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); 844 + if (IS_ERR(data)) { 845 + rc = PTR_ERR(data); 846 + goto out_put; 847 + } 848 + 849 + /* 850 + * drivers that have hw_info callback should have a unique 851 + * iommu_hw_info_type. 852 + */ 853 + if (WARN_ON_ONCE(cmd->out_data_type == 854 + IOMMU_HW_INFO_TYPE_NONE)) { 855 + rc = -ENODEV; 856 + goto out_free; 857 + } 858 + } else { 859 + cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; 860 + data_len = 0; 861 + data = NULL; 862 + } 863 + 864 + copy_len = min(cmd->data_len, data_len); 865 + if (copy_to_user(user_ptr, data, copy_len)) { 866 + rc = -EFAULT; 867 + goto out_free; 868 + } 869 + 870 + /* 871 + * Zero the trailing bytes if the user buffer is bigger than the 872 + * data size kernel actually has. 873 + */ 874 + if (copy_len < cmd->data_len) { 875 + if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { 876 + rc = -EFAULT; 877 + goto out_free; 878 + } 879 + } 880 + 881 + /* 882 + * We return the length the kernel supports so userspace may know what 883 + * the kernel capability is. It could be larger than the input buffer. 884 + */ 885 + cmd->data_len = data_len; 886 + 887 + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 888 + out_free: 889 + kfree(data); 890 + out_put: 891 + iommufd_put_object(&idev->obj); 892 + return rc; 893 + }
+93 -19
drivers/iommu/iommufd/hw_pagetable.c
··· 3 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 4 4 */ 5 5 #include <linux/iommu.h> 6 + #include <uapi/linux/iommufd.h> 6 7 7 8 #include "iommufd_private.h" 8 9 ··· 11 10 { 12 11 struct iommufd_hw_pagetable *hwpt = 13 12 container_of(obj, struct iommufd_hw_pagetable, obj); 14 - 15 - WARN_ON(!list_empty(&hwpt->devices)); 16 13 17 14 if (!list_empty(&hwpt->hwpt_item)) { 18 15 mutex_lock(&hwpt->ioas->mutex); ··· 24 25 iommu_domain_free(hwpt->domain); 25 26 26 27 refcount_dec(&hwpt->ioas->obj.users); 27 - mutex_destroy(&hwpt->devices_lock); 28 + } 29 + 30 + void iommufd_hw_pagetable_abort(struct iommufd_object *obj) 31 + { 32 + struct iommufd_hw_pagetable *hwpt = 33 + container_of(obj, struct iommufd_hw_pagetable, obj); 34 + 35 + /* The ioas->mutex must be held until finalize is called. */ 36 + lockdep_assert_held(&hwpt->ioas->mutex); 37 + 38 + if (!list_empty(&hwpt->hwpt_item)) { 39 + list_del_init(&hwpt->hwpt_item); 40 + iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); 41 + } 42 + iommufd_hw_pagetable_destroy(obj); 43 + } 44 + 45 + int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) 46 + { 47 + if (hwpt->enforce_cache_coherency) 48 + return 0; 49 + 50 + if (hwpt->domain->ops->enforce_cache_coherency) 51 + hwpt->enforce_cache_coherency = 52 + hwpt->domain->ops->enforce_cache_coherency( 53 + hwpt->domain); 54 + if (!hwpt->enforce_cache_coherency) 55 + return -EINVAL; 56 + return 0; 28 57 } 29 58 30 59 /** ··· 65 38 * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT 66 39 * will be linked to the given ioas and upon return the underlying iommu_domain 67 40 * is fully popoulated. 41 + * 42 + * The caller must hold the ioas->mutex until after 43 + * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on 44 + * the returned hwpt. 68 45 */ 69 46 struct iommufd_hw_pagetable * 70 47 iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, ··· 83 52 if (IS_ERR(hwpt)) 84 53 return hwpt; 85 54 86 - INIT_LIST_HEAD(&hwpt->devices); 87 55 INIT_LIST_HEAD(&hwpt->hwpt_item); 88 - mutex_init(&hwpt->devices_lock); 89 56 /* Pairs with iommufd_hw_pagetable_destroy() */ 90 57 refcount_inc(&ioas->obj.users); 91 58 hwpt->ioas = ioas; ··· 94 65 goto out_abort; 95 66 } 96 67 97 - mutex_lock(&hwpt->devices_lock); 68 + /* 69 + * Set the coherency mode before we do iopt_table_add_domain() as some 70 + * iommus have a per-PTE bit that controls it and need to decide before 71 + * doing any maps. It is an iommu driver bug to report 72 + * IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on 73 + * a new domain. 74 + */ 75 + if (idev->enforce_cache_coherency) { 76 + rc = iommufd_hw_pagetable_enforce_cc(hwpt); 77 + if (WARN_ON(rc)) 78 + goto out_abort; 79 + } 98 80 99 81 /* 100 82 * immediate_attach exists only to accommodate iommu drivers that cannot ··· 116 76 if (immediate_attach) { 117 77 rc = iommufd_hw_pagetable_attach(hwpt, idev); 118 78 if (rc) 119 - goto out_unlock; 79 + goto out_abort; 120 80 } 121 81 122 82 rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); 123 83 if (rc) 124 84 goto out_detach; 125 85 list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); 126 - 127 - if (immediate_attach) { 128 - /* See iommufd_device_do_attach() */ 129 - refcount_inc(&hwpt->obj.users); 130 - idev->hwpt = hwpt; 131 - list_add(&idev->devices_item, &hwpt->devices); 132 - } 133 - 134 - mutex_unlock(&hwpt->devices_lock); 135 86 return hwpt; 136 87 137 88 out_detach: 138 89 if (immediate_attach) 139 - iommufd_hw_pagetable_detach(hwpt, idev); 140 - out_unlock: 141 - mutex_unlock(&hwpt->devices_lock); 90 + iommufd_hw_pagetable_detach(idev); 142 91 out_abort: 143 92 iommufd_object_abort_and_destroy(ictx, &hwpt->obj); 144 93 return ERR_PTR(rc); 94 + } 95 + 96 + int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) 97 + { 98 + struct iommu_hwpt_alloc *cmd = ucmd->cmd; 99 + struct iommufd_hw_pagetable *hwpt; 100 + struct iommufd_device *idev; 101 + struct iommufd_ioas *ioas; 102 + int rc; 103 + 104 + if (cmd->flags || cmd->__reserved) 105 + return -EOPNOTSUPP; 106 + 107 + idev = iommufd_get_device(ucmd, cmd->dev_id); 108 + if (IS_ERR(idev)) 109 + return PTR_ERR(idev); 110 + 111 + ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id); 112 + if (IS_ERR(ioas)) { 113 + rc = PTR_ERR(ioas); 114 + goto out_put_idev; 115 + } 116 + 117 + mutex_lock(&ioas->mutex); 118 + hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false); 119 + if (IS_ERR(hwpt)) { 120 + rc = PTR_ERR(hwpt); 121 + goto out_unlock; 122 + } 123 + 124 + cmd->out_hwpt_id = hwpt->obj.id; 125 + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 126 + if (rc) 127 + goto out_hwpt; 128 + iommufd_object_finalize(ucmd->ictx, &hwpt->obj); 129 + goto out_unlock; 130 + 131 + out_hwpt: 132 + iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj); 133 + out_unlock: 134 + mutex_unlock(&ioas->mutex); 135 + iommufd_put_object(&ioas->obj); 136 + out_put_idev: 137 + iommufd_put_object(&idev->obj); 138 + return rc; 145 139 }
+18 -20
drivers/iommu/iommufd/io_pagetable.c
··· 1158 1158 } 1159 1159 1160 1160 void iopt_remove_access(struct io_pagetable *iopt, 1161 - struct iommufd_access *access) 1161 + struct iommufd_access *access, 1162 + u32 iopt_access_list_id) 1162 1163 { 1163 1164 down_write(&iopt->domains_rwsem); 1164 1165 down_write(&iopt->iova_rwsem); 1165 - WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != 1166 - access); 1166 + WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 1167 1167 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1168 1168 up_write(&iopt->iova_rwsem); 1169 1169 up_write(&iopt->domains_rwsem); 1170 1170 } 1171 1171 1172 - /* Narrow the valid_iova_itree to include reserved ranges from a group. */ 1173 - int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, 1174 - struct device *device, 1175 - struct iommu_group *group, 1176 - phys_addr_t *sw_msi_start) 1172 + /* Narrow the valid_iova_itree to include reserved ranges from a device. */ 1173 + int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 1174 + struct device *dev, 1175 + phys_addr_t *sw_msi_start) 1177 1176 { 1178 1177 struct iommu_resv_region *resv; 1179 - struct iommu_resv_region *tmp; 1180 - LIST_HEAD(group_resv_regions); 1178 + LIST_HEAD(resv_regions); 1181 1179 unsigned int num_hw_msi = 0; 1182 1180 unsigned int num_sw_msi = 0; 1183 1181 int rc; 1184 1182 1185 - down_write(&iopt->iova_rwsem); 1186 - rc = iommu_get_group_resv_regions(group, &group_resv_regions); 1187 - if (rc) 1188 - goto out_unlock; 1183 + if (iommufd_should_fail()) 1184 + return -EINVAL; 1189 1185 1190 - list_for_each_entry(resv, &group_resv_regions, list) { 1186 + down_write(&iopt->iova_rwsem); 1187 + /* FIXME: drivers allocate memory but there is no failure propogated */ 1188 + iommu_get_resv_regions(dev, &resv_regions); 1189 + 1190 + list_for_each_entry(resv, &resv_regions, list) { 1191 1191 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1192 1192 continue; 1193 1193 ··· 1199 1199 } 1200 1200 1201 1201 rc = iopt_reserve_iova(iopt, resv->start, 1202 - resv->length - 1 + resv->start, device); 1202 + resv->length - 1 + resv->start, dev); 1203 1203 if (rc) 1204 1204 goto out_reserved; 1205 1205 } ··· 1214 1214 goto out_free_resv; 1215 1215 1216 1216 out_reserved: 1217 - __iopt_remove_reserved_iova(iopt, device); 1217 + __iopt_remove_reserved_iova(iopt, dev); 1218 1218 out_free_resv: 1219 - list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) 1220 - kfree(resv); 1221 - out_unlock: 1219 + iommu_put_resv_regions(dev, &resv_regions); 1222 1220 up_write(&iopt->iova_rwsem); 1223 1221 return rc; 1224 1222 }
+46 -38
drivers/iommu/iommufd/iommufd_private.h
··· 17 17 struct iommufd_ctx { 18 18 struct file *file; 19 19 struct xarray objects; 20 + struct xarray groups; 20 21 21 22 u8 account_mode; 22 23 /* Compatibility with VFIO no iommu */ ··· 76 75 struct iommu_domain *domain); 77 76 void iopt_table_remove_domain(struct io_pagetable *iopt, 78 77 struct iommu_domain *domain); 79 - int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, 80 - struct device *device, 81 - struct iommu_group *group, 82 - phys_addr_t *sw_msi_start); 78 + int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 79 + struct device *dev, 80 + phys_addr_t *sw_msi_start); 83 81 int iopt_set_allow_iova(struct io_pagetable *iopt, 84 82 struct rb_root_cached *allowed_iova); 85 83 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, ··· 119 119 #ifdef CONFIG_IOMMUFD_TEST 120 120 IOMMUFD_OBJ_SELFTEST, 121 121 #endif 122 + IOMMUFD_OBJ_MAX, 122 123 }; 123 124 124 125 /* Base struct for all objects with a userspace ID handle. */ ··· 149 148 up_read(&obj->destroy_rwsem); 150 149 } 151 150 152 - /** 153 - * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount 154 - * protection 155 - * @obj - Object to release 156 - * 157 - * Objects have two refcount protections (destroy_rwsem and the refcount_t 158 - * users). Holding either of these will prevent the object from being destroyed. 159 - * 160 - * Depending on the use case, one protection or the other is appropriate. In 161 - * most cases references are being protected by the destroy_rwsem. This allows 162 - * orderly destruction of the object because iommufd_object_destroy_user() will 163 - * wait for it to become unlocked. However, as a rwsem, it cannot be held across 164 - * a system call return. So cases that have longer term needs must switch 165 - * to the weaker users refcount_t. 166 - * 167 - * With users protection iommufd_object_destroy_user() will return false, 168 - * refusing to destroy the object, causing -EBUSY to userspace. 169 - */ 170 - static inline void iommufd_ref_to_users(struct iommufd_object *obj) 171 - { 172 - up_read(&obj->destroy_rwsem); 173 - /* iommufd_lock_obj() obtains users as well */ 174 - } 175 151 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); 176 152 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 177 153 struct iommufd_object *obj); ··· 238 260 bool msi_cookie : 1; 239 261 /* Head at iommufd_ioas::hwpt_list */ 240 262 struct list_head hwpt_item; 241 - struct mutex devices_lock; 242 - struct list_head devices; 243 263 }; 244 264 245 265 struct iommufd_hw_pagetable * 246 266 iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, 247 267 struct iommufd_device *idev, bool immediate_attach); 268 + int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt); 248 269 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 249 270 struct iommufd_device *idev); 250 - void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, 251 - struct iommufd_device *idev); 271 + struct iommufd_hw_pagetable * 272 + iommufd_hw_pagetable_detach(struct iommufd_device *idev); 252 273 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); 274 + void iommufd_hw_pagetable_abort(struct iommufd_object *obj); 275 + int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); 276 + 277 + static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, 278 + struct iommufd_hw_pagetable *hwpt) 279 + { 280 + lockdep_assert_not_held(&hwpt->ioas->mutex); 281 + if (hwpt->auto_domain) 282 + iommufd_object_deref_user(ictx, &hwpt->obj); 283 + else 284 + refcount_dec(&hwpt->obj.users); 285 + } 286 + 287 + struct iommufd_group { 288 + struct kref ref; 289 + struct mutex lock; 290 + struct iommufd_ctx *ictx; 291 + struct iommu_group *group; 292 + struct iommufd_hw_pagetable *hwpt; 293 + struct list_head device_list; 294 + phys_addr_t sw_msi_start; 295 + }; 253 296 254 297 /* 255 298 * A iommufd_device object represents the binding relationship between a ··· 280 281 struct iommufd_device { 281 282 struct iommufd_object obj; 282 283 struct iommufd_ctx *ictx; 283 - struct iommufd_hw_pagetable *hwpt; 284 - /* Head at iommufd_hw_pagetable::devices */ 285 - struct list_head devices_item; 284 + struct iommufd_group *igroup; 285 + struct list_head group_item; 286 286 /* always the physical device */ 287 287 struct device *dev; 288 - struct iommu_group *group; 289 288 bool enforce_cache_coherency; 290 289 }; 291 290 291 + static inline struct iommufd_device * 292 + iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) 293 + { 294 + return container_of(iommufd_get_object(ucmd->ictx, id, 295 + IOMMUFD_OBJ_DEVICE), 296 + struct iommufd_device, obj); 297 + } 298 + 292 299 void iommufd_device_destroy(struct iommufd_object *obj); 300 + int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 293 301 294 302 struct iommufd_access { 295 303 struct iommufd_object obj; ··· 312 306 313 307 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); 314 308 void iopt_remove_access(struct io_pagetable *iopt, 315 - struct iommufd_access *access); 309 + struct iommufd_access *access, 310 + u32 iopt_access_list_id); 316 311 void iommufd_access_destroy_object(struct iommufd_object *obj); 317 312 318 313 #ifdef CONFIG_IOMMUFD_TEST ··· 323 316 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 324 317 unsigned int ioas_id, u64 *iova, u32 *flags); 325 318 bool iommufd_should_fail(void); 326 - void __init iommufd_test_init(void); 319 + int __init iommufd_test_init(void); 327 320 void iommufd_test_exit(void); 328 321 bool iommufd_selftest_is_mock_dev(struct device *dev); 329 322 #else ··· 336 329 { 337 330 return false; 338 331 } 339 - static inline void __init iommufd_test_init(void) 332 + static inline int __init iommufd_test_init(void) 340 333 { 334 + return 0; 341 335 } 342 336 static inline void iommufd_test_exit(void) 343 337 {
+19
drivers/iommu/iommufd/iommufd_test.h
··· 17 17 IOMMU_TEST_OP_ACCESS_PAGES, 18 18 IOMMU_TEST_OP_ACCESS_RW, 19 19 IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, 20 + IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, 21 + IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, 20 22 }; 21 23 22 24 enum { ··· 53 51 struct { 54 52 __u32 out_stdev_id; 55 53 __u32 out_hwpt_id; 54 + /* out_idev_id is the standard iommufd_bind object */ 55 + __u32 out_idev_id; 56 56 } mock_domain; 57 + struct { 58 + __u32 pt_id; 59 + } mock_domain_replace; 57 60 struct { 58 61 __aligned_u64 iova; 59 62 __aligned_u64 length; ··· 92 85 struct { 93 86 __u32 limit; 94 87 } memory_limit; 88 + struct { 89 + __u32 ioas_id; 90 + } access_replace_ioas; 95 91 }; 96 92 __u32 last; 97 93 }; 98 94 #define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) 95 + 96 + /* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */ 97 + #define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef 98 + #define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef 99 + 100 + struct iommu_test_hw_info { 101 + __u32 flags; 102 + __u32 test_reg; 103 + }; 99 104 100 105 #endif
+32 -3
drivers/iommu/iommufd/main.c
··· 24 24 25 25 struct iommufd_object_ops { 26 26 void (*destroy)(struct iommufd_object *obj); 27 + void (*abort)(struct iommufd_object *obj); 27 28 }; 28 29 static const struct iommufd_object_ops iommufd_object_ops[]; 29 30 static struct miscdevice vfio_misc_dev; ··· 33 32 size_t size, 34 33 enum iommufd_object_type type) 35 34 { 35 + static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; 36 36 struct iommufd_object *obj; 37 37 int rc; 38 38 ··· 41 39 if (!obj) 42 40 return ERR_PTR(-ENOMEM); 43 41 obj->type = type; 44 - init_rwsem(&obj->destroy_rwsem); 42 + /* 43 + * In most cases the destroy_rwsem is obtained with try so it doesn't 44 + * interact with lockdep, however on destroy we have to sleep. This 45 + * means if we have to destroy an object while holding a get on another 46 + * object it triggers lockdep. Using one locking class per object type 47 + * is a simple and reasonable way to avoid this. 48 + */ 49 + __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", 50 + &obj_keys[type]); 45 51 refcount_set(&obj->users, 1); 46 52 47 53 /* ··· 105 95 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 106 96 struct iommufd_object *obj) 107 97 { 108 - iommufd_object_ops[obj->type].destroy(obj); 98 + if (iommufd_object_ops[obj->type].abort) 99 + iommufd_object_ops[obj->type].abort(obj); 100 + else 101 + iommufd_object_ops[obj->type].destroy(obj); 109 102 iommufd_object_abort(ictx, obj); 110 103 } 111 104 ··· 236 223 } 237 224 238 225 xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); 226 + xa_init(&ictx->groups); 239 227 ictx->file = filp; 240 228 filp->private_data = ictx; 241 229 return 0; ··· 272 258 if (WARN_ON(!destroyed)) 273 259 break; 274 260 } 261 + WARN_ON(!xa_empty(&ictx->groups)); 275 262 kfree(ictx); 276 263 return 0; 277 264 } ··· 305 290 306 291 union ucmd_buffer { 307 292 struct iommu_destroy destroy; 293 + struct iommu_hw_info info; 294 + struct iommu_hwpt_alloc hwpt; 308 295 struct iommu_ioas_alloc alloc; 309 296 struct iommu_ioas_allow_iovas allow_iovas; 310 297 struct iommu_ioas_copy ioas_copy; ··· 338 321 } 339 322 static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { 340 323 IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), 324 + IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, 325 + __reserved), 326 + IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, 327 + __reserved), 341 328 IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, 342 329 struct iommu_ioas_alloc, out_ioas_id), 343 330 IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, ··· 484 463 }, 485 464 [IOMMUFD_OBJ_HW_PAGETABLE] = { 486 465 .destroy = iommufd_hw_pagetable_destroy, 466 + .abort = iommufd_hw_pagetable_abort, 487 467 }, 488 468 #ifdef CONFIG_IOMMUFD_TEST 489 469 [IOMMUFD_OBJ_SELFTEST] = { ··· 523 501 if (ret) 524 502 goto err_misc; 525 503 } 526 - iommufd_test_init(); 504 + ret = iommufd_test_init(); 505 + if (ret) 506 + goto err_vfio_misc; 527 507 return 0; 508 + 509 + err_vfio_misc: 510 + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) 511 + misc_deregister(&vfio_misc_dev); 528 512 err_misc: 529 513 misc_deregister(&iommu_misc_dev); 530 514 return ret; ··· 551 523 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 552 524 MODULE_ALIAS("devname:vfio/vfio"); 553 525 #endif 526 + MODULE_IMPORT_NS(IOMMUFD_INTERNAL); 554 527 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); 555 528 MODULE_LICENSE("GPL");
+157 -56
drivers/iommu/iommufd/selftest.c
··· 9 9 #include <linux/file.h> 10 10 #include <linux/anon_inodes.h> 11 11 #include <linux/fault-inject.h> 12 + #include <linux/platform_device.h> 12 13 #include <uapi/linux/iommufd.h> 13 14 15 + #include "../iommu-priv.h" 14 16 #include "io_pagetable.h" 15 17 #include "iommufd_private.h" 16 18 #include "iommufd_test.h" 17 19 18 20 static DECLARE_FAULT_ATTR(fail_iommufd); 19 21 static struct dentry *dbgfs_root; 22 + static struct platform_device *selftest_iommu_dev; 20 23 21 24 size_t iommufd_test_memory_limit = 65536; 22 25 ··· 131 128 .ops = &mock_blocking_ops, 132 129 }; 133 130 131 + static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) 132 + { 133 + struct iommu_test_hw_info *info; 134 + 135 + info = kzalloc(sizeof(*info), GFP_KERNEL); 136 + if (!info) 137 + return ERR_PTR(-ENOMEM); 138 + 139 + info->test_reg = IOMMU_HW_INFO_SELFTEST_REGVAL; 140 + *length = sizeof(*info); 141 + *type = IOMMU_HW_INFO_TYPE_SELFTEST; 142 + 143 + return info; 144 + } 145 + 134 146 static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) 135 147 { 136 148 struct mock_iommu_domain *mock; ··· 153 135 if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED) 154 136 return &mock_blocking_domain; 155 137 156 - if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)) 138 + if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED) 157 139 return NULL; 158 140 159 141 mock = kzalloc(sizeof(*mock), GFP_KERNEL); ··· 294 276 */ 295 277 } 296 278 279 + static struct iommu_device mock_iommu_device = { 280 + }; 281 + 282 + static struct iommu_device *mock_probe_device(struct device *dev) 283 + { 284 + return &mock_iommu_device; 285 + } 286 + 297 287 static const struct iommu_ops mock_ops = { 298 288 .owner = THIS_MODULE, 299 289 .pgsize_bitmap = MOCK_IO_PAGE_SIZE, 290 + .hw_info = mock_domain_hw_info, 300 291 .domain_alloc = mock_domain_alloc, 301 292 .capable = mock_domain_capable, 302 293 .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, 294 + .device_group = generic_device_group, 295 + .probe_device = mock_probe_device, 303 296 .default_domain_ops = 304 297 &(struct iommu_domain_ops){ 305 298 .free = mock_domain_free, ··· 319 290 .unmap_pages = mock_domain_unmap_pages, 320 291 .iova_to_phys = mock_domain_iova_to_phys, 321 292 }, 322 - }; 323 - 324 - static struct iommu_device mock_iommu_device = { 325 - .ops = &mock_ops, 326 293 }; 327 294 328 295 static inline struct iommufd_hw_pagetable * ··· 341 316 return hwpt; 342 317 } 343 318 344 - static struct bus_type iommufd_mock_bus_type = { 345 - .name = "iommufd_mock", 346 - .iommu_ops = &mock_ops, 319 + struct mock_bus_type { 320 + struct bus_type bus; 321 + struct notifier_block nb; 347 322 }; 323 + 324 + static struct mock_bus_type iommufd_mock_bus_type = { 325 + .bus = { 326 + .name = "iommufd_mock", 327 + }, 328 + }; 329 + 330 + static atomic_t mock_dev_num; 348 331 349 332 static void mock_dev_release(struct device *dev) 350 333 { 351 334 struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); 352 335 336 + atomic_dec(&mock_dev_num); 353 337 kfree(mdev); 354 338 } 355 339 356 340 static struct mock_dev *mock_dev_create(void) 357 341 { 358 - struct iommu_group *iommu_group; 359 - struct dev_iommu *dev_iommu; 360 342 struct mock_dev *mdev; 361 343 int rc; 362 344 ··· 373 341 374 342 device_initialize(&mdev->dev); 375 343 mdev->dev.release = mock_dev_release; 376 - mdev->dev.bus = &iommufd_mock_bus_type; 377 - 378 - iommu_group = iommu_group_alloc(); 379 - if (IS_ERR(iommu_group)) { 380 - rc = PTR_ERR(iommu_group); 381 - goto err_put; 382 - } 344 + mdev->dev.bus = &iommufd_mock_bus_type.bus; 383 345 384 346 rc = dev_set_name(&mdev->dev, "iommufd_mock%u", 385 - iommu_group_id(iommu_group)); 347 + atomic_inc_return(&mock_dev_num)); 386 348 if (rc) 387 - goto err_group; 388 - 389 - /* 390 - * The iommu core has no way to associate a single device with an iommu 391 - * driver (heck currently it can't even support two iommu_drivers 392 - * registering). Hack it together with an open coded dev_iommu_get(). 393 - * Notice that the normal notifier triggered iommu release process also 394 - * does not work here because this bus is not in iommu_buses. 395 - */ 396 - mdev->dev.iommu = kzalloc(sizeof(*dev_iommu), GFP_KERNEL); 397 - if (!mdev->dev.iommu) { 398 - rc = -ENOMEM; 399 - goto err_group; 400 - } 401 - mutex_init(&mdev->dev.iommu->lock); 402 - mdev->dev.iommu->iommu_dev = &mock_iommu_device; 349 + goto err_put; 403 350 404 351 rc = device_add(&mdev->dev); 405 352 if (rc) 406 - goto err_dev_iommu; 407 - 408 - rc = iommu_group_add_device(iommu_group, &mdev->dev); 409 - if (rc) 410 - goto err_del; 411 - iommu_group_put(iommu_group); 353 + goto err_put; 412 354 return mdev; 413 355 414 - err_del: 415 - device_del(&mdev->dev); 416 - err_dev_iommu: 417 - kfree(mdev->dev.iommu); 418 - mdev->dev.iommu = NULL; 419 - err_group: 420 - iommu_group_put(iommu_group); 421 356 err_put: 422 357 put_device(&mdev->dev); 423 358 return ERR_PTR(rc); ··· 392 393 393 394 static void mock_dev_destroy(struct mock_dev *mdev) 394 395 { 395 - iommu_group_remove_device(&mdev->dev); 396 - device_del(&mdev->dev); 397 - kfree(mdev->dev.iommu); 398 - mdev->dev.iommu = NULL; 399 - put_device(&mdev->dev); 396 + device_unregister(&mdev->dev); 400 397 } 401 398 402 399 bool iommufd_selftest_is_mock_dev(struct device *dev) ··· 438 443 /* Userspace must destroy the device_id to destroy the object */ 439 444 cmd->mock_domain.out_hwpt_id = pt_id; 440 445 cmd->mock_domain.out_stdev_id = sobj->obj.id; 446 + cmd->mock_domain.out_idev_id = idev_id; 447 + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 448 + if (rc) 449 + goto out_detach; 441 450 iommufd_object_finalize(ucmd->ictx, &sobj->obj); 442 - return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 451 + return 0; 443 452 453 + out_detach: 454 + iommufd_device_detach(idev); 444 455 out_unbind: 445 456 iommufd_device_unbind(idev); 446 457 out_mdev: 447 458 mock_dev_destroy(sobj->idev.mock_dev); 448 459 out_sobj: 449 460 iommufd_object_abort(ucmd->ictx, &sobj->obj); 461 + return rc; 462 + } 463 + 464 + /* Replace the mock domain with a manually allocated hw_pagetable */ 465 + static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, 466 + unsigned int device_id, u32 pt_id, 467 + struct iommu_test_cmd *cmd) 468 + { 469 + struct iommufd_object *dev_obj; 470 + struct selftest_obj *sobj; 471 + int rc; 472 + 473 + /* 474 + * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure 475 + * it doesn't race with detach, which is not allowed. 476 + */ 477 + dev_obj = 478 + iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); 479 + if (IS_ERR(dev_obj)) 480 + return PTR_ERR(dev_obj); 481 + 482 + sobj = container_of(dev_obj, struct selftest_obj, obj); 483 + if (sobj->type != TYPE_IDEV) { 484 + rc = -EINVAL; 485 + goto out_dev_obj; 486 + } 487 + 488 + rc = iommufd_device_replace(sobj->idev.idev, &pt_id); 489 + if (rc) 490 + goto out_dev_obj; 491 + 492 + cmd->mock_domain_replace.pt_id = pt_id; 493 + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 494 + 495 + out_dev_obj: 496 + iommufd_put_object(dev_obj); 450 497 return rc; 451 498 } 452 499 ··· 785 748 return rc; 786 749 } 787 750 751 + static int iommufd_test_access_replace_ioas(struct iommufd_ucmd *ucmd, 752 + unsigned int access_id, 753 + unsigned int ioas_id) 754 + { 755 + struct selftest_access *staccess; 756 + int rc; 757 + 758 + staccess = iommufd_access_get(access_id); 759 + if (IS_ERR(staccess)) 760 + return PTR_ERR(staccess); 761 + 762 + rc = iommufd_access_replace(staccess->access, ioas_id); 763 + fput(staccess->file); 764 + return rc; 765 + } 766 + 788 767 /* Check that the pages in a page array match the pages in the user VA */ 789 768 static int iommufd_test_check_pages(void __user *uptr, struct page **pages, 790 769 size_t npages) ··· 1001 948 cmd->add_reserved.length); 1002 949 case IOMMU_TEST_OP_MOCK_DOMAIN: 1003 950 return iommufd_test_mock_domain(ucmd, cmd); 951 + case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE: 952 + return iommufd_test_mock_domain_replace( 953 + ucmd, cmd->id, cmd->mock_domain_replace.pt_id, cmd); 1004 954 case IOMMU_TEST_OP_MD_CHECK_MAP: 1005 955 return iommufd_test_md_check_pa( 1006 956 ucmd, cmd->id, cmd->check_map.iova, ··· 1016 960 case IOMMU_TEST_OP_CREATE_ACCESS: 1017 961 return iommufd_test_create_access(ucmd, cmd->id, 1018 962 cmd->create_access.flags); 963 + case IOMMU_TEST_OP_ACCESS_REPLACE_IOAS: 964 + return iommufd_test_access_replace_ioas( 965 + ucmd, cmd->id, cmd->access_replace_ioas.ioas_id); 1019 966 case IOMMU_TEST_OP_ACCESS_PAGES: 1020 967 return iommufd_test_access_pages( 1021 968 ucmd, cmd->id, cmd->access_pages.iova, ··· 1051 992 return should_fail(&fail_iommufd, 1); 1052 993 } 1053 994 1054 - void __init iommufd_test_init(void) 995 + int __init iommufd_test_init(void) 1055 996 { 997 + struct platform_device_info pdevinfo = { 998 + .name = "iommufd_selftest_iommu", 999 + }; 1000 + int rc; 1001 + 1056 1002 dbgfs_root = 1057 1003 fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd); 1058 - WARN_ON(bus_register(&iommufd_mock_bus_type)); 1004 + 1005 + selftest_iommu_dev = platform_device_register_full(&pdevinfo); 1006 + if (IS_ERR(selftest_iommu_dev)) { 1007 + rc = PTR_ERR(selftest_iommu_dev); 1008 + goto err_dbgfs; 1009 + } 1010 + 1011 + rc = bus_register(&iommufd_mock_bus_type.bus); 1012 + if (rc) 1013 + goto err_platform; 1014 + 1015 + rc = iommu_device_sysfs_add(&mock_iommu_device, 1016 + &selftest_iommu_dev->dev, NULL, "%s", 1017 + dev_name(&selftest_iommu_dev->dev)); 1018 + if (rc) 1019 + goto err_bus; 1020 + 1021 + rc = iommu_device_register_bus(&mock_iommu_device, &mock_ops, 1022 + &iommufd_mock_bus_type.bus, 1023 + &iommufd_mock_bus_type.nb); 1024 + if (rc) 1025 + goto err_sysfs; 1026 + return 0; 1027 + 1028 + err_sysfs: 1029 + iommu_device_sysfs_remove(&mock_iommu_device); 1030 + err_bus: 1031 + bus_unregister(&iommufd_mock_bus_type.bus); 1032 + err_platform: 1033 + platform_device_unregister(selftest_iommu_dev); 1034 + err_dbgfs: 1035 + debugfs_remove_recursive(dbgfs_root); 1036 + return rc; 1059 1037 } 1060 1038 1061 1039 void iommufd_test_exit(void) 1062 1040 { 1041 + iommu_device_sysfs_remove(&mock_iommu_device); 1042 + iommu_device_unregister_bus(&mock_iommu_device, 1043 + &iommufd_mock_bus_type.bus, 1044 + &iommufd_mock_bus_type.nb); 1045 + bus_unregister(&iommufd_mock_bus_type.bus); 1046 + platform_device_unregister(selftest_iommu_dev); 1063 1047 debugfs_remove_recursive(dbgfs_root); 1064 - bus_unregister(&iommufd_mock_bus_type); 1065 1048 }
+6 -5
drivers/vfio/iommufd.c
··· 146 146 return -EINVAL; 147 147 148 148 if (vdev->iommufd_attached) 149 - return -EBUSY; 150 - 151 - rc = iommufd_device_attach(vdev->iommufd_device, pt_id); 149 + rc = iommufd_device_replace(vdev->iommufd_device, pt_id); 150 + else 151 + rc = iommufd_device_attach(vdev->iommufd_device, pt_id); 152 152 if (rc) 153 153 return rc; 154 154 vdev->iommufd_attached = true; ··· 223 223 lockdep_assert_held(&vdev->dev_set->lock); 224 224 225 225 if (vdev->iommufd_attached) 226 - return -EBUSY; 227 - rc = iommufd_access_attach(vdev->iommufd_access, *pt_id); 226 + rc = iommufd_access_replace(vdev->iommufd_access, *pt_id); 227 + else 228 + rc = iommufd_access_attach(vdev->iommufd_access, *pt_id); 228 229 if (rc) 229 230 return rc; 230 231 vdev->iommufd_attached = true;
+4
drivers/vfio/vfio_main.c
··· 1536 1536 /* group->container cannot change while a vfio device is open */ 1537 1537 if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) 1538 1538 return -EINVAL; 1539 + if (!device->ops->dma_unmap) 1540 + return -EINVAL; 1539 1541 if (vfio_device_has_container(device)) 1540 1542 return vfio_device_container_pin_pages(device, iova, 1541 1543 npage, prot, pages); ··· 1574 1572 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 1575 1573 { 1576 1574 if (WARN_ON(!vfio_assert_device_open(device))) 1575 + return; 1576 + if (WARN_ON(!device->ops->dma_unmap)) 1577 1577 return; 1578 1578 1579 1579 if (vfio_device_has_container(device)) {
+5 -11
include/linux/iommu.h
··· 228 228 /** 229 229 * struct iommu_ops - iommu ops and capabilities 230 230 * @capable: check capability 231 + * @hw_info: report iommu hardware information. The data buffer returned by this 232 + * op is allocated in the iommu driver and freed by the caller after 233 + * use. The information type is one of enum iommu_hw_info_type defined 234 + * in include/uapi/linux/iommufd.h. 231 235 * @domain_alloc: allocate iommu domain 232 236 * @probe_device: Add device to iommu driver handling 233 237 * @release_device: Remove device from iommu driver handling ··· 261 257 */ 262 258 struct iommu_ops { 263 259 bool (*capable)(struct device *dev, enum iommu_cap); 260 + void *(*hw_info)(struct device *dev, u32 *length, u32 *type); 264 261 265 262 /* Domain allocation and freeing by the iommu driver */ 266 263 struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); ··· 453 448 .start = ULONG_MAX, 454 449 .freelist = LIST_HEAD_INIT(gather->freelist), 455 450 }; 456 - } 457 - 458 - static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) 459 - { 460 - /* 461 - * Assume that valid ops must be installed if iommu_probe_device() 462 - * has succeeded. The device ops are essentially for internal use 463 - * within the IOMMU subsystem itself, so we should be able to trust 464 - * ourselves not to misuse the helper. 465 - */ 466 - return dev->iommu->iommu_dev->ops; 467 451 } 468 452 469 453 extern int bus_iommu_probe(const struct bus_type *bus);
+2
include/linux/iommufd.h
··· 23 23 void iommufd_device_unbind(struct iommufd_device *idev); 24 24 25 25 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); 26 + int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); 26 27 void iommufd_device_detach(struct iommufd_device *idev); 27 28 28 29 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); ··· 49 48 const struct iommufd_access_ops *ops, void *data, u32 *id); 50 49 void iommufd_access_destroy(struct iommufd_access *access); 51 50 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); 51 + int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id); 52 52 void iommufd_access_detach(struct iommufd_access *access); 53 53 54 54 void iommufd_ctx_get(struct iommufd_ctx *ictx);
+97
include/uapi/linux/iommufd.h
··· 45 45 IOMMUFD_CMD_IOAS_UNMAP, 46 46 IOMMUFD_CMD_OPTION, 47 47 IOMMUFD_CMD_VFIO_IOAS, 48 + IOMMUFD_CMD_HWPT_ALLOC, 49 + IOMMUFD_CMD_GET_HW_INFO, 48 50 }; 49 51 50 52 /** ··· 346 344 __u16 __reserved; 347 345 }; 348 346 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) 347 + 348 + /** 349 + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) 350 + * @size: sizeof(struct iommu_hwpt_alloc) 351 + * @flags: Must be 0 352 + * @dev_id: The device to allocate this HWPT for 353 + * @pt_id: The IOAS to connect this HWPT to 354 + * @out_hwpt_id: The ID of the new HWPT 355 + * @__reserved: Must be 0 356 + * 357 + * Explicitly allocate a hardware page table object. This is the same object 358 + * type that is returned by iommufd_device_attach() and represents the 359 + * underlying iommu driver's iommu_domain kernel object. 360 + * 361 + * A HWPT will be created with the IOVA mappings from the given IOAS. 362 + */ 363 + struct iommu_hwpt_alloc { 364 + __u32 size; 365 + __u32 flags; 366 + __u32 dev_id; 367 + __u32 pt_id; 368 + __u32 out_hwpt_id; 369 + __u32 __reserved; 370 + }; 371 + #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) 372 + 373 + /** 374 + * struct iommu_hw_info_vtd - Intel VT-d hardware information 375 + * 376 + * @flags: Must be 0 377 + * @__reserved: Must be 0 378 + * 379 + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec 380 + * section 11.4.2 Capability Register. 381 + * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec 382 + * section 11.4.3 Extended Capability Register. 383 + * 384 + * User needs to understand the Intel VT-d specification to decode the 385 + * register value. 386 + */ 387 + struct iommu_hw_info_vtd { 388 + __u32 flags; 389 + __u32 __reserved; 390 + __aligned_u64 cap_reg; 391 + __aligned_u64 ecap_reg; 392 + }; 393 + 394 + /** 395 + * enum iommu_hw_info_type - IOMMU Hardware Info Types 396 + * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware 397 + * info 398 + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type 399 + */ 400 + enum iommu_hw_info_type { 401 + IOMMU_HW_INFO_TYPE_NONE, 402 + IOMMU_HW_INFO_TYPE_INTEL_VTD, 403 + }; 404 + 405 + /** 406 + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) 407 + * @size: sizeof(struct iommu_hw_info) 408 + * @flags: Must be 0 409 + * @dev_id: The device bound to the iommufd 410 + * @data_len: Input the length of a user buffer in bytes. Output the length of 411 + * data that kernel supports 412 + * @data_uptr: User pointer to a user-space buffer used by the kernel to fill 413 + * the iommu type specific hardware information data 414 + * @out_data_type: Output the iommu hardware info type as defined in the enum 415 + * iommu_hw_info_type. 416 + * @__reserved: Must be 0 417 + * 418 + * Query an iommu type specific hardware information data from an iommu behind 419 + * a given device that has been bound to iommufd. This hardware info data will 420 + * be used to sync capabilities between the virtual iommu and the physical 421 + * iommu, e.g. a nested translation setup needs to check the hardware info, so 422 + * a guest stage-1 page table can be compatible with the physical iommu. 423 + * 424 + * To capture an iommu type specific hardware information data, @data_uptr and 425 + * its length @data_len must be provided. Trailing bytes will be zeroed if the 426 + * user buffer is larger than the data that kernel has. Otherwise, kernel only 427 + * fills the buffer using the given length in @data_len. If the ioctl succeeds, 428 + * @data_len will be updated to the length that kernel actually supports, 429 + * @out_data_type will be filled to decode the data filled in the buffer 430 + * pointed by @data_uptr. Input @data_len == zero is allowed. 431 + */ 432 + struct iommu_hw_info { 433 + __u32 size; 434 + __u32 flags; 435 + __u32 dev_id; 436 + __u32 data_len; 437 + __aligned_u64 data_uptr; 438 + __u32 out_data_type; 439 + __u32 __reserved; 440 + }; 441 + #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) 349 442 #endif
+6
include/uapi/linux/vfio.h
··· 940 940 * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only 941 941 * allowed on cdev fds. 942 942 * 943 + * If a vfio device is currently attached to a valid hw_pagetable, without doing 944 + * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl 945 + * passing in another hw_pagetable (hwpt) id is allowed. This action, also known 946 + * as a hw_pagetable replacement, will replace the device's currently attached 947 + * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. 948 + * 943 949 * Return: 0 on success, -errno on failure. 944 950 */ 945 951 struct vfio_device_attach_iommufd_pt {
+118 -12
tools/testing/selftests/iommu/iommufd.c
··· 9 9 10 10 #include "iommufd_utils.h" 11 11 12 - static void *buffer; 13 - 14 - static unsigned long PAGE_SIZE; 15 12 static unsigned long HUGEPAGE_SIZE; 16 13 17 14 #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) ··· 113 116 } 114 117 115 118 TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); 119 + TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO); 116 120 TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); 117 121 TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); 118 122 TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); ··· 186 188 uint32_t ioas_id; 187 189 uint32_t stdev_id; 188 190 uint32_t hwpt_id; 191 + uint32_t device_id; 189 192 uint64_t base_iova; 190 193 }; 191 194 ··· 213 214 214 215 for (i = 0; i != variant->mock_domains; i++) { 215 216 test_cmd_mock_domain(self->ioas_id, &self->stdev_id, 216 - &self->hwpt_id); 217 + &self->hwpt_id, &self->device_id); 217 218 self->base_iova = MOCK_APERTURE_START; 218 219 } 219 220 } ··· 264 265 { 265 266 /* Create a device attached directly to a hwpt */ 266 267 if (self->stdev_id) { 267 - test_cmd_mock_domain(self->hwpt_id, NULL, NULL); 268 + test_cmd_mock_domain(self->hwpt_id, NULL, NULL, NULL); 268 269 } else { 269 270 test_err_mock_domain(ENOENT, self->hwpt_id, NULL, NULL); 270 271 } ··· 289 290 for (i = 0; i != 10; i++) { 290 291 test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, 291 292 self->base_iova + i * PAGE_SIZE); 293 + } 294 + } 295 + 296 + TEST_F(iommufd_ioas, get_hw_info) 297 + { 298 + struct iommu_test_hw_info buffer_exact; 299 + struct iommu_test_hw_info_buffer_larger { 300 + struct iommu_test_hw_info info; 301 + uint64_t trailing_bytes; 302 + } buffer_larger; 303 + struct iommu_test_hw_info_buffer_smaller { 304 + __u32 flags; 305 + } buffer_smaller; 306 + 307 + if (self->device_id) { 308 + /* Provide a zero-size user_buffer */ 309 + test_cmd_get_hw_info(self->device_id, NULL, 0); 310 + /* Provide a user_buffer with exact size */ 311 + test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact)); 312 + /* 313 + * Provide a user_buffer with size larger than the exact size to check if 314 + * kernel zero the trailing bytes. 315 + */ 316 + test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger)); 317 + /* 318 + * Provide a user_buffer with size smaller than the exact size to check if 319 + * the fields within the size range still gets updated. 320 + */ 321 + test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); 322 + } else { 323 + test_err_get_hw_info(ENOENT, self->device_id, 324 + &buffer_exact, sizeof(buffer_exact)); 325 + test_err_get_hw_info(ENOENT, self->device_id, 326 + &buffer_larger, sizeof(buffer_larger)); 292 327 } 293 328 } 294 329 ··· 717 684 _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), 718 685 &access_cmd)); 719 686 test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, 720 - &mock_hwpt_id); 687 + &mock_hwpt_id, NULL); 721 688 check_map_cmd.id = mock_hwpt_id; 722 689 ASSERT_EQ(0, ioctl(self->fd, 723 690 _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), ··· 872 839 * If a domain already existed then everything was pinned within 873 840 * the fork, so this copies from one domain to another. 874 841 */ 875 - test_cmd_mock_domain(self->ioas_id, NULL, NULL); 842 + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); 876 843 check_access_rw(_metadata, self->fd, access_id, 877 844 MOCK_APERTURE_START, 0); 878 845 ··· 921 888 ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); 922 889 923 890 /* Read pages from the remote process */ 924 - test_cmd_mock_domain(self->ioas_id, NULL, NULL); 891 + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); 925 892 check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); 926 893 927 894 ASSERT_EQ(0, close(pipefds[1])); ··· 1068 1035 uint32_t ioas_id; 1069 1036 uint32_t hwpt_id; 1070 1037 uint32_t hwpt_ids[2]; 1038 + uint32_t stdev_ids[2]; 1039 + uint32_t idev_ids[2]; 1071 1040 int mmap_flags; 1072 1041 size_t mmap_buf_size; 1073 1042 }; ··· 1091 1056 ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); 1092 1057 1093 1058 for (i = 0; i != variant->mock_domains; i++) 1094 - test_cmd_mock_domain(self->ioas_id, NULL, &self->hwpt_ids[i]); 1059 + test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], 1060 + &self->hwpt_ids[i], &self->idev_ids[i]); 1095 1061 self->hwpt_id = self->hwpt_ids[0]; 1096 1062 1097 1063 self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; ··· 1286 1250 /* Add and destroy a domain while the area exists */ 1287 1251 old_id = self->hwpt_ids[1]; 1288 1252 test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, 1289 - &self->hwpt_ids[1]); 1253 + &self->hwpt_ids[1], NULL); 1290 1254 1291 1255 check_mock_iova(buf + start, iova, length); 1292 1256 check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, ··· 1319 1283 .dst_iova = MOCK_APERTURE_START, 1320 1284 .length = BUFFER_SIZE, 1321 1285 }; 1322 - unsigned int ioas_id; 1286 + struct iommu_ioas_unmap unmap_cmd = { 1287 + .size = sizeof(unmap_cmd), 1288 + .ioas_id = self->ioas_id, 1289 + .iova = MOCK_APERTURE_START, 1290 + .length = BUFFER_SIZE, 1291 + }; 1292 + unsigned int new_ioas_id, ioas_id; 1323 1293 1324 1294 /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ 1325 1295 test_ioctl_ioas_alloc(&ioas_id); ··· 1343 1301 ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd)); 1344 1302 check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); 1345 1303 1304 + /* Now replace the ioas with a new one */ 1305 + test_ioctl_ioas_alloc(&new_ioas_id); 1306 + test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, 1307 + &copy_cmd.src_iova); 1308 + test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); 1309 + 1310 + /* Destroy the old ioas and cleanup copied mapping */ 1311 + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_UNMAP, &unmap_cmd)); 1312 + test_ioctl_destroy(ioas_id); 1313 + 1314 + /* Then run the same test again with the new ioas */ 1315 + access_cmd.access_pages.iova = copy_cmd.src_iova; 1316 + ASSERT_EQ(0, 1317 + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), 1318 + &access_cmd)); 1319 + copy_cmd.src_ioas_id = new_ioas_id; 1320 + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd)); 1321 + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); 1322 + 1346 1323 test_cmd_destroy_access_pages( 1347 1324 access_cmd.id, access_cmd.access_pages.out_access_pages_id); 1348 1325 test_cmd_destroy_access(access_cmd.id); 1349 1326 1327 + test_ioctl_destroy(new_ioas_id); 1328 + } 1329 + 1330 + TEST_F(iommufd_mock_domain, replace) 1331 + { 1332 + uint32_t ioas_id; 1333 + 1334 + test_ioctl_ioas_alloc(&ioas_id); 1335 + 1336 + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); 1337 + 1338 + /* 1339 + * Replacing the IOAS causes the prior HWPT to be deallocated, thus we 1340 + * should get enoent when we try to use it. 1341 + */ 1342 + if (variant->mock_domains == 1) 1343 + test_err_mock_domain_replace(ENOENT, self->stdev_ids[0], 1344 + self->hwpt_ids[0]); 1345 + 1346 + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); 1347 + if (variant->mock_domains >= 2) { 1348 + test_cmd_mock_domain_replace(self->stdev_ids[0], 1349 + self->hwpt_ids[1]); 1350 + test_cmd_mock_domain_replace(self->stdev_ids[0], 1351 + self->hwpt_ids[1]); 1352 + test_cmd_mock_domain_replace(self->stdev_ids[0], 1353 + self->hwpt_ids[0]); 1354 + } 1355 + 1356 + test_cmd_mock_domain_replace(self->stdev_ids[0], self->ioas_id); 1350 1357 test_ioctl_destroy(ioas_id); 1358 + } 1359 + 1360 + TEST_F(iommufd_mock_domain, alloc_hwpt) 1361 + { 1362 + int i; 1363 + 1364 + for (i = 0; i != variant->mock_domains; i++) { 1365 + uint32_t stddev_id; 1366 + uint32_t hwpt_id; 1367 + 1368 + test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id); 1369 + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); 1370 + test_ioctl_destroy(stddev_id); 1371 + test_ioctl_destroy(hwpt_id); 1372 + } 1351 1373 } 1352 1374 1353 1375 /* VFIO compatibility IOCTLs */ ··· 1535 1429 1536 1430 /* Create what VFIO would consider a group */ 1537 1431 test_ioctl_ioas_alloc(&self->ioas_id); 1538 - test_cmd_mock_domain(self->ioas_id, NULL, NULL); 1432 + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); 1539 1433 1540 1434 /* Attach it to the vfio compat */ 1541 1435 vfio_ioas_cmd.ioas_id = self->ioas_id;
+64 -7
tools/testing/selftests/iommu/iommufd_fail_nth.c
··· 41 41 42 42 static __attribute__((constructor)) void setup_buffer(void) 43 43 { 44 + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); 45 + 44 46 BUFFER_SIZE = 2*1024*1024; 45 47 46 48 buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, ··· 315 313 316 314 fail_nth_enable(); 317 315 318 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) 316 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) 319 317 return -1; 320 318 321 319 if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, ··· 326 324 if (_test_ioctl_destroy(self->fd, stdev_id)) 327 325 return -1; 328 326 329 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) 327 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) 330 328 return -1; 331 329 return 0; 332 330 } ··· 350 348 if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) 351 349 return -1; 352 350 353 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) 351 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) 354 352 return -1; 355 353 356 354 fail_nth_enable(); 357 355 358 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) 356 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, 357 + NULL)) 359 358 return -1; 360 359 361 360 if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, ··· 370 367 if (_test_ioctl_destroy(self->fd, stdev_id2)) 371 368 return -1; 372 369 373 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) 370 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) 374 371 return -1; 375 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) 372 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, 373 + NULL)) 376 374 return -1; 377 375 return 0; 378 376 } ··· 530 526 if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) 531 527 return -1; 532 528 533 - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) 529 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) 534 530 return -1; 535 531 536 532 if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, ··· 569 565 self->access_id = 0; 570 566 571 567 if (_test_ioctl_destroy(self->fd, stdev_id)) 568 + return -1; 569 + return 0; 570 + } 571 + 572 + /* device.c */ 573 + TEST_FAIL_NTH(basic_fail_nth, device) 574 + { 575 + struct iommu_test_hw_info info; 576 + uint32_t ioas_id; 577 + uint32_t ioas_id2; 578 + uint32_t stdev_id; 579 + uint32_t idev_id; 580 + uint32_t hwpt_id; 581 + __u64 iova; 582 + 583 + self->fd = open("/dev/iommu", O_RDWR); 584 + if (self->fd == -1) 585 + return -1; 586 + 587 + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) 588 + return -1; 589 + 590 + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id2)) 591 + return -1; 592 + 593 + iova = MOCK_APERTURE_START; 594 + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, PAGE_SIZE, &iova, 595 + IOMMU_IOAS_MAP_FIXED_IOVA | 596 + IOMMU_IOAS_MAP_WRITEABLE | 597 + IOMMU_IOAS_MAP_READABLE)) 598 + return -1; 599 + if (_test_ioctl_ioas_map(self->fd, ioas_id2, buffer, PAGE_SIZE, &iova, 600 + IOMMU_IOAS_MAP_FIXED_IOVA | 601 + IOMMU_IOAS_MAP_WRITEABLE | 602 + IOMMU_IOAS_MAP_READABLE)) 603 + return -1; 604 + 605 + fail_nth_enable(); 606 + 607 + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL, 608 + &idev_id)) 609 + return -1; 610 + 611 + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info))) 612 + return -1; 613 + 614 + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id)) 615 + return -1; 616 + 617 + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) 618 + return -1; 619 + 620 + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) 572 621 return -1; 573 622 return 0; 574 623 }
+139 -5
tools/testing/selftests/iommu/iommufd_utils.h
··· 19 19 static void *buffer; 20 20 static unsigned long BUFFER_SIZE; 21 21 22 + static unsigned long PAGE_SIZE; 23 + 24 + #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 25 + #define offsetofend(TYPE, MEMBER) \ 26 + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) 27 + 22 28 /* 23 29 * Have the kernel check the refcount on pages. I don't know why a freshly 24 30 * mmap'd anon non-compound page starts out with a ref of 3 ··· 45 39 }) 46 40 47 41 static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, 48 - __u32 *hwpt_id) 42 + __u32 *hwpt_id, __u32 *idev_id) 49 43 { 50 44 struct iommu_test_cmd cmd = { 51 45 .size = sizeof(cmd), ··· 63 57 assert(cmd.id != 0); 64 58 if (hwpt_id) 65 59 *hwpt_id = cmd.mock_domain.out_hwpt_id; 60 + if (idev_id) 61 + *idev_id = cmd.mock_domain.out_idev_id; 66 62 return 0; 67 63 } 68 - #define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id) \ 69 - ASSERT_EQ(0, \ 70 - _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, hwpt_id)) 64 + #define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id, idev_id) \ 65 + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, \ 66 + hwpt_id, idev_id)) 71 67 #define test_err_mock_domain(_errno, ioas_id, stdev_id, hwpt_id) \ 72 68 EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ 73 - stdev_id, hwpt_id)) 69 + stdev_id, hwpt_id, NULL)) 70 + 71 + static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, 72 + __u32 *hwpt_id) 73 + { 74 + struct iommu_test_cmd cmd = { 75 + .size = sizeof(cmd), 76 + .op = IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, 77 + .id = stdev_id, 78 + .mock_domain_replace = { 79 + .pt_id = pt_id, 80 + }, 81 + }; 82 + int ret; 83 + 84 + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); 85 + if (ret) 86 + return ret; 87 + if (hwpt_id) 88 + *hwpt_id = cmd.mock_domain_replace.pt_id; 89 + return 0; 90 + } 91 + 92 + #define test_cmd_mock_domain_replace(stdev_id, pt_id) \ 93 + ASSERT_EQ(0, _test_cmd_mock_domain_replace(self->fd, stdev_id, pt_id, \ 94 + NULL)) 95 + #define test_err_mock_domain_replace(_errno, stdev_id, pt_id) \ 96 + EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ 97 + pt_id, NULL)) 98 + 99 + static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, 100 + __u32 *hwpt_id) 101 + { 102 + struct iommu_hwpt_alloc cmd = { 103 + .size = sizeof(cmd), 104 + .dev_id = device_id, 105 + .pt_id = pt_id, 106 + }; 107 + int ret; 108 + 109 + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &cmd); 110 + if (ret) 111 + return ret; 112 + if (hwpt_id) 113 + *hwpt_id = cmd.out_hwpt_id; 114 + return 0; 115 + } 116 + 117 + #define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \ 118 + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id)) 119 + 120 + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, 121 + unsigned int ioas_id) 122 + { 123 + struct iommu_test_cmd cmd = { 124 + .size = sizeof(cmd), 125 + .op = IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, 126 + .id = access_id, 127 + .access_replace_ioas = { .ioas_id = ioas_id }, 128 + }; 129 + int ret; 130 + 131 + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); 132 + if (ret) 133 + return ret; 134 + return 0; 135 + } 136 + #define test_cmd_access_replace_ioas(access_id, ioas_id) \ 137 + ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id)) 74 138 75 139 static int _test_cmd_create_access(int fd, unsigned int ioas_id, 76 140 __u32 *access_id, unsigned int flags) ··· 352 276 }) 353 277 354 278 #endif 279 + 280 + /* @data can be NULL */ 281 + static int _test_cmd_get_hw_info(int fd, __u32 device_id, 282 + void *data, size_t data_len) 283 + { 284 + struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; 285 + struct iommu_hw_info cmd = { 286 + .size = sizeof(cmd), 287 + .dev_id = device_id, 288 + .data_len = data_len, 289 + .data_uptr = (uint64_t)data, 290 + }; 291 + int ret; 292 + 293 + ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd); 294 + if (ret) 295 + return ret; 296 + 297 + assert(cmd.out_data_type == IOMMU_HW_INFO_TYPE_SELFTEST); 298 + 299 + /* 300 + * The struct iommu_test_hw_info should be the one defined 301 + * by the current kernel. 302 + */ 303 + assert(cmd.data_len == sizeof(struct iommu_test_hw_info)); 304 + 305 + /* 306 + * Trailing bytes should be 0 if user buffer is larger than 307 + * the data that kernel reports. 308 + */ 309 + if (data_len > cmd.data_len) { 310 + char *ptr = (char *)(data + cmd.data_len); 311 + int idx = 0; 312 + 313 + while (idx < data_len - cmd.data_len) { 314 + assert(!*(ptr + idx)); 315 + idx++; 316 + } 317 + } 318 + 319 + if (info) { 320 + if (data_len >= offsetofend(struct iommu_test_hw_info, test_reg)) 321 + assert(info->test_reg == IOMMU_HW_INFO_SELFTEST_REGVAL); 322 + if (data_len >= offsetofend(struct iommu_test_hw_info, flags)) 323 + assert(!info->flags); 324 + } 325 + 326 + return 0; 327 + } 328 + 329 + #define test_cmd_get_hw_info(device_id, data, data_len) \ 330 + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ 331 + data, data_len)) 332 + 333 + #define test_err_get_hw_info(_errno, device_id, data, data_len) \ 334 + EXPECT_ERRNO(_errno, \ 335 + _test_cmd_get_hw_info(self->fd, device_id, \ 336 + data, data_len))