Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio/noiommu: Don't use iommu_present() to track fake groups

Using iommu_present() to determine whether an IOMMU group is real or
fake has some problems. First, apparently Power systems don't
register an IOMMU on the device bus, so the groups and containers get
marked as noiommu and then won't bind to their actual IOMMU driver.
Second, I expect we'll run into the same issue as we try to support
vGPUs through vfio, since they're likely to emulate this behavior of
creating an IOMMU group on a virtual device and then providing a vfio
IOMMU backend tailored to the sort of isolation they provide, which
won't necessarily be fully compatible with the IOMMU API.

The solution here is to use the existing iommudata interface to IOMMU
groups, which allows us to easily identify the fake groups we've
created for noiommu purposes. The iommudata we set is purely
arbitrary since we're only comparing the address, so we use the
address of the noiommu switch itself.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <sshukla@mvista.com>
Fixes: 03a76b60f8ba ("vfio: Include No-IOMMU mode")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

+10 -14
+10 -14
drivers/vfio/vfio.c
··· 123 123 /* 124 124 * With noiommu enabled, an IOMMU group will be created for a device 125 125 * that doesn't already have one and doesn't have an iommu_ops on their 126 - * bus. We use iommu_present() again in the main code to detect these 127 - * fake groups. 126 + * bus. We set iommudata simply to be able to identify these groups 127 + * as special use and for reclamation later. 128 128 */ 129 129 if (group || !noiommu || iommu_present(dev->bus)) 130 130 return group; ··· 134 134 return NULL; 135 135 136 136 iommu_group_set_name(group, "vfio-noiommu"); 137 + iommu_group_set_iommudata(group, &noiommu, NULL); 137 138 ret = iommu_group_add_device(group, dev); 138 139 iommu_group_put(group); 139 140 if (ret) ··· 159 158 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) 160 159 { 161 160 #ifdef CONFIG_VFIO_NOIOMMU 162 - if (!iommu_present(dev->bus)) 161 + if (iommu_group_get_iommudata(group) == &noiommu) 163 162 iommu_group_remove_device(dev); 164 163 #endif 165 164 ··· 191 190 return -ENOTTY; 192 191 } 193 192 194 - static int vfio_iommu_present(struct device *dev, void *unused) 195 - { 196 - return iommu_present(dev->bus) ? 1 : 0; 197 - } 198 - 199 193 static int vfio_noiommu_attach_group(void *iommu_data, 200 194 struct iommu_group *iommu_group) 201 195 { 202 - return iommu_group_for_each_dev(iommu_group, NULL, 203 - vfio_iommu_present) ? -EINVAL : 0; 196 + return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL; 204 197 } 205 198 206 199 static void vfio_noiommu_detach_group(void *iommu_data, ··· 318 323 /** 319 324 * Group objects - create, release, get, put, search 320 325 */ 321 - static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 322 - bool iommu_present) 326 + static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 323 327 { 324 328 struct vfio_group *group, *tmp; 325 329 struct device *dev; ··· 336 342 atomic_set(&group->container_users, 0); 337 343 atomic_set(&group->opened, 0); 338 344 group->iommu_group = iommu_group; 339 - group->noiommu = !iommu_present; 345 + #ifdef CONFIG_VFIO_NOIOMMU 346 + group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu); 347 + #endif 340 348 341 349 group->nb.notifier_call = vfio_iommu_group_notifier; 342 350 ··· 763 767 764 768 group = vfio_group_get_from_iommu(iommu_group); 765 769 if (!group) { 766 - group = vfio_create_group(iommu_group, iommu_present(dev->bus)); 770 + group = vfio_create_group(iommu_group); 767 771 if (IS_ERR(group)) { 768 772 iommu_group_put(iommu_group); 769 773 return PTR_ERR(group);