Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management

The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.

This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).

For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.

Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>

authored by

Lu Baolu and committed by
Joerg Roedel
512881ea 4a6d9dd5

+108 -2
+18
drivers/amba/bus.c
··· 22 22 #include <linux/of_irq.h> 23 23 #include <linux/of_device.h> 24 24 #include <linux/acpi.h> 25 + #include <linux/iommu.h> 26 + #include <linux/dma-map-ops.h> 25 27 26 28 #define to_amba_driver(d) container_of(d, struct amba_driver, drv) 27 29 ··· 279 277 280 278 static int amba_dma_configure(struct device *dev) 281 279 { 280 + struct amba_driver *drv = to_amba_driver(dev->driver); 282 281 enum dev_dma_attr attr; 283 282 int ret = 0; 284 283 ··· 290 287 ret = acpi_dma_configure(dev, attr); 291 288 } 292 289 290 + if (!ret && !drv->driver_managed_dma) { 291 + ret = iommu_device_use_default_domain(dev); 292 + if (ret) 293 + arch_teardown_dma_ops(dev); 294 + } 295 + 293 296 return ret; 297 + } 298 + 299 + static void amba_dma_cleanup(struct device *dev) 300 + { 301 + struct amba_driver *drv = to_amba_driver(dev->driver); 302 + 303 + if (!drv->driver_managed_dma) 304 + iommu_device_unuse_default_domain(dev); 294 305 } 295 306 296 307 #ifdef CONFIG_PM ··· 376 359 .remove = amba_remove, 377 360 .shutdown = amba_shutdown, 378 361 .dma_configure = amba_dma_configure, 362 + .dma_cleanup = amba_dma_cleanup, 379 363 .pm = &amba_pm, 380 364 }; 381 365 EXPORT_SYMBOL_GPL(amba_bustype);
+18
drivers/base/platform.c
··· 30 30 #include <linux/property.h> 31 31 #include <linux/kmemleak.h> 32 32 #include <linux/types.h> 33 + #include <linux/iommu.h> 34 + #include <linux/dma-map-ops.h> 33 35 34 36 #include "base.h" 35 37 #include "power/power.h" ··· 1458 1456 1459 1457 static int platform_dma_configure(struct device *dev) 1460 1458 { 1459 + struct platform_driver *drv = to_platform_driver(dev->driver); 1461 1460 enum dev_dma_attr attr; 1462 1461 int ret = 0; 1463 1462 ··· 1469 1466 ret = acpi_dma_configure(dev, attr); 1470 1467 } 1471 1468 1469 + if (!ret && !drv->driver_managed_dma) { 1470 + ret = iommu_device_use_default_domain(dev); 1471 + if (ret) 1472 + arch_teardown_dma_ops(dev); 1473 + } 1474 + 1472 1475 return ret; 1476 + } 1477 + 1478 + static void platform_dma_cleanup(struct device *dev) 1479 + { 1480 + struct platform_driver *drv = to_platform_driver(dev->driver); 1481 + 1482 + if (!drv->driver_managed_dma) 1483 + iommu_device_unuse_default_domain(dev); 1473 1484 } 1474 1485 1475 1486 static const struct dev_pm_ops platform_dev_pm_ops = { ··· 1500 1483 .remove = platform_remove, 1501 1484 .shutdown = platform_shutdown, 1502 1485 .dma_configure = platform_dma_configure, 1486 + .dma_cleanup = platform_dma_cleanup, 1503 1487 .pm = &platform_dev_pm_ops, 1504 1488 }; 1505 1489 EXPORT_SYMBOL_GPL(platform_bus_type);
+22 -2
drivers/bus/fsl-mc/fsl-mc-bus.c
··· 21 21 #include <linux/dma-mapping.h> 22 22 #include <linux/acpi.h> 23 23 #include <linux/iommu.h> 24 + #include <linux/dma-map-ops.h> 24 25 25 26 #include "fsl-mc-private.h" 26 27 ··· 141 140 { 142 141 struct device *dma_dev = dev; 143 142 struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); 143 + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); 144 144 u32 input_id = mc_dev->icid; 145 + int ret; 145 146 146 147 while (dev_is_fsl_mc(dma_dev)) 147 148 dma_dev = dma_dev->parent; 148 149 149 150 if (dev_of_node(dma_dev)) 150 - return of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); 151 + ret = of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); 152 + else 153 + ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); 151 154 152 - return acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); 155 + if (!ret && !mc_drv->driver_managed_dma) { 156 + ret = iommu_device_use_default_domain(dev); 157 + if (ret) 158 + arch_teardown_dma_ops(dev); 159 + } 160 + 161 + return ret; 162 + } 163 + 164 + static void fsl_mc_dma_cleanup(struct device *dev) 165 + { 166 + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); 167 + 168 + if (!mc_drv->driver_managed_dma) 169 + iommu_device_unuse_default_domain(dev); 153 170 } 154 171 155 172 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, ··· 331 312 .match = fsl_mc_bus_match, 332 313 .uevent = fsl_mc_bus_uevent, 333 314 .dma_configure = fsl_mc_dma_configure, 315 + .dma_cleanup = fsl_mc_dma_cleanup, 334 316 .dev_groups = fsl_mc_dev_groups, 335 317 .bus_groups = fsl_mc_bus_groups, 336 318 };
+18
drivers/pci/pci-driver.c
··· 20 20 #include <linux/of_device.h> 21 21 #include <linux/acpi.h> 22 22 #include <linux/dma-map-ops.h> 23 + #include <linux/iommu.h> 23 24 #include "pci.h" 24 25 #include "pcie/portdrv.h" 25 26 ··· 1602 1601 */ 1603 1602 static int pci_dma_configure(struct device *dev) 1604 1603 { 1604 + struct pci_driver *driver = to_pci_driver(dev->driver); 1605 1605 struct device *bridge; 1606 1606 int ret = 0; 1607 1607 ··· 1618 1616 } 1619 1617 1620 1618 pci_put_host_bridge_device(bridge); 1619 + 1620 + if (!ret && !driver->driver_managed_dma) { 1621 + ret = iommu_device_use_default_domain(dev); 1622 + if (ret) 1623 + arch_teardown_dma_ops(dev); 1624 + } 1625 + 1621 1626 return ret; 1627 + } 1628 + 1629 + static void pci_dma_cleanup(struct device *dev) 1630 + { 1631 + struct pci_driver *driver = to_pci_driver(dev->driver); 1632 + 1633 + if (!driver->driver_managed_dma) 1634 + iommu_device_unuse_default_domain(dev); 1622 1635 } 1623 1636 1624 1637 struct bus_type pci_bus_type = { ··· 1649 1632 .pm = PCI_PM_OPS_PTR, 1650 1633 .num_vf = pci_bus_num_vf, 1651 1634 .dma_configure = pci_dma_configure, 1635 + .dma_cleanup = pci_dma_cleanup, 1652 1636 }; 1653 1637 EXPORT_SYMBOL(pci_bus_type); 1654 1638
+8
include/linux/amba/bus.h
··· 79 79 void (*remove)(struct amba_device *); 80 80 void (*shutdown)(struct amba_device *); 81 81 const struct amba_id *id_table; 82 + /* 83 + * For most device drivers, no need to care about this flag as long as 84 + * all DMAs are handled through the kernel DMA API. For some special 85 + * ones, for example VFIO drivers, they know how to manage the DMA 86 + * themselves and set this flag so that the IOMMU layer will allow them 87 + * to setup and manage their own I/O address space. 88 + */ 89 + bool driver_managed_dma; 82 90 }; 83 91 84 92 /*
+8
include/linux/fsl/mc.h
··· 32 32 * @shutdown: Function called at shutdown time to quiesce the device 33 33 * @suspend: Function called when a device is stopped 34 34 * @resume: Function called when a device is resumed 35 + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. 36 + * For most device drivers, no need to care about this flag 37 + * as long as all DMAs are handled through the kernel DMA API. 38 + * For some special ones, for example VFIO drivers, they know 39 + * how to manage the DMA themselves and set this flag so that 40 + * the IOMMU layer will allow them to setup and manage their 41 + * own I/O address space. 35 42 * 36 43 * Generic DPAA device driver object for device drivers that are registered 37 44 * with a DPRC bus. This structure is to be embedded in each device-specific ··· 52 45 void (*shutdown)(struct fsl_mc_device *dev); 53 46 int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); 54 47 int (*resume)(struct fsl_mc_device *dev); 48 + bool driver_managed_dma; 55 49 }; 56 50 57 51 #define to_fsl_mc_driver(_drv) \
+8
include/linux/pci.h
··· 895 895 * created once it is bound to the driver. 896 896 * @driver: Driver model structure. 897 897 * @dynids: List of dynamically added device IDs. 898 + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. 899 + * For most device drivers, no need to care about this flag 900 + * as long as all DMAs are handled through the kernel DMA API. 901 + * For some special ones, for example VFIO drivers, they know 902 + * how to manage the DMA themselves and set this flag so that 903 + * the IOMMU layer will allow them to setup and manage their 904 + * own I/O address space. 898 905 */ 899 906 struct pci_driver { 900 907 struct list_head node; ··· 920 913 const struct attribute_group **dev_groups; 921 914 struct device_driver driver; 922 915 struct pci_dynids dynids; 916 + bool driver_managed_dma; 923 917 }; 924 918 925 919 static inline struct pci_driver *to_pci_driver(struct device_driver *drv)
+8
include/linux/platform_device.h
··· 210 210 struct device_driver driver; 211 211 const struct platform_device_id *id_table; 212 212 bool prevent_deferred_probe; 213 + /* 214 + * For most device drivers, no need to care about this flag as long as 215 + * all DMAs are handled through the kernel DMA API. For some special 216 + * ones, for example VFIO drivers, they know how to manage the DMA 217 + * themselves and set this flag so that the IOMMU layer will allow them 218 + * to setup and manage their own I/O address space. 219 + */ 220 + bool driver_managed_dma; 213 221 }; 214 222 215 223 #define to_platform_driver(drv) (container_of((drv), struct platform_driver, \