Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dma-mapping-6.12-2024-09-19' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

- support DMA zones for arm64 systems where memory starts at > 4GB
(Baruch Siach, Catalin Marinas)

- support direct calls into dma-iommu and thus obsolete dma_map_ops for
many common configurations (Leon Romanovsky)

- add DMA-API tracing (Sean Anderson)

- remove the not very useful return value from various dma_set_* APIs
(Christoph Hellwig)

- misc cleanups and minor optimizations (Chen Y, Yosry Ahmed, Christoph
Hellwig)

* tag 'dma-mapping-6.12-2024-09-19' of git://git.infradead.org/users/hch/dma-mapping:
dma-mapping: reflow dma_supported
dma-mapping: reliably inform about DMA support for IOMMU
dma-mapping: add tracing for dma-mapping API calls
dma-mapping: use IOMMU DMA calls for common alloc/free page calls
dma-direct: optimize page freeing when it is not addressable
dma-mapping: clearly mark DMA ops as an architecture feature
vdpa_sim: don't select DMA_OPS
arm64: mm: keep low RAM dma zone
dma-mapping: don't return errors from dma_set_max_seg_size
dma-mapping: don't return errors from dma_set_seg_boundary
dma-mapping: don't return errors from dma_set_min_align_mask
scsi: check that busses support the DMA API before setting dma parameters
arm64: mm: fix DMA zone when dma-ranges is missing
dma-mapping: direct calls for dma-iommu
dma-mapping: call ->unmap_page and ->unmap_sg unconditionally
arm64: support DMA zone above 4GB
dma-mapping: replace zone_dma_bits by zone_dma_limit
dma-mapping: use bit masking to check VM_DMA_COHERENT

+782 -226
+1
MAINTAINERS
··· 11841 11841 F: drivers/iommu/dma-iommu.c 11842 11842 F: drivers/iommu/dma-iommu.h 11843 11843 F: drivers/iommu/iova.c 11844 + F: include/linux/iommu-dma.h 11844 11845 F: include/linux/iova.h 11845 11846 11846 11847 IOMMU SUBSYSTEM
+9
arch/Kconfig
··· 17 17 def_bool y 18 18 endif 19 19 20 + # 21 + # Selected by architectures that need custom DMA operations for e.g. legacy 22 + # IOMMUs not handled by dma-iommu. Drivers must never select this symbol. 23 + # 24 + config ARCH_HAS_DMA_OPS 25 + depends on HAS_DMA 26 + select DMA_OPS_HELPERS 27 + bool 28 + 20 29 menu "General architecture-dependent options" 21 30 22 31 config ARCH_HAS_SUBPAGE_FAULTS
+1 -1
arch/alpha/Kconfig
··· 4 4 default y 5 5 select ARCH_32BIT_USTAT_F_TINODE 6 6 select ARCH_HAS_CURRENT_STACK_POINTER 7 + select ARCH_HAS_DMA_OPS if PCI 7 8 select ARCH_MIGHT_HAVE_PC_PARPORT 8 9 select ARCH_MIGHT_HAVE_PC_SERIO 9 10 select ARCH_NO_PREEMPT 10 11 select ARCH_NO_SG_CHAIN 11 12 select ARCH_USE_CMPXCHG_LOCKREF 12 - select DMA_OPS if PCI 13 13 select FORCE_PCI 14 14 select PCI_DOMAINS if PCI 15 15 select PCI_SYSCALL if PCI
+1 -1
arch/arm/Kconfig
··· 10 10 select ARCH_HAS_CURRENT_STACK_POINTER 11 11 select ARCH_HAS_DEBUG_VIRTUAL if MMU 12 12 select ARCH_HAS_DMA_ALLOC if MMU 13 + select ARCH_HAS_DMA_OPS 13 14 select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE 14 15 select ARCH_HAS_ELF_RANDOMIZE 15 16 select ARCH_HAS_FORTIFY_SOURCE ··· 55 54 select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS 56 55 select DMA_DECLARE_COHERENT 57 56 select DMA_GLOBAL_POOL if !MMU 58 - select DMA_OPS 59 57 select DMA_NONCOHERENT_MMAP if MMU 60 58 select EDAC_SUPPORT 61 59 select EDAC_ATOMIC_SCRUB
+1
arch/arm64/Kconfig
··· 24 24 select ARCH_HAS_CURRENT_STACK_POINTER 25 25 select ARCH_HAS_DEBUG_VIRTUAL 26 26 select ARCH_HAS_DEBUG_VM_PGTABLE 27 + select ARCH_HAS_DMA_OPS if XEN 27 28 select ARCH_HAS_DMA_PREP_COHERENT 28 29 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI 29 30 select ARCH_HAS_FAST_MULTIPLIER
+18 -21
arch/arm64/mm/init.c
··· 114 114 low_size, high); 115 115 } 116 116 117 - /* 118 - * Return the maximum physical address for a zone accessible by the given bits 119 - * limit. If DRAM starts above 32-bit, expand the zone to the maximum 120 - * available memory, otherwise cap it at 32-bit. 121 - */ 122 - static phys_addr_t __init max_zone_phys(unsigned int zone_bits) 117 + static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) 123 118 { 124 - phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); 125 - phys_addr_t phys_start = memblock_start_of_DRAM(); 119 + /** 120 + * Information we get from firmware (e.g. DT dma-ranges) describe DMA 121 + * bus constraints. Devices using DMA might have their own limitations. 122 + * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM 123 + * DMA zone on platforms that have RAM there. 124 + */ 125 + if (memblock_start_of_DRAM() < U32_MAX) 126 + zone_limit = min(zone_limit, U32_MAX); 126 127 127 - if (phys_start > U32_MAX) 128 - zone_mask = PHYS_ADDR_MAX; 129 - else if (phys_start > zone_mask) 130 - zone_mask = U32_MAX; 131 - 132 - return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; 128 + return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; 133 129 } 134 130 135 131 static void __init zone_sizes_init(void) 136 132 { 137 133 unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; 138 - unsigned int __maybe_unused acpi_zone_dma_bits; 139 - unsigned int __maybe_unused dt_zone_dma_bits; 140 - phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); 134 + phys_addr_t __maybe_unused acpi_zone_dma_limit; 135 + phys_addr_t __maybe_unused dt_zone_dma_limit; 136 + phys_addr_t __maybe_unused dma32_phys_limit = 137 + max_zone_phys(DMA_BIT_MASK(32)); 141 138 142 139 #ifdef CONFIG_ZONE_DMA 143 - acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); 144 - dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); 145 - zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); 146 - arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); 140 + acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address(); 141 + dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL); 142 + zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit); 143 + arm64_dma_phys_limit = max_zone_phys(zone_dma_limit); 147 144 max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); 148 145 #endif 149 146 #ifdef CONFIG_ZONE_DMA32
+1 -1
arch/mips/Kconfig
··· 8 8 select ARCH_HAS_CPU_FINALIZE_INIT 9 9 select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 10 10 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT 11 + select ARCH_HAS_DMA_OPS if MACH_JAZZ 11 12 select ARCH_HAS_FORTIFY_SOURCE 12 13 select ARCH_HAS_KCOV 13 14 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if !EVA ··· 394 393 select ARC_PROMLIB 395 394 select ARCH_MIGHT_HAVE_PC_PARPORT 396 395 select ARCH_MIGHT_HAVE_PC_SERIO 397 - select DMA_OPS 398 396 select FW_ARC 399 397 select FW_ARC32 400 398 select ARCH_MAY_HAVE_PC_FDC
+1 -1
arch/parisc/Kconfig
··· 10 10 select ARCH_WANT_FRAME_POINTERS 11 11 select ARCH_HAS_CPU_CACHE_ALIASING 12 12 select ARCH_HAS_DMA_ALLOC if PA11 13 + select ARCH_HAS_DMA_OPS 13 14 select ARCH_HAS_ELF_RANDOMIZE 14 15 select ARCH_HAS_STRICT_KERNEL_RWX 15 16 select ARCH_HAS_STRICT_MODULE_RWX ··· 24 23 select ARCH_HAS_CACHE_LINE_SIZE 25 24 select ARCH_HAS_DEBUG_VM_PGTABLE 26 25 select HAVE_RELIABLE_STACKTRACE 27 - select DMA_OPS 28 26 select RTC_CLASS 29 27 select RTC_DRV_GENERIC 30 28 select INIT_ALL_POSSIBLE
+1 -1
arch/powerpc/Kconfig
··· 133 133 select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX 134 134 select ARCH_HAS_DEVMEM_IS_ALLOWED 135 135 select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES 136 + select ARCH_HAS_DMA_OPS if PPC64 136 137 select ARCH_HAS_FORTIFY_SOURCE 137 138 select ARCH_HAS_GCOV_PROFILE_ALL 138 139 select ARCH_HAS_KCOV ··· 186 185 select CPUMASK_OFFSTACK if NR_CPUS >= 8192 187 186 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN 188 187 select DMA_OPS_BYPASS if PPC64 189 - select DMA_OPS if PPC64 190 188 select DYNAMIC_FTRACE if FUNCTION_TRACER 191 189 select EDAC_ATOMIC_SCRUB 192 190 select EDAC_SUPPORT
+4 -1
arch/powerpc/mm/mem.c
··· 216 216 * everything else. GFP_DMA32 page allocations automatically fall back to 217 217 * ZONE_DMA. 218 218 * 219 - * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the 219 + * By using 31-bit unconditionally, we can exploit zone_dma_limit to inform the 220 220 * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU 221 221 * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by 222 222 * ZONE_DMA. ··· 230 230 { 231 231 unsigned long long total_ram = memblock_phys_mem_size(); 232 232 phys_addr_t top_of_ram = memblock_end_of_DRAM(); 233 + int zone_dma_bits; 233 234 234 235 #ifdef CONFIG_HIGHMEM 235 236 unsigned long v = __fix_to_virt(FIX_KMAP_END); ··· 256 255 zone_dma_bits = 30; 257 256 else 258 257 zone_dma_bits = 31; 258 + 259 + zone_dma_limit = DMA_BIT_MASK(zone_dma_bits); 259 260 260 261 #ifdef CONFIG_ZONE_DMA 261 262 max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
+1 -1
arch/s390/Kconfig
··· 70 70 select ARCH_HAS_DEBUG_VM_PGTABLE 71 71 select ARCH_HAS_DEBUG_WX 72 72 select ARCH_HAS_DEVMEM_IS_ALLOWED 73 + select ARCH_HAS_DMA_OPS if PCI 73 74 select ARCH_HAS_ELF_RANDOMIZE 74 75 select ARCH_HAS_FORCE_DMA_UNENCRYPTED 75 76 select ARCH_HAS_FORTIFY_SOURCE ··· 138 137 select BUILDTIME_TABLE_SORT 139 138 select CLONE_BACKWARDS2 140 139 select DCACHE_WORD_ACCESS if !KMSAN 141 - select DMA_OPS if PCI 142 140 select DYNAMIC_FTRACE if FUNCTION_TRACER 143 141 select FUNCTION_ALIGNMENT_8B if CC_IS_GCC 144 142 select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC
+1 -1
arch/s390/mm/init.c
··· 97 97 98 98 vmem_map_init(); 99 99 sparse_init(); 100 - zone_dma_bits = 31; 100 + zone_dma_limit = DMA_BIT_MASK(31); 101 101 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 102 102 max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS); 103 103 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+1 -1
arch/sparc/Kconfig
··· 14 14 bool 15 15 default y 16 16 select ARCH_HAS_CPU_CACHE_ALIASING 17 + select ARCH_HAS_DMA_OPS 17 18 select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI 18 19 select ARCH_MIGHT_HAVE_PC_SERIO 19 - select DMA_OPS 20 20 select OF 21 21 select OF_PROMTREE 22 22 select HAVE_ASM_MODVERSIONS
+1 -1
arch/x86/Kconfig
··· 79 79 select ARCH_HAS_DEBUG_VIRTUAL 80 80 select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE 81 81 select ARCH_HAS_DEVMEM_IS_ALLOWED 82 + select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN 82 83 select ARCH_HAS_EARLY_DEBUG if KGDB 83 84 select ARCH_HAS_ELF_RANDOMIZE 84 85 select ARCH_HAS_FAST_MULTIPLIER ··· 945 944 946 945 config GART_IOMMU 947 946 bool "Old AMD GART IOMMU support" 948 - select DMA_OPS 949 947 select IOMMU_HELPER 950 948 select SWIOTLB 951 949 depends on X86_64 && PCI && AMD_NB
+1 -3
drivers/accel/qaic/qaic_drv.c
··· 447 447 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 448 448 if (ret) 449 449 return ret; 450 - ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX); 451 - if (ret) 452 - return ret; 450 + dma_set_max_seg_size(&pdev->dev, UINT_MAX); 453 451 454 452 qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]); 455 453 if (IS_ERR(qdev->bar_0))
+1 -3
drivers/dma/idma64.c
··· 598 598 599 599 idma64->dma.dev = chip->sysdev; 600 600 601 - ret = dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); 602 - if (ret) 603 - return ret; 601 + dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); 604 602 605 603 ret = dma_async_device_register(&idma64->dma); 606 604 if (ret)
+1 -4
drivers/dma/pl330.c
··· 3163 3163 * This is the limit for transfers with a buswidth of 1, larger 3164 3164 * buswidths will have larger limits. 3165 3165 */ 3166 - ret = dma_set_max_seg_size(&adev->dev, 1900800); 3167 - if (ret) 3168 - dev_err(&adev->dev, "unable to set the seg size\n"); 3169 - 3166 + dma_set_max_seg_size(&adev->dev, 1900800); 3170 3167 3171 3168 init_pl330_debugfs(pl330); 3172 3169 dev_info(&adev->dev,
+1 -5
drivers/dma/qcom/bam_dma.c
··· 1325 1325 1326 1326 /* set max dma segment size */ 1327 1327 bdev->common.dev = bdev->dev; 1328 - ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); 1329 - if (ret) { 1330 - dev_err(bdev->dev, "cannot set maximum segment size\n"); 1331 - goto err_bam_channel_exit; 1332 - } 1328 + dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); 1333 1329 1334 1330 platform_set_drvdata(pdev, bdev); 1335 1331
+1 -3
drivers/dma/sh/rcar-dmac.c
··· 1868 1868 1869 1869 dmac->dev = &pdev->dev; 1870 1870 platform_set_drvdata(pdev, dmac); 1871 - ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); 1872 - if (ret) 1873 - return ret; 1871 + dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); 1874 1872 1875 1873 ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); 1876 1874 if (ret)
+1 -5
drivers/dma/ste_dma40.c
··· 3632 3632 if (ret) 3633 3633 goto destroy_cache; 3634 3634 3635 - ret = dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE); 3636 - if (ret) { 3637 - d40_err(dev, "Failed to set dma max seg size\n"); 3638 - goto destroy_cache; 3639 - } 3635 + dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE); 3640 3636 3641 3637 d40_hw_init(base); 3642 3638
+1 -5
drivers/gpu/drm/mediatek/mtk_drm_drv.c
··· 559 559 * Configure the DMA segment size to make sure we get contiguous IOVA 560 560 * when importing PRIME buffers. 561 561 */ 562 - ret = dma_set_max_seg_size(dma_dev, UINT_MAX); 563 - if (ret) { 564 - dev_err(dma_dev, "Failed to set DMA segment size\n"); 565 - goto err_component_unbind; 566 - } 562 + dma_set_max_seg_size(dma_dev, UINT_MAX); 567 563 568 564 ret = drm_vblank_init(drm, MAX_CRTC); 569 565 if (ret < 0)
+1 -1
drivers/iommu/Kconfig
··· 151 151 # IOMMU-agnostic DMA-mapping layer 152 152 config IOMMU_DMA 153 153 def_bool ARM64 || X86 || S390 154 - select DMA_OPS 154 + select DMA_OPS_HELPERS 155 155 select IOMMU_API 156 156 select IOMMU_IOVA 157 157 select IRQ_MSI_IOMMU
+36 -68
drivers/iommu/dma-iommu.c
··· 17 17 #include <linux/gfp.h> 18 18 #include <linux/huge_mm.h> 19 19 #include <linux/iommu.h> 20 + #include <linux/iommu-dma.h> 20 21 #include <linux/iova.h> 21 22 #include <linux/irq.h> 22 23 #include <linux/list_sort.h> ··· 1038 1037 return NULL; 1039 1038 } 1040 1039 1041 - static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, 1042 - size_t size, enum dma_data_direction dir, gfp_t gfp, 1043 - unsigned long attrs) 1040 + struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, 1041 + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) 1044 1042 { 1045 1043 struct dma_sgt_handle *sh; 1046 1044 ··· 1055 1055 return &sh->sgt; 1056 1056 } 1057 1057 1058 - static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 1058 + void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 1059 1059 struct sg_table *sgt, enum dma_data_direction dir) 1060 1060 { 1061 1061 struct dma_sgt_handle *sh = sgt_handle(sgt); ··· 1066 1066 kfree(sh); 1067 1067 } 1068 1068 1069 - static void iommu_dma_sync_single_for_cpu(struct device *dev, 1070 - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 1069 + void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 1070 + size_t size, enum dma_data_direction dir) 1071 1071 { 1072 1072 phys_addr_t phys; 1073 1073 ··· 1081 1081 swiotlb_sync_single_for_cpu(dev, phys, size, dir); 1082 1082 } 1083 1083 1084 - static void iommu_dma_sync_single_for_device(struct device *dev, 1085 - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 1084 + void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, 1085 + size_t size, enum dma_data_direction dir) 1086 1086 { 1087 1087 phys_addr_t phys; 1088 1088 ··· 1096 1096 arch_sync_dma_for_device(phys, size, dir); 1097 1097 } 1098 1098 1099 - static void iommu_dma_sync_sg_for_cpu(struct device *dev, 1100 - struct scatterlist *sgl, int nelems, 1101 - enum dma_data_direction dir) 1099 + void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, 1100 + int nelems, enum dma_data_direction dir) 1102 1101 { 1103 1102 struct scatterlist *sg; 1104 1103 int i; ··· 1111 1112 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 1112 1113 } 1113 1114 1114 - static void iommu_dma_sync_sg_for_device(struct device *dev, 1115 - struct scatterlist *sgl, int nelems, 1116 - enum dma_data_direction dir) 1115 + void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, 1116 + int nelems, enum dma_data_direction dir) 1117 1117 { 1118 1118 struct scatterlist *sg; 1119 1119 int i; ··· 1127 1129 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 1128 1130 } 1129 1131 1130 - static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 1131 - unsigned long offset, size_t size, enum dma_data_direction dir, 1132 - unsigned long attrs) 1132 + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 1133 + unsigned long offset, size_t size, enum dma_data_direction dir, 1134 + unsigned long attrs) 1133 1135 { 1134 1136 phys_addr_t phys = page_to_phys(page) + offset; 1135 1137 bool coherent = dev_is_dma_coherent(dev); ··· 1187 1189 return iova; 1188 1190 } 1189 1191 1190 - static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 1192 + void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 1191 1193 size_t size, enum dma_data_direction dir, unsigned long attrs) 1192 1194 { 1193 1195 struct iommu_domain *domain = iommu_get_dma_domain(dev); ··· 1340 1342 * impedance-matching, to be able to hand off a suitably-aligned list, 1341 1343 * but still preserve the original offsets and sizes for the caller. 1342 1344 */ 1343 - static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 1344 - int nents, enum dma_data_direction dir, unsigned long attrs) 1345 + int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1346 + enum dma_data_direction dir, unsigned long attrs) 1345 1347 { 1346 1348 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1347 1349 struct iommu_dma_cookie *cookie = domain->iova_cookie; ··· 1460 1462 return ret; 1461 1463 } 1462 1464 1463 - static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 1464 - int nents, enum dma_data_direction dir, unsigned long attrs) 1465 + void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 1466 + enum dma_data_direction dir, unsigned long attrs) 1465 1467 { 1466 1468 dma_addr_t end = 0, start; 1467 1469 struct scatterlist *tmp; ··· 1510 1512 __iommu_dma_unmap(dev, start, end - start); 1511 1513 } 1512 1514 1513 - static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 1515 + dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 1514 1516 size_t size, enum dma_data_direction dir, unsigned long attrs) 1515 1517 { 1516 1518 return __iommu_dma_map(dev, phys, size, ··· 1518 1520 dma_get_mask(dev)); 1519 1521 } 1520 1522 1521 - static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 1523 + void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 1522 1524 size_t size, enum dma_data_direction dir, unsigned long attrs) 1523 1525 { 1524 1526 __iommu_dma_unmap(dev, handle, size); ··· 1555 1557 dma_free_contiguous(dev, page, alloc_size); 1556 1558 } 1557 1559 1558 - static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 1560 + void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 1559 1561 dma_addr_t handle, unsigned long attrs) 1560 1562 { 1561 1563 __iommu_dma_unmap(dev, handle, size); ··· 1599 1601 return NULL; 1600 1602 } 1601 1603 1602 - static void *iommu_dma_alloc(struct device *dev, size_t size, 1603 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1604 + void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 1605 + gfp_t gfp, unsigned long attrs) 1604 1606 { 1605 1607 bool coherent = dev_is_dma_coherent(dev); 1606 1608 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); ··· 1633 1635 return cpu_addr; 1634 1636 } 1635 1637 1636 - static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1638 + int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1637 1639 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1638 1640 unsigned long attrs) 1639 1641 { ··· 1664 1666 vma->vm_page_prot); 1665 1667 } 1666 1668 1667 - static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1669 + int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1668 1670 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1669 1671 unsigned long attrs) 1670 1672 { ··· 1691 1693 return ret; 1692 1694 } 1693 1695 1694 - static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1696 + unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1695 1697 { 1696 1698 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1697 1699 1698 1700 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1699 1701 } 1700 1702 1701 - static size_t iommu_dma_opt_mapping_size(void) 1703 + size_t iommu_dma_opt_mapping_size(void) 1702 1704 { 1703 1705 return iova_rcache_range(); 1704 1706 } 1705 1707 1706 - static size_t iommu_dma_max_mapping_size(struct device *dev) 1708 + size_t iommu_dma_max_mapping_size(struct device *dev) 1707 1709 { 1708 1710 if (dev_is_untrusted(dev)) 1709 1711 return swiotlb_max_mapping_size(dev); 1710 1712 1711 1713 return SIZE_MAX; 1712 1714 } 1713 - 1714 - static const struct dma_map_ops iommu_dma_ops = { 1715 - .flags = DMA_F_PCI_P2PDMA_SUPPORTED | 1716 - DMA_F_CAN_SKIP_SYNC, 1717 - .alloc = iommu_dma_alloc, 1718 - .free = iommu_dma_free, 1719 - .alloc_pages_op = dma_common_alloc_pages, 1720 - .free_pages = dma_common_free_pages, 1721 - .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, 1722 - .free_noncontiguous = iommu_dma_free_noncontiguous, 1723 - .mmap = iommu_dma_mmap, 1724 - .get_sgtable = iommu_dma_get_sgtable, 1725 - .map_page = iommu_dma_map_page, 1726 - .unmap_page = iommu_dma_unmap_page, 1727 - .map_sg = iommu_dma_map_sg, 1728 - .unmap_sg = iommu_dma_unmap_sg, 1729 - .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, 1730 - .sync_single_for_device = iommu_dma_sync_single_for_device, 1731 - .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, 1732 - .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1733 - .map_resource = iommu_dma_map_resource, 1734 - .unmap_resource = iommu_dma_unmap_resource, 1735 - .get_merge_boundary = iommu_dma_get_merge_boundary, 1736 - .opt_mapping_size = iommu_dma_opt_mapping_size, 1737 - .max_mapping_size = iommu_dma_max_mapping_size, 1738 - }; 1739 1715 1740 1716 void iommu_setup_dma_ops(struct device *dev) 1741 1717 { ··· 1718 1746 if (dev_is_pci(dev)) 1719 1747 dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; 1720 1748 1721 - if (iommu_is_dma_domain(domain)) { 1722 - if (iommu_dma_init_domain(domain, dev)) 1723 - goto out_err; 1724 - dev->dma_ops = &iommu_dma_ops; 1725 - } else if (dev->dma_ops == &iommu_dma_ops) { 1726 - /* Clean up if we've switched *from* a DMA domain */ 1727 - dev->dma_ops = NULL; 1728 - } 1749 + dev->dma_iommu = iommu_is_dma_domain(domain); 1750 + if (dev->dma_iommu && iommu_dma_init_domain(domain, dev)) 1751 + goto out_err; 1729 1752 1730 1753 return; 1731 1754 out_err: 1732 - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1733 - dev_name(dev)); 1755 + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1756 + dev_name(dev)); 1757 + dev->dma_iommu = false; 1734 1758 } 1735 1759 1736 1760 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
-1
drivers/iommu/intel/Kconfig
··· 12 12 config INTEL_IOMMU 13 13 bool "Support for Intel IOMMU using DMA Remapping Devices" 14 14 depends on PCI_MSI && ACPI && X86 15 - select DMA_OPS 16 15 select IOMMU_API 17 16 select IOMMU_IOVA 18 17 select IOMMUFD_DRIVER if IOMMUFD
+2 -2
drivers/macintosh/macio_asic.c
··· 387 387 dma_set_max_seg_size(&dev->ofdev.dev, 65536); 388 388 dma_set_seg_boundary(&dev->ofdev.dev, 0xffffffff); 389 389 390 - #if defined(CONFIG_PCI) && defined(CONFIG_DMA_OPS) 390 + #if defined(CONFIG_PCI) && defined(CONFIG_ARCH_HAS_DMA_OPS) 391 391 /* Set the DMA ops to the ones from the PCI device, this could be 392 392 * fishy if we didn't know that on PowerMac it's always direct ops 393 393 * or iommu ops that will work fine ··· 396 396 */ 397 397 dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata; 398 398 dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops; 399 - #endif /* CONFIG_PCI && CONFIG_DMA_OPS */ 399 + #endif /* CONFIG_PCI && CONFIG_ARCH_HAS_DMA_OPS */ 400 400 401 401 #ifdef DEBUG 402 402 printk("preparing mdev @%p, ofdev @%p, dev @%p, kobj @%p\n",
+1 -2
drivers/media/common/videobuf2/videobuf2-dma-contig.c
··· 854 854 return -ENODEV; 855 855 } 856 856 if (dma_get_max_seg_size(dev) < size) 857 - return dma_set_max_seg_size(dev, size); 858 - 857 + dma_set_max_seg_size(dev, size); 859 858 return 0; 860 859 } 861 860 EXPORT_SYMBOL_GPL(vb2_dma_contig_set_max_seg_size);
+6 -1
drivers/media/pci/intel/ipu6/Kconfig
··· 4 4 depends on VIDEO_DEV 5 5 depends on X86 && X86_64 && HAS_DMA 6 6 depends on IPU_BRIDGE || !IPU_BRIDGE 7 + # 8 + # This driver incorrectly tries to override the dma_ops. It should 9 + # never have done that, but for now keep it working on architectures 10 + # that use dma ops 11 + # 12 + depends on ARCH_HAS_DMA_OPS 7 13 select AUXILIARY_BUS 8 - select DMA_OPS 9 14 select IOMMU_IOVA 10 15 select VIDEO_V4L2_SUBDEV_API 11 16 select MEDIA_CONTROLLER
+1 -3
drivers/media/pci/intel/ipu6/ipu6.c
··· 576 576 if (ret) 577 577 return dev_err_probe(dev, ret, "Failed to set DMA mask\n"); 578 578 579 - ret = dma_set_max_seg_size(dev, UINT_MAX); 580 - if (ret) 581 - return dev_err_probe(dev, ret, "Failed to set max_seg_size\n"); 579 + dma_set_max_seg_size(dev, UINT_MAX); 582 580 583 581 ret = ipu6_pci_config_setup(pdev, isp->hw_ver); 584 582 if (ret)
+2 -1
drivers/mmc/host/mmci_stm32_sdmmc.c
··· 213 213 host->mmc->max_seg_size = host->mmc->max_req_size; 214 214 } 215 215 216 - return dma_set_max_seg_size(dev, host->mmc->max_seg_size); 216 + dma_set_max_seg_size(dev, host->mmc->max_seg_size); 217 + return 0; 217 218 } 218 219 219 220 static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
+1 -5
drivers/net/ethernet/microsoft/mana/gdma_main.c
··· 1496 1496 if (err) 1497 1497 goto release_region; 1498 1498 1499 - err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); 1500 - if (err) { 1501 - dev_err(&pdev->dev, "Failed to set dma device segment size\n"); 1502 - goto release_region; 1503 - } 1499 + dma_set_max_seg_size(&pdev->dev, UINT_MAX); 1504 1500 1505 1501 err = -ENOMEM; 1506 1502 gc = vzalloc(sizeof(*gc));
+1 -6
drivers/scsi/lpfc/lpfc_init.c
··· 13861 13861 if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE) 13862 13862 sli4_params->sge_supp_len = LPFC_MAX_SGE_SIZE; 13863 13863 13864 - rc = dma_set_max_seg_size(&phba->pcidev->dev, sli4_params->sge_supp_len); 13865 - if (unlikely(rc)) { 13866 - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 13867 - "6400 Can't set dma maximum segment size\n"); 13868 - return rc; 13869 - } 13864 + dma_set_max_seg_size(&phba->pcidev->dev, sli4_params->sge_supp_len); 13870 13865 13871 13866 /* 13872 13867 * Check whether the adapter supports an embedded copy of the
+9 -2
drivers/scsi/scsi_lib.c
··· 1988 1988 if (shost->no_highmem) 1989 1989 lim->features |= BLK_FEAT_BOUNCE_HIGH; 1990 1990 1991 - dma_set_seg_boundary(dev, shost->dma_boundary); 1992 - dma_set_max_seg_size(dev, shost->max_segment_size); 1991 + /* 1992 + * Propagate the DMA formation properties to the dma-mapping layer as 1993 + * a courtesy service to the LLDDs. This needs to check that the buses 1994 + * actually support the DMA API first, though. 1995 + */ 1996 + if (dev->dma_parms) { 1997 + dma_set_seg_boundary(dev, shost->dma_boundary); 1998 + dma_set_max_seg_size(dev, shost->max_segment_size); 1999 + } 1993 2000 } 1994 2001 EXPORT_SYMBOL_GPL(scsi_init_limits); 1995 2002
+7 -3
drivers/vdpa/Kconfig
··· 11 11 12 12 config VDPA_SIM 13 13 tristate "vDPA device simulator core" 14 - depends on RUNTIME_TESTING_MENU && HAS_DMA 15 - select DMA_OPS 14 + depends on RUNTIME_TESTING_MENU 16 15 select VHOST_RING 17 16 select IOMMU_IOVA 18 17 help ··· 35 36 config VDPA_USER 36 37 tristate "VDUSE (vDPA Device in Userspace) support" 37 38 depends on EVENTFD && MMU && HAS_DMA 38 - select DMA_OPS 39 + # 40 + # This driver incorrectly tries to override the dma_ops. It should 41 + # never have done that, but for now keep it working on architectures 42 + # that use dma ops 43 + # 44 + depends on ARCH_HAS_DMA_OPS 39 45 select VHOST_IOTLB 40 46 select IOMMU_IOVA 41 47 help
+2 -2
drivers/xen/Kconfig
··· 177 177 178 178 config SWIOTLB_XEN 179 179 def_bool y 180 + depends on ARCH_HAS_DMA_OPS 180 181 depends on XEN_PV || ARM || ARM64 181 - select DMA_OPS 182 182 select SWIOTLB 183 183 184 184 config XEN_PCI_STUB ··· 348 348 349 349 config XEN_GRANT_DMA_OPS 350 350 bool 351 - select DMA_OPS 352 351 353 352 config XEN_VIRTIO 354 353 bool "Xen virtio support" 354 + depends on ARCH_HAS_DMA_OPS 355 355 depends on VIRTIO 356 356 select XEN_GRANT_DMA_OPS 357 357 select XEN_GRANT_DMA_IOMMU if OF
+6 -1
include/linux/device.h
··· 707 707 * for dma allocations. This flag is managed by the dma ops 708 708 * instance from ->dma_supported. 709 709 * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. 710 + * @dma_iommu: Device is using default IOMMU implementation for DMA and 711 + * doesn't rely on dma_ops structure. 710 712 * 711 713 * At the lowest level, every device in a Linux system is represented by an 712 714 * instance of struct device. The device structure contains the information ··· 750 748 struct dev_pin_info *pins; 751 749 #endif 752 750 struct dev_msi_info msi; 753 - #ifdef CONFIG_DMA_OPS 751 + #ifdef CONFIG_ARCH_HAS_DMA_OPS 754 752 const struct dma_map_ops *dma_ops; 755 753 #endif 756 754 u64 *dma_mask; /* dma mask (if dma'able device) */ ··· 823 821 #endif 824 822 #ifdef CONFIG_DMA_NEED_SYNC 825 823 bool dma_skip_sync:1; 824 + #endif 825 + #ifdef CONFIG_IOMMU_DMA 826 + bool dma_iommu:1; 826 827 #endif 827 828 }; 828 829
+1 -1
include/linux/dma-direct.h
··· 12 12 #include <linux/mem_encrypt.h> 13 13 #include <linux/swiotlb.h> 14 14 15 - extern unsigned int zone_dma_bits; 15 + extern u64 zone_dma_limit; 16 16 17 17 /* 18 18 * Record the mapping of CPU physical to DMA addresses for a given region.
+3 -16
include/linux/dma-map-ops.h
··· 13 13 struct cma; 14 14 struct iommu_ops; 15 15 16 - /* 17 - * Values for struct dma_map_ops.flags: 18 - * 19 - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can 20 - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. 21 - * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is 22 - * coherent and it's not an SWIOTLB buffer. 23 - */ 24 - #define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) 25 - #define DMA_F_CAN_SKIP_SYNC (1 << 1) 26 - 27 16 struct dma_map_ops { 28 - unsigned int flags; 29 - 30 17 void *(*alloc)(struct device *dev, size_t size, 31 18 dma_addr_t *dma_handle, gfp_t gfp, 32 19 unsigned long attrs); ··· 75 88 unsigned long (*get_merge_boundary)(struct device *dev); 76 89 }; 77 90 78 - #ifdef CONFIG_DMA_OPS 91 + #ifdef CONFIG_ARCH_HAS_DMA_OPS 79 92 #include <asm/dma-mapping.h> 80 93 81 94 static inline const struct dma_map_ops *get_dma_ops(struct device *dev) ··· 90 103 { 91 104 dev->dma_ops = dma_ops; 92 105 } 93 - #else /* CONFIG_DMA_OPS */ 106 + #else /* CONFIG_ARCH_HAS_DMA_OPS */ 94 107 static inline const struct dma_map_ops *get_dma_ops(struct device *dev) 95 108 { 96 109 return NULL; ··· 99 112 const struct dma_map_ops *dma_ops) 100 113 { 101 114 } 102 - #endif /* CONFIG_DMA_OPS */ 115 + #endif /* CONFIG_ARCH_HAS_DMA_OPS */ 103 116 104 117 #ifdef CONFIG_DMA_CMA 105 118 extern struct cma *dma_contiguous_default_area;
+10 -15
include/linux/dma-mapping.h
··· 524 524 return SZ_64K; 525 525 } 526 526 527 - static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) 527 + static inline void dma_set_max_seg_size(struct device *dev, unsigned int size) 528 528 { 529 - if (dev->dma_parms) { 530 - dev->dma_parms->max_segment_size = size; 531 - return 0; 532 - } 533 - return -EIO; 529 + if (WARN_ON_ONCE(!dev->dma_parms)) 530 + return; 531 + dev->dma_parms->max_segment_size = size; 534 532 } 535 533 536 534 static inline unsigned long dma_get_seg_boundary(struct device *dev) ··· 557 559 return (dma_get_seg_boundary(dev) >> page_shift) + 1; 558 560 } 559 561 560 - static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) 562 + static inline void dma_set_seg_boundary(struct device *dev, unsigned long mask) 561 563 { 562 - if (dev->dma_parms) { 563 - dev->dma_parms->segment_boundary_mask = mask; 564 - return 0; 565 - } 566 - return -EIO; 564 + if (WARN_ON_ONCE(!dev->dma_parms)) 565 + return; 566 + dev->dma_parms->segment_boundary_mask = mask; 567 567 } 568 568 569 569 static inline unsigned int dma_get_min_align_mask(struct device *dev) ··· 571 575 return 0; 572 576 } 573 577 574 - static inline int dma_set_min_align_mask(struct device *dev, 578 + static inline void dma_set_min_align_mask(struct device *dev, 575 579 unsigned int min_align_mask) 576 580 { 577 581 if (WARN_ON_ONCE(!dev->dma_parms)) 578 - return -EIO; 582 + return; 579 583 dev->dma_parms->min_align_mask = min_align_mask; 580 - return 0; 581 584 } 582 585 583 586 #ifndef dma_get_cache_alignment
+155
include/linux/iommu-dma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 + * 5 + * DMA operations that map physical memory through IOMMU. 6 + */ 7 + #ifndef _LINUX_IOMMU_DMA_H 8 + #define _LINUX_IOMMU_DMA_H 9 + 10 + #include <linux/dma-direction.h> 11 + 12 + #ifdef CONFIG_IOMMU_DMA 13 + static inline bool use_dma_iommu(struct device *dev) 14 + { 15 + return dev->dma_iommu; 16 + } 17 + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 18 + unsigned long offset, size_t size, enum dma_data_direction dir, 19 + unsigned long attrs); 20 + void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 21 + size_t size, enum dma_data_direction dir, unsigned long attrs); 22 + int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 23 + enum dma_data_direction dir, unsigned long attrs); 24 + void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 25 + enum dma_data_direction dir, unsigned long attrs); 26 + void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 27 + gfp_t gfp, unsigned long attrs); 28 + int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 29 + void *cpu_addr, dma_addr_t dma_addr, size_t size, 30 + unsigned long attrs); 31 + int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 32 + void *cpu_addr, dma_addr_t dma_addr, size_t size, 33 + unsigned long attrs); 34 + unsigned long iommu_dma_get_merge_boundary(struct device *dev); 35 + size_t iommu_dma_opt_mapping_size(void); 36 + size_t iommu_dma_max_mapping_size(struct device *dev); 37 + void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 38 + dma_addr_t handle, unsigned long attrs); 39 + dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 40 + size_t size, enum dma_data_direction dir, unsigned long attrs); 41 + void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 42 + size_t size, enum dma_data_direction dir, unsigned long attrs); 43 + struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, 44 + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); 45 + void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 46 + struct sg_table *sgt, enum dma_data_direction dir); 47 + void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 48 + size_t size, enum dma_data_direction dir); 49 + void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, 50 + size_t size, enum dma_data_direction dir); 51 + void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, 52 + int nelems, enum dma_data_direction dir); 53 + void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, 54 + int nelems, enum dma_data_direction dir); 55 + #else 56 + static inline bool use_dma_iommu(struct device *dev) 57 + { 58 + return false; 59 + } 60 + static inline dma_addr_t iommu_dma_map_page(struct device *dev, 61 + struct page *page, unsigned long offset, size_t size, 62 + enum dma_data_direction dir, unsigned long attrs) 63 + { 64 + return DMA_MAPPING_ERROR; 65 + } 66 + static inline void iommu_dma_unmap_page(struct device *dev, 67 + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, 68 + unsigned long attrs) 69 + { 70 + } 71 + static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 72 + int nents, enum dma_data_direction dir, unsigned long attrs) 73 + { 74 + return -EINVAL; 75 + } 76 + static inline void iommu_dma_unmap_sg(struct device *dev, 77 + struct scatterlist *sg, int nents, enum dma_data_direction dir, 78 + unsigned long attrs) 79 + { 80 + } 81 + static inline void *iommu_dma_alloc(struct device *dev, size_t size, 82 + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 83 + { 84 + return NULL; 85 + } 86 + static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 87 + void *cpu_addr, dma_addr_t dma_addr, size_t size, 88 + unsigned long attrs) 89 + { 90 + return -EINVAL; 91 + } 92 + static inline int iommu_dma_get_sgtable(struct device *dev, 93 + struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, 94 + size_t size, unsigned long attrs) 95 + { 96 + return -EINVAL; 97 + } 98 + static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) 99 + { 100 + return 0; 101 + } 102 + static inline size_t iommu_dma_opt_mapping_size(void) 103 + { 104 + return 0; 105 + } 106 + static inline size_t iommu_dma_max_mapping_size(struct device *dev) 107 + { 108 + return 0; 109 + } 110 + static inline void iommu_dma_free(struct device *dev, size_t size, 111 + void *cpu_addr, dma_addr_t handle, unsigned long attrs) 112 + { 113 + } 114 + static inline dma_addr_t iommu_dma_map_resource(struct device *dev, 115 + phys_addr_t phys, size_t size, enum dma_data_direction dir, 116 + unsigned long attrs) 117 + { 118 + return DMA_MAPPING_ERROR; 119 + } 120 + static inline void iommu_dma_unmap_resource(struct device *dev, 121 + dma_addr_t handle, size_t size, enum dma_data_direction dir, 122 + unsigned long attrs) 123 + { 124 + } 125 + static inline struct sg_table * 126 + iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, 127 + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) 128 + { 129 + return NULL; 130 + } 131 + static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 132 + struct sg_table *sgt, enum dma_data_direction dir) 133 + { 134 + } 135 + static inline void iommu_dma_sync_single_for_cpu(struct device *dev, 136 + dma_addr_t dma_handle, size_t size, 137 + enum dma_data_direction dir) 138 + { 139 + } 140 + static inline void iommu_dma_sync_single_for_device(struct device *dev, 141 + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 142 + { 143 + } 144 + static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, 145 + struct scatterlist *sgl, int nelems, 146 + enum dma_data_direction dir) 147 + { 148 + } 149 + static inline void iommu_dma_sync_sg_for_device(struct device *dev, 150 + struct scatterlist *sgl, int nelems, 151 + enum dma_data_direction dir) 152 + { 153 + } 154 + #endif /* CONFIG_IOMMU_DMA */ 155 + #endif /* _LINUX_IOMMU_DMA_H */
+341
include/trace/events/dma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #undef TRACE_SYSTEM 3 + #define TRACE_SYSTEM dma 4 + 5 + #if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ) 6 + #define _TRACE_DMA_H 7 + 8 + #include <linux/tracepoint.h> 9 + #include <linux/dma-direction.h> 10 + #include <linux/dma-mapping.h> 11 + #include <trace/events/mmflags.h> 12 + 13 + TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL); 14 + TRACE_DEFINE_ENUM(DMA_TO_DEVICE); 15 + TRACE_DEFINE_ENUM(DMA_FROM_DEVICE); 16 + TRACE_DEFINE_ENUM(DMA_NONE); 17 + 18 + #define decode_dma_data_direction(dir) \ 19 + __print_symbolic(dir, \ 20 + { DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \ 21 + { DMA_TO_DEVICE, "TO_DEVICE" }, \ 22 + { DMA_FROM_DEVICE, "FROM_DEVICE" }, \ 23 + { DMA_NONE, "NONE" }) 24 + 25 + #define decode_dma_attrs(attrs) \ 26 + __print_flags(attrs, "|", \ 27 + { DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \ 28 + { DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \ 29 + { DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \ 30 + { DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \ 31 + { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ 32 + { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ 33 + { DMA_ATTR_NO_WARN, "NO_WARN" }, \ 34 + { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) 35 + 36 + DECLARE_EVENT_CLASS(dma_map, 37 + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, 38 + size_t size, enum dma_data_direction dir, unsigned long attrs), 39 + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs), 40 + 41 + TP_STRUCT__entry( 42 + __string(device, dev_name(dev)) 43 + __field(u64, phys_addr) 44 + __field(u64, dma_addr) 45 + __field(size_t, size) 46 + __field(enum dma_data_direction, dir) 47 + __field(unsigned long, attrs) 48 + ), 49 + 50 + TP_fast_assign( 51 + __assign_str(device); 52 + __entry->phys_addr = phys_addr; 53 + __entry->dma_addr = dma_addr; 54 + __entry->size = size; 55 + __entry->dir = dir; 56 + __entry->attrs = attrs; 57 + ), 58 + 59 + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", 60 + __get_str(device), 61 + decode_dma_data_direction(__entry->dir), 62 + __entry->dma_addr, 63 + __entry->size, 64 + __entry->phys_addr, 65 + decode_dma_attrs(__entry->attrs)) 66 + ); 67 + 68 + DEFINE_EVENT(dma_map, dma_map_page, 69 + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, 70 + size_t size, enum dma_data_direction dir, unsigned long attrs), 71 + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); 72 + 73 + DEFINE_EVENT(dma_map, dma_map_resource, 74 + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, 75 + size_t size, enum dma_data_direction dir, unsigned long attrs), 76 + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); 77 + 78 + DECLARE_EVENT_CLASS(dma_unmap, 79 + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, 80 + enum dma_data_direction dir, unsigned long attrs), 81 + TP_ARGS(dev, addr, size, dir, attrs), 82 + 83 + TP_STRUCT__entry( 84 + __string(device, dev_name(dev)) 85 + __field(u64, addr) 86 + __field(size_t, size) 87 + __field(enum dma_data_direction, dir) 88 + __field(unsigned long, attrs) 89 + ), 90 + 91 + TP_fast_assign( 92 + __assign_str(device); 93 + __entry->addr = addr; 94 + __entry->size = size; 95 + __entry->dir = dir; 96 + __entry->attrs = attrs; 97 + ), 98 + 99 + TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s", 100 + __get_str(device), 101 + decode_dma_data_direction(__entry->dir), 102 + __entry->addr, 103 + __entry->size, 104 + decode_dma_attrs(__entry->attrs)) 105 + ); 106 + 107 + DEFINE_EVENT(dma_unmap, dma_unmap_page, 108 + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, 109 + enum dma_data_direction dir, unsigned long attrs), 110 + TP_ARGS(dev, addr, size, dir, attrs)); 111 + 112 + DEFINE_EVENT(dma_unmap, dma_unmap_resource, 113 + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, 114 + enum dma_data_direction dir, unsigned long attrs), 115 + TP_ARGS(dev, addr, size, dir, attrs)); 116 + 117 + TRACE_EVENT(dma_alloc, 118 + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, 119 + size_t size, gfp_t flags, unsigned long attrs), 120 + TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), 121 + 122 + TP_STRUCT__entry( 123 + __string(device, dev_name(dev)) 124 + __field(u64, phys_addr) 125 + __field(u64, dma_addr) 126 + __field(size_t, size) 127 + __field(gfp_t, flags) 128 + __field(unsigned long, attrs) 129 + ), 130 + 131 + TP_fast_assign( 132 + __assign_str(device); 133 + __entry->phys_addr = virt_to_phys(virt_addr); 134 + __entry->dma_addr = dma_addr; 135 + __entry->size = size; 136 + __entry->flags = flags; 137 + __entry->attrs = attrs; 138 + ), 139 + 140 + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s", 141 + __get_str(device), 142 + __entry->dma_addr, 143 + __entry->size, 144 + __entry->phys_addr, 145 + show_gfp_flags(__entry->flags), 146 + decode_dma_attrs(__entry->attrs)) 147 + ); 148 + 149 + TRACE_EVENT(dma_free, 150 + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, 151 + size_t size, unsigned long attrs), 152 + TP_ARGS(dev, virt_addr, dma_addr, size, attrs), 153 + 154 + TP_STRUCT__entry( 155 + __string(device, dev_name(dev)) 156 + __field(u64, phys_addr) 157 + __field(u64, dma_addr) 158 + __field(size_t, size) 159 + __field(unsigned long, attrs) 160 + ), 161 + 162 + TP_fast_assign( 163 + __assign_str(device); 164 + __entry->phys_addr = virt_to_phys(virt_addr); 165 + __entry->dma_addr = dma_addr; 166 + __entry->size = size; 167 + __entry->attrs = attrs; 168 + ), 169 + 170 + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", 171 + __get_str(device), 172 + __entry->dma_addr, 173 + __entry->size, 174 + __entry->phys_addr, 175 + decode_dma_attrs(__entry->attrs)) 176 + ); 177 + 178 + TRACE_EVENT(dma_map_sg, 179 + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, 180 + int ents, enum dma_data_direction dir, unsigned long attrs), 181 + TP_ARGS(dev, sg, nents, ents, dir, attrs), 182 + 183 + TP_STRUCT__entry( 184 + __string(device, dev_name(dev)) 185 + __dynamic_array(u64, phys_addrs, nents) 186 + __dynamic_array(u64, dma_addrs, ents) 187 + __dynamic_array(unsigned int, lengths, ents) 188 + __field(enum dma_data_direction, dir) 189 + __field(unsigned long, attrs) 190 + ), 191 + 192 + TP_fast_assign( 193 + int i; 194 + 195 + __assign_str(device); 196 + for (i = 0; i < nents; i++) 197 + ((u64 *)__get_dynamic_array(phys_addrs))[i] = 198 + sg_phys(sg + i); 199 + for (i = 0; i < ents; i++) { 200 + ((u64 *)__get_dynamic_array(dma_addrs))[i] = 201 + sg_dma_address(sg + i); 202 + ((unsigned int *)__get_dynamic_array(lengths))[i] = 203 + sg_dma_len(sg + i); 204 + } 205 + __entry->dir = dir; 206 + __entry->attrs = attrs; 207 + ), 208 + 209 + TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s", 210 + __get_str(device), 211 + decode_dma_data_direction(__entry->dir), 212 + __print_array(__get_dynamic_array(dma_addrs), 213 + __get_dynamic_array_len(dma_addrs) / 214 + sizeof(u64), sizeof(u64)), 215 + __print_array(__get_dynamic_array(lengths), 216 + __get_dynamic_array_len(lengths) / 217 + sizeof(unsigned int), sizeof(unsigned int)), 218 + __print_array(__get_dynamic_array(phys_addrs), 219 + __get_dynamic_array_len(phys_addrs) / 220 + sizeof(u64), sizeof(u64)), 221 + decode_dma_attrs(__entry->attrs)) 222 + ); 223 + 224 + TRACE_EVENT(dma_unmap_sg, 225 + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, 226 + enum dma_data_direction dir, unsigned long attrs), 227 + TP_ARGS(dev, sg, nents, dir, attrs), 228 + 229 + TP_STRUCT__entry( 230 + __string(device, dev_name(dev)) 231 + __dynamic_array(u64, addrs, nents) 232 + __field(enum dma_data_direction, dir) 233 + __field(unsigned long, attrs) 234 + ), 235 + 236 + TP_fast_assign( 237 + int i; 238 + 239 + __assign_str(device); 240 + for (i = 0; i < nents; i++) 241 + ((u64 *)__get_dynamic_array(addrs))[i] = 242 + sg_phys(sg + i); 243 + __entry->dir = dir; 244 + __entry->attrs = attrs; 245 + ), 246 + 247 + TP_printk("%s dir=%s phys_addrs=%s attrs=%s", 248 + __get_str(device), 249 + decode_dma_data_direction(__entry->dir), 250 + __print_array(__get_dynamic_array(addrs), 251 + __get_dynamic_array_len(addrs) / 252 + sizeof(u64), sizeof(u64)), 253 + decode_dma_attrs(__entry->attrs)) 254 + ); 255 + 256 + DECLARE_EVENT_CLASS(dma_sync_single, 257 + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, 258 + enum dma_data_direction dir), 259 + TP_ARGS(dev, dma_addr, size, dir), 260 + 261 + TP_STRUCT__entry( 262 + __string(device, dev_name(dev)) 263 + __field(u64, dma_addr) 264 + __field(size_t, size) 265 + __field(enum dma_data_direction, dir) 266 + ), 267 + 268 + TP_fast_assign( 269 + __assign_str(device); 270 + __entry->dma_addr = dma_addr; 271 + __entry->size = size; 272 + __entry->dir = dir; 273 + ), 274 + 275 + TP_printk("%s dir=%s dma_addr=%llx size=%zu", 276 + __get_str(device), 277 + decode_dma_data_direction(__entry->dir), 278 + __entry->dma_addr, 279 + __entry->size) 280 + ); 281 + 282 + DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, 283 + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, 284 + enum dma_data_direction dir), 285 + TP_ARGS(dev, dma_addr, size, dir)); 286 + 287 + DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, 288 + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, 289 + enum dma_data_direction dir), 290 + TP_ARGS(dev, dma_addr, size, dir)); 291 + 292 + DECLARE_EVENT_CLASS(dma_sync_sg, 293 + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, 294 + enum dma_data_direction dir), 295 + TP_ARGS(dev, sg, nents, dir), 296 + 297 + TP_STRUCT__entry( 298 + __string(device, dev_name(dev)) 299 + __dynamic_array(u64, dma_addrs, nents) 300 + __dynamic_array(unsigned int, lengths, nents) 301 + __field(enum dma_data_direction, dir) 302 + ), 303 + 304 + TP_fast_assign( 305 + int i; 306 + 307 + __assign_str(device); 308 + for (i = 0; i < nents; i++) { 309 + ((u64 *)__get_dynamic_array(dma_addrs))[i] = 310 + sg_dma_address(sg + i); 311 + ((unsigned int *)__get_dynamic_array(lengths))[i] = 312 + sg_dma_len(sg + i); 313 + } 314 + __entry->dir = dir; 315 + ), 316 + 317 + TP_printk("%s dir=%s dma_addrs=%s sizes=%s", 318 + __get_str(device), 319 + decode_dma_data_direction(__entry->dir), 320 + __print_array(__get_dynamic_array(dma_addrs), 321 + __get_dynamic_array_len(dma_addrs) / 322 + sizeof(u64), sizeof(u64)), 323 + __print_array(__get_dynamic_array(lengths), 324 + __get_dynamic_array_len(lengths) / 325 + sizeof(unsigned int), sizeof(unsigned int))) 326 + ); 327 + 328 + DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, 329 + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, 330 + enum dma_data_direction dir), 331 + TP_ARGS(dev, sg, nents, dir)); 332 + 333 + DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, 334 + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, 335 + enum dma_data_direction dir), 336 + TP_ARGS(dev, sg, nents, dir)); 337 + 338 + #endif /* _TRACE_DMA_H */ 339 + 340 + /* This part must be outside protection */ 341 + #include <trace/define_trace.h>
+3 -4
kernel/dma/Kconfig
··· 8 8 depends on !NO_DMA 9 9 default y 10 10 11 - config DMA_OPS 12 - depends on HAS_DMA 11 + config DMA_OPS_HELPERS 13 12 bool 14 13 15 14 # ··· 108 109 109 110 config DMA_NEED_SYNC 110 111 def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ 111 - ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \ 112 - SWIOTLB 112 + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || \ 113 + ARCH_HAS_DMA_OPS || SWIOTLB 113 114 114 115 config DMA_RESTRICTED_POOL 115 116 bool "DMA Restricted Pool"
+2 -2
kernel/dma/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 3 3 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o 4 - obj-$(CONFIG_DMA_OPS) += ops_helpers.o 5 - obj-$(CONFIG_DMA_OPS) += dummy.o 4 + obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o 5 + obj-$(CONFIG_ARCH_HAS_DMA_OPS) += dummy.o 6 6 obj-$(CONFIG_DMA_CMA) += contiguous.o 7 7 obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o 8 8 obj-$(CONFIG_DMA_API_DEBUG) += debug.o
+4 -4
kernel/dma/direct.c
··· 20 20 * it for entirely different regions. In that case the arch code needs to 21 21 * override the variable below for dma-direct to work properly. 22 22 */ 23 - unsigned int zone_dma_bits __ro_after_init = 24; 23 + u64 zone_dma_limit __ro_after_init = DMA_BIT_MASK(24); 24 24 25 25 static inline dma_addr_t phys_to_dma_direct(struct device *dev, 26 26 phys_addr_t phys) ··· 59 59 * zones. 60 60 */ 61 61 *phys_limit = dma_to_phys(dev, dma_limit); 62 - if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) 62 + if (*phys_limit <= zone_dma_limit) 63 63 return GFP_DMA; 64 64 if (*phys_limit <= DMA_BIT_MASK(32)) 65 65 return GFP_DMA32; ··· 140 140 if (!page) 141 141 page = alloc_pages_node(node, gfp, get_order(size)); 142 142 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 143 - dma_free_contiguous(dev, page, size); 143 + __free_pages(page, get_order(size)); 144 144 page = NULL; 145 145 146 146 if (IS_ENABLED(CONFIG_ZONE_DMA32) && ··· 580 580 * part of the check. 581 581 */ 582 582 if (IS_ENABLED(CONFIG_ZONE_DMA)) 583 - min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); 583 + min_mask = min_t(u64, min_mask, zone_dma_limit); 584 584 return mask >= phys_to_dma_unencrypted(dev, min_mask); 585 585 } 586 586
+21
kernel/dma/dummy.c
··· 17 17 { 18 18 return DMA_MAPPING_ERROR; 19 19 } 20 + static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle, 21 + size_t size, enum dma_data_direction dir, unsigned long attrs) 22 + { 23 + /* 24 + * Dummy ops doesn't support map_page, so unmap_page should never be 25 + * called. 26 + */ 27 + WARN_ON_ONCE(true); 28 + } 20 29 21 30 static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, 22 31 int nelems, enum dma_data_direction dir, 23 32 unsigned long attrs) 24 33 { 25 34 return -EINVAL; 35 + } 36 + 37 + static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, 38 + int nelems, enum dma_data_direction dir, 39 + unsigned long attrs) 40 + { 41 + /* 42 + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. 43 + */ 44 + WARN_ON_ONCE(true); 26 45 } 27 46 28 47 static int dma_dummy_supported(struct device *hwdev, u64 mask) ··· 52 33 const struct dma_map_ops dma_dummy_ops = { 53 34 .mmap = dma_dummy_mmap, 54 35 .map_page = dma_dummy_map_page, 36 + .unmap_page = dma_dummy_unmap_page, 55 37 .map_sg = dma_dummy_map_sg, 38 + .unmap_sg = dma_dummy_unmap_sg, 56 39 .dma_supported = dma_dummy_supported, 57 40 };
+98 -17
kernel/dma/mapping.c
··· 10 10 #include <linux/dma-map-ops.h> 11 11 #include <linux/export.h> 12 12 #include <linux/gfp.h> 13 + #include <linux/iommu-dma.h> 13 14 #include <linux/kmsan.h> 14 15 #include <linux/of_device.h> 15 16 #include <linux/slab.h> 16 17 #include <linux/vmalloc.h> 17 18 #include "debug.h" 18 19 #include "direct.h" 20 + 21 + #define CREATE_TRACE_POINTS 22 + #include <trace/events/dma.h> 19 23 20 24 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ 21 25 defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ ··· 120 116 static bool dma_go_direct(struct device *dev, dma_addr_t mask, 121 117 const struct dma_map_ops *ops) 122 118 { 119 + if (use_dma_iommu(dev)) 120 + return false; 121 + 123 122 if (likely(!ops)) 124 123 return true; 124 + 125 125 #ifdef CONFIG_DMA_OPS_BYPASS 126 126 if (dev->dma_ops_bypass) 127 127 return min_not_zero(mask, dev->bus_dma_limit) >= ··· 167 159 if (dma_map_direct(dev, ops) || 168 160 arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) 169 161 addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); 162 + else if (use_dma_iommu(dev)) 163 + addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs); 170 164 else 171 165 addr = ops->map_page(dev, page, offset, size, dir, attrs); 172 166 kmsan_handle_dma(page, offset, size, dir); 167 + trace_dma_map_page(dev, page_to_phys(page) + offset, addr, size, dir, 168 + attrs); 173 169 debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); 174 170 175 171 return addr; ··· 189 177 if (dma_map_direct(dev, ops) || 190 178 arch_dma_unmap_page_direct(dev, addr + size)) 191 179 dma_direct_unmap_page(dev, addr, size, dir, attrs); 192 - else if (ops->unmap_page) 180 + else if (use_dma_iommu(dev)) 181 + iommu_dma_unmap_page(dev, addr, size, dir, attrs); 182 + else 193 183 ops->unmap_page(dev, addr, size, dir, attrs); 184 + trace_dma_unmap_page(dev, addr, size, dir, attrs); 194 185 debug_dma_unmap_page(dev, addr, size, dir); 195 186 } 196 187 EXPORT_SYMBOL(dma_unmap_page_attrs); ··· 212 197 if (dma_map_direct(dev, ops) || 213 198 arch_dma_map_sg_direct(dev, sg, nents)) 214 199 ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); 200 + else if (use_dma_iommu(dev)) 201 + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); 215 202 else 216 203 ents = ops->map_sg(dev, sg, nents, dir, attrs); 217 204 218 205 if (ents > 0) { 219 206 kmsan_handle_dma_sg(sg, nents, dir); 207 + trace_dma_map_sg(dev, sg, nents, ents, dir, attrs); 220 208 debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); 221 209 } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && 222 210 ents != -EIO && ents != -EREMOTEIO)) { ··· 305 287 const struct dma_map_ops *ops = get_dma_ops(dev); 306 288 307 289 BUG_ON(!valid_dma_direction(dir)); 290 + trace_dma_unmap_sg(dev, sg, nents, dir, attrs); 308 291 debug_dma_unmap_sg(dev, sg, nents, dir); 309 292 if (dma_map_direct(dev, ops) || 310 293 arch_dma_unmap_sg_direct(dev, sg, nents)) 311 294 dma_direct_unmap_sg(dev, sg, nents, dir, attrs); 295 + else if (use_dma_iommu(dev)) 296 + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); 312 297 else if (ops->unmap_sg) 313 298 ops->unmap_sg(dev, sg, nents, dir, attrs); 314 299 } ··· 330 309 331 310 if (dma_map_direct(dev, ops)) 332 311 addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); 312 + else if (use_dma_iommu(dev)) 313 + addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs); 333 314 else if (ops->map_resource) 334 315 addr = ops->map_resource(dev, phys_addr, size, dir, attrs); 335 316 317 + trace_dma_map_resource(dev, phys_addr, addr, size, dir, attrs); 336 318 debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); 337 319 return addr; 338 320 } ··· 347 323 const struct dma_map_ops *ops = get_dma_ops(dev); 348 324 349 325 BUG_ON(!valid_dma_direction(dir)); 350 - if (!dma_map_direct(dev, ops) && ops->unmap_resource) 326 + if (dma_map_direct(dev, ops)) 327 + ; /* nothing to do: uncached and no swiotlb */ 328 + else if (use_dma_iommu(dev)) 329 + iommu_dma_unmap_resource(dev, addr, size, dir, attrs); 330 + else if (ops->unmap_resource) 351 331 ops->unmap_resource(dev, addr, size, dir, attrs); 332 + trace_dma_unmap_resource(dev, addr, size, dir, attrs); 352 333 debug_dma_unmap_resource(dev, addr, size, dir); 353 334 } 354 335 EXPORT_SYMBOL(dma_unmap_resource); ··· 367 338 BUG_ON(!valid_dma_direction(dir)); 368 339 if (dma_map_direct(dev, ops)) 369 340 dma_direct_sync_single_for_cpu(dev, addr, size, dir); 341 + else if (use_dma_iommu(dev)) 342 + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); 370 343 else if (ops->sync_single_for_cpu) 371 344 ops->sync_single_for_cpu(dev, addr, size, dir); 345 + trace_dma_sync_single_for_cpu(dev, addr, size, dir); 372 346 debug_dma_sync_single_for_cpu(dev, addr, size, dir); 373 347 } 374 348 EXPORT_SYMBOL(__dma_sync_single_for_cpu); ··· 384 352 BUG_ON(!valid_dma_direction(dir)); 385 353 if (dma_map_direct(dev, ops)) 386 354 dma_direct_sync_single_for_device(dev, addr, size, dir); 355 + else if (use_dma_iommu(dev)) 356 + iommu_dma_sync_single_for_device(dev, addr, size, dir); 387 357 else if (ops->sync_single_for_device) 388 358 ops->sync_single_for_device(dev, addr, size, dir); 359 + trace_dma_sync_single_for_device(dev, addr, size, dir); 389 360 debug_dma_sync_single_for_device(dev, addr, size, dir); 390 361 } 391 362 EXPORT_SYMBOL(__dma_sync_single_for_device); ··· 401 366 BUG_ON(!valid_dma_direction(dir)); 402 367 if (dma_map_direct(dev, ops)) 403 368 dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); 369 + else if (use_dma_iommu(dev)) 370 + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); 404 371 else if (ops->sync_sg_for_cpu) 405 372 ops->sync_sg_for_cpu(dev, sg, nelems, dir); 373 + trace_dma_sync_sg_for_cpu(dev, sg, nelems, dir); 406 374 debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); 407 375 } 408 376 EXPORT_SYMBOL(__dma_sync_sg_for_cpu); ··· 418 380 BUG_ON(!valid_dma_direction(dir)); 419 381 if (dma_map_direct(dev, ops)) 420 382 dma_direct_sync_sg_for_device(dev, sg, nelems, dir); 383 + else if (use_dma_iommu(dev)) 384 + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); 421 385 else if (ops->sync_sg_for_device) 422 386 ops->sync_sg_for_device(dev, sg, nelems, dir); 387 + trace_dma_sync_sg_for_device(dev, sg, nelems, dir); 423 388 debug_dma_sync_sg_for_device(dev, sg, nelems, dir); 424 389 } 425 390 EXPORT_SYMBOL(__dma_sync_sg_for_device); ··· 446 405 { 447 406 const struct dma_map_ops *ops = get_dma_ops(dev); 448 407 449 - if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC)) 408 + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) 450 409 /* 451 410 * dma_skip_sync will be reset to %false on first SWIOTLB buffer 452 411 * mapping, if any. During the device initialization, it's ··· 487 446 if (dma_alloc_direct(dev, ops)) 488 447 return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, 489 448 size, attrs); 449 + if (use_dma_iommu(dev)) 450 + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, 451 + size, attrs); 490 452 if (!ops->get_sgtable) 491 453 return -ENXIO; 492 454 return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); ··· 526 482 527 483 if (dma_alloc_direct(dev, ops)) 528 484 return dma_direct_can_mmap(dev); 485 + if (use_dma_iommu(dev)) 486 + return true; 529 487 return ops->mmap != NULL; 530 488 } 531 489 EXPORT_SYMBOL_GPL(dma_can_mmap); ··· 554 508 if (dma_alloc_direct(dev, ops)) 555 509 return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, 556 510 attrs); 511 + if (use_dma_iommu(dev)) 512 + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, 513 + attrs); 557 514 if (!ops->mmap) 558 515 return -ENXIO; 559 516 return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); ··· 608 559 609 560 if (dma_alloc_direct(dev, ops)) 610 561 cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); 562 + else if (use_dma_iommu(dev)) 563 + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); 611 564 else if (ops->alloc) 612 565 cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); 613 566 else 614 567 return NULL; 615 568 569 + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, flag, attrs); 616 570 debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); 617 571 return cpu_addr; 618 572 } ··· 640 588 if (!cpu_addr) 641 589 return; 642 590 591 + trace_dma_free(dev, cpu_addr, dma_handle, size, attrs); 643 592 debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); 644 593 if (dma_alloc_direct(dev, ops)) 645 594 dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); 595 + else if (use_dma_iommu(dev)) 596 + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); 646 597 else if (ops->free) 647 598 ops->free(dev, size, cpu_addr, dma_handle, attrs); 648 599 } ··· 666 611 size = PAGE_ALIGN(size); 667 612 if (dma_alloc_direct(dev, ops)) 668 613 return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); 614 + if (use_dma_iommu(dev)) 615 + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); 669 616 if (!ops->alloc_pages_op) 670 617 return NULL; 671 618 return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); ··· 678 621 { 679 622 struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); 680 623 681 - if (page) 624 + if (page) { 625 + trace_dma_map_page(dev, page_to_phys(page), *dma_handle, size, 626 + dir, 0); 682 627 debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); 628 + } 683 629 return page; 684 630 } 685 631 EXPORT_SYMBOL_GPL(dma_alloc_pages); ··· 695 635 size = PAGE_ALIGN(size); 696 636 if (dma_alloc_direct(dev, ops)) 697 637 dma_direct_free_pages(dev, size, page, dma_handle, dir); 638 + else if (use_dma_iommu(dev)) 639 + dma_common_free_pages(dev, size, page, dma_handle, dir); 698 640 else if (ops->free_pages) 699 641 ops->free_pages(dev, size, page, dma_handle, dir); 700 642 } ··· 704 642 void dma_free_pages(struct device *dev, size_t size, struct page *page, 705 643 dma_addr_t dma_handle, enum dma_data_direction dir) 706 644 { 645 + trace_dma_unmap_page(dev, dma_handle, size, dir, 0); 707 646 debug_dma_unmap_page(dev, dma_handle, size, dir); 708 647 __dma_free_pages(dev, size, page, dma_handle, dir); 709 648 } ··· 760 697 761 698 if (ops && ops->alloc_noncontiguous) 762 699 sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); 700 + else if (use_dma_iommu(dev)) 701 + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); 763 702 else 764 703 sgt = alloc_single_sgt(dev, size, dir, gfp); 765 704 766 705 if (sgt) { 767 706 sgt->nents = 1; 707 + trace_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); 768 708 debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); 769 709 } 770 710 return sgt; ··· 788 722 { 789 723 const struct dma_map_ops *ops = get_dma_ops(dev); 790 724 725 + trace_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir, 0); 791 726 debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); 792 727 if (ops && ops->free_noncontiguous) 793 728 ops->free_noncontiguous(dev, size, sgt, dir); 729 + else if (use_dma_iommu(dev)) 730 + iommu_dma_free_noncontiguous(dev, size, sgt, dir); 794 731 else 795 732 free_single_sgt(dev, size, sgt, dir); 796 733 } ··· 841 772 { 842 773 const struct dma_map_ops *ops = get_dma_ops(dev); 843 774 775 + if (use_dma_iommu(dev)) { 776 + if (WARN_ON(ops)) 777 + return false; 778 + return true; 779 + } 780 + 844 781 /* 845 - * ->dma_supported sets the bypass flag, so we must always call 846 - * into the method here unless the device is truly direct mapped. 782 + * ->dma_supported sets and clears the bypass flag, so ignore it here 783 + * and always call into the method if there is one. 847 784 */ 848 - if (!ops) 849 - return dma_direct_supported(dev, mask); 850 - if (!ops->dma_supported) 851 - return 1; 852 - return ops->dma_supported(dev, mask); 785 + if (ops) { 786 + if (!ops->dma_supported) 787 + return true; 788 + return ops->dma_supported(dev, mask); 789 + } 790 + 791 + return dma_direct_supported(dev, mask); 853 792 } 854 793 855 794 bool dma_pci_p2pdma_supported(struct device *dev) 856 795 { 857 796 const struct dma_map_ops *ops = get_dma_ops(dev); 858 - 859 - /* if ops is not set, dma direct will be used which supports P2PDMA */ 860 - if (!ops) 861 - return true; 862 797 863 798 /* 864 799 * Note: dma_ops_bypass is not checked here because P2PDMA should ··· 870 797 * if the specific device is bypassing them. 871 798 */ 872 799 873 - return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; 800 + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ 801 + return !ops; 874 802 } 875 803 EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); 876 804 ··· 939 865 940 866 if (dma_map_direct(dev, ops)) 941 867 size = dma_direct_max_mapping_size(dev); 868 + else if (use_dma_iommu(dev)) 869 + size = iommu_dma_max_mapping_size(dev); 942 870 else if (ops && ops->max_mapping_size) 943 871 size = ops->max_mapping_size(dev); 944 872 ··· 953 877 const struct dma_map_ops *ops = get_dma_ops(dev); 954 878 size_t size = SIZE_MAX; 955 879 956 - if (ops && ops->opt_mapping_size) 880 + if (use_dma_iommu(dev)) 881 + size = iommu_dma_opt_mapping_size(); 882 + else if (ops && ops->opt_mapping_size) 957 883 size = ops->opt_mapping_size(); 958 884 959 885 return min(dma_max_mapping_size(dev), size); ··· 965 887 unsigned long dma_get_merge_boundary(struct device *dev) 966 888 { 967 889 const struct dma_map_ops *ops = get_dma_ops(dev); 890 + 891 + if (use_dma_iommu(dev)) 892 + return iommu_dma_get_merge_boundary(dev); 968 893 969 894 if (!ops || !ops->get_merge_boundary) 970 895 return 0; /* can't merge */
+11 -3
kernel/dma/ops_helpers.c
··· 4 4 * the allocated memory contains normal pages in the direct kernel mapping. 5 5 */ 6 6 #include <linux/dma-map-ops.h> 7 + #include <linux/iommu-dma.h> 7 8 8 9 static struct page *dma_common_vaddr_to_page(void *cpu_addr) 9 10 { ··· 71 70 if (!page) 72 71 return NULL; 73 72 74 - *dma_handle = ops->map_page(dev, page, 0, size, dir, 75 - DMA_ATTR_SKIP_CPU_SYNC); 73 + if (use_dma_iommu(dev)) 74 + *dma_handle = iommu_dma_map_page(dev, page, 0, size, dir, 75 + DMA_ATTR_SKIP_CPU_SYNC); 76 + else 77 + *dma_handle = ops->map_page(dev, page, 0, size, dir, 78 + DMA_ATTR_SKIP_CPU_SYNC); 76 79 if (*dma_handle == DMA_MAPPING_ERROR) { 77 80 dma_free_contiguous(dev, page, size); 78 81 return NULL; ··· 91 86 { 92 87 const struct dma_map_ops *ops = get_dma_ops(dev); 93 88 94 - if (ops->unmap_page) 89 + if (use_dma_iommu(dev)) 90 + iommu_dma_unmap_page(dev, dma_handle, size, dir, 91 + DMA_ATTR_SKIP_CPU_SYNC); 92 + else if (ops->unmap_page) 95 93 ops->unmap_page(dev, dma_handle, size, dir, 96 94 DMA_ATTR_SKIP_CPU_SYNC); 97 95 dma_free_contiguous(dev, page, size);
+2 -2
kernel/dma/pool.c
··· 70 70 /* CMA can't cross zone boundaries, see cma_activate_area() */ 71 71 end = cma_get_base(cma) + size - 1; 72 72 if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) 73 - return end <= DMA_BIT_MASK(zone_dma_bits); 73 + return end <= zone_dma_limit; 74 74 if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) 75 - return end <= DMA_BIT_MASK(32); 75 + return end <= max(DMA_BIT_MASK(32), zone_dma_limit); 76 76 return true; 77 77 } 78 78
+4 -2
kernel/dma/remap.c
··· 10 10 { 11 11 struct vm_struct *area = find_vm_area(cpu_addr); 12 12 13 - if (!area || area->flags != VM_DMA_COHERENT) 13 + if (!area || !(area->flags & VM_DMA_COHERENT)) 14 14 return NULL; 15 + WARN(area->flags != VM_DMA_COHERENT, 16 + "unexpected flags in area: %p\n", cpu_addr); 15 17 return area->pages; 16 18 } 17 19 ··· 63 61 { 64 62 struct vm_struct *area = find_vm_area(cpu_addr); 65 63 66 - if (!area || area->flags != VM_DMA_COHERENT) { 64 + if (!area || !(area->flags & VM_DMA_COHERENT)) { 67 65 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 68 66 return; 69 67 }
+3 -3
kernel/dma/swiotlb.c
··· 450 450 if (!remap) 451 451 io_tlb_default_mem.can_grow = true; 452 452 if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) 453 - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits); 453 + io_tlb_default_mem.phys_limit = zone_dma_limit; 454 454 else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) 455 - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32); 455 + io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); 456 456 else 457 457 io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); 458 458 #endif ··· 629 629 } 630 630 631 631 gfp &= ~GFP_ZONEMASK; 632 - if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) 632 + if (phys_limit <= zone_dma_limit) 633 633 gfp |= __GFP_DMA; 634 634 else if (phys_limit <= DMA_BIT_MASK(32)) 635 635 gfp |= __GFP_DMA32;