Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dma-mapping-4.20-1' of git://git.infradead.org/users/hch/dma-mapping

Pull more dma-mapping updates from Christoph Hellwig:

- various swiotlb cleanups

- do not dip into the ѕwiotlb pool for dma coherent allocations

- add support for not cache coherent DMA to swiotlb

- switch ARM64 to use the generic swiotlb_dma_ops

* tag 'dma-mapping-4.20-1' of git://git.infradead.org/users/hch/dma-mapping:
arm64: use the generic swiotlb_dma_ops
swiotlb: add support for non-coherent DMA
swiotlb: don't dip into swiotlb pool for coherent allocations
swiotlb: refactor swiotlb_map_page
swiotlb: use swiotlb_map_page in swiotlb_map_sg_attrs
swiotlb: merge swiotlb_unmap_page and unmap_single
swiotlb: remove the overflow buffer
swiotlb: do not panic on mapping failures
swiotlb: mark is_swiotlb_buffer static
swiotlb: remove a pointless comment

+128 -494
+4
arch/arm64/Kconfig
··· 11 11 select ARCH_CLOCKSOURCE_DATA 12 12 select ARCH_HAS_DEBUG_VIRTUAL 13 13 select ARCH_HAS_DEVMEM_IS_ALLOWED 14 + select ARCH_HAS_DMA_COHERENT_TO_PFN 15 + select ARCH_HAS_DMA_MMAP_PGPROT 14 16 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI 15 17 select ARCH_HAS_ELF_RANDOMIZE 16 18 select ARCH_HAS_FAST_MULTIPLIER ··· 26 24 select ARCH_HAS_SG_CHAIN 27 25 select ARCH_HAS_STRICT_KERNEL_RWX 28 26 select ARCH_HAS_STRICT_MODULE_RWX 27 + select ARCH_HAS_SYNC_DMA_FOR_DEVICE 28 + select ARCH_HAS_SYNC_DMA_FOR_CPU 29 29 select ARCH_HAS_SYSCALL_WRAPPER 30 30 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 31 31 select ARCH_HAVE_NMI_SAFE_CMPXCHG
-1
arch/arm64/include/asm/device.h
··· 23 23 #ifdef CONFIG_XEN 24 24 const struct dma_map_ops *dev_dma_ops; 25 25 #endif 26 - bool dma_coherent; 27 26 }; 28 27 29 28 struct pdev_archdata {
+5 -2
arch/arm64/include/asm/dma-mapping.h
··· 44 44 #define arch_teardown_dma_ops arch_teardown_dma_ops 45 45 #endif 46 46 47 - /* do not use this function in a driver */ 47 + /* 48 + * Do not use this function in a driver, it is only provided for 49 + * arch/arm/mm/xen.c, which is used by arm64 as well. 50 + */ 48 51 static inline bool is_device_dma_coherent(struct device *dev) 49 52 { 50 - return dev->archdata.dma_coherent; 53 + return dev->dma_coherent; 51 54 } 52 55 53 56 #endif /* __KERNEL__ */
+53 -214
arch/arm64/mm/dma-mapping.c
··· 25 25 #include <linux/slab.h> 26 26 #include <linux/genalloc.h> 27 27 #include <linux/dma-direct.h> 28 + #include <linux/dma-noncoherent.h> 28 29 #include <linux/dma-contiguous.h> 29 30 #include <linux/vmalloc.h> 30 31 #include <linux/swiotlb.h> 31 32 #include <linux/pci.h> 32 33 33 34 #include <asm/cacheflush.h> 34 - 35 - static int swiotlb __ro_after_init; 36 - 37 - static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, 38 - bool coherent) 39 - { 40 - if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) 41 - return pgprot_writecombine(prot); 42 - return prot; 43 - } 44 35 45 36 static struct gen_pool *atomic_pool __ro_after_init; 46 37 ··· 82 91 return 1; 83 92 } 84 93 85 - static void *__dma_alloc(struct device *dev, size_t size, 86 - dma_addr_t *dma_handle, gfp_t flags, 87 - unsigned long attrs) 94 + void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 95 + gfp_t flags, unsigned long attrs) 88 96 { 89 97 struct page *page; 90 98 void *ptr, *coherent_ptr; 91 - bool coherent = is_device_dma_coherent(dev); 92 - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); 99 + pgprot_t prot = pgprot_writecombine(PAGE_KERNEL); 93 100 94 101 size = PAGE_ALIGN(size); 95 102 96 - if (!coherent && !gfpflags_allow_blocking(flags)) { 103 + if (!gfpflags_allow_blocking(flags)) { 97 104 struct page *page = NULL; 98 105 void *addr = __alloc_from_pool(size, &page, flags); 99 106 ··· 101 112 return addr; 102 113 } 103 114 104 - ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs); 115 + ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); 105 116 if (!ptr) 106 117 goto no_mem; 107 - 108 - /* no need for non-cacheable mapping if coherent */ 109 - if (coherent) 110 - return ptr; 111 118 112 119 /* remove any dirty cache lines on the kernel alias */ 113 120 __dma_flush_area(ptr, size); ··· 118 133 return coherent_ptr; 119 134 120 135 no_map: 121 - swiotlb_free(dev, size, ptr, *dma_handle, attrs); 136 + dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs); 122 137 no_mem: 123 138 return NULL; 124 139 } 125 140 126 - static void __dma_free(struct device *dev, size_t size, 127 - void *vaddr, dma_addr_t dma_handle, 128 - unsigned long attrs) 141 + void arch_dma_free(struct device *dev, size_t size, void *vaddr, 142 + dma_addr_t dma_handle, unsigned long attrs) 129 143 { 130 - void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); 144 + if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) { 145 + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); 131 146 132 - size = PAGE_ALIGN(size); 133 - 134 - if (!is_device_dma_coherent(dev)) { 135 - if (__free_from_pool(vaddr, size)) 136 - return; 137 147 vunmap(vaddr); 148 + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); 138 149 } 139 - swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs); 140 150 } 141 151 142 - static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, 143 - unsigned long offset, size_t size, 144 - enum dma_data_direction dir, 145 - unsigned long attrs) 152 + long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, 153 + dma_addr_t dma_addr) 146 154 { 147 - dma_addr_t dev_addr; 148 - 149 - dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); 150 - if (!is_device_dma_coherent(dev) && 151 - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 152 - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 153 - 154 - return dev_addr; 155 + return __phys_to_pfn(dma_to_phys(dev, dma_addr)); 155 156 } 156 157 157 - 158 - static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, 159 - size_t size, enum dma_data_direction dir, 160 - unsigned long attrs) 158 + pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 159 + unsigned long attrs) 161 160 { 162 - if (!is_device_dma_coherent(dev) && 163 - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 164 - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 165 - swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); 161 + if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) 162 + return pgprot_writecombine(prot); 163 + return prot; 166 164 } 167 165 168 - static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 169 - int nelems, enum dma_data_direction dir, 170 - unsigned long attrs) 166 + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 167 + size_t size, enum dma_data_direction dir) 171 168 { 172 - struct scatterlist *sg; 173 - int i, ret; 169 + __dma_map_area(phys_to_virt(paddr), size, dir); 170 + } 174 171 175 - ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); 176 - if (!is_device_dma_coherent(dev) && 177 - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 178 - for_each_sg(sgl, sg, ret, i) 179 - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 180 - sg->length, dir); 172 + void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, 173 + size_t size, enum dma_data_direction dir) 174 + { 175 + __dma_unmap_area(phys_to_virt(paddr), size, dir); 176 + } 177 + 178 + static int __swiotlb_get_sgtable_page(struct sg_table *sgt, 179 + struct page *page, size_t size) 180 + { 181 + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 182 + 183 + if (!ret) 184 + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 181 185 182 186 return ret; 183 - } 184 - 185 - static void __swiotlb_unmap_sg_attrs(struct device *dev, 186 - struct scatterlist *sgl, int nelems, 187 - enum dma_data_direction dir, 188 - unsigned long attrs) 189 - { 190 - struct scatterlist *sg; 191 - int i; 192 - 193 - if (!is_device_dma_coherent(dev) && 194 - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 195 - for_each_sg(sgl, sg, nelems, i) 196 - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 197 - sg->length, dir); 198 - swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); 199 - } 200 - 201 - static void __swiotlb_sync_single_for_cpu(struct device *dev, 202 - dma_addr_t dev_addr, size_t size, 203 - enum dma_data_direction dir) 204 - { 205 - if (!is_device_dma_coherent(dev)) 206 - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 207 - swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); 208 - } 209 - 210 - static void __swiotlb_sync_single_for_device(struct device *dev, 211 - dma_addr_t dev_addr, size_t size, 212 - enum dma_data_direction dir) 213 - { 214 - swiotlb_sync_single_for_device(dev, dev_addr, size, dir); 215 - if (!is_device_dma_coherent(dev)) 216 - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 217 - } 218 - 219 - static void __swiotlb_sync_sg_for_cpu(struct device *dev, 220 - struct scatterlist *sgl, int nelems, 221 - enum dma_data_direction dir) 222 - { 223 - struct scatterlist *sg; 224 - int i; 225 - 226 - if (!is_device_dma_coherent(dev)) 227 - for_each_sg(sgl, sg, nelems, i) 228 - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 229 - sg->length, dir); 230 - swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); 231 - } 232 - 233 - static void __swiotlb_sync_sg_for_device(struct device *dev, 234 - struct scatterlist *sgl, int nelems, 235 - enum dma_data_direction dir) 236 - { 237 - struct scatterlist *sg; 238 - int i; 239 - 240 - swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); 241 - if (!is_device_dma_coherent(dev)) 242 - for_each_sg(sgl, sg, nelems, i) 243 - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 244 - sg->length, dir); 245 187 } 246 188 247 189 static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, ··· 188 276 189 277 return ret; 190 278 } 191 - 192 - static int __swiotlb_mmap(struct device *dev, 193 - struct vm_area_struct *vma, 194 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 195 - unsigned long attrs) 196 - { 197 - int ret; 198 - unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; 199 - 200 - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 201 - is_device_dma_coherent(dev)); 202 - 203 - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 204 - return ret; 205 - 206 - return __swiotlb_mmap_pfn(vma, pfn, size); 207 - } 208 - 209 - static int __swiotlb_get_sgtable_page(struct sg_table *sgt, 210 - struct page *page, size_t size) 211 - { 212 - int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 213 - 214 - if (!ret) 215 - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 216 - 217 - return ret; 218 - } 219 - 220 - static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, 221 - void *cpu_addr, dma_addr_t handle, size_t size, 222 - unsigned long attrs) 223 - { 224 - struct page *page = phys_to_page(dma_to_phys(dev, handle)); 225 - 226 - return __swiotlb_get_sgtable_page(sgt, page, size); 227 - } 228 - 229 - static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) 230 - { 231 - if (swiotlb) 232 - return swiotlb_dma_supported(hwdev, mask); 233 - return 1; 234 - } 235 - 236 - static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) 237 - { 238 - if (swiotlb) 239 - return swiotlb_dma_mapping_error(hwdev, addr); 240 - return 0; 241 - } 242 - 243 - static const struct dma_map_ops arm64_swiotlb_dma_ops = { 244 - .alloc = __dma_alloc, 245 - .free = __dma_free, 246 - .mmap = __swiotlb_mmap, 247 - .get_sgtable = __swiotlb_get_sgtable, 248 - .map_page = __swiotlb_map_page, 249 - .unmap_page = __swiotlb_unmap_page, 250 - .map_sg = __swiotlb_map_sg_attrs, 251 - .unmap_sg = __swiotlb_unmap_sg_attrs, 252 - .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, 253 - .sync_single_for_device = __swiotlb_sync_single_for_device, 254 - .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, 255 - .sync_sg_for_device = __swiotlb_sync_sg_for_device, 256 - .dma_supported = __swiotlb_dma_supported, 257 - .mapping_error = __swiotlb_dma_mapping_error, 258 - }; 259 279 260 280 static int __init atomic_pool_init(void) 261 281 { ··· 344 500 345 501 static int __init arm64_dma_init(void) 346 502 { 347 - if (swiotlb_force == SWIOTLB_FORCE || 348 - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) 349 - swiotlb = 1; 350 - 351 503 WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), 352 504 TAINT_CPU_OUT_OF_SPEC, 353 505 "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", ··· 368 528 dma_addr_t *handle, gfp_t gfp, 369 529 unsigned long attrs) 370 530 { 371 - bool coherent = is_device_dma_coherent(dev); 531 + bool coherent = dev_is_dma_coherent(dev); 372 532 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 373 533 size_t iosize = size; 374 534 void *addr; ··· 409 569 addr = NULL; 410 570 } 411 571 } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 412 - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 572 + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); 413 573 struct page *page; 414 574 415 575 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, ··· 436 596 size >> PAGE_SHIFT); 437 597 } 438 598 } else { 439 - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 599 + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); 440 600 struct page **pages; 441 601 442 602 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, ··· 498 658 struct vm_struct *area; 499 659 int ret; 500 660 501 - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 502 - is_device_dma_coherent(dev)); 661 + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); 503 662 504 663 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 505 664 return ret; ··· 548 709 { 549 710 phys_addr_t phys; 550 711 551 - if (is_device_dma_coherent(dev)) 712 + if (dev_is_dma_coherent(dev)) 552 713 return; 553 714 554 715 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); 555 - __dma_unmap_area(phys_to_virt(phys), size, dir); 716 + arch_sync_dma_for_cpu(dev, phys, size, dir); 556 717 } 557 718 558 719 static void __iommu_sync_single_for_device(struct device *dev, ··· 561 722 { 562 723 phys_addr_t phys; 563 724 564 - if (is_device_dma_coherent(dev)) 725 + if (dev_is_dma_coherent(dev)) 565 726 return; 566 727 567 728 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); 568 - __dma_map_area(phys_to_virt(phys), size, dir); 729 + arch_sync_dma_for_device(dev, phys, size, dir); 569 730 } 570 731 571 732 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, ··· 573 734 enum dma_data_direction dir, 574 735 unsigned long attrs) 575 736 { 576 - bool coherent = is_device_dma_coherent(dev); 737 + bool coherent = dev_is_dma_coherent(dev); 577 738 int prot = dma_info_to_prot(dir, coherent, attrs); 578 739 dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 579 740 ··· 601 762 struct scatterlist *sg; 602 763 int i; 603 764 604 - if (is_device_dma_coherent(dev)) 765 + if (dev_is_dma_coherent(dev)) 605 766 return; 606 767 607 768 for_each_sg(sgl, sg, nelems, i) 608 - __dma_unmap_area(sg_virt(sg), sg->length, dir); 769 + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); 609 770 } 610 771 611 772 static void __iommu_sync_sg_for_device(struct device *dev, ··· 615 776 struct scatterlist *sg; 616 777 int i; 617 778 618 - if (is_device_dma_coherent(dev)) 779 + if (dev_is_dma_coherent(dev)) 619 780 return; 620 781 621 782 for_each_sg(sgl, sg, nelems, i) 622 - __dma_map_area(sg_virt(sg), sg->length, dir); 783 + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); 623 784 } 624 785 625 786 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 626 787 int nelems, enum dma_data_direction dir, 627 788 unsigned long attrs) 628 789 { 629 - bool coherent = is_device_dma_coherent(dev); 790 + bool coherent = dev_is_dma_coherent(dev); 630 791 631 792 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 632 793 __iommu_sync_sg_for_device(dev, sgl, nelems, dir); ··· 718 879 const struct iommu_ops *iommu, bool coherent) 719 880 { 720 881 if (!dev->dma_ops) 721 - dev->dma_ops = &arm64_swiotlb_dma_ops; 882 + dev->dma_ops = &swiotlb_dma_ops; 722 883 723 - dev->archdata.dma_coherent = coherent; 884 + dev->dma_coherent = coherent; 724 885 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 725 886 726 887 #ifdef CONFIG_XEN
+2 -2
arch/powerpc/kernel/dma-swiotlb.c
··· 11 11 * 12 12 */ 13 13 14 - #include <linux/dma-mapping.h> 14 + #include <linux/dma-direct.h> 15 15 #include <linux/memblock.h> 16 16 #include <linux/pfn.h> 17 17 #include <linux/of_platform.h> ··· 59 59 .sync_single_for_device = swiotlb_sync_single_for_device, 60 60 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 61 61 .sync_sg_for_device = swiotlb_sync_sg_for_device, 62 - .mapping_error = swiotlb_dma_mapping_error, 62 + .mapping_error = dma_direct_mapping_error, 63 63 .get_required_mask = swiotlb_powerpc_get_required, 64 64 }; 65 65
+2
include/linux/dma-direct.h
··· 5 5 #include <linux/dma-mapping.h> 6 6 #include <linux/mem_encrypt.h> 7 7 8 + #define DIRECT_MAPPING_ERROR 0 9 + 8 10 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA 9 11 #include <asm/dma-direct.h> 10 12 #else
-9
include/linux/swiotlb.h
··· 67 67 68 68 /* Accessory functions. */ 69 69 70 - void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, 71 - gfp_t flags, unsigned long attrs); 72 - void swiotlb_free(struct device *dev, size_t size, void *vaddr, 73 - dma_addr_t dma_addr, unsigned long attrs); 74 - 75 70 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, 76 71 unsigned long offset, size_t size, 77 72 enum dma_data_direction dir, ··· 102 107 int nelems, enum dma_data_direction dir); 103 108 104 109 extern int 105 - swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); 106 - 107 - extern int 108 110 swiotlb_dma_supported(struct device *hwdev, u64 mask); 109 111 110 112 #ifdef CONFIG_SWIOTLB ··· 113 121 #endif 114 122 115 123 extern void swiotlb_print_info(void); 116 - extern int is_swiotlb_buffer(phys_addr_t paddr); 117 124 extern void swiotlb_set_max_segment(unsigned int); 118 125 119 126 extern const struct dma_map_ops swiotlb_dma_ops;
-2
kernel/dma/direct.c
··· 14 14 #include <linux/pfn.h> 15 15 #include <linux/set_memory.h> 16 16 17 - #define DIRECT_MAPPING_ERROR 0 18 - 19 17 /* 20 18 * Most architectures use ZONE_DMA for the first 16 Megabytes, but 21 19 * some use it for entirely different regions:
+62 -264
kernel/dma/swiotlb.c
··· 21 21 22 22 #include <linux/cache.h> 23 23 #include <linux/dma-direct.h> 24 + #include <linux/dma-noncoherent.h> 24 25 #include <linux/mm.h> 25 26 #include <linux/export.h> 26 27 #include <linux/spinlock.h> ··· 74 73 static unsigned long io_tlb_nslabs; 75 74 76 75 /* 77 - * When the IOMMU overflows we return a fallback buffer. This sets the size. 78 - */ 79 - static unsigned long io_tlb_overflow = 32*1024; 80 - 81 - static phys_addr_t io_tlb_overflow_buffer; 82 - 83 - /* 84 76 * This is a free list describing the number of free entries available from 85 77 * each index 86 78 */ ··· 120 126 return 0; 121 127 } 122 128 early_param("swiotlb", setup_io_tlb_npages); 123 - /* make io_tlb_overflow tunable too? */ 124 129 125 130 unsigned long swiotlb_nr_tbl(void) 126 131 { ··· 187 194 bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); 188 195 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); 189 196 memset(vaddr, 0, bytes); 190 - 191 - vaddr = phys_to_virt(io_tlb_overflow_buffer); 192 - bytes = PAGE_ALIGN(io_tlb_overflow); 193 - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); 194 - memset(vaddr, 0, bytes); 195 197 } 196 198 197 199 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) 198 200 { 199 - void *v_overflow_buffer; 200 201 unsigned long i, bytes; 201 202 202 203 bytes = nslabs << IO_TLB_SHIFT; ··· 198 211 io_tlb_nslabs = nslabs; 199 212 io_tlb_start = __pa(tlb); 200 213 io_tlb_end = io_tlb_start + bytes; 201 - 202 - /* 203 - * Get the overflow emergency buffer 204 - */ 205 - v_overflow_buffer = memblock_virt_alloc_low_nopanic( 206 - PAGE_ALIGN(io_tlb_overflow), 207 - PAGE_SIZE); 208 - if (!v_overflow_buffer) 209 - return -ENOMEM; 210 - 211 - io_tlb_overflow_buffer = __pa(v_overflow_buffer); 212 214 213 215 /* 214 216 * Allocate and initialize the free list array. This array is used ··· 306 330 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) 307 331 { 308 332 unsigned long i, bytes; 309 - unsigned char *v_overflow_buffer; 310 333 311 334 bytes = nslabs << IO_TLB_SHIFT; 312 335 ··· 315 340 316 341 set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); 317 342 memset(tlb, 0, bytes); 318 - 319 - /* 320 - * Get the overflow emergency buffer 321 - */ 322 - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, 323 - get_order(io_tlb_overflow)); 324 - if (!v_overflow_buffer) 325 - goto cleanup2; 326 - 327 - set_memory_decrypted((unsigned long)v_overflow_buffer, 328 - io_tlb_overflow >> PAGE_SHIFT); 329 - memset(v_overflow_buffer, 0, io_tlb_overflow); 330 - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); 331 343 332 344 /* 333 345 * Allocate and initialize the free list array. This array is used ··· 352 390 sizeof(int))); 353 391 io_tlb_list = NULL; 354 392 cleanup3: 355 - free_pages((unsigned long)v_overflow_buffer, 356 - get_order(io_tlb_overflow)); 357 - io_tlb_overflow_buffer = 0; 358 - cleanup2: 359 393 io_tlb_end = 0; 360 394 io_tlb_start = 0; 361 395 io_tlb_nslabs = 0; ··· 365 407 return; 366 408 367 409 if (late_alloc) { 368 - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), 369 - get_order(io_tlb_overflow)); 370 410 free_pages((unsigned long)io_tlb_orig_addr, 371 411 get_order(io_tlb_nslabs * sizeof(phys_addr_t))); 372 412 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * ··· 372 416 free_pages((unsigned long)phys_to_virt(io_tlb_start), 373 417 get_order(io_tlb_nslabs << IO_TLB_SHIFT)); 374 418 } else { 375 - memblock_free_late(io_tlb_overflow_buffer, 376 - PAGE_ALIGN(io_tlb_overflow)); 377 419 memblock_free_late(__pa(io_tlb_orig_addr), 378 420 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); 379 421 memblock_free_late(__pa(io_tlb_list), ··· 383 429 max_segment = 0; 384 430 } 385 431 386 - int is_swiotlb_buffer(phys_addr_t paddr) 432 + static int is_swiotlb_buffer(phys_addr_t paddr) 387 433 { 388 434 return paddr >= io_tlb_start && paddr < io_tlb_end; 389 435 } ··· 545 591 } 546 592 547 593 /* 548 - * Allocates bounce buffer and returns its physical address. 549 - */ 550 - static phys_addr_t 551 - map_single(struct device *hwdev, phys_addr_t phys, size_t size, 552 - enum dma_data_direction dir, unsigned long attrs) 553 - { 554 - dma_addr_t start_dma_addr; 555 - 556 - if (swiotlb_force == SWIOTLB_NO_FORCE) { 557 - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", 558 - &phys); 559 - return SWIOTLB_MAP_ERROR; 560 - } 561 - 562 - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); 563 - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, 564 - dir, attrs); 565 - } 566 - 567 - /* 568 594 * tlb_addr is the physical address of the bounce buffer to unmap. 569 595 */ 570 596 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, ··· 623 689 } 624 690 } 625 691 626 - static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, 627 - size_t size) 692 + static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, 693 + size_t size, enum dma_data_direction dir, unsigned long attrs) 628 694 { 629 - u64 mask = DMA_BIT_MASK(32); 695 + dma_addr_t dma_addr; 630 696 631 - if (dev && dev->coherent_dma_mask) 632 - mask = dev->coherent_dma_mask; 633 - return addr + size - 1 <= mask; 634 - } 635 - 636 - static void * 637 - swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, 638 - unsigned long attrs) 639 - { 640 - phys_addr_t phys_addr; 641 - 642 - if (swiotlb_force == SWIOTLB_NO_FORCE) 643 - goto out_warn; 644 - 645 - phys_addr = swiotlb_tbl_map_single(dev, 646 - __phys_to_dma(dev, io_tlb_start), 647 - 0, size, DMA_FROM_DEVICE, attrs); 648 - if (phys_addr == SWIOTLB_MAP_ERROR) 649 - goto out_warn; 650 - 651 - *dma_handle = __phys_to_dma(dev, phys_addr); 652 - if (!dma_coherent_ok(dev, *dma_handle, size)) 653 - goto out_unmap; 654 - 655 - memset(phys_to_virt(phys_addr), 0, size); 656 - return phys_to_virt(phys_addr); 657 - 658 - out_unmap: 659 - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", 660 - (unsigned long long)dev->coherent_dma_mask, 661 - (unsigned long long)*dma_handle); 662 - 663 - /* 664 - * DMA_TO_DEVICE to avoid memcpy in unmap_single. 665 - * DMA_ATTR_SKIP_CPU_SYNC is optional. 666 - */ 667 - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, 668 - DMA_ATTR_SKIP_CPU_SYNC); 669 - out_warn: 670 - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { 671 - dev_warn(dev, 672 - "swiotlb: coherent allocation failed, size=%zu\n", 673 - size); 674 - dump_stack(); 697 + if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { 698 + dev_warn_ratelimited(dev, 699 + "Cannot do DMA to address %pa\n", phys); 700 + return DIRECT_MAPPING_ERROR; 675 701 } 676 - return NULL; 677 - } 678 702 679 - static bool swiotlb_free_buffer(struct device *dev, size_t size, 680 - dma_addr_t dma_addr) 681 - { 682 - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); 703 + /* Oh well, have to allocate and map a bounce buffer. */ 704 + *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), 705 + *phys, size, dir, attrs); 706 + if (*phys == SWIOTLB_MAP_ERROR) 707 + return DIRECT_MAPPING_ERROR; 683 708 684 - WARN_ON_ONCE(irqs_disabled()); 709 + /* Ensure that the address returned is DMA'ble */ 710 + dma_addr = __phys_to_dma(dev, *phys); 711 + if (unlikely(!dma_capable(dev, dma_addr, size))) { 712 + swiotlb_tbl_unmap_single(dev, *phys, size, dir, 713 + attrs | DMA_ATTR_SKIP_CPU_SYNC); 714 + return DIRECT_MAPPING_ERROR; 715 + } 685 716 686 - if (!is_swiotlb_buffer(phys_addr)) 687 - return false; 688 - 689 - /* 690 - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. 691 - * DMA_ATTR_SKIP_CPU_SYNC is optional. 692 - */ 693 - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, 694 - DMA_ATTR_SKIP_CPU_SYNC); 695 - return true; 696 - } 697 - 698 - static void 699 - swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, 700 - int do_panic) 701 - { 702 - if (swiotlb_force == SWIOTLB_NO_FORCE) 703 - return; 704 - 705 - /* 706 - * Ran out of IOMMU space for this operation. This is very bad. 707 - * Unfortunately the drivers cannot handle this operation properly. 708 - * unless they check for dma_mapping_error (most don't) 709 - * When the mapping is small enough return a static buffer to limit 710 - * the damage, or panic when the transfer is too big. 711 - */ 712 - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", 713 - size); 714 - 715 - if (size <= io_tlb_overflow || !do_panic) 716 - return; 717 - 718 - if (dir == DMA_BIDIRECTIONAL) 719 - panic("DMA: Random memory could be DMA accessed\n"); 720 - if (dir == DMA_FROM_DEVICE) 721 - panic("DMA: Random memory could be DMA written\n"); 722 - if (dir == DMA_TO_DEVICE) 723 - panic("DMA: Random memory could be DMA read\n"); 717 + return dma_addr; 724 718 } 725 719 726 720 /* ··· 663 801 enum dma_data_direction dir, 664 802 unsigned long attrs) 665 803 { 666 - phys_addr_t map, phys = page_to_phys(page) + offset; 804 + phys_addr_t phys = page_to_phys(page) + offset; 667 805 dma_addr_t dev_addr = phys_to_dma(dev, phys); 668 806 669 807 BUG_ON(dir == DMA_NONE); ··· 672 810 * we can safely return the device addr and not worry about bounce 673 811 * buffering it. 674 812 */ 675 - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) 676 - return dev_addr; 677 - 678 - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); 679 - 680 - /* Oh well, have to allocate and map a bounce buffer. */ 681 - map = map_single(dev, phys, size, dir, attrs); 682 - if (map == SWIOTLB_MAP_ERROR) { 683 - swiotlb_full(dev, size, dir, 1); 684 - return __phys_to_dma(dev, io_tlb_overflow_buffer); 813 + if (!dma_capable(dev, dev_addr, size) || 814 + swiotlb_force == SWIOTLB_FORCE) { 815 + trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); 816 + dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs); 685 817 } 686 818 687 - dev_addr = __phys_to_dma(dev, map); 819 + if (!dev_is_dma_coherent(dev) && 820 + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 821 + arch_sync_dma_for_device(dev, phys, size, dir); 688 822 689 - /* Ensure that the address returned is DMA'ble */ 690 - if (dma_capable(dev, dev_addr, size)) 691 - return dev_addr; 692 - 693 - attrs |= DMA_ATTR_SKIP_CPU_SYNC; 694 - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); 695 - 696 - return __phys_to_dma(dev, io_tlb_overflow_buffer); 823 + return dev_addr; 697 824 } 698 825 699 826 /* ··· 693 842 * After this call, reads by the cpu to the buffer are guaranteed to see 694 843 * whatever the device wrote there. 695 844 */ 696 - static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 697 - size_t size, enum dma_data_direction dir, 698 - unsigned long attrs) 845 + void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 846 + size_t size, enum dma_data_direction dir, 847 + unsigned long attrs) 699 848 { 700 849 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 701 850 702 851 BUG_ON(dir == DMA_NONE); 852 + 853 + if (!dev_is_dma_coherent(hwdev) && 854 + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 855 + arch_sync_dma_for_cpu(hwdev, paddr, size, dir); 703 856 704 857 if (is_swiotlb_buffer(paddr)) { 705 858 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); ··· 720 865 * make dma_mark_clean() take a physical address if necessary. 721 866 */ 722 867 dma_mark_clean(phys_to_virt(paddr), size); 723 - } 724 - 725 - void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 726 - size_t size, enum dma_data_direction dir, 727 - unsigned long attrs) 728 - { 729 - unmap_single(hwdev, dev_addr, size, dir, attrs); 730 868 } 731 869 732 870 /* ··· 741 893 742 894 BUG_ON(dir == DMA_NONE); 743 895 744 - if (is_swiotlb_buffer(paddr)) { 896 + if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU) 897 + arch_sync_dma_for_cpu(hwdev, paddr, size, dir); 898 + 899 + if (is_swiotlb_buffer(paddr)) 745 900 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); 746 - return; 747 - } 748 901 749 - if (dir != DMA_FROM_DEVICE) 750 - return; 902 + if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE) 903 + arch_sync_dma_for_device(hwdev, paddr, size, dir); 751 904 752 - dma_mark_clean(phys_to_virt(paddr), size); 905 + if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE) 906 + dma_mark_clean(phys_to_virt(paddr), size); 753 907 } 754 908 755 909 void ··· 775 925 * appropriate dma address and length. They are obtained via 776 926 * sg_dma_{address,length}(SG). 777 927 * 778 - * NOTE: An implementation may be able to use a smaller number of 779 - * DMA address/length pairs than there are SG table elements. 780 - * (for example via virtual mapping capabilities) 781 - * The routine returns the number of addr/length pairs actually 782 - * used, at most nents. 783 - * 784 928 * Device ownership issues as mentioned above for swiotlb_map_page are the 785 929 * same here. 786 930 */ 787 931 int 788 - swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, 932 + swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, 789 933 enum dma_data_direction dir, unsigned long attrs) 790 934 { 791 935 struct scatterlist *sg; 792 936 int i; 793 937 794 - BUG_ON(dir == DMA_NONE); 795 - 796 938 for_each_sg(sgl, sg, nelems, i) { 797 - phys_addr_t paddr = sg_phys(sg); 798 - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); 799 - 800 - if (swiotlb_force == SWIOTLB_FORCE || 801 - !dma_capable(hwdev, dev_addr, sg->length)) { 802 - phys_addr_t map = map_single(hwdev, sg_phys(sg), 803 - sg->length, dir, attrs); 804 - if (map == SWIOTLB_MAP_ERROR) { 805 - /* Don't panic here, we expect map_sg users 806 - to do proper error handling. */ 807 - swiotlb_full(hwdev, sg->length, dir, 0); 808 - attrs |= DMA_ATTR_SKIP_CPU_SYNC; 809 - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 810 - attrs); 811 - sg_dma_len(sgl) = 0; 812 - return 0; 813 - } 814 - sg->dma_address = __phys_to_dma(hwdev, map); 815 - } else 816 - sg->dma_address = dev_addr; 939 + sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset, 940 + sg->length, dir, attrs); 941 + if (sg->dma_address == DIRECT_MAPPING_ERROR) 942 + goto out_error; 817 943 sg_dma_len(sg) = sg->length; 818 944 } 945 + 819 946 return nelems; 947 + 948 + out_error: 949 + swiotlb_unmap_sg_attrs(dev, sgl, i, dir, 950 + attrs | DMA_ATTR_SKIP_CPU_SYNC); 951 + sg_dma_len(sgl) = 0; 952 + return 0; 820 953 } 821 954 822 955 /* ··· 817 984 BUG_ON(dir == DMA_NONE); 818 985 819 986 for_each_sg(sgl, sg, nelems, i) 820 - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, 987 + swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir, 821 988 attrs); 822 989 } 823 990 ··· 855 1022 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); 856 1023 } 857 1024 858 - int 859 - swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 860 - { 861 - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); 862 - } 863 - 864 1025 /* 865 1026 * Return whether the given device DMA address mask can be supported 866 1027 * properly. For example, if your device can only drive the low 24-bits ··· 867 1040 return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; 868 1041 } 869 1042 870 - void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 871 - gfp_t gfp, unsigned long attrs) 872 - { 873 - void *vaddr; 874 - 875 - /* temporary workaround: */ 876 - if (gfp & __GFP_NOWARN) 877 - attrs |= DMA_ATTR_NO_WARN; 878 - 879 - /* 880 - * Don't print a warning when the first allocation attempt fails. 881 - * swiotlb_alloc_coherent() will print a warning when the DMA memory 882 - * allocation ultimately failed. 883 - */ 884 - gfp |= __GFP_NOWARN; 885 - 886 - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); 887 - if (!vaddr) 888 - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); 889 - return vaddr; 890 - } 891 - 892 - void swiotlb_free(struct device *dev, size_t size, void *vaddr, 893 - dma_addr_t dma_addr, unsigned long attrs) 894 - { 895 - if (!swiotlb_free_buffer(dev, size, dma_addr)) 896 - dma_direct_free(dev, size, vaddr, dma_addr, attrs); 897 - } 898 - 899 1043 const struct dma_map_ops swiotlb_dma_ops = { 900 - .mapping_error = swiotlb_dma_mapping_error, 901 - .alloc = swiotlb_alloc, 902 - .free = swiotlb_free, 1044 + .mapping_error = dma_direct_mapping_error, 1045 + .alloc = dma_direct_alloc, 1046 + .free = dma_direct_free, 903 1047 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 904 1048 .sync_single_for_device = swiotlb_sync_single_for_device, 905 1049 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,