Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

- add dma-mapping and block layer helpers to take care of IOMMU merging
for mmc plus subsequent fixups (Yoshihiro Shimoda)

- rework handling of the pgprot bits for remapping (me)

- take care of the dma direct infrastructure for swiotlb-xen (me)

- improve the dma noncoherent remapping infrastructure (me)

- better defaults for ->mmap, ->get_sgtable and ->get_required_mask
(me)

- cleanup mmaping of coherent DMA allocations (me)

- various misc cleanups (Andy Shevchenko, me)

* tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits)
mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE
mmc: queue: Fix bigger segments usage
arm64: use asm-generic/dma-mapping.h
swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page
swiotlb-xen: simplify cache maintainance
swiotlb-xen: use the same foreign page check everywhere
swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable
xen: remove the exports for xen_{create,destroy}_contiguous_region
xen/arm: remove xen_dma_ops
xen/arm: simplify dma_cache_maint
xen/arm: use dev_is_dma_coherent
xen/arm: consolidate page-coherent.h
xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance
arm: remove wrappers for the generic dma remap helpers
dma-mapping: introduce a dma_common_find_pages helper
dma-mapping: always use VM_DMA_COHERENT for generic DMA remap
vmalloc: lift the arm flag for coherent mappings to common code
dma-mapping: provide a better default ->get_required_mask
dma-mapping: remove the dma_declare_coherent_memory export
remoteproc: don't allow modular build
...

+397 -674
+8 -11
Documentation/DMA-API.txt
··· 204 204 of the mapping functions like dma_map_single(), dma_map_page() and 205 205 others should not be larger than the returned value. 206 206 207 + :: 208 + 209 + unsigned long 210 + dma_get_merge_boundary(struct device *dev); 211 + 212 + Returns the DMA merge boundary. If the device cannot merge any the DMA address 213 + segments, the function returns 0. 214 + 207 215 Part Id - Streaming DMA mappings 208 216 -------------------------------- 209 217 ··· 602 594 For reasons of efficiency, most platforms choose to track the declared 603 595 region only at the granularity of a page. For smaller allocations, 604 596 you should use the dma_pool() API. 605 - 606 - :: 607 - 608 - void 609 - dma_release_declared_memory(struct device *dev) 610 - 611 - Remove the memory region previously declared from the system. This 612 - API performs *no* in-use checking for this region and will return 613 - unconditionally having removed all the required structures. It is the 614 - driver's job to ensure that no parts of this memory region are 615 - currently in use. 616 597 617 598 Part III - Debug drivers use of the DMA-API 618 599 -------------------------------------------
+1 -1
Documentation/x86/x86_64/boot-options.rst
··· 230 230 =========================================== 231 231 Multiple x86-64 PCI-DMA mapping implementations exist, for example: 232 232 233 - 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all 233 + 1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all 234 234 (e.g. because you have < 3 GB memory). 235 235 Kernel boot message: "PCI-DMA: Disabling IOMMU" 236 236
-3
arch/Kconfig
··· 793 793 This is relevant on all 32-bit architectures, and 64-bit architectures 794 794 as part of compat syscall handling. 795 795 796 - config ARCH_NO_COHERENT_DMA_MMAP 797 - bool 798 - 799 796 config ARCH_NO_PREEMPT 800 797 bool 801 798
+2
arch/alpha/kernel/pci_iommu.c
··· 955 955 .map_sg = alpha_pci_map_sg, 956 956 .unmap_sg = alpha_pci_unmap_sg, 957 957 .dma_supported = alpha_pci_supported, 958 + .mmap = dma_common_mmap, 959 + .get_sgtable = dma_common_get_sgtable, 958 960 }; 959 961 EXPORT_SYMBOL(alpha_pci_ops);
-6
arch/arc/mm/dma.c
··· 104 104 dev_info(dev, "use %scoherent DMA ops\n", 105 105 dev->dma_coherent ? "" : "non"); 106 106 } 107 - 108 - static int __init atomic_pool_init(void) 109 - { 110 - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); 111 - } 112 - postcore_initcall(atomic_pool_init);
+1 -1
arch/arm/Kconfig
··· 8 8 select ARCH_HAS_DEBUG_VIRTUAL if MMU 9 9 select ARCH_HAS_DEVMEM_IS_ALLOWED 10 10 select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB 11 - select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB 11 + select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE 12 12 select ARCH_HAS_ELF_RANDOMIZE 13 13 select ARCH_HAS_FORTIFY_SOURCE 14 14 select ARCH_HAS_KEEPINITRD
-3
arch/arm/include/asm/device.h
··· 15 15 #ifdef CONFIG_ARM_DMA_USE_IOMMU 16 16 struct dma_iommu_mapping *mapping; 17 17 #endif 18 - #ifdef CONFIG_XEN 19 - const struct dma_map_ops *dev_dma_ops; 20 - #endif 21 18 unsigned int dma_coherent:1; 22 19 unsigned int dma_ops_setup:1; 23 20 };
-6
arch/arm/include/asm/dma-mapping.h
··· 91 91 } 92 92 #endif 93 93 94 - /* do not use this function in a driver */ 95 - static inline bool is_device_dma_coherent(struct device *dev) 96 - { 97 - return dev->archdata.dma_coherent; 98 - } 99 - 100 94 /** 101 95 * arm_dma_alloc - allocate consistent memory for DMA 102 96 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
-1
arch/arm/include/asm/pgtable-nommu.h
··· 62 62 */ 63 63 #define pgprot_noncached(prot) (prot) 64 64 #define pgprot_writecombine(prot) (prot) 65 - #define pgprot_dmacoherent(prot) (prot) 66 65 #define pgprot_device(prot) (prot) 67 66 68 67
-93
arch/arm/include/asm/xen/page-coherent.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _ASM_ARM_XEN_PAGE_COHERENT_H 3 - #define _ASM_ARM_XEN_PAGE_COHERENT_H 4 - 5 - #include <linux/dma-mapping.h> 6 - #include <asm/page.h> 7 2 #include <xen/arm/page-coherent.h> 8 - 9 - static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) 10 - { 11 - if (dev && dev->archdata.dev_dma_ops) 12 - return dev->archdata.dev_dma_ops; 13 - return get_arch_dma_ops(NULL); 14 - } 15 - 16 - static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, 17 - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) 18 - { 19 - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); 20 - } 21 - 22 - static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, 23 - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) 24 - { 25 - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); 26 - } 27 - 28 - static inline void xen_dma_map_page(struct device *hwdev, struct page *page, 29 - dma_addr_t dev_addr, unsigned long offset, size_t size, 30 - enum dma_data_direction dir, unsigned long attrs) 31 - { 32 - unsigned long page_pfn = page_to_xen_pfn(page); 33 - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); 34 - unsigned long compound_pages = 35 - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; 36 - bool local = (page_pfn <= dev_pfn) && 37 - (dev_pfn - page_pfn < compound_pages); 38 - 39 - /* 40 - * Dom0 is mapped 1:1, while the Linux page can span across 41 - * multiple Xen pages, it's not possible for it to contain a 42 - * mix of local and foreign Xen pages. So if the first xen_pfn 43 - * == mfn the page is local otherwise it's a foreign page 44 - * grant-mapped in dom0. If the page is local we can safely 45 - * call the native dma_ops function, otherwise we call the xen 46 - * specific function. 47 - */ 48 - if (local) 49 - xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); 50 - else 51 - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); 52 - } 53 - 54 - static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 55 - size_t size, enum dma_data_direction dir, unsigned long attrs) 56 - { 57 - unsigned long pfn = PFN_DOWN(handle); 58 - /* 59 - * Dom0 is mapped 1:1, while the Linux page can be spanned accross 60 - * multiple Xen page, it's not possible to have a mix of local and 61 - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a 62 - * foreign mfn will always return false. If the page is local we can 63 - * safely call the native dma_ops function, otherwise we call the xen 64 - * specific function. 65 - */ 66 - if (pfn_valid(pfn)) { 67 - if (xen_get_dma_ops(hwdev)->unmap_page) 68 - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); 69 - } else 70 - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); 71 - } 72 - 73 - static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, 74 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 75 - { 76 - unsigned long pfn = PFN_DOWN(handle); 77 - if (pfn_valid(pfn)) { 78 - if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) 79 - xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); 80 - } else 81 - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); 82 - } 83 - 84 - static inline void xen_dma_sync_single_for_device(struct device *hwdev, 85 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 86 - { 87 - unsigned long pfn = PFN_DOWN(handle); 88 - if (pfn_valid(pfn)) { 89 - if (xen_get_dma_ops(hwdev)->sync_single_for_device) 90 - xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); 91 - } else 92 - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); 93 - } 94 - 95 - #endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
+3 -2
arch/arm/mm/dma-mapping-nommu.c
··· 68 68 69 69 if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) 70 70 return ret; 71 - 72 - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 71 + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 72 + return ret; 73 + return -ENXIO; 73 74 } 74 75 75 76
+14 -70
arch/arm/mm/dma-mapping.c
··· 14 14 #include <linux/list.h> 15 15 #include <linux/init.h> 16 16 #include <linux/device.h> 17 + #include <linux/dma-direct.h> 17 18 #include <linux/dma-mapping.h> 18 19 #include <linux/dma-noncoherent.h> 19 20 #include <linux/dma-contiguous.h> ··· 36 35 #include <asm/mach/map.h> 37 36 #include <asm/system_info.h> 38 37 #include <asm/dma-contiguous.h> 38 + #include <xen/swiotlb-xen.h> 39 39 40 40 #include "dma.h" 41 41 #include "mm.h" ··· 194 192 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 195 193 .sync_sg_for_device = arm_dma_sync_sg_for_device, 196 194 .dma_supported = arm_dma_supported, 195 + .get_required_mask = dma_direct_get_required_mask, 197 196 }; 198 197 EXPORT_SYMBOL(arm_dma_ops); 199 198 ··· 215 212 .map_sg = arm_dma_map_sg, 216 213 .map_resource = dma_direct_map_resource, 217 214 .dma_supported = arm_dma_supported, 215 + .get_required_mask = dma_direct_get_required_mask, 218 216 }; 219 217 EXPORT_SYMBOL(arm_coherent_dma_ops); 220 218 ··· 339 335 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 340 336 pgprot_t prot, struct page **ret_page, 341 337 const void *caller, bool want_vaddr); 342 - 343 - static void * 344 - __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 345 - const void *caller) 346 - { 347 - /* 348 - * DMA allocation can be mapped to user space, so lets 349 - * set VM_USERMAP flags too. 350 - */ 351 - return dma_common_contiguous_remap(page, size, 352 - VM_ARM_DMA_CONSISTENT | VM_USERMAP, 353 - prot, caller); 354 - } 355 - 356 - static void __dma_free_remap(void *cpu_addr, size_t size) 357 - { 358 - dma_common_free_remap(cpu_addr, size, 359 - VM_ARM_DMA_CONSISTENT | VM_USERMAP); 360 - } 361 338 362 339 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 363 340 static struct gen_pool *atomic_pool __ro_after_init; ··· 495 510 if (!want_vaddr) 496 511 goto out; 497 512 498 - ptr = __dma_alloc_remap(page, size, gfp, prot, caller); 513 + ptr = dma_common_contiguous_remap(page, size, prot, caller); 499 514 if (!ptr) { 500 515 __dma_free_buffer(page, size); 501 516 return NULL; ··· 562 577 goto out; 563 578 564 579 if (PageHighMem(page)) { 565 - ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); 580 + ptr = dma_common_contiguous_remap(page, size, prot, caller); 566 581 if (!ptr) { 567 582 dma_release_from_contiguous(dev, page, count); 568 583 return NULL; ··· 582 597 { 583 598 if (want_vaddr) { 584 599 if (PageHighMem(page)) 585 - __dma_free_remap(cpu_addr, size); 600 + dma_common_free_remap(cpu_addr, size); 586 601 else 587 602 __dma_remap(page, size, PAGE_KERNEL); 588 603 } ··· 674 689 static void remap_allocator_free(struct arm_dma_free_args *args) 675 690 { 676 691 if (args->want_vaddr) 677 - __dma_free_remap(args->cpu_addr, args->size); 692 + dma_common_free_remap(args->cpu_addr, args->size); 678 693 679 694 __dma_free_buffer(args->page, args->size); 680 695 } ··· 862 877 __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); 863 878 } 864 879 865 - /* 866 - * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems 867 - * that the intention is to allow exporting memory allocated via the 868 - * coherent DMA APIs through the dma_buf API, which only accepts a 869 - * scattertable. This presents a couple of problems: 870 - * 1. Not all memory allocated via the coherent DMA APIs is backed by 871 - * a struct page 872 - * 2. Passing coherent DMA memory into the streaming APIs is not allowed 873 - * as we will try to flush the memory through a different alias to that 874 - * actually being used (and the flushes are redundant.) 875 - */ 876 880 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 877 881 void *cpu_addr, dma_addr_t handle, size_t size, 878 882 unsigned long attrs) ··· 1106 1132 * 32-bit DMA. 1107 1133 * Use the generic dma-direct / swiotlb ops code in that case, as that 1108 1134 * handles bounce buffering for us. 1109 - * 1110 - * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the 1111 - * latter is also selected by the Xen code, but that code for now relies 1112 - * on non-NULL dev_dma_ops. To be cleaned up later. 1113 1135 */ 1114 1136 if (IS_ENABLED(CONFIG_ARM_LPAE)) 1115 1137 return NULL; ··· 1343 1373 } 1344 1374 1345 1375 /* 1346 - * Create a CPU mapping for a specified pages 1347 - */ 1348 - static void * 1349 - __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, 1350 - const void *caller) 1351 - { 1352 - return dma_common_pages_remap(pages, size, 1353 - VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); 1354 - } 1355 - 1356 - /* 1357 1376 * Create a mapping in device IO address space for specified pages 1358 1377 */ 1359 1378 static dma_addr_t ··· 1414 1455 1415 1456 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1416 1457 { 1417 - struct vm_struct *area; 1418 - 1419 1458 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1420 1459 return __atomic_get_pages(cpu_addr); 1421 1460 1422 1461 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1423 1462 return cpu_addr; 1424 1463 1425 - area = find_vm_area(cpu_addr); 1426 - if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) 1427 - return area->pages; 1428 - return NULL; 1464 + return dma_common_find_pages(cpu_addr); 1429 1465 } 1430 1466 1431 1467 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, ··· 1493 1539 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1494 1540 return pages; 1495 1541 1496 - addr = __iommu_alloc_remap(pages, size, gfp, prot, 1542 + addr = dma_common_pages_remap(pages, size, prot, 1497 1543 __builtin_return_address(0)); 1498 1544 if (!addr) 1499 1545 goto err_mapping; ··· 1576 1622 return; 1577 1623 } 1578 1624 1579 - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { 1580 - dma_common_free_remap(cpu_addr, size, 1581 - VM_ARM_DMA_CONSISTENT | VM_USERMAP); 1582 - } 1625 + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) 1626 + dma_common_free_remap(cpu_addr, size); 1583 1627 1584 1628 __iommu_remove_mapping(dev, handle, size); 1585 1629 __iommu_free_buffer(dev, pages, size, attrs); ··· 2315 2363 set_dma_ops(dev, dma_ops); 2316 2364 2317 2365 #ifdef CONFIG_XEN 2318 - if (xen_initial_domain()) { 2319 - dev->archdata.dev_dma_ops = dev->dma_ops; 2320 - dev->dma_ops = xen_dma_ops; 2321 - } 2366 + if (xen_initial_domain()) 2367 + dev->dma_ops = &xen_swiotlb_dma_ops; 2322 2368 #endif 2323 2369 dev->archdata.dma_ops_setup = true; 2324 2370 } ··· 2350 2400 dma_addr_t dma_addr) 2351 2401 { 2352 2402 return dma_to_pfn(dev, dma_addr); 2353 - } 2354 - 2355 - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 2356 - unsigned long attrs) 2357 - { 2358 - return __get_dma_pgprot(attrs, prot); 2359 2403 } 2360 2404 2361 2405 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-3
arch/arm/mm/mm.h
··· 70 70 #define VM_ARM_MTYPE(mt) ((mt) << 20) 71 71 #define VM_ARM_MTYPE_MASK (0x1f << 20) 72 72 73 - /* consistent regions used by dma_alloc_attrs() */ 74 - #define VM_ARM_DMA_CONSISTENT 0x20000000 75 - 76 73 77 74 struct static_vm { 78 75 struct vm_struct vm;
+37 -92
arch/arm/xen/mm.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 #include <linux/cpu.h> 3 - #include <linux/dma-mapping.h> 3 + #include <linux/dma-noncoherent.h> 4 4 #include <linux/gfp.h> 5 5 #include <linux/highmem.h> 6 6 #include <linux/export.h> ··· 35 35 return __get_free_pages(flags, order); 36 36 } 37 37 38 - enum dma_cache_op { 39 - DMA_UNMAP, 40 - DMA_MAP, 41 - }; 42 38 static bool hypercall_cflush = false; 43 39 44 - /* functions called by SWIOTLB */ 45 - 46 - static void dma_cache_maint(dma_addr_t handle, unsigned long offset, 47 - size_t size, enum dma_data_direction dir, enum dma_cache_op op) 40 + /* buffers in highmem or foreign pages cannot cross page boundaries */ 41 + static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) 48 42 { 49 43 struct gnttab_cache_flush cflush; 50 - unsigned long xen_pfn; 51 - size_t left = size; 52 44 53 - xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; 54 - offset %= XEN_PAGE_SIZE; 45 + cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK; 46 + cflush.offset = xen_offset_in_page(handle); 47 + cflush.op = op; 55 48 56 49 do { 57 - size_t len = left; 58 - 59 - /* buffers in highmem or foreign pages cannot cross page 60 - * boundaries */ 61 - if (len + offset > XEN_PAGE_SIZE) 62 - len = XEN_PAGE_SIZE - offset; 50 + if (size + cflush.offset > XEN_PAGE_SIZE) 51 + cflush.length = XEN_PAGE_SIZE - cflush.offset; 52 + else 53 + cflush.length = size; 63 54 64 - cflush.op = 0; 65 - cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT; 66 - cflush.offset = offset; 67 - cflush.length = len; 55 + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); 68 56 69 - if (op == DMA_UNMAP && dir != DMA_TO_DEVICE) 70 - cflush.op = GNTTAB_CACHE_INVAL; 71 - if (op == DMA_MAP) { 72 - if (dir == DMA_FROM_DEVICE) 73 - cflush.op = GNTTAB_CACHE_INVAL; 74 - else 75 - cflush.op = GNTTAB_CACHE_CLEAN; 76 - } 77 - if (cflush.op) 78 - HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); 79 - 80 - offset = 0; 81 - xen_pfn++; 82 - left -= len; 83 - } while (left); 57 + cflush.offset = 0; 58 + cflush.a.dev_bus_addr += cflush.length; 59 + size -= cflush.length; 60 + } while (size); 84 61 } 85 62 86 - static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, 87 - size_t size, enum dma_data_direction dir) 63 + /* 64 + * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen 65 + * pages, it is not possible for it to contain a mix of local and foreign Xen 66 + * pages. Calling pfn_valid on a foreign mfn will always return false, so if 67 + * pfn_valid returns true the pages is local and we can use the native 68 + * dma-direct functions, otherwise we call the Xen specific version. 69 + */ 70 + void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, 71 + phys_addr_t paddr, size_t size, enum dma_data_direction dir) 88 72 { 89 - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP); 73 + if (pfn_valid(PFN_DOWN(handle))) 74 + arch_sync_dma_for_cpu(dev, paddr, size, dir); 75 + else if (dir != DMA_TO_DEVICE) 76 + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); 90 77 } 91 78 92 - static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, 93 - size_t size, enum dma_data_direction dir) 79 + void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, 80 + phys_addr_t paddr, size_t size, enum dma_data_direction dir) 94 81 { 95 - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP); 96 - } 97 - 98 - void __xen_dma_map_page(struct device *hwdev, struct page *page, 99 - dma_addr_t dev_addr, unsigned long offset, size_t size, 100 - enum dma_data_direction dir, unsigned long attrs) 101 - { 102 - if (is_device_dma_coherent(hwdev)) 103 - return; 104 - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) 105 - return; 106 - 107 - __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir); 108 - } 109 - 110 - void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 111 - size_t size, enum dma_data_direction dir, 112 - unsigned long attrs) 113 - 114 - { 115 - if (is_device_dma_coherent(hwdev)) 116 - return; 117 - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) 118 - return; 119 - 120 - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); 121 - } 122 - 123 - void __xen_dma_sync_single_for_cpu(struct device *hwdev, 124 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 125 - { 126 - if (is_device_dma_coherent(hwdev)) 127 - return; 128 - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); 129 - } 130 - 131 - void __xen_dma_sync_single_for_device(struct device *hwdev, 132 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 133 - { 134 - if (is_device_dma_coherent(hwdev)) 135 - return; 136 - __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); 82 + if (pfn_valid(PFN_DOWN(handle))) 83 + arch_sync_dma_for_device(dev, paddr, size, dir); 84 + else if (dir == DMA_FROM_DEVICE) 85 + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); 86 + else 87 + dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN); 137 88 } 138 89 139 90 bool xen_arch_need_swiotlb(struct device *dev, ··· 110 159 * memory and we are not able to flush the cache. 111 160 */ 112 161 return (!hypercall_cflush && (xen_pfn != bfn) && 113 - !is_device_dma_coherent(dev)); 162 + !dev_is_dma_coherent(dev)); 114 163 } 115 164 116 165 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, ··· 124 173 *dma_handle = pstart; 125 174 return 0; 126 175 } 127 - EXPORT_SYMBOL_GPL(xen_create_contiguous_region); 128 176 129 177 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 130 178 { 131 179 return; 132 180 } 133 - EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); 134 - 135 - const struct dma_map_ops *xen_dma_ops; 136 - EXPORT_SYMBOL(xen_dma_ops); 137 181 138 182 int __init xen_mm_init(void) 139 183 { ··· 136 190 if (!xen_initial_domain()) 137 191 return 0; 138 192 xen_swiotlb_init(1, false); 139 - xen_dma_ops = &xen_swiotlb_dma_ops; 140 193 141 194 cflush.op = 0; 142 195 cflush.a.dev_bus_addr = 0;
-1
arch/arm64/Kconfig
··· 13 13 select ARCH_HAS_DEBUG_VIRTUAL 14 14 select ARCH_HAS_DEVMEM_IS_ALLOWED 15 15 select ARCH_HAS_DMA_COHERENT_TO_PFN 16 - select ARCH_HAS_DMA_MMAP_PGPROT 17 16 select ARCH_HAS_DMA_PREP_COHERENT 18 17 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI 19 18 select ARCH_HAS_ELF_RANDOMIZE
+1
arch/arm64/include/asm/Kbuild
··· 4 4 generic-y += div64.h 5 5 generic-y += dma.h 6 6 generic-y += dma-contiguous.h 7 + generic-y += dma-mapping.h 7 8 generic-y += early_ioremap.h 8 9 generic-y += emergency-restart.h 9 10 generic-y += hw_irq.h
-28
arch/arm64/include/asm/dma-mapping.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright (C) 2012 ARM Ltd. 4 - */ 5 - #ifndef __ASM_DMA_MAPPING_H 6 - #define __ASM_DMA_MAPPING_H 7 - 8 - #include <linux/types.h> 9 - #include <linux/vmalloc.h> 10 - 11 - #include <xen/xen.h> 12 - #include <asm/xen/hypervisor.h> 13 - 14 - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) 15 - { 16 - return NULL; 17 - } 18 - 19 - /* 20 - * Do not use this function in a driver, it is only provided for 21 - * arch/arm/mm/xen.c, which is used by arm64 as well. 22 - */ 23 - static inline bool is_device_dma_coherent(struct device *dev) 24 - { 25 - return dev->dma_coherent; 26 - } 27 - 28 - #endif /* __ASM_DMA_MAPPING_H */
+12
arch/arm64/include/asm/pgtable.h
··· 437 437 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 438 438 #define pgprot_device(prot) \ 439 439 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) 440 + /* 441 + * DMA allocations for non-coherent devices use what the Arm architecture calls 442 + * "Normal non-cacheable" memory, which permits speculation, unaligned accesses 443 + * and merging of writes. This is different from "Device-nGnR[nE]" memory which 444 + * is intended for MMIO and thus forbids speculation, preserves access size, 445 + * requires strict alignment and can also force write responses to come from the 446 + * endpoint. 447 + */ 448 + #define pgprot_dmacoherent(prot) \ 449 + __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ 450 + PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 451 + 440 452 #define __HAVE_PHYS_MEM_ACCESS_PROT 441 453 struct file; 442 454 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-75
arch/arm64/include/asm/xen/page-coherent.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H 3 - #define _ASM_ARM64_XEN_PAGE_COHERENT_H 4 - 5 - #include <linux/dma-mapping.h> 6 - #include <asm/page.h> 7 2 #include <xen/arm/page-coherent.h> 8 - 9 - static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, 10 - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) 11 - { 12 - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); 13 - } 14 - 15 - static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, 16 - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) 17 - { 18 - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); 19 - } 20 - 21 - static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, 22 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 23 - { 24 - unsigned long pfn = PFN_DOWN(handle); 25 - 26 - if (pfn_valid(pfn)) 27 - dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); 28 - else 29 - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); 30 - } 31 - 32 - static inline void xen_dma_sync_single_for_device(struct device *hwdev, 33 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 34 - { 35 - unsigned long pfn = PFN_DOWN(handle); 36 - if (pfn_valid(pfn)) 37 - dma_direct_sync_single_for_device(hwdev, handle, size, dir); 38 - else 39 - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); 40 - } 41 - 42 - static inline void xen_dma_map_page(struct device *hwdev, struct page *page, 43 - dma_addr_t dev_addr, unsigned long offset, size_t size, 44 - enum dma_data_direction dir, unsigned long attrs) 45 - { 46 - unsigned long page_pfn = page_to_xen_pfn(page); 47 - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); 48 - unsigned long compound_pages = 49 - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; 50 - bool local = (page_pfn <= dev_pfn) && 51 - (dev_pfn - page_pfn < compound_pages); 52 - 53 - if (local) 54 - dma_direct_map_page(hwdev, page, offset, size, dir, attrs); 55 - else 56 - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); 57 - } 58 - 59 - static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 60 - size_t size, enum dma_data_direction dir, unsigned long attrs) 61 - { 62 - unsigned long pfn = PFN_DOWN(handle); 63 - /* 64 - * Dom0 is mapped 1:1, while the Linux page can be spanned accross 65 - * multiple Xen page, it's not possible to have a mix of local and 66 - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a 67 - * foreign mfn will always return false. If the page is local we can 68 - * safely call the native dma_ops function, otherwise we call the xen 69 - * specific function. 70 - */ 71 - if (pfn_valid(pfn)) 72 - dma_direct_unmap_page(hwdev, handle, size, dir, attrs); 73 - else 74 - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); 75 - } 76 - 77 - #endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
+3 -13
arch/arm64/mm/dma-mapping.c
··· 8 8 #include <linux/cache.h> 9 9 #include <linux/dma-noncoherent.h> 10 10 #include <linux/dma-iommu.h> 11 + #include <xen/xen.h> 12 + #include <xen/swiotlb-xen.h> 11 13 12 14 #include <asm/cacheflush.h> 13 - 14 - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 15 - unsigned long attrs) 16 - { 17 - return pgprot_writecombine(prot); 18 - } 19 15 20 16 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 21 17 size_t size, enum dma_data_direction dir) ··· 29 33 { 30 34 __dma_flush_area(page_address(page), size); 31 35 } 32 - 33 - static int __init arm64_dma_init(void) 34 - { 35 - return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); 36 - } 37 - arch_initcall(arm64_dma_init); 38 36 39 37 #ifdef CONFIG_IOMMU_DMA 40 38 void arch_teardown_dma_ops(struct device *dev) ··· 54 64 55 65 #ifdef CONFIG_XEN 56 66 if (xen_initial_domain()) 57 - dev->dma_ops = xen_dma_ops; 67 + dev->dma_ops = &xen_swiotlb_dma_ops; 58 68 #endif 59 69 }
-1
arch/c6x/Kconfig
··· 20 20 select OF_EARLY_FLATTREE 21 21 select GENERIC_CLOCKEVENTS 22 22 select MODULES_USE_ELF_RELA 23 - select ARCH_NO_COHERENT_DMA_MMAP 24 23 select MMU_GATHER_NO_RANGE if MMU 25 24 26 25 config MMU
-6
arch/csky/mm/dma-mapping.c
··· 14 14 #include <linux/version.h> 15 15 #include <asm/cache.h> 16 16 17 - static int __init atomic_pool_init(void) 18 - { 19 - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); 20 - } 21 - postcore_initcall(atomic_pool_init); 22 - 23 17 void arch_dma_prep_coherent(struct page *page, size_t size) 24 18 { 25 19 if (PageHighMem(page)) {
+2
arch/ia64/hp/common/sba_iommu.c
··· 2069 2069 .map_sg = sba_map_sg_attrs, 2070 2070 .unmap_sg = sba_unmap_sg_attrs, 2071 2071 .dma_supported = sba_dma_supported, 2072 + .mmap = dma_common_mmap, 2073 + .get_sgtable = dma_common_get_sgtable, 2072 2074 }; 2073 2075 2074 2076 static int __init
+1 -1
arch/ia64/kernel/setup.c
··· 259 259 * This function checks if the reserved crashkernel is allowed on the specific 260 260 * IA64 machine flavour. Machines without an IO TLB use swiotlb and require 261 261 * some memory below 4 GB (i.e. in 32 bit area), see the implementation of 262 - * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that 262 + * kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that 263 263 * in kdump case. See the comment in sba_init() in sba_iommu.c. 264 264 * 265 265 * So, the only machvec that really supports loading the kdump kernel
-2
arch/m68k/Kconfig
··· 4 4 default y 5 5 select ARCH_32BIT_OFF_T 6 6 select ARCH_HAS_BINFMT_FLAT 7 - select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE 8 7 select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE 9 8 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA 10 9 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA 11 - select ARCH_NO_COHERENT_DMA_MMAP if !MMU 12 10 select ARCH_NO_PREEMPT if !COLDFIRE 13 11 select BINFMT_FLAT_ARGVP_ENVP_ON_STACK 14 12 select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
+3
arch/m68k/include/asm/pgtable_mm.h
··· 169 169 ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \ 170 170 : (prot))) 171 171 172 + pgprot_t pgprot_dmacoherent(pgprot_t prot); 173 + #define pgprot_dmacoherent(prot) pgprot_dmacoherent(prot) 174 + 172 175 #endif /* CONFIG_COLDFIRE */ 173 176 #include <asm-generic/pgtable.h> 174 177 #endif /* !__ASSEMBLY__ */
+1 -2
arch/m68k/kernel/dma.c
··· 23 23 cache_push(page_to_phys(page), size); 24 24 } 25 25 26 - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 27 - unsigned long attrs) 26 + pgprot_t pgprot_dmacoherent(pgprot_t prot) 28 27 { 29 28 if (CPU_IS_040_OR_060) { 30 29 pgprot_val(prot) &= ~_PAGE_CACHE040;
-1
arch/microblaze/Kconfig
··· 9 9 select ARCH_HAS_SYNC_DMA_FOR_CPU 10 10 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 11 11 select ARCH_MIGHT_HAVE_PC_PARPORT 12 - select ARCH_NO_COHERENT_DMA_MMAP if !MMU 13 12 select ARCH_WANT_IPC_PARSE_VERSION 14 13 select BUILDTIME_EXTABLE_SORT 15 14 select TIMER_OF
+8 -1
arch/mips/Kconfig
··· 1119 1119 1120 1120 config DMA_NONCOHERENT 1121 1121 bool 1122 - select ARCH_HAS_DMA_MMAP_PGPROT 1122 + # 1123 + # MIPS allows mixing "slightly different" Cacheability and Coherency 1124 + # Attribute bits. It is believed that the uncached access through 1125 + # KSEG1 and the implementation specific "uncached accelerated" used 1126 + # by pgprot_writcombine can be mixed, and the latter sometimes provides 1127 + # significant advantages. 1128 + # 1129 + select ARCH_HAS_DMA_WRITE_COMBINE 1123 1130 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 1124 1131 select ARCH_HAS_UNCACHED_SEGMENT 1125 1132 select NEED_DMA_MAP_STATE
+2
arch/mips/jazz/jazzdma.c
··· 682 682 .sync_sg_for_device = jazz_dma_sync_sg_for_device, 683 683 .dma_supported = dma_direct_supported, 684 684 .cache_sync = arch_dma_cache_sync, 685 + .mmap = dma_common_mmap, 686 + .get_sgtable = dma_common_get_sgtable, 685 687 }; 686 688 EXPORT_SYMBOL(jazz_dma_ops);
-8
arch/mips/mm/dma-noncoherent.c
··· 65 65 return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); 66 66 } 67 67 68 - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 69 - unsigned long attrs) 70 - { 71 - if (attrs & DMA_ATTR_WRITE_COMBINE) 72 - return pgprot_writecombine(prot); 73 - return pgprot_noncached(prot); 74 - } 75 - 76 68 static inline void dma_sync_virt(void *addr, size_t size, 77 69 enum dma_data_direction dir) 78 70 {
-6
arch/nds32/kernel/dma.c
··· 80 80 { 81 81 cache_op(page_to_phys(page), size, cpu_dma_wbinval_range); 82 82 } 83 - 84 - static int __init atomic_pool_init(void) 85 - { 86 - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); 87 - } 88 - postcore_initcall(atomic_pool_init);
-1
arch/parisc/Kconfig
··· 52 52 select GENERIC_SCHED_CLOCK 53 53 select HAVE_UNSTABLE_SCHED_CLOCK if SMP 54 54 select GENERIC_CLOCKEVENTS 55 - select ARCH_NO_COHERENT_DMA_MMAP 56 55 select CPU_NO_EFFICIENT_FFS 57 56 select NEED_DMA_MAP_STATE 58 57 select NEED_SG_DMA_LENGTH
+2
arch/powerpc/kernel/dma-iommu.c
··· 208 208 .sync_single_for_device = dma_iommu_sync_for_device, 209 209 .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu, 210 210 .sync_sg_for_device = dma_iommu_sync_sg_for_device, 211 + .mmap = dma_common_mmap, 212 + .get_sgtable = dma_common_get_sgtable, 211 213 };
+4 -7
arch/powerpc/platforms/ps3/system-bus.c
··· 686 686 return mask >= DMA_BIT_MASK(32); 687 687 } 688 688 689 - static u64 ps3_dma_get_required_mask(struct device *_dev) 690 - { 691 - return DMA_BIT_MASK(32); 692 - } 693 - 694 689 static const struct dma_map_ops ps3_sb_dma_ops = { 695 690 .alloc = ps3_alloc_coherent, 696 691 .free = ps3_free_coherent, 697 692 .map_sg = ps3_sb_map_sg, 698 693 .unmap_sg = ps3_sb_unmap_sg, 699 694 .dma_supported = ps3_dma_supported, 700 - .get_required_mask = ps3_dma_get_required_mask, 701 695 .map_page = ps3_sb_map_page, 702 696 .unmap_page = ps3_unmap_page, 697 + .mmap = dma_common_mmap, 698 + .get_sgtable = dma_common_get_sgtable, 703 699 }; 704 700 705 701 static const struct dma_map_ops ps3_ioc0_dma_ops = { ··· 704 708 .map_sg = ps3_ioc0_map_sg, 705 709 .unmap_sg = ps3_ioc0_unmap_sg, 706 710 .dma_supported = ps3_dma_supported, 707 - .get_required_mask = ps3_dma_get_required_mask, 708 711 .map_page = ps3_ioc0_map_page, 709 712 .unmap_page = ps3_unmap_page, 713 + .mmap = dma_common_mmap, 714 + .get_sgtable = dma_common_get_sgtable, 710 715 }; 711 716 712 717 /**
+2
arch/powerpc/platforms/pseries/vio.c
··· 605 605 .unmap_page = vio_dma_iommu_unmap_page, 606 606 .dma_supported = dma_iommu_dma_supported, 607 607 .get_required_mask = dma_iommu_get_required_mask, 608 + .mmap = dma_common_mmap, 609 + .get_sgtable = dma_common_get_sgtable, 608 610 }; 609 611 610 612 /**
+2
arch/s390/pci/pci_dma.c
··· 668 668 .unmap_sg = s390_dma_unmap_sg, 669 669 .map_page = s390_dma_map_pages, 670 670 .unmap_page = s390_dma_unmap_pages, 671 + .mmap = dma_common_mmap, 672 + .get_sgtable = dma_common_get_sgtable, 671 673 /* dma_supported is unconditionally true without a callback */ 672 674 }; 673 675 EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-1
arch/sh/Kconfig
··· 5 5 select ARCH_HAS_PTE_SPECIAL 6 6 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 7 7 select ARCH_MIGHT_HAVE_PC_PARPORT 8 - select ARCH_NO_COHERENT_DMA_MMAP if !MMU 9 8 select HAVE_PATA_PLATFORM 10 9 select CLKDEV_LOOKUP 11 10 select DMA_DECLARE_COHERENT
-2
arch/unicore32/include/asm/pgtable.h
··· 198 198 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) 199 199 #define pgprot_writecombine(prot) \ 200 200 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) 201 - #define pgprot_dmacoherent(prot) \ 202 - __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) 203 201 204 202 #define pmd_none(pmd) (!pmd_val(pmd)) 205 203 #define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT)
-14
arch/x86/include/asm/xen/page-coherent.h
··· 21 21 free_pages((unsigned long) cpu_addr, get_order(size)); 22 22 } 23 23 24 - static inline void xen_dma_map_page(struct device *hwdev, struct page *page, 25 - dma_addr_t dev_addr, unsigned long offset, size_t size, 26 - enum dma_data_direction dir, unsigned long attrs) { } 27 - 28 - static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 29 - size_t size, enum dma_data_direction dir, 30 - unsigned long attrs) { } 31 - 32 - static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, 33 - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } 34 - 35 - static inline void xen_dma_sync_single_for_device(struct device *hwdev, 36 - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } 37 - 38 24 #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
+3
arch/x86/kernel/amd_gart_64.c
··· 677 677 .unmap_page = gart_unmap_page, 678 678 .alloc = gart_alloc_coherent, 679 679 .free = gart_free_coherent, 680 + .mmap = dma_common_mmap, 681 + .get_sgtable = dma_common_get_sgtable, 680 682 .dma_supported = dma_direct_supported, 683 + .get_required_mask = dma_direct_get_required_mask, 681 684 }; 682 685 683 686 static void gart_iommu_shutdown(void)
+2
arch/x86/kernel/pci-calgary_64.c
··· 468 468 .map_page = calgary_map_page, 469 469 .unmap_page = calgary_unmap_page, 470 470 .dma_supported = dma_direct_supported, 471 + .mmap = dma_common_mmap, 472 + .get_sgtable = dma_common_get_sgtable, 471 473 }; 472 474 473 475 static inline void __iomem * busno_to_bbar(unsigned char num)
-1
arch/x86/kernel/pci-swiotlb.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - /* Glue code to lib/swiotlb.c */ 3 2 4 3 #include <linux/pci.h> 5 4 #include <linux/cache.h>
+1 -1
arch/x86/kernel/setup.c
··· 486 486 ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); 487 487 if (ret) { 488 488 /* 489 - * two parts from lib/swiotlb.c: 489 + * two parts from kernel/dma/swiotlb.c: 490 490 * -swiotlb size: user-specified with swiotlb= or default. 491 491 * 492 492 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
+1 -3
arch/x86/pci/sta2x11-fixup.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * arch/x86/pci/sta2x11-fixup.c 4 - * glue code for lib/swiotlb.c and DMA translation between STA2x11 5 - * AMBA memory mapping and the X86 memory mapping 3 + * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping 6 4 * 7 5 * ST Microelectronics ConneXt (STA2X11/STA2X10) 8 6 *
-2
arch/x86/xen/mmu_pv.c
··· 2625 2625 *dma_handle = virt_to_machine(vstart).maddr; 2626 2626 return success ? 0 : -ENOMEM; 2627 2627 } 2628 - EXPORT_SYMBOL_GPL(xen_create_contiguous_region); 2629 2628 2630 2629 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 2631 2630 { ··· 2659 2660 2660 2661 spin_unlock_irqrestore(&xen_reservation_lock, flags); 2661 2662 } 2662 - EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); 2663 2663 2664 2664 static noinline void xen_flush_tlb_all(void) 2665 2665 {
-1
arch/xtensa/Kconfig
··· 5 5 select ARCH_HAS_BINFMT_FLAT if !MMU 6 6 select ARCH_HAS_SYNC_DMA_FOR_CPU 7 7 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 8 - select ARCH_NO_COHERENT_DMA_MMAP if !MMU 9 8 select ARCH_USE_QUEUED_RWLOCKS 10 9 select ARCH_USE_QUEUED_SPINLOCKS 11 10 select ARCH_WANT_FRAME_POINTERS
+2 -2
arch/xtensa/kernel/pci-dma.c
··· 167 167 if (PageHighMem(page)) { 168 168 void *p; 169 169 170 - p = dma_common_contiguous_remap(page, size, VM_MAP, 170 + p = dma_common_contiguous_remap(page, size, 171 171 pgprot_noncached(PAGE_KERNEL), 172 172 __builtin_return_address(0)); 173 173 if (!p) { ··· 192 192 page = virt_to_page(platform_vaddr_to_cached(vaddr)); 193 193 } else { 194 194 #ifdef CONFIG_MMU 195 - dma_common_free_remap(vaddr, size, VM_MAP); 195 + dma_common_free_remap(vaddr, size); 196 196 #endif 197 197 page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); 198 198 }
+23
block/blk-settings.c
··· 12 12 #include <linux/lcm.h> 13 13 #include <linux/jiffies.h> 14 14 #include <linux/gfp.h> 15 + #include <linux/dma-mapping.h> 15 16 16 17 #include "blk.h" 17 18 #include "blk-wbt.h" ··· 848 847 q->required_elevator_features = features; 849 848 } 850 849 EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features); 850 + 851 + /** 852 + * blk_queue_can_use_dma_map_merging - configure queue for merging segments. 853 + * @q: the request queue for the device 854 + * @dev: the device pointer for dma 855 + * 856 + * Tell the block layer about merging the segments by dma map of @q. 857 + */ 858 + bool blk_queue_can_use_dma_map_merging(struct request_queue *q, 859 + struct device *dev) 860 + { 861 + unsigned long boundary = dma_get_merge_boundary(dev); 862 + 863 + if (!boundary) 864 + return false; 865 + 866 + /* No need to update max_segment_size. see blk_queue_virt_boundary() */ 867 + blk_queue_virt_boundary(q, boundary); 868 + 869 + return true; 870 + } 871 + EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); 851 872 852 873 static int __init blk_settings_init(void) 853 874 {
+5 -6
drivers/gpu/drm/omapdrm/dss/dispc.c
··· 4609 4609 i734_buf.size = i734.ovli.width * i734.ovli.height * 4610 4610 color_mode_to_bpp(i734.ovli.fourcc) / 8; 4611 4611 4612 - i734_buf.vaddr = dma_alloc_writecombine(&dispc->pdev->dev, 4613 - i734_buf.size, &i734_buf.paddr, 4614 - GFP_KERNEL); 4612 + i734_buf.vaddr = dma_alloc_wc(&dispc->pdev->dev, i734_buf.size, 4613 + &i734_buf.paddr, GFP_KERNEL); 4615 4614 if (!i734_buf.vaddr) { 4616 - dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n", 4615 + dev_err(&dispc->pdev->dev, "%s: dma_alloc_wc failed\n", 4617 4616 __func__); 4618 4617 return -ENOMEM; 4619 4618 } ··· 4625 4626 if (!dispc->feat->has_gamma_i734_bug) 4626 4627 return; 4627 4628 4628 - dma_free_writecombine(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr, 4629 - i734_buf.paddr); 4629 + dma_free_wc(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr, 4630 + i734_buf.paddr); 4630 4631 } 4631 4632 4632 4633 static void dispc_errata_i734_wa(struct dispc_device *dispc)
+2
drivers/iommu/amd_iommu.c
··· 2754 2754 .map_sg = map_sg, 2755 2755 .unmap_sg = unmap_sg, 2756 2756 .dma_supported = amd_iommu_dma_supported, 2757 + .mmap = dma_common_mmap, 2758 + .get_sgtable = dma_common_get_sgtable, 2757 2759 }; 2758 2760 2759 2761 static int init_reserved_iova_ranges(void)
+14 -15
drivers/iommu/dma-iommu.c
··· 548 548 return pages; 549 549 } 550 550 551 - static struct page **__iommu_dma_get_pages(void *cpu_addr) 552 - { 553 - struct vm_struct *area = find_vm_area(cpu_addr); 554 - 555 - if (!area || !area->pages) 556 - return NULL; 557 - return area->pages; 558 - } 559 - 560 551 /** 561 552 * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space 562 553 * @dev: Device to allocate memory for. Must be a real device ··· 615 624 < size) 616 625 goto out_free_sg; 617 626 618 - vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 627 + vaddr = dma_common_pages_remap(pages, size, prot, 619 628 __builtin_return_address(0)); 620 629 if (!vaddr) 621 630 goto out_unmap; ··· 936 945 * If it the address is remapped, then it's either non-coherent 937 946 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 938 947 */ 939 - pages = __iommu_dma_get_pages(cpu_addr); 948 + pages = dma_common_find_pages(cpu_addr); 940 949 if (!pages) 941 950 page = vmalloc_to_page(cpu_addr); 942 - dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP); 951 + dma_common_free_remap(cpu_addr, alloc_size); 943 952 } else { 944 953 /* Lowmem means a coherent atomic or CMA allocation */ 945 954 page = virt_to_page(cpu_addr); ··· 977 986 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 978 987 979 988 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 980 - VM_USERMAP, prot, __builtin_return_address(0)); 989 + prot, __builtin_return_address(0)); 981 990 if (!cpu_addr) 982 991 goto out_free_pages; 983 992 ··· 1043 1052 return -ENXIO; 1044 1053 1045 1054 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1046 - struct page **pages = __iommu_dma_get_pages(cpu_addr); 1055 + struct page **pages = dma_common_find_pages(cpu_addr); 1047 1056 1048 1057 if (pages) 1049 1058 return __iommu_dma_mmap(pages, size, vma); ··· 1065 1074 int ret; 1066 1075 1067 1076 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1068 - struct page **pages = __iommu_dma_get_pages(cpu_addr); 1077 + struct page **pages = dma_common_find_pages(cpu_addr); 1069 1078 1070 1079 if (pages) { 1071 1080 return sg_alloc_table_from_pages(sgt, pages, ··· 1084 1093 return ret; 1085 1094 } 1086 1095 1096 + static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1097 + { 1098 + struct iommu_domain *domain = iommu_get_dma_domain(dev); 1099 + 1100 + return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1101 + } 1102 + 1087 1103 static const struct dma_map_ops iommu_dma_ops = { 1088 1104 .alloc = iommu_dma_alloc, 1089 1105 .free = iommu_dma_free, ··· 1106 1108 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1107 1109 .map_resource = iommu_dma_map_resource, 1108 1110 .unmap_resource = iommu_dma_unmap_resource, 1111 + .get_merge_boundary = iommu_dma_get_merge_boundary, 1109 1112 }; 1110 1113 1111 1114 /*
+2
drivers/iommu/intel-iommu.c
··· 3785 3785 .map_resource = intel_map_resource, 3786 3786 .unmap_resource = intel_unmap_resource, 3787 3787 .dma_supported = dma_direct_supported, 3788 + .mmap = dma_common_mmap, 3789 + .get_sgtable = dma_common_get_sgtable, 3788 3790 }; 3789 3791 3790 3792 static void
+38 -3
drivers/mmc/core/queue.c
··· 21 21 #include "card.h" 22 22 #include "host.h" 23 23 24 + #define MMC_DMA_MAP_MERGE_SEGMENTS 512 25 + 24 26 static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) 25 27 { 26 28 /* Allow only 1 DCMD at a time */ ··· 195 193 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); 196 194 } 197 195 196 + static unsigned int mmc_get_max_segments(struct mmc_host *host) 197 + { 198 + return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS : 199 + host->max_segs; 200 + } 201 + 198 202 /** 199 203 * mmc_init_request() - initialize the MMC-specific per-request data 200 204 * @q: the request queue ··· 214 206 struct mmc_card *card = mq->card; 215 207 struct mmc_host *host = card->host; 216 208 217 - mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); 209 + mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp); 218 210 if (!mq_rq->sg) 219 211 return -ENOMEM; 220 212 ··· 370 362 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); 371 363 blk_queue_max_hw_sectors(mq->queue, 372 364 min(host->max_blk_count, host->max_req_size / 512)); 373 - blk_queue_max_segments(mq->queue, host->max_segs); 365 + if (host->can_dma_map_merge) 366 + WARN(!blk_queue_can_use_dma_map_merging(mq->queue, 367 + mmc_dev(host)), 368 + "merging was advertised but not possible"); 369 + blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); 374 370 375 371 if (mmc_card_mmc(card)) 376 372 block_size = card->ext_csd.data_sector_size; 377 373 378 374 blk_queue_logical_block_size(mq->queue, block_size); 379 - blk_queue_max_segment_size(mq->queue, 375 + /* 376 + * After blk_queue_can_use_dma_map_merging() was called with succeed, 377 + * since it calls blk_queue_virt_boundary(), the mmc should not call 378 + * both blk_queue_max_segment_size(). 379 + */ 380 + if (!host->can_dma_map_merge) 381 + blk_queue_max_segment_size(mq->queue, 380 382 round_down(host->max_seg_size, block_size)); 381 383 382 384 dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); ··· 397 379 mutex_init(&mq->complete_lock); 398 380 399 381 init_waitqueue_head(&mq->wait); 382 + } 383 + 384 + static inline bool mmc_merge_capable(struct mmc_host *host) 385 + { 386 + return host->caps2 & MMC_CAP2_MERGE_CAPABLE; 400 387 } 401 388 402 389 /* Set queue depth to get a reasonable value for q->nr_requests */ ··· 440 417 mq->tag_set.nr_hw_queues = 1; 441 418 mq->tag_set.cmd_size = sizeof(struct mmc_queue_req); 442 419 mq->tag_set.driver_data = mq; 420 + 421 + /* 422 + * Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops, 423 + * the host->can_dma_map_merge should be set before to get max_segs 424 + * from mmc_get_max_segments(). 425 + */ 426 + if (mmc_merge_capable(host) && 427 + host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS && 428 + dma_get_merge_boundary(mmc_dev(host))) 429 + host->can_dma_map_merge = 1; 430 + else 431 + host->can_dma_map_merge = 0; 443 432 444 433 ret = blk_mq_alloc_tag_set(&mq->tag_set); 445 434 if (ret)
+1 -1
drivers/mmc/host/renesas_sdhi_internal_dmac.c
··· 106 106 TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2, 107 107 .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | 108 108 MMC_CAP_CMD23, 109 - .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT, 109 + .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT | MMC_CAP2_MERGE_CAPABLE, 110 110 .bus_shift = 2, 111 111 .scc_offset = 0x1000, 112 112 .taps = rcar_gen3_scc_taps,
+1
drivers/parisc/ccio-dma.c
··· 1024 1024 .unmap_page = ccio_unmap_page, 1025 1025 .map_sg = ccio_map_sg, 1026 1026 .unmap_sg = ccio_unmap_sg, 1027 + .get_sgtable = dma_common_get_sgtable, 1027 1028 }; 1028 1029 1029 1030 #ifdef CONFIG_PROC_FS
+1
drivers/parisc/sba_iommu.c
··· 1084 1084 .unmap_page = sba_unmap_page, 1085 1085 .map_sg = sba_map_sg, 1086 1086 .unmap_sg = sba_unmap_sg, 1087 + .get_sgtable = dma_common_get_sgtable, 1087 1088 }; 1088 1089 1089 1090
+1 -1
drivers/remoteproc/Kconfig
··· 2 2 menu "Remoteproc drivers" 3 3 4 4 config REMOTEPROC 5 - tristate "Support for Remote Processor subsystem" 5 + bool "Support for Remote Processor subsystem" 6 6 depends on HAS_DMA 7 7 select CRC32 8 8 select FW_LOADER
+16 -68
drivers/xen/swiotlb-xen.c
··· 28 28 29 29 #include <linux/memblock.h> 30 30 #include <linux/dma-direct.h> 31 + #include <linux/dma-noncoherent.h> 31 32 #include <linux/export.h> 32 33 #include <xen/swiotlb-xen.h> 33 34 #include <xen/page.h> ··· 392 391 if (map == (phys_addr_t)DMA_MAPPING_ERROR) 393 392 return DMA_MAPPING_ERROR; 394 393 394 + phys = map; 395 395 dev_addr = xen_phys_to_bus(map); 396 396 397 397 /* ··· 404 402 return DMA_MAPPING_ERROR; 405 403 } 406 404 407 - page = pfn_to_page(map >> PAGE_SHIFT); 408 - offset = map & ~PAGE_MASK; 409 405 done: 410 - /* 411 - * we are not interested in the dma_addr returned by xen_dma_map_page, 412 - * only in the potential cache flushes executed by the function. 413 - */ 414 - xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs); 406 + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 407 + xen_dma_sync_for_device(dev, dev_addr, phys, size, dir); 415 408 return dev_addr; 416 409 } 417 410 ··· 418 421 * After this call, reads by the cpu to the buffer are guaranteed to see 419 422 * whatever the device wrote there. 420 423 */ 421 - static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, 422 - size_t size, enum dma_data_direction dir, 423 - unsigned long attrs) 424 + static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 425 + size_t size, enum dma_data_direction dir, unsigned long attrs) 424 426 { 425 427 phys_addr_t paddr = xen_bus_to_phys(dev_addr); 426 428 427 429 BUG_ON(dir == DMA_NONE); 428 430 429 - xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); 431 + if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 432 + xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir); 430 433 431 434 /* NOTE: We use dev_addr here, not paddr! */ 432 435 if (is_xen_swiotlb_buffer(dev_addr)) 433 436 swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs); 434 - } 435 - 436 - static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 437 - size_t size, enum dma_data_direction dir, 438 - unsigned long attrs) 439 - { 440 - xen_unmap_single(hwdev, dev_addr, size, dir, attrs); 441 437 } 442 438 443 439 static void ··· 439 449 { 440 450 phys_addr_t paddr = xen_bus_to_phys(dma_addr); 441 451 442 - xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir); 452 + if (!dev_is_dma_coherent(dev)) 453 + xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir); 443 454 444 455 if (is_xen_swiotlb_buffer(dma_addr)) 445 456 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); ··· 455 464 if (is_xen_swiotlb_buffer(dma_addr)) 456 465 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 457 466 458 - xen_dma_sync_single_for_device(dev, dma_addr, size, dir); 467 + if (!dev_is_dma_coherent(dev)) 468 + xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir); 459 469 } 460 470 461 471 /* ··· 473 481 BUG_ON(dir == DMA_NONE); 474 482 475 483 for_each_sg(sgl, sg, nelems, i) 476 - xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); 484 + xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), 485 + dir, attrs); 477 486 478 487 } 479 488 ··· 540 547 return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; 541 548 } 542 549 543 - /* 544 - * Create userspace mapping for the DMA-coherent memory. 545 - * This function should be called with the pages from the current domain only, 546 - * passing pages mapped from other domains would lead to memory corruption. 547 - */ 548 - static int 549 - xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, 550 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 551 - unsigned long attrs) 552 - { 553 - #ifdef CONFIG_ARM 554 - if (xen_get_dma_ops(dev)->mmap) 555 - return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, 556 - dma_addr, size, attrs); 557 - #endif 558 - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 559 - } 560 - 561 - /* 562 - * This function should be called with the pages from the current domain only, 563 - * passing pages mapped from other domains would lead to memory corruption. 564 - */ 565 - static int 566 - xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, 567 - void *cpu_addr, dma_addr_t handle, size_t size, 568 - unsigned long attrs) 569 - { 570 - #ifdef CONFIG_ARM 571 - if (xen_get_dma_ops(dev)->get_sgtable) { 572 - #if 0 573 - /* 574 - * This check verifies that the page belongs to the current domain and 575 - * is not one mapped from another domain. 576 - * This check is for debug only, and should not go to production build 577 - */ 578 - unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); 579 - BUG_ON (!page_is_ram(bfn)); 580 - #endif 581 - return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, 582 - handle, size, attrs); 583 - } 584 - #endif 585 - return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); 586 - } 587 - 588 550 const struct dma_map_ops xen_swiotlb_dma_ops = { 589 551 .alloc = xen_swiotlb_alloc_coherent, 590 552 .free = xen_swiotlb_free_coherent, ··· 552 604 .map_page = xen_swiotlb_map_page, 553 605 .unmap_page = xen_swiotlb_unmap_page, 554 606 .dma_supported = xen_swiotlb_dma_supported, 555 - .mmap = xen_swiotlb_dma_mmap, 556 - .get_sgtable = xen_swiotlb_get_sgtable, 607 + .mmap = dma_common_mmap, 608 + .get_sgtable = dma_common_get_sgtable, 557 609 };
+2
include/linux/blkdev.h
··· 1110 1110 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); 1111 1111 extern void blk_queue_required_elevator_features(struct request_queue *q, 1112 1112 unsigned int features); 1113 + extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, 1114 + struct device *dev); 1113 1115 1114 1116 /* 1115 1117 * Number of physical segments as sent to the device.
+14 -20
include/linux/dma-mapping.h
··· 131 131 int (*dma_supported)(struct device *dev, u64 mask); 132 132 u64 (*get_required_mask)(struct device *dev); 133 133 size_t (*max_mapping_size)(struct device *dev); 134 + unsigned long (*get_merge_boundary)(struct device *dev); 134 135 }; 135 136 136 137 #define DMA_MAPPING_ERROR (~(dma_addr_t)0) ··· 458 457 int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 459 458 void *cpu_addr, dma_addr_t dma_addr, size_t size, 460 459 unsigned long attrs); 460 + bool dma_can_mmap(struct device *dev); 461 461 int dma_supported(struct device *dev, u64 mask); 462 462 int dma_set_mask(struct device *dev, u64 mask); 463 463 int dma_set_coherent_mask(struct device *dev, u64 mask); 464 464 u64 dma_get_required_mask(struct device *dev); 465 465 size_t dma_max_mapping_size(struct device *dev); 466 + unsigned long dma_get_merge_boundary(struct device *dev); 466 467 #else /* CONFIG_HAS_DMA */ 467 468 static inline dma_addr_t dma_map_page_attrs(struct device *dev, 468 469 struct page *page, size_t offset, size_t size, ··· 550 547 { 551 548 return -ENXIO; 552 549 } 550 + static inline bool dma_can_mmap(struct device *dev) 551 + { 552 + return false; 553 + } 553 554 static inline int dma_supported(struct device *dev, u64 mask) 554 555 { 555 556 return 0; ··· 571 564 return 0; 572 565 } 573 566 static inline size_t dma_max_mapping_size(struct device *dev) 567 + { 568 + return 0; 569 + } 570 + static inline unsigned long dma_get_merge_boundary(struct device *dev) 574 571 { 575 572 return 0; 576 573 } ··· 621 610 void *cpu_addr, dma_addr_t dma_addr, size_t size, 622 611 unsigned long attrs); 623 612 613 + struct page **dma_common_find_pages(void *cpu_addr); 624 614 void *dma_common_contiguous_remap(struct page *page, size_t size, 625 - unsigned long vm_flags, 626 615 pgprot_t prot, const void *caller); 627 616 628 617 void *dma_common_pages_remap(struct page **pages, size_t size, 629 - unsigned long vm_flags, pgprot_t prot, 630 - const void *caller); 631 - void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); 618 + pgprot_t prot, const void *caller); 619 + void dma_common_free_remap(void *cpu_addr, size_t size); 632 620 633 - int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot); 634 621 bool dma_in_atomic_pool(void *start, size_t size); 635 622 void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); 636 623 bool dma_free_from_pool(void *start, size_t size); ··· 758 749 #ifdef CONFIG_DMA_DECLARE_COHERENT 759 750 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, 760 751 dma_addr_t device_addr, size_t size); 761 - void dma_release_declared_memory(struct device *dev); 762 752 #else 763 753 static inline int 764 754 dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, 765 755 dma_addr_t device_addr, size_t size) 766 756 { 767 757 return -ENOSYS; 768 - } 769 - 770 - static inline void 771 - dma_release_declared_memory(struct device *dev) 772 - { 773 758 } 774 759 #endif /* CONFIG_DMA_DECLARE_COHERENT */ 775 760 ··· 784 781 785 782 return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); 786 783 } 787 - #ifndef dma_alloc_writecombine 788 - #define dma_alloc_writecombine dma_alloc_wc 789 - #endif 790 784 791 785 static inline void dma_free_wc(struct device *dev, size_t size, 792 786 void *cpu_addr, dma_addr_t dma_addr) ··· 791 791 return dma_free_attrs(dev, size, cpu_addr, dma_addr, 792 792 DMA_ATTR_WRITE_COMBINE); 793 793 } 794 - #ifndef dma_free_writecombine 795 - #define dma_free_writecombine dma_free_wc 796 - #endif 797 794 798 795 static inline int dma_mmap_wc(struct device *dev, 799 796 struct vm_area_struct *vma, ··· 800 803 return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, 801 804 DMA_ATTR_WRITE_COMBINE); 802 805 } 803 - #ifndef dma_mmap_writecombine 804 - #define dma_mmap_writecombine dma_mmap_wc 805 - #endif 806 806 807 807 #ifdef CONFIG_NEED_DMA_MAP_STATE 808 808 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
+11 -2
include/linux/dma-noncoherent.h
··· 3 3 #define _LINUX_DMA_NONCOHERENT_H 1 4 4 5 5 #include <linux/dma-mapping.h> 6 + #include <asm/pgtable.h> 6 7 7 8 #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H 8 9 #include <asm/dma-coherence.h> ··· 43 42 dma_addr_t dma_addr, unsigned long attrs); 44 43 long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, 45 44 dma_addr_t dma_addr); 46 - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 47 - unsigned long attrs); 48 45 49 46 #ifdef CONFIG_MMU 47 + /* 48 + * Page protection so that devices that can't snoop CPU caches can use the 49 + * memory coherently. We default to pgprot_noncached which is usually used 50 + * for ioremap as a safe bet, but architectures can override this with less 51 + * strict semantics if possible. 52 + */ 53 + #ifndef pgprot_dmacoherent 54 + #define pgprot_dmacoherent(prot) pgprot_noncached(prot) 55 + #endif 56 + 50 57 pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); 51 58 #else 52 59 static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
+2
include/linux/mmc/host.h
··· 368 368 #define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */ 369 369 #define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */ 370 370 #define MMC_CAP2_AVOID_3_3V (1 << 25) /* Host must negotiate down from 3.3V */ 371 + #define MMC_CAP2_MERGE_CAPABLE (1 << 26) /* Host can merge a segment over the segment size */ 371 372 372 373 int fixed_drv_type; /* fixed driver type for non-removable media */ 373 374 ··· 398 397 unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ 399 398 unsigned int use_blk_mq:1; /* use blk-mq */ 400 399 unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ 400 + unsigned int can_dma_map_merge:1; /* merging can be used */ 401 401 402 402 int rescan_disable; /* disable card detection */ 403 403 int rescan_entered; /* used with nonremovable devices */
+2
include/linux/vmalloc.h
··· 18 18 #define VM_ALLOC 0x00000002 /* vmalloc() */ 19 19 #define VM_MAP 0x00000004 /* vmap()ed pages */ 20 20 #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ 21 + #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ 21 22 #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ 22 23 #define VM_NO_GUARD 0x00000040 /* don't add guard page */ 23 24 #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ ··· 27 26 * vfree_atomic(). 28 27 */ 29 28 #define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ 29 + 30 30 /* bits [20..32] reserved for arch specific ioremap internals */ 31 31 32 32 /*
-2
include/xen/arm/hypervisor.h
··· 19 19 return PARAVIRT_LAZY_NONE; 20 20 } 21 21 22 - extern const struct dma_map_ops *xen_dma_ops; 23 - 24 22 #ifdef CONFIG_XEN 25 23 void __init xen_early_init(void); 26 24 #else
+14 -10
include/xen/arm/page-coherent.h
··· 2 2 #ifndef _XEN_ARM_PAGE_COHERENT_H 3 3 #define _XEN_ARM_PAGE_COHERENT_H 4 4 5 - void __xen_dma_map_page(struct device *hwdev, struct page *page, 6 - dma_addr_t dev_addr, unsigned long offset, size_t size, 7 - enum dma_data_direction dir, unsigned long attrs); 8 - void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 9 - size_t size, enum dma_data_direction dir, 10 - unsigned long attrs); 11 - void __xen_dma_sync_single_for_cpu(struct device *hwdev, 12 - dma_addr_t handle, size_t size, enum dma_data_direction dir); 13 - void __xen_dma_sync_single_for_device(struct device *hwdev, 14 - dma_addr_t handle, size_t size, enum dma_data_direction dir); 5 + #include <linux/dma-mapping.h> 6 + #include <asm/page.h> 7 + 8 + static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, 9 + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) 10 + { 11 + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); 12 + } 13 + 14 + static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, 15 + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) 16 + { 17 + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); 18 + } 15 19 16 20 #endif /* _XEN_ARM_PAGE_COHERENT_H */
+5
include/xen/swiotlb-xen.h
··· 4 4 5 5 #include <linux/swiotlb.h> 6 6 7 + void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, 8 + phys_addr_t paddr, size_t size, enum dma_data_direction dir); 9 + void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, 10 + phys_addr_t paddr, size_t size, enum dma_data_direction dir); 11 + 7 12 extern int xen_swiotlb_init(int verbose, bool early); 8 13 extern const struct dma_map_ops xen_swiotlb_dma_ops; 9 14
+9 -3
kernel/dma/Kconfig
··· 20 20 config ARCH_HAS_DMA_SET_MASK 21 21 bool 22 22 23 + # 24 + # Select this option if the architecture needs special handling for 25 + # DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what 26 + # people thing of when saying write combine, so very few platforms should 27 + # need to enable this. 28 + # 29 + config ARCH_HAS_DMA_WRITE_COMBINE 30 + bool 31 + 23 32 config DMA_DECLARE_COHERENT 24 33 bool 25 34 ··· 52 43 bool 53 44 54 45 config ARCH_HAS_DMA_COHERENT_TO_PFN 55 - bool 56 - 57 - config ARCH_HAS_DMA_MMAP_PGPROT 58 46 bool 59 47 60 48 config ARCH_HAS_FORCE_DMA_UNENCRYPTED
-13
kernel/dma/coherent.c
··· 122 122 dma_release_coherent_memory(mem); 123 123 return ret; 124 124 } 125 - EXPORT_SYMBOL(dma_declare_coherent_memory); 126 - 127 - void dma_release_declared_memory(struct device *dev) 128 - { 129 - struct dma_coherent_mem *mem = dev->dma_mem; 130 - 131 - if (!mem) 132 - return; 133 - dma_release_coherent_memory(mem); 134 - dev->dma_mem = NULL; 135 - } 136 - EXPORT_SYMBOL(dma_release_declared_memory); 137 125 138 126 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, 139 127 ssize_t size, dma_addr_t *dma_handle) ··· 276 288 277 289 return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); 278 290 } 279 - EXPORT_SYMBOL(dma_mmap_from_dev_coherent); 280 291 281 292 int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, 282 293 size_t size, int *ret)
+72 -33
kernel/dma/mapping.c
··· 136 136 return ret; 137 137 } 138 138 139 + /* 140 + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems 141 + * that the intention is to allow exporting memory allocated via the 142 + * coherent DMA APIs through the dma_buf API, which only accepts a 143 + * scattertable. This presents a couple of problems: 144 + * 1. Not all memory allocated via the coherent DMA APIs is backed by 145 + * a struct page 146 + * 2. Passing coherent DMA memory into the streaming APIs is not allowed 147 + * as we will try to flush the memory through a different alias to that 148 + * actually being used (and the flushes are redundant.) 149 + */ 139 150 int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, 140 151 void *cpu_addr, dma_addr_t dma_addr, size_t size, 141 152 unsigned long attrs) 142 153 { 143 154 const struct dma_map_ops *ops = get_dma_ops(dev); 144 155 145 - if (!dma_is_direct(ops) && ops->get_sgtable) 146 - return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, 147 - attrs); 148 - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, 149 - attrs); 156 + if (dma_is_direct(ops)) 157 + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, 158 + size, attrs); 159 + if (!ops->get_sgtable) 160 + return -ENXIO; 161 + return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); 150 162 } 151 163 EXPORT_SYMBOL(dma_get_sgtable_attrs); 152 164 ··· 173 161 (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && 174 162 (attrs & DMA_ATTR_NON_CONSISTENT))) 175 163 return prot; 176 - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT)) 177 - return arch_dma_mmap_pgprot(dev, prot, attrs); 178 - return pgprot_noncached(prot); 164 + #ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE 165 + if (attrs & DMA_ATTR_WRITE_COMBINE) 166 + return pgprot_writecombine(prot); 167 + #endif 168 + return pgprot_dmacoherent(prot); 179 169 } 180 170 #endif /* CONFIG_MMU */ 181 171 ··· 188 174 void *cpu_addr, dma_addr_t dma_addr, size_t size, 189 175 unsigned long attrs) 190 176 { 191 - #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP 177 + #ifdef CONFIG_MMU 192 178 unsigned long user_count = vma_pages(vma); 193 179 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; 194 180 unsigned long off = vma->vm_pgoff; ··· 219 205 user_count << PAGE_SHIFT, vma->vm_page_prot); 220 206 #else 221 207 return -ENXIO; 222 - #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ 208 + #endif /* CONFIG_MMU */ 223 209 } 210 + 211 + /** 212 + * dma_can_mmap - check if a given device supports dma_mmap_* 213 + * @dev: device to check 214 + * 215 + * Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to 216 + * map DMA allocations to userspace. 217 + */ 218 + bool dma_can_mmap(struct device *dev) 219 + { 220 + const struct dma_map_ops *ops = get_dma_ops(dev); 221 + 222 + if (dma_is_direct(ops)) { 223 + return IS_ENABLED(CONFIG_MMU) && 224 + (dev_is_dma_coherent(dev) || 225 + IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)); 226 + } 227 + 228 + return ops->mmap != NULL; 229 + } 230 + EXPORT_SYMBOL_GPL(dma_can_mmap); 224 231 225 232 /** 226 233 * dma_mmap_attrs - map a coherent DMA allocation into user space ··· 262 227 { 263 228 const struct dma_map_ops *ops = get_dma_ops(dev); 264 229 265 - if (!dma_is_direct(ops) && ops->mmap) 266 - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 267 - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 230 + if (dma_is_direct(ops)) 231 + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, 232 + attrs); 233 + if (!ops->mmap) 234 + return -ENXIO; 235 + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 268 236 } 269 237 EXPORT_SYMBOL(dma_mmap_attrs); 270 - 271 - static u64 dma_default_get_required_mask(struct device *dev) 272 - { 273 - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); 274 - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); 275 - u64 mask; 276 - 277 - if (!high_totalram) { 278 - /* convert to mask just covering totalram */ 279 - low_totalram = (1 << (fls(low_totalram) - 1)); 280 - low_totalram += low_totalram - 1; 281 - mask = low_totalram; 282 - } else { 283 - high_totalram = (1 << (fls(high_totalram) - 1)); 284 - high_totalram += high_totalram - 1; 285 - mask = (((u64)high_totalram) << 32) + 0xffffffff; 286 - } 287 - return mask; 288 - } 289 238 290 239 u64 dma_get_required_mask(struct device *dev) 291 240 { ··· 279 260 return dma_direct_get_required_mask(dev); 280 261 if (ops->get_required_mask) 281 262 return ops->get_required_mask(dev); 282 - return dma_default_get_required_mask(dev); 263 + 264 + /* 265 + * We require every DMA ops implementation to at least support a 32-bit 266 + * DMA mask (and use bounce buffering if that isn't supported in 267 + * hardware). As the direct mapping code has its own routine to 268 + * actually report an optimal mask we default to 32-bit here as that 269 + * is the right thing for most IOMMUs, and at least not actively 270 + * harmful in general. 271 + */ 272 + return DMA_BIT_MASK(32); 283 273 } 284 274 EXPORT_SYMBOL_GPL(dma_get_required_mask); 285 275 ··· 433 405 return size; 434 406 } 435 407 EXPORT_SYMBOL_GPL(dma_max_mapping_size); 408 + 409 + unsigned long dma_get_merge_boundary(struct device *dev) 410 + { 411 + const struct dma_map_ops *ops = get_dma_ops(dev); 412 + 413 + if (!ops || !ops->get_merge_boundary) 414 + return 0; /* can't merge */ 415 + 416 + return ops->get_merge_boundary(dev); 417 + } 418 + EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
+34 -17
kernel/dma/remap.c
··· 11 11 #include <linux/slab.h> 12 12 #include <linux/vmalloc.h> 13 13 14 + struct page **dma_common_find_pages(void *cpu_addr) 15 + { 16 + struct vm_struct *area = find_vm_area(cpu_addr); 17 + 18 + if (!area || area->flags != VM_DMA_COHERENT) 19 + return NULL; 20 + return area->pages; 21 + } 22 + 14 23 static struct vm_struct *__dma_common_pages_remap(struct page **pages, 15 - size_t size, unsigned long vm_flags, pgprot_t prot, 16 - const void *caller) 24 + size_t size, pgprot_t prot, const void *caller) 17 25 { 18 26 struct vm_struct *area; 19 27 20 - area = get_vm_area_caller(size, vm_flags, caller); 28 + area = get_vm_area_caller(size, VM_DMA_COHERENT, caller); 21 29 if (!area) 22 30 return NULL; 23 31 ··· 42 34 * Cannot be used in non-sleeping contexts 43 35 */ 44 36 void *dma_common_pages_remap(struct page **pages, size_t size, 45 - unsigned long vm_flags, pgprot_t prot, 46 - const void *caller) 37 + pgprot_t prot, const void *caller) 47 38 { 48 39 struct vm_struct *area; 49 40 50 - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); 41 + area = __dma_common_pages_remap(pages, size, prot, caller); 51 42 if (!area) 52 43 return NULL; 53 44 ··· 60 53 * Cannot be used in non-sleeping contexts 61 54 */ 62 55 void *dma_common_contiguous_remap(struct page *page, size_t size, 63 - unsigned long vm_flags, 64 56 pgprot_t prot, const void *caller) 65 57 { 66 58 int i; ··· 73 67 for (i = 0; i < (size >> PAGE_SHIFT); i++) 74 68 pages[i] = nth_page(page, i); 75 69 76 - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); 70 + area = __dma_common_pages_remap(pages, size, prot, caller); 77 71 78 72 kfree(pages); 79 73 ··· 85 79 /* 86 80 * Unmaps a range previously mapped by dma_common_*_remap 87 81 */ 88 - void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) 82 + void dma_common_free_remap(void *cpu_addr, size_t size) 89 83 { 90 - struct vm_struct *area = find_vm_area(cpu_addr); 84 + struct page **pages = dma_common_find_pages(cpu_addr); 91 85 92 - if (!area || (area->flags & vm_flags) != vm_flags) { 86 + if (!pages) { 93 87 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 94 88 return; 95 89 } ··· 111 105 } 112 106 early_param("coherent_pool", early_coherent_pool); 113 107 114 - int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) 108 + static gfp_t dma_atomic_pool_gfp(void) 109 + { 110 + if (IS_ENABLED(CONFIG_ZONE_DMA)) 111 + return GFP_DMA; 112 + if (IS_ENABLED(CONFIG_ZONE_DMA32)) 113 + return GFP_DMA32; 114 + return GFP_KERNEL; 115 + } 116 + 117 + static int __init dma_atomic_pool_init(void) 115 118 { 116 119 unsigned int pool_size_order = get_order(atomic_pool_size); 117 120 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; ··· 132 117 page = dma_alloc_from_contiguous(NULL, nr_pages, 133 118 pool_size_order, false); 134 119 else 135 - page = alloc_pages(gfp, pool_size_order); 120 + page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order); 136 121 if (!page) 137 122 goto out; 138 123 ··· 142 127 if (!atomic_pool) 143 128 goto free_page; 144 129 145 - addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP, 146 - prot, __builtin_return_address(0)); 130 + addr = dma_common_contiguous_remap(page, atomic_pool_size, 131 + pgprot_dmacoherent(PAGE_KERNEL), 132 + __builtin_return_address(0)); 147 133 if (!addr) 148 134 goto destroy_genpool; 149 135 ··· 159 143 return 0; 160 144 161 145 remove_mapping: 162 - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); 146 + dma_common_free_remap(addr, atomic_pool_size); 163 147 destroy_genpool: 164 148 gen_pool_destroy(atomic_pool); 165 149 atomic_pool = NULL; ··· 171 155 atomic_pool_size / 1024); 172 156 return -ENOMEM; 173 157 } 158 + postcore_initcall(dma_atomic_pool_init); 174 159 175 160 bool dma_in_atomic_pool(void *start, size_t size) 176 161 { ··· 234 217 arch_dma_prep_coherent(page, size); 235 218 236 219 /* create a coherent mapping */ 237 - ret = dma_common_contiguous_remap(page, size, VM_USERMAP, 220 + ret = dma_common_contiguous_remap(page, size, 238 221 dma_pgprot(dev, PAGE_KERNEL, attrs), 239 222 __builtin_return_address(0)); 240 223 if (!ret) {
+4 -1
mm/vmalloc.c
··· 2993 2993 if (!area) 2994 2994 return -EINVAL; 2995 2995 2996 - if (!(area->flags & VM_USERMAP)) 2996 + if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) 2997 2997 return -EINVAL; 2998 2998 2999 2999 if (kaddr + size > area->addr + get_vm_area_size(area)) ··· 3495 3495 3496 3496 if (v->flags & VM_USERMAP) 3497 3497 seq_puts(m, " user"); 3498 + 3499 + if (v->flags & VM_DMA_COHERENT) 3500 + seq_puts(m, " dma-coherent"); 3498 3501 3499 3502 if (is_vmalloc_addr(v->pages)) 3500 3503 seq_puts(m, " vpages");
+6 -7
sound/core/pcm_native.c
··· 220 220 { 221 221 if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) 222 222 return false; 223 - /* architecture supports dma_mmap_coherent()? */ 224 - #if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA) 225 - if (!substream->ops->mmap && 226 - substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) 227 - return false; 228 - #endif 229 - return true; 223 + 224 + if (substream->ops->mmap || 225 + substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV) 226 + return true; 227 + 228 + return dma_can_mmap(substream->dma_buffer.dev.dev); 230 229 } 231 230 232 231 static int constrain_mask_params(struct snd_pcm_substream *substream,