Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: dma [IOC] Enable per device io coherency

So far the IOC treatment was global on ARC, being turned on (or off)
for all devices in the system. With this patch, this can now be done
per device using the "dma-coherent" DT property; IOW with this patch
we can use both HW-coherent and regular DMA peripherals simultaneously.

The changes involved are too many so enlisting the summary below:

1. common code calls ARC arch_setup_dma_ops() per device.

2. For coherent dma (IOC) it plugs in generic @dma_direct_ops which
doesn't need any arch specific backend: No need for any explicit
cache flushes or MMU mappings to provide for uncached access

- dma_(map|sync)_single* return early as corresponding dma ops callbacks
are NULL in generic code.
So arch_sync_dma_*() -> dma_cache_*() need not handle the coherent
dma case, hence drop ARC __dma_cache_*_ioc() which were no-op anyways

3. For noncoherent dma (non IOC) generic @dma_noncoherent_ops is used
which in turns calls ARC specific routines

- arch_dma_alloc() no longer checks for @ioc_enable since this is
called only for !IOC case.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
[vgupta: rewrote changelog]

authored by

Eugeniy Paltsev and committed by
Vineet Gupta
2820a708 678c8110

+50 -40
+13
arch/arc/include/asm/dma-mapping.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // (C) 2018 Synopsys, Inc. (www.synopsys.com) 3 + 4 + #ifndef ASM_ARC_DMA_MAPPING_H 5 + #define ASM_ARC_DMA_MAPPING_H 6 + 7 + #include <asm-generic/dma-mapping.h> 8 + 9 + void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 10 + const struct iommu_ops *iommu, bool coherent); 11 + #define arch_setup_dma_ops arch_setup_dma_ops 12 + 13 + #endif
+8 -15
arch/arc/mm/cache.c
··· 65 65 66 66 n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", 67 67 perip_base, 68 - IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); 68 + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); 69 69 70 70 return buf; 71 71 } ··· 897 897 } 898 898 899 899 /* 900 - * DMA ops for systems with IOC 901 - * IOC hardware snoops all DMA traffic keeping the caches consistent with 902 - * memory - eliding need for any explicit cache maintenance of DMA buffers 903 - */ 904 - static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {} 905 - static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {} 906 - static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {} 907 - 908 - /* 909 900 * Exported DMA API 910 901 */ 911 902 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz) ··· 1255 1264 if (is_isa_arcv2() && ioc_enable) 1256 1265 arc_ioc_setup(); 1257 1266 1258 - if (is_isa_arcv2() && ioc_enable) { 1259 - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; 1260 - __dma_cache_inv = __dma_cache_inv_ioc; 1261 - __dma_cache_wback = __dma_cache_wback_ioc; 1262 - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { 1267 + if (is_isa_arcv2() && l2_line_sz && slc_enable) { 1263 1268 __dma_cache_wback_inv = __dma_cache_wback_inv_slc; 1264 1269 __dma_cache_inv = __dma_cache_inv_slc; 1265 1270 __dma_cache_wback = __dma_cache_wback_slc; ··· 1264 1277 __dma_cache_inv = __dma_cache_inv_l1; 1265 1278 __dma_cache_wback = __dma_cache_wback_l1; 1266 1279 } 1280 + /* 1281 + * In case of IOC (say IOC+SLC case), pointers above could still be set 1282 + * but end up not being relevant as the first function in chain is not 1283 + * called at all for @dma_direct_ops 1284 + * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() 1285 + */ 1267 1286 } 1268 1287 1269 1288 void __ref arc_cache_init(void)
+29 -25
arch/arc/mm/dma.c
··· 6 6 * published by the Free Software Foundation. 7 7 */ 8 8 9 - /* 10 - * DMA Coherent API Notes 11 - * 12 - * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is 13 - * implemented by accessing it using a kernel virtual address, with 14 - * Cache bit off in the TLB entry. 15 - * 16 - * The default DMA address == Phy address which is 0x8000_0000 based. 17 - */ 18 - 19 9 #include <linux/dma-noncoherent.h> 20 10 #include <asm/cache.h> 21 11 #include <asm/cacheflush.h> 22 12 13 + /* 14 + * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) 15 + * - hardware IOC not available (or "dma-coherent" not set for device in DT) 16 + * - But still handle both coherent and non-coherent requests from caller 17 + * 18 + * For DMA coherent hardware (IOC) generic code suffices 19 + */ 23 20 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 24 21 gfp_t gfp, unsigned long attrs) 25 22 { ··· 30 33 if (!page) 31 34 return NULL; 32 35 33 - /* 34 - * IOC relies on all data (even coherent DMA data) being in cache 35 - * Thus allocate normal cached memory 36 - * 37 - * The gains with IOC are two pronged: 38 - * -For streaming data, elides need for cache maintenance, saving 39 - * cycles in flush code, and bus bandwidth as all the lines of a 40 - * buffer need to be flushed out to memory 41 - * -For coherent data, Read/Write to buffers terminate early in cache 42 - * (vs. always going to memory - thus are faster) 43 - */ 44 - if ((is_isa_arcv2() && ioc_enable) || 45 - (attrs & DMA_ATTR_NON_CONSISTENT)) 36 + if (attrs & DMA_ATTR_NON_CONSISTENT) 46 37 need_coh = 0; 47 38 48 39 /* ··· 80 95 struct page *page = virt_to_page(paddr); 81 96 int is_non_coh = 1; 82 97 83 - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || 84 - (is_isa_arcv2() && ioc_enable); 98 + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); 85 99 86 100 if (PageHighMem(page) || !is_non_coh) 87 101 iounmap((void __force __iomem *)vaddr); ··· 167 183 168 184 default: 169 185 break; 186 + } 187 + } 188 + 189 + /* 190 + * Plug in coherent or noncoherent dma ops 191 + */ 192 + void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 193 + const struct iommu_ops *iommu, bool coherent) 194 + { 195 + /* 196 + * IOC hardware snoops all DMA traffic keeping the caches consistent 197 + * with memory - eliding need for any explicit cache maintenance of 198 + * DMA buffers - so we can use dma_direct cache ops. 199 + */ 200 + if (is_isa_arcv2() && ioc_enable && coherent) { 201 + set_dma_ops(dev, &dma_direct_ops); 202 + dev_info(dev, "use dma_direct_ops cache ops\n"); 203 + } else { 204 + set_dma_ops(dev, &dma_noncoherent_ops); 205 + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); 170 206 } 171 207 }