Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

swiotlb-xen: fix DMA_ATTR_NO_KERNEL_MAPPING on arm

swiotlb-xen uses very different ways to allocate coherent memory on x86
vs arm. On the former it allocates memory from the page allocator, while
on the later it reuses the dma-direct allocator the handles the
complexities of non-coherent DMA on arm platforms.

Unfortunately the complexities of trying to deal with the two cases in
the swiotlb-xen.c code lead to a bug in the handling of
DMA_ATTR_NO_KERNEL_MAPPING on arm. With the DMA_ATTR_NO_KERNEL_MAPPING
flag the coherent memory allocator does not actually allocate coherent
memory, but just a DMA handle for some memory that is DMA addressable
by the device, but which does not have to have a kernel mapping. Thus
dereferencing the return value will lead to kernel crashed and memory
corruption.

Fix this by using the dma-direct allocator directly for arm, which works
perfectly fine because on arm swiotlb-xen is only used when the domain is
1:1 mapped, and then simplifying the remaining code to only cater for the
x86 case with DMA coherent device.

Reported-by: Rahul Singh <Rahul.Singh@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Rahul Singh <rahul.singh@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Tested-by: Rahul Singh <rahul.singh@arm.com>

+40 -139
-2
arch/arm/include/asm/xen/page-coherent.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #include <xen/arm/page-coherent.h>
-14
arch/arm/xen/mm.c
··· 116 116 !dev_is_dma_coherent(dev)); 117 117 } 118 118 119 - int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 120 - unsigned int address_bits, 121 - dma_addr_t *dma_handle) 122 - { 123 - /* the domain is 1:1 mapped to use swiotlb-xen */ 124 - *dma_handle = pstart; 125 - return 0; 126 - } 127 - 128 - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 129 - { 130 - return; 131 - } 132 - 133 119 static int __init xen_mm_init(void) 134 120 { 135 121 struct gnttab_cache_flush cflush;
-2
arch/arm64/include/asm/xen/page-coherent.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #include <xen/arm/page-coherent.h>
-24
arch/x86/include/asm/xen/page-coherent.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _ASM_X86_XEN_PAGE_COHERENT_H 3 - #define _ASM_X86_XEN_PAGE_COHERENT_H 4 - 5 - #include <asm/page.h> 6 - #include <linux/dma-mapping.h> 7 - 8 - static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, 9 - dma_addr_t *dma_handle, gfp_t flags, 10 - unsigned long attrs) 11 - { 12 - void *vstart = (void*)__get_free_pages(flags, get_order(size)); 13 - *dma_handle = virt_to_phys(vstart); 14 - return vstart; 15 - } 16 - 17 - static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, 18 - void *cpu_addr, dma_addr_t dma_handle, 19 - unsigned long attrs) 20 - { 21 - free_pages((unsigned long) cpu_addr, get_order(size)); 22 - } 23 - 24 - #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
+6
arch/x86/include/asm/xen/swiotlb-xen.h
··· 8 8 static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } 9 9 #endif 10 10 11 + int xen_swiotlb_fixup(void *buf, unsigned long nslabs); 12 + int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 13 + unsigned int address_bits, 14 + dma_addr_t *dma_handle); 15 + void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); 16 + 11 17 #endif /* _ASM_X86_SWIOTLB_XEN_H */
+1
arch/x86/xen/mmu_pv.c
··· 80 80 #include <xen/interface/version.h> 81 81 #include <xen/interface/memory.h> 82 82 #include <xen/hvc-console.h> 83 + #include <xen/swiotlb-xen.h> 83 84 84 85 #include "multicalls.h" 85 86 #include "mmu.h"
+33 -64
drivers/xen/swiotlb-xen.c
··· 36 36 #include <xen/hvc-console.h> 37 37 38 38 #include <asm/dma-mapping.h> 39 - #include <asm/xen/page-coherent.h> 40 39 41 40 #include <trace/events/swiotlb.h> 42 41 #define MAX_DMA_BITS 32 ··· 103 104 return 0; 104 105 } 105 106 107 + #ifdef CONFIG_X86 106 108 int xen_swiotlb_fixup(void *buf, unsigned long nslabs) 107 109 { 108 110 int rc; ··· 131 131 } 132 132 133 133 static void * 134 - xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 135 - dma_addr_t *dma_handle, gfp_t flags, 136 - unsigned long attrs) 134 + xen_swiotlb_alloc_coherent(struct device *dev, size_t size, 135 + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) 137 136 { 138 - void *ret; 137 + u64 dma_mask = dev->coherent_dma_mask; 139 138 int order = get_order(size); 140 - u64 dma_mask = DMA_BIT_MASK(32); 141 139 phys_addr_t phys; 142 - dma_addr_t dev_addr; 140 + void *ret; 143 141 144 - /* 145 - * Ignore region specifiers - the kernel's ideas of 146 - * pseudo-phys memory layout has nothing to do with the 147 - * machine physical layout. We can't allocate highmem 148 - * because we can't return a pointer to it. 149 - */ 150 - flags &= ~(__GFP_DMA | __GFP_HIGHMEM); 151 - 152 - /* Convert the size to actually allocated. */ 142 + /* Align the allocation to the Xen page size */ 153 143 size = 1UL << (order + XEN_PAGE_SHIFT); 154 144 155 - /* On ARM this function returns an ioremap'ped virtual address for 156 - * which virt_to_phys doesn't return the corresponding physical 157 - * address. In fact on ARM virt_to_phys only works for kernel direct 158 - * mapped RAM memory. Also see comment below. 159 - */ 160 - ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs); 161 - 145 + ret = (void *)__get_free_pages(flags, get_order(size)); 162 146 if (!ret) 163 147 return ret; 148 + phys = virt_to_phys(ret); 164 149 165 - if (hwdev && hwdev->coherent_dma_mask) 166 - dma_mask = hwdev->coherent_dma_mask; 167 - 168 - /* At this point dma_handle is the dma address, next we are 169 - * going to set it to the machine address. 170 - * Do not use virt_to_phys(ret) because on ARM it doesn't correspond 171 - * to *dma_handle. */ 172 - phys = dma_to_phys(hwdev, *dma_handle); 173 - dev_addr = xen_phys_to_dma(hwdev, phys); 174 - if (((dev_addr + size - 1 <= dma_mask)) && 175 - !range_straddles_page_boundary(phys, size)) 176 - *dma_handle = dev_addr; 177 - else { 178 - if (xen_create_contiguous_region(phys, order, 179 - fls64(dma_mask), dma_handle) != 0) { 180 - xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs); 181 - return NULL; 182 - } 183 - *dma_handle = phys_to_dma(hwdev, *dma_handle); 150 + *dma_handle = xen_phys_to_dma(dev, phys); 151 + if (*dma_handle + size - 1 > dma_mask || 152 + range_straddles_page_boundary(phys, size)) { 153 + if (xen_create_contiguous_region(phys, order, fls64(dma_mask), 154 + dma_handle) != 0) 155 + goto out_free_pages; 184 156 SetPageXenRemapped(virt_to_page(ret)); 185 157 } 158 + 186 159 memset(ret, 0, size); 187 160 return ret; 161 + 162 + out_free_pages: 163 + free_pages((unsigned long)ret, get_order(size)); 164 + return NULL; 188 165 } 189 166 190 167 static void 191 - xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, 192 - dma_addr_t dev_addr, unsigned long attrs) 168 + xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, 169 + dma_addr_t dma_handle, unsigned long attrs) 193 170 { 171 + phys_addr_t phys = virt_to_phys(vaddr); 194 172 int order = get_order(size); 195 - phys_addr_t phys; 196 - u64 dma_mask = DMA_BIT_MASK(32); 197 - struct page *page; 198 - 199 - if (hwdev && hwdev->coherent_dma_mask) 200 - dma_mask = hwdev->coherent_dma_mask; 201 - 202 - /* do not use virt_to_phys because on ARM it doesn't return you the 203 - * physical address */ 204 - phys = xen_dma_to_phys(hwdev, dev_addr); 205 173 206 174 /* Convert the size to actually allocated. */ 207 175 size = 1UL << (order + XEN_PAGE_SHIFT); 208 176 209 - if (is_vmalloc_addr(vaddr)) 210 - page = vmalloc_to_page(vaddr); 211 - else 212 - page = virt_to_page(vaddr); 177 + if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || 178 + WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) 179 + return; 213 180 214 - if (!WARN_ON((dev_addr + size - 1 > dma_mask) || 215 - range_straddles_page_boundary(phys, size)) && 216 - TestClearPageXenRemapped(page)) 181 + if (TestClearPageXenRemapped(virt_to_page(vaddr))) 217 182 xen_destroy_contiguous_region(phys, order); 218 - 219 - xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys), 220 - attrs); 183 + free_pages((unsigned long)vaddr, get_order(size)); 221 184 } 185 + #endif /* CONFIG_X86 */ 222 186 223 187 /* 224 188 * Map a single buffer of the indicated size for DMA in streaming mode. The ··· 385 421 } 386 422 387 423 const struct dma_map_ops xen_swiotlb_dma_ops = { 424 + #ifdef CONFIG_X86 388 425 .alloc = xen_swiotlb_alloc_coherent, 389 426 .free = xen_swiotlb_free_coherent, 427 + #else 428 + .alloc = dma_direct_alloc, 429 + .free = dma_direct_free, 430 + #endif 390 431 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, 391 432 .sync_single_for_device = xen_swiotlb_sync_single_for_device, 392 433 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-20
include/xen/arm/page-coherent.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _XEN_ARM_PAGE_COHERENT_H 3 - #define _XEN_ARM_PAGE_COHERENT_H 4 - 5 - #include <linux/dma-mapping.h> 6 - #include <asm/page.h> 7 - 8 - static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, 9 - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) 10 - { 11 - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); 12 - } 13 - 14 - static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, 15 - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) 16 - { 17 - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); 18 - } 19 - 20 - #endif /* _XEN_ARM_PAGE_COHERENT_H */
-6
include/xen/swiotlb-xen.h
··· 10 10 void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, 11 11 size_t size, enum dma_data_direction dir); 12 12 13 - #ifdef CONFIG_SWIOTLB_XEN 14 - int xen_swiotlb_fixup(void *buf, unsigned long nslabs); 15 - #else 16 - #define xen_swiotlb_fixup NULL 17 - #endif 18 - 19 13 extern const struct dma_map_ops xen_swiotlb_dma_ops; 20 14 21 15 #endif /* __LINUX_SWIOTLB_XEN_H */
-7
include/xen/xen-ops.h
··· 42 42 43 43 extern unsigned long *xen_contiguous_bitmap; 44 44 45 - #if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) 46 - int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 47 - unsigned int address_bits, 48 - dma_addr_t *dma_handle); 49 - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); 50 - #endif 51 - 52 45 #if defined(CONFIG_XEN_PV) 53 46 int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, 54 47 xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,