Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'stable/for-linus-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb

Pull swiotlb updates from Konrad Rzeszutek Wilk:
"Christoph Hellwig has taken a cleaver and trimmed off the not-needed
code and nicely folded duplicate code in the generic framework.

This lays the groundwork for more work to add extra DMA-backend-ish in
the future. Along with that some bug-fixes to make this a nice working
package"

* 'stable/for-linus-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb:
swiotlb: don't override user specified size in swiotlb_adjust_size
swiotlb: Fix the type of index
swiotlb: Make SWIOTLB_NO_FORCE perform no allocation
ARM: Qualify enabling of swiotlb_init()
swiotlb: remove swiotlb_nr_tbl
swiotlb: dynamically allocate io_tlb_default_mem
swiotlb: move global variables into a new io_tlb_mem structure
xen-swiotlb: remove the unused size argument from xen_swiotlb_fixup
xen-swiotlb: split xen_swiotlb_init
swiotlb: lift the double initialization protection from xen-swiotlb
xen-swiotlb: remove xen_io_tlb_start and xen_io_tlb_nslabs
xen-swiotlb: remove xen_set_nslabs
xen-swiotlb: use io_tlb_end in xen_swiotlb_dma_supported
xen-swiotlb: use is_swiotlb_buffer in is_xen_swiotlb_buffer
swiotlb: split swiotlb_tbl_sync_single
swiotlb: move orig addr and size validation into swiotlb_bounce
swiotlb: remove the alloc_size parameter to swiotlb_tbl_unmap_single
powerpc/svm: stop using io_tlb_start

+356 -478
+5 -1
arch/arm/mm/init.c
··· 301 301 void __init mem_init(void) 302 302 { 303 303 #ifdef CONFIG_ARM_LPAE 304 - swiotlb_init(1); 304 + if (swiotlb_force == SWIOTLB_FORCE || 305 + max_pfn > arm_dma_pfn_limit) 306 + swiotlb_init(1); 307 + else 308 + swiotlb_force = SWIOTLB_NO_FORCE; 305 309 #endif 306 310 307 311 set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
+1 -1
arch/arm/xen/mm.c
··· 152 152 struct gnttab_cache_flush cflush; 153 153 if (!xen_swiotlb_detect()) 154 154 return 0; 155 - xen_swiotlb_init(1, false); 155 + xen_swiotlb_init(); 156 156 157 157 cflush.op = 0; 158 158 cflush.a.dev_bus_addr = 0;
+3 -3
arch/powerpc/platforms/pseries/svm.c
··· 55 55 if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) 56 56 return; 57 57 58 - if (io_tlb_start) 59 - memblock_free_early(io_tlb_start, 60 - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 58 + 59 + memblock_free_early(__pa(vstart), 60 + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 61 61 panic("SVM: Cannot allocate SWIOTLB buffer"); 62 62 } 63 63
+2 -2
arch/x86/xen/pci-swiotlb-xen.c
··· 59 59 void __init pci_xen_swiotlb_init(void) 60 60 { 61 61 if (xen_swiotlb) { 62 - xen_swiotlb_init(1, true /* early */); 62 + xen_swiotlb_init_early(); 63 63 dma_ops = &xen_swiotlb_dma_ops; 64 64 65 65 #ifdef CONFIG_PCI ··· 76 76 if (xen_swiotlb) 77 77 return 0; 78 78 79 - rc = xen_swiotlb_init(1, false /* late */); 79 + rc = xen_swiotlb_init(); 80 80 if (rc) 81 81 return rc; 82 82
+1 -1
drivers/gpu/drm/i915/gem/i915_gem_internal.c
··· 42 42 43 43 max_order = MAX_ORDER; 44 44 #ifdef CONFIG_SWIOTLB 45 - if (swiotlb_nr_tbl()) { 45 + if (is_swiotlb_active()) { 46 46 unsigned int max_segment; 47 47 48 48 max_segment = swiotlb_max_segment();
+1 -1
drivers/gpu/drm/nouveau/nouveau_ttm.c
··· 321 321 } 322 322 323 323 #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86) 324 - need_swiotlb = !!swiotlb_nr_tbl(); 324 + need_swiotlb = is_swiotlb_active(); 325 325 #endif 326 326 327 327 ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
+9 -14
drivers/iommu/dma-iommu.c
··· 496 496 unsigned long attrs) 497 497 { 498 498 struct iommu_domain *domain = iommu_get_dma_domain(dev); 499 - struct iommu_dma_cookie *cookie = domain->iova_cookie; 500 - struct iova_domain *iovad = &cookie->iovad; 501 499 phys_addr_t phys; 502 500 503 501 phys = iommu_iova_to_phys(domain, dma_addr); ··· 505 507 __iommu_dma_unmap(dev, dma_addr, size); 506 508 507 509 if (unlikely(is_swiotlb_buffer(phys))) 508 - swiotlb_tbl_unmap_single(dev, phys, size, 509 - iova_align(iovad, size), dir, attrs); 510 + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 510 511 } 511 512 512 513 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, ··· 575 578 } 576 579 577 580 iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask); 578 - if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys)) 579 - swiotlb_tbl_unmap_single(dev, phys, org_size, 580 - aligned_size, dir, attrs); 581 - 581 + if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys)) 582 + swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs); 582 583 return iova; 583 584 } 584 585 ··· 782 787 arch_sync_dma_for_cpu(phys, size, dir); 783 788 784 789 if (is_swiotlb_buffer(phys)) 785 - swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); 790 + swiotlb_sync_single_for_cpu(dev, phys, size, dir); 786 791 } 787 792 788 793 static void iommu_dma_sync_single_for_device(struct device *dev, ··· 795 800 796 801 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 797 802 if (is_swiotlb_buffer(phys)) 798 - swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE); 803 + swiotlb_sync_single_for_device(dev, phys, size, dir); 799 804 800 805 if (!dev_is_dma_coherent(dev)) 801 806 arch_sync_dma_for_device(phys, size, dir); ··· 816 821 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 817 822 818 823 if (is_swiotlb_buffer(sg_phys(sg))) 819 - swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, 820 - dir, SYNC_FOR_CPU); 824 + swiotlb_sync_single_for_cpu(dev, sg_phys(sg), 825 + sg->length, dir); 821 826 } 822 827 } 823 828 ··· 833 838 834 839 for_each_sg(sgl, sg, nelems, i) { 835 840 if (is_swiotlb_buffer(sg_phys(sg))) 836 - swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, 837 - dir, SYNC_FOR_DEVICE); 841 + swiotlb_sync_single_for_device(dev, sg_phys(sg), 842 + sg->length, dir); 838 843 839 844 if (!dev_is_dma_coherent(dev)) 840 845 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
+1 -1
drivers/pci/xen-pcifront.c
··· 693 693 694 694 spin_unlock(&pcifront_dev_lock); 695 695 696 - if (!err && !swiotlb_nr_tbl()) { 696 + if (!err && !is_swiotlb_active()) { 697 697 err = pci_xen_swiotlb_init_late(); 698 698 if (err) 699 699 dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
+82 -100
drivers/xen/swiotlb-xen.c
··· 40 40 41 41 #include <trace/events/swiotlb.h> 42 42 #define MAX_DMA_BITS 32 43 - /* 44 - * Used to do a quick range check in swiotlb_tbl_unmap_single and 45 - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this 46 - * API. 47 - */ 48 43 49 - static char *xen_io_tlb_start, *xen_io_tlb_end; 50 - static unsigned long xen_io_tlb_nslabs; 51 44 /* 52 45 * Quick lookup value of the bus address of the IOTLB. 53 46 */ ··· 75 82 return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr)); 76 83 } 77 84 78 - static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address) 79 - { 80 - return xen_phys_to_dma(dev, virt_to_phys(address)); 81 - } 82 - 83 85 static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) 84 86 { 85 87 unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); ··· 99 111 * have the same virtual address as another address 100 112 * in our domain. Therefore _only_ check address within our domain. 101 113 */ 102 - if (pfn_valid(PFN_DOWN(paddr))) { 103 - return paddr >= virt_to_phys(xen_io_tlb_start) && 104 - paddr < virt_to_phys(xen_io_tlb_end); 105 - } 114 + if (pfn_valid(PFN_DOWN(paddr))) 115 + return is_swiotlb_buffer(paddr); 106 116 return 0; 107 117 } 108 118 109 - static int 110 - xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) 119 + static int xen_swiotlb_fixup(void *buf, unsigned long nslabs) 111 120 { 112 121 int i, rc; 113 122 int dma_bits; ··· 130 145 } while (i < nslabs); 131 146 return 0; 132 147 } 133 - static unsigned long xen_set_nslabs(unsigned long nr_tbl) 134 - { 135 - if (!nr_tbl) { 136 - xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); 137 - xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); 138 - } else 139 - xen_io_tlb_nslabs = nr_tbl; 140 - 141 - return xen_io_tlb_nslabs << IO_TLB_SHIFT; 142 - } 143 148 144 149 enum xen_swiotlb_err { 145 150 XEN_SWIOTLB_UNKNOWN = 0, ··· 152 177 } 153 178 return ""; 154 179 } 155 - int __ref xen_swiotlb_init(int verbose, bool early) 180 + 181 + #define DEFAULT_NSLABS ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE) 182 + 183 + int __ref xen_swiotlb_init(void) 156 184 { 157 - unsigned long bytes, order; 158 - int rc = -ENOMEM; 159 185 enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; 160 - unsigned int repeat = 3; 186 + unsigned long bytes = swiotlb_size_or_default(); 187 + unsigned long nslabs = bytes >> IO_TLB_SHIFT; 188 + unsigned int order, repeat = 3; 189 + int rc = -ENOMEM; 190 + char *start; 161 191 162 - xen_io_tlb_nslabs = swiotlb_nr_tbl(); 163 192 retry: 164 - bytes = xen_set_nslabs(xen_io_tlb_nslabs); 165 - order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT); 166 - 167 - /* 168 - * IO TLB memory already allocated. Just use it. 169 - */ 170 - if (io_tlb_start != 0) { 171 - xen_io_tlb_start = phys_to_virt(io_tlb_start); 172 - goto end; 173 - } 193 + m_ret = XEN_SWIOTLB_ENOMEM; 194 + order = get_order(bytes); 174 195 175 196 /* 176 197 * Get IO TLB memory from any location. 177 198 */ 178 - if (early) { 179 - xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes), 180 - PAGE_SIZE); 181 - if (!xen_io_tlb_start) 182 - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 183 - __func__, PAGE_ALIGN(bytes), PAGE_SIZE); 184 - } else { 185 199 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 186 200 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 187 - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 188 - xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order); 189 - if (xen_io_tlb_start) 190 - break; 191 - order--; 192 - } 193 - if (order != get_order(bytes)) { 194 - pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n", 195 - (PAGE_SIZE << order) >> 20); 196 - xen_io_tlb_nslabs = SLABS_PER_PAGE << order; 197 - bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; 198 - } 201 + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 202 + start = (void *)xen_get_swiotlb_free_pages(order); 203 + if (start) 204 + break; 205 + order--; 199 206 } 200 - if (!xen_io_tlb_start) { 201 - m_ret = XEN_SWIOTLB_ENOMEM; 207 + if (!start) 202 208 goto error; 209 + if (order != get_order(bytes)) { 210 + pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n", 211 + (PAGE_SIZE << order) >> 20); 212 + nslabs = SLABS_PER_PAGE << order; 213 + bytes = nslabs << IO_TLB_SHIFT; 203 214 } 215 + 204 216 /* 205 217 * And replace that memory with pages under 4GB. 206 218 */ 207 - rc = xen_swiotlb_fixup(xen_io_tlb_start, 208 - bytes, 209 - xen_io_tlb_nslabs); 219 + rc = xen_swiotlb_fixup(start, nslabs); 210 220 if (rc) { 211 - if (early) 212 - memblock_free(__pa(xen_io_tlb_start), 213 - PAGE_ALIGN(bytes)); 214 - else { 215 - free_pages((unsigned long)xen_io_tlb_start, order); 216 - xen_io_tlb_start = NULL; 217 - } 221 + free_pages((unsigned long)start, order); 218 222 m_ret = XEN_SWIOTLB_EFIXUP; 219 223 goto error; 220 224 } 221 - if (early) { 222 - if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, 223 - verbose)) 224 - panic("Cannot allocate SWIOTLB buffer"); 225 - rc = 0; 226 - } else 227 - rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); 228 - 229 - end: 230 - xen_io_tlb_end = xen_io_tlb_start + bytes; 231 - if (!rc) 232 - swiotlb_set_max_segment(PAGE_SIZE); 233 - 234 - return rc; 225 + rc = swiotlb_late_init_with_tbl(start, nslabs); 226 + if (rc) 227 + return rc; 228 + swiotlb_set_max_segment(PAGE_SIZE); 229 + return 0; 235 230 error: 236 231 if (repeat--) { 237 - xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ 238 - (xen_io_tlb_nslabs >> 1)); 232 + /* Min is 2MB */ 233 + nslabs = max(1024UL, (nslabs >> 1)); 239 234 pr_info("Lowering to %luMB\n", 240 - (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); 235 + (nslabs << IO_TLB_SHIFT) >> 20); 241 236 goto retry; 242 237 } 243 238 pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); 244 - if (early) 245 - panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); 246 - else 247 - free_pages((unsigned long)xen_io_tlb_start, order); 239 + free_pages((unsigned long)start, order); 248 240 return rc; 249 241 } 242 + 243 + #ifdef CONFIG_X86 244 + void __init xen_swiotlb_init_early(void) 245 + { 246 + unsigned long bytes = swiotlb_size_or_default(); 247 + unsigned long nslabs = bytes >> IO_TLB_SHIFT; 248 + unsigned int repeat = 3; 249 + char *start; 250 + int rc; 251 + 252 + retry: 253 + /* 254 + * Get IO TLB memory from any location. 255 + */ 256 + start = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); 257 + if (!start) 258 + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 259 + __func__, PAGE_ALIGN(bytes), PAGE_SIZE); 260 + 261 + /* 262 + * And replace that memory with pages under 4GB. 263 + */ 264 + rc = xen_swiotlb_fixup(start, nslabs); 265 + if (rc) { 266 + memblock_free(__pa(start), PAGE_ALIGN(bytes)); 267 + if (repeat--) { 268 + /* Min is 2MB */ 269 + nslabs = max(1024UL, (nslabs >> 1)); 270 + bytes = nslabs << IO_TLB_SHIFT; 271 + pr_info("Lowering to %luMB\n", bytes >> 20); 272 + goto retry; 273 + } 274 + panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); 275 + } 276 + 277 + if (swiotlb_init_with_tbl(start, nslabs, false)) 278 + panic("Cannot allocate SWIOTLB buffer"); 279 + swiotlb_set_max_segment(PAGE_SIZE); 280 + } 281 + #endif /* CONFIG_X86 */ 250 282 251 283 static void * 252 284 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, ··· 388 406 * Ensure that the address returned is DMA'ble 389 407 */ 390 408 if (unlikely(!dma_capable(dev, dev_addr, size, true))) { 391 - swiotlb_tbl_unmap_single(dev, map, size, size, dir, 409 + swiotlb_tbl_unmap_single(dev, map, size, dir, 392 410 attrs | DMA_ATTR_SKIP_CPU_SYNC); 393 411 return DMA_MAPPING_ERROR; 394 412 } ··· 427 445 428 446 /* NOTE: We use dev_addr here, not paddr! */ 429 447 if (is_xen_swiotlb_buffer(hwdev, dev_addr)) 430 - swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs); 448 + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); 431 449 } 432 450 433 451 static void ··· 444 462 } 445 463 446 464 if (is_xen_swiotlb_buffer(dev, dma_addr)) 447 - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 465 + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); 448 466 } 449 467 450 468 static void ··· 454 472 phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); 455 473 456 474 if (is_xen_swiotlb_buffer(dev, dma_addr)) 457 - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 475 + swiotlb_sync_single_for_device(dev, paddr, size, dir); 458 476 459 477 if (!dev_is_dma_coherent(dev)) { 460 478 if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) ··· 542 560 static int 543 561 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) 544 562 { 545 - return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask; 563 + return xen_phys_to_dma(hwdev, io_tlb_default_mem->end - 1) <= mask; 546 564 } 547 565 548 566 const struct dma_map_ops xen_swiotlb_dma_ops = {
+49 -19
include/linux/swiotlb.h
··· 6 6 #include <linux/init.h> 7 7 #include <linux/types.h> 8 8 #include <linux/limits.h> 9 + #include <linux/spinlock.h> 9 10 10 11 struct device; 11 12 struct page; ··· 37 36 38 37 extern void swiotlb_init(int verbose); 39 38 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); 40 - extern unsigned long swiotlb_nr_tbl(void); 41 39 unsigned long swiotlb_size_or_default(void); 42 40 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); 43 41 extern int swiotlb_late_init_with_default_size(size_t default_size); 44 42 extern void __init swiotlb_update_mem_attributes(void); 45 - 46 - /* 47 - * Enumeration for sync targets 48 - */ 49 - enum dma_sync_target { 50 - SYNC_FOR_CPU = 0, 51 - SYNC_FOR_DEVICE = 1, 52 - }; 53 43 54 44 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, 55 45 size_t mapping_size, size_t alloc_size, ··· 49 57 extern void swiotlb_tbl_unmap_single(struct device *hwdev, 50 58 phys_addr_t tlb_addr, 51 59 size_t mapping_size, 52 - size_t alloc_size, 53 60 enum dma_data_direction dir, 54 61 unsigned long attrs); 55 62 56 - extern void swiotlb_tbl_sync_single(struct device *hwdev, 57 - phys_addr_t tlb_addr, 58 - size_t size, enum dma_data_direction dir, 59 - enum dma_sync_target target); 60 - 63 + void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, 64 + size_t size, enum dma_data_direction dir); 65 + void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, 66 + size_t size, enum dma_data_direction dir); 61 67 dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, 62 68 size_t size, enum dma_data_direction dir, unsigned long attrs); 63 69 64 70 #ifdef CONFIG_SWIOTLB 65 71 extern enum swiotlb_force swiotlb_force; 66 - extern phys_addr_t io_tlb_start, io_tlb_end; 72 + 73 + /** 74 + * struct io_tlb_mem - IO TLB Memory Pool Descriptor 75 + * 76 + * @start: The start address of the swiotlb memory pool. Used to do a quick 77 + * range check to see if the memory was in fact allocated by this 78 + * API. 79 + * @end: The end address of the swiotlb memory pool. Used to do a quick 80 + * range check to see if the memory was in fact allocated by this 81 + * API. 82 + * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and 83 + * @end. This is command line adjustable via setup_io_tlb_npages. 84 + * @used: The number of used IO TLB block. 85 + * @list: The free list describing the number of free entries available 86 + * from each index. 87 + * @index: The index to start searching in the next round. 88 + * @orig_addr: The original address corresponding to a mapped entry. 89 + * @alloc_size: Size of the allocated buffer. 90 + * @lock: The lock to protect the above data structures in the map and 91 + * unmap calls. 92 + * @debugfs: The dentry to debugfs. 93 + * @late_alloc: %true if allocated using the page allocator 94 + */ 95 + struct io_tlb_mem { 96 + phys_addr_t start; 97 + phys_addr_t end; 98 + unsigned long nslabs; 99 + unsigned long used; 100 + unsigned int index; 101 + spinlock_t lock; 102 + struct dentry *debugfs; 103 + bool late_alloc; 104 + struct io_tlb_slot { 105 + phys_addr_t orig_addr; 106 + size_t alloc_size; 107 + unsigned int list; 108 + } slots[]; 109 + }; 110 + extern struct io_tlb_mem *io_tlb_default_mem; 67 111 68 112 static inline bool is_swiotlb_buffer(phys_addr_t paddr) 69 113 { 70 - return paddr >= io_tlb_start && paddr < io_tlb_end; 114 + struct io_tlb_mem *mem = io_tlb_default_mem; 115 + 116 + return mem && paddr >= mem->start && paddr < mem->end; 71 117 } 72 118 73 119 void __init swiotlb_exit(void); 74 120 unsigned int swiotlb_max_segment(void); 75 121 size_t swiotlb_max_mapping_size(struct device *dev); 76 122 bool is_swiotlb_active(void); 77 - void __init swiotlb_adjust_size(unsigned long new_size); 123 + void __init swiotlb_adjust_size(unsigned long size); 78 124 #else 79 125 #define swiotlb_force SWIOTLB_NO_FORCE 80 126 static inline bool is_swiotlb_buffer(phys_addr_t paddr) ··· 136 106 return false; 137 107 } 138 108 139 - static inline void swiotlb_adjust_size(unsigned long new_size) 109 + static inline void swiotlb_adjust_size(unsigned long size) 140 110 { 141 111 } 142 112 #endif /* CONFIG_SWIOTLB */
+2 -1
include/xen/swiotlb-xen.h
··· 10 10 void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, 11 11 size_t size, enum dma_data_direction dir); 12 12 13 - extern int xen_swiotlb_init(int verbose, bool early); 13 + int xen_swiotlb_init(void); 14 + void __init xen_swiotlb_init_early(void); 14 15 extern const struct dma_map_ops xen_swiotlb_dma_ops; 15 16 16 17 #endif /* __LINUX_SWIOTLB_XEN_H */
+4 -4
kernel/dma/direct.c
··· 344 344 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 345 345 346 346 if (unlikely(is_swiotlb_buffer(paddr))) 347 - swiotlb_tbl_sync_single(dev, paddr, sg->length, 348 - dir, SYNC_FOR_DEVICE); 347 + swiotlb_sync_single_for_device(dev, paddr, sg->length, 348 + dir); 349 349 350 350 if (!dev_is_dma_coherent(dev)) 351 351 arch_sync_dma_for_device(paddr, sg->length, ··· 370 370 arch_sync_dma_for_cpu(paddr, sg->length, dir); 371 371 372 372 if (unlikely(is_swiotlb_buffer(paddr))) 373 - swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, 374 - SYNC_FOR_CPU); 373 + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, 374 + dir); 375 375 376 376 if (dir == DMA_FROM_DEVICE) 377 377 arch_dma_mark_clean(paddr, sg->length);
+3 -3
kernel/dma/direct.h
··· 57 57 phys_addr_t paddr = dma_to_phys(dev, addr); 58 58 59 59 if (unlikely(is_swiotlb_buffer(paddr))) 60 - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 60 + swiotlb_sync_single_for_device(dev, paddr, size, dir); 61 61 62 62 if (!dev_is_dma_coherent(dev)) 63 63 arch_sync_dma_for_device(paddr, size, dir); ··· 74 74 } 75 75 76 76 if (unlikely(is_swiotlb_buffer(paddr))) 77 - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 77 + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); 78 78 79 79 if (dir == DMA_FROM_DEVICE) 80 80 arch_dma_mark_clean(paddr, size); ··· 114 114 dma_direct_sync_single_for_cpu(dev, addr, size, dir); 115 115 116 116 if (unlikely(is_swiotlb_buffer(phys))) 117 - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); 117 + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 118 118 } 119 119 #endif /* _KERNEL_DMA_DIRECT_H */
+193 -327
kernel/dma/swiotlb.c
··· 59 59 */ 60 60 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 61 61 62 + #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 63 + 62 64 enum swiotlb_force swiotlb_force; 63 65 64 - /* 65 - * Used to do a quick range check in swiotlb_tbl_unmap_single and 66 - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this 67 - * API. 68 - */ 69 - phys_addr_t io_tlb_start, io_tlb_end; 70 - 71 - /* 72 - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and 73 - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. 74 - */ 75 - static unsigned long io_tlb_nslabs; 76 - 77 - /* 78 - * The number of used IO TLB block 79 - */ 80 - static unsigned long io_tlb_used; 81 - 82 - /* 83 - * This is a free list describing the number of free entries available from 84 - * each index 85 - */ 86 - static unsigned int *io_tlb_list; 87 - static unsigned int io_tlb_index; 66 + struct io_tlb_mem *io_tlb_default_mem; 88 67 89 68 /* 90 69 * Max segment that we can provide which (if pages are contingous) will ··· 71 92 */ 72 93 static unsigned int max_segment; 73 94 74 - /* 75 - * We need to save away the original address corresponding to a mapped entry 76 - * for the sync operations. 77 - */ 78 - #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 79 - static phys_addr_t *io_tlb_orig_addr; 80 - 81 - /* 82 - * The mapped buffer's size should be validated during a sync operation. 83 - */ 84 - static size_t *io_tlb_orig_size; 85 - 86 - /* 87 - * Protect the above data structures in the map and unmap calls 88 - */ 89 - static DEFINE_SPINLOCK(io_tlb_lock); 90 - 91 - static int late_alloc; 95 + static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; 92 96 93 97 static int __init 94 98 setup_io_tlb_npages(char *str) 95 99 { 96 100 if (isdigit(*str)) { 97 - io_tlb_nslabs = simple_strtoul(str, &str, 0); 98 101 /* avoid tail segment of size < IO_TLB_SEGSIZE */ 99 - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 102 + default_nslabs = 103 + ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); 100 104 } 101 105 if (*str == ',') 102 106 ++str; 103 - if (!strcmp(str, "force")) { 107 + if (!strcmp(str, "force")) 104 108 swiotlb_force = SWIOTLB_FORCE; 105 - } else if (!strcmp(str, "noforce")) { 109 + else if (!strcmp(str, "noforce")) 106 110 swiotlb_force = SWIOTLB_NO_FORCE; 107 - io_tlb_nslabs = 1; 108 - } 109 111 110 112 return 0; 111 113 } 112 114 early_param("swiotlb", setup_io_tlb_npages); 113 115 114 - static bool no_iotlb_memory; 115 - 116 - unsigned long swiotlb_nr_tbl(void) 117 - { 118 - return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs; 119 - } 120 - EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); 121 - 122 116 unsigned int swiotlb_max_segment(void) 123 117 { 124 - return unlikely(no_iotlb_memory) ? 0 : max_segment; 118 + return io_tlb_default_mem ? max_segment : 0; 125 119 } 126 120 EXPORT_SYMBOL_GPL(swiotlb_max_segment); 127 121 ··· 108 156 109 157 unsigned long swiotlb_size_or_default(void) 110 158 { 111 - unsigned long size; 112 - 113 - size = io_tlb_nslabs << IO_TLB_SHIFT; 114 - 115 - return size ? size : (IO_TLB_DEFAULT_SIZE); 159 + return default_nslabs << IO_TLB_SHIFT; 116 160 } 117 161 118 - void __init swiotlb_adjust_size(unsigned long new_size) 162 + void __init swiotlb_adjust_size(unsigned long size) 119 163 { 120 - unsigned long size; 121 - 122 164 /* 123 165 * If swiotlb parameter has not been specified, give a chance to 124 166 * architectures such as those supporting memory encryption to 125 167 * adjust/expand SWIOTLB size for their use. 126 168 */ 127 - if (!io_tlb_nslabs) { 128 - size = ALIGN(new_size, IO_TLB_SIZE); 129 - io_tlb_nslabs = size >> IO_TLB_SHIFT; 130 - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 131 - 132 - pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); 133 - } 169 + if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) 170 + return; 171 + size = ALIGN(size, IO_TLB_SIZE); 172 + default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); 173 + pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); 134 174 } 135 175 136 176 void swiotlb_print_info(void) 137 177 { 138 - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; 178 + struct io_tlb_mem *mem = io_tlb_default_mem; 139 179 140 - if (no_iotlb_memory) { 180 + if (!mem) { 141 181 pr_warn("No low mem\n"); 142 182 return; 143 183 } 144 184 145 - pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end, 146 - bytes >> 20); 185 + pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end, 186 + (mem->nslabs << IO_TLB_SHIFT) >> 20); 147 187 } 148 188 149 189 static inline unsigned long io_tlb_offset(unsigned long val) ··· 156 212 */ 157 213 void __init swiotlb_update_mem_attributes(void) 158 214 { 215 + struct io_tlb_mem *mem = io_tlb_default_mem; 159 216 void *vaddr; 160 217 unsigned long bytes; 161 218 162 - if (no_iotlb_memory || late_alloc) 219 + if (!mem || mem->late_alloc) 163 220 return; 164 - 165 - vaddr = phys_to_virt(io_tlb_start); 166 - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); 221 + vaddr = phys_to_virt(mem->start); 222 + bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); 167 223 set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); 168 224 memset(vaddr, 0, bytes); 169 225 } 170 226 171 227 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) 172 228 { 173 - unsigned long i, bytes; 229 + unsigned long bytes = nslabs << IO_TLB_SHIFT, i; 230 + struct io_tlb_mem *mem; 174 231 size_t alloc_size; 175 232 176 - bytes = nslabs << IO_TLB_SHIFT; 233 + if (swiotlb_force == SWIOTLB_NO_FORCE) 234 + return 0; 177 235 178 - io_tlb_nslabs = nslabs; 179 - io_tlb_start = __pa(tlb); 180 - io_tlb_end = io_tlb_start + bytes; 236 + /* protect against double initialization */ 237 + if (WARN_ON_ONCE(io_tlb_default_mem)) 238 + return -ENOMEM; 181 239 182 - /* 183 - * Allocate and initialize the free list array. This array is used 184 - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 185 - * between io_tlb_start and io_tlb_end. 186 - */ 187 - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int)); 188 - io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE); 189 - if (!io_tlb_list) 240 + alloc_size = PAGE_ALIGN(struct_size(mem, slots, nslabs)); 241 + mem = memblock_alloc(alloc_size, PAGE_SIZE); 242 + if (!mem) 190 243 panic("%s: Failed to allocate %zu bytes align=0x%lx\n", 191 244 __func__, alloc_size, PAGE_SIZE); 192 - 193 - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)); 194 - io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE); 195 - if (!io_tlb_orig_addr) 196 - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", 197 - __func__, alloc_size, PAGE_SIZE); 198 - 199 - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)); 200 - io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE); 201 - if (!io_tlb_orig_size) 202 - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", 203 - __func__, alloc_size, PAGE_SIZE); 204 - 205 - for (i = 0; i < io_tlb_nslabs; i++) { 206 - io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); 207 - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; 208 - io_tlb_orig_size[i] = 0; 245 + mem->nslabs = nslabs; 246 + mem->start = __pa(tlb); 247 + mem->end = mem->start + bytes; 248 + mem->index = 0; 249 + spin_lock_init(&mem->lock); 250 + for (i = 0; i < mem->nslabs; i++) { 251 + mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); 252 + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; 253 + mem->slots[i].alloc_size = 0; 209 254 } 210 - io_tlb_index = 0; 211 - no_iotlb_memory = false; 212 255 256 + io_tlb_default_mem = mem; 213 257 if (verbose) 214 258 swiotlb_print_info(); 215 - 216 - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); 259 + swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); 217 260 return 0; 218 261 } 219 262 ··· 211 280 void __init 212 281 swiotlb_init(int verbose) 213 282 { 214 - size_t default_size = IO_TLB_DEFAULT_SIZE; 215 - unsigned char *vstart; 216 - unsigned long bytes; 283 + size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); 284 + void *tlb; 217 285 218 - if (!io_tlb_nslabs) { 219 - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 220 - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 221 - } 222 - 223 - bytes = io_tlb_nslabs << IO_TLB_SHIFT; 224 - 225 - /* Get IO TLB memory from the low pages */ 226 - vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); 227 - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) 286 + if (swiotlb_force == SWIOTLB_NO_FORCE) 228 287 return; 229 288 230 - if (io_tlb_start) { 231 - memblock_free_early(io_tlb_start, 232 - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 233 - io_tlb_start = 0; 234 - } 289 + /* Get IO TLB memory from the low pages */ 290 + tlb = memblock_alloc_low(bytes, PAGE_SIZE); 291 + if (!tlb) 292 + goto fail; 293 + if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose)) 294 + goto fail_free_mem; 295 + return; 296 + 297 + fail_free_mem: 298 + memblock_free_early(__pa(tlb), bytes); 299 + fail: 235 300 pr_warn("Cannot allocate buffer"); 236 - no_iotlb_memory = true; 237 301 } 238 302 239 303 /* ··· 239 313 int 240 314 swiotlb_late_init_with_default_size(size_t default_size) 241 315 { 242 - unsigned long bytes, req_nslabs = io_tlb_nslabs; 316 + unsigned long nslabs = 317 + ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); 318 + unsigned long bytes; 243 319 unsigned char *vstart = NULL; 244 320 unsigned int order; 245 321 int rc = 0; 246 322 247 - if (!io_tlb_nslabs) { 248 - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 249 - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 250 - } 323 + if (swiotlb_force == SWIOTLB_NO_FORCE) 324 + return 0; 251 325 252 326 /* 253 327 * Get IO TLB memory from the low pages 254 328 */ 255 - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); 256 - io_tlb_nslabs = SLABS_PER_PAGE << order; 257 - bytes = io_tlb_nslabs << IO_TLB_SHIFT; 329 + order = get_order(nslabs << IO_TLB_SHIFT); 330 + nslabs = SLABS_PER_PAGE << order; 331 + bytes = nslabs << IO_TLB_SHIFT; 258 332 259 333 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 260 334 vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, ··· 264 338 order--; 265 339 } 266 340 267 - if (!vstart) { 268 - io_tlb_nslabs = req_nslabs; 341 + if (!vstart) 269 342 return -ENOMEM; 270 - } 343 + 271 344 if (order != get_order(bytes)) { 272 345 pr_warn("only able to allocate %ld MB\n", 273 346 (PAGE_SIZE << order) >> 20); 274 - io_tlb_nslabs = SLABS_PER_PAGE << order; 347 + nslabs = SLABS_PER_PAGE << order; 275 348 } 276 - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); 349 + rc = swiotlb_late_init_with_tbl(vstart, nslabs); 277 350 if (rc) 278 351 free_pages((unsigned long)vstart, order); 279 352 280 353 return rc; 281 354 } 282 355 283 - static void swiotlb_cleanup(void) 284 - { 285 - io_tlb_end = 0; 286 - io_tlb_start = 0; 287 - io_tlb_nslabs = 0; 288 - max_segment = 0; 289 - } 290 - 291 356 int 292 357 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) 293 358 { 294 - unsigned long i, bytes; 359 + unsigned long bytes = nslabs << IO_TLB_SHIFT, i; 360 + struct io_tlb_mem *mem; 295 361 296 - bytes = nslabs << IO_TLB_SHIFT; 362 + if (swiotlb_force == SWIOTLB_NO_FORCE) 363 + return 0; 297 364 298 - io_tlb_nslabs = nslabs; 299 - io_tlb_start = virt_to_phys(tlb); 300 - io_tlb_end = io_tlb_start + bytes; 365 + /* protect against double initialization */ 366 + if (WARN_ON_ONCE(io_tlb_default_mem)) 367 + return -ENOMEM; 368 + 369 + mem = (void *)__get_free_pages(GFP_KERNEL, 370 + get_order(struct_size(mem, slots, nslabs))); 371 + if (!mem) 372 + return -ENOMEM; 373 + 374 + mem->nslabs = nslabs; 375 + mem->start = virt_to_phys(tlb); 376 + mem->end = mem->start + bytes; 377 + mem->index = 0; 378 + mem->late_alloc = 1; 379 + spin_lock_init(&mem->lock); 380 + for (i = 0; i < mem->nslabs; i++) { 381 + mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); 382 + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; 383 + mem->slots[i].alloc_size = 0; 384 + } 301 385 302 386 set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); 303 387 memset(tlb, 0, bytes); 304 388 305 - /* 306 - * Allocate and initialize the free list array. This array is used 307 - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 308 - * between io_tlb_start and io_tlb_end. 309 - */ 310 - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, 311 - get_order(io_tlb_nslabs * sizeof(int))); 312 - if (!io_tlb_list) 313 - goto cleanup3; 314 - 315 - io_tlb_orig_addr = (phys_addr_t *) 316 - __get_free_pages(GFP_KERNEL, 317 - get_order(io_tlb_nslabs * 318 - sizeof(phys_addr_t))); 319 - if (!io_tlb_orig_addr) 320 - goto cleanup4; 321 - 322 - io_tlb_orig_size = (size_t *) 323 - __get_free_pages(GFP_KERNEL, 324 - get_order(io_tlb_nslabs * 325 - sizeof(size_t))); 326 - if (!io_tlb_orig_size) 327 - goto cleanup5; 328 - 329 - 330 - for (i = 0; i < io_tlb_nslabs; i++) { 331 - io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); 332 - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; 333 - io_tlb_orig_size[i] = 0; 334 - } 335 - io_tlb_index = 0; 336 - no_iotlb_memory = false; 337 - 389 + io_tlb_default_mem = mem; 338 390 swiotlb_print_info(); 339 - 340 - late_alloc = 1; 341 - 342 - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); 343 - 391 + swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); 344 392 return 0; 345 - 346 - cleanup5: 347 - free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * 348 - sizeof(phys_addr_t))); 349 - 350 - cleanup4: 351 - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * 352 - sizeof(int))); 353 - io_tlb_list = NULL; 354 - cleanup3: 355 - swiotlb_cleanup(); 356 - return -ENOMEM; 357 393 } 358 394 359 395 void __init swiotlb_exit(void) 360 396 { 361 - if (!io_tlb_orig_addr) 397 + struct io_tlb_mem *mem = io_tlb_default_mem; 398 + size_t size; 399 + 400 + if (!mem) 362 401 return; 363 402 364 - if (late_alloc) { 365 - free_pages((unsigned long)io_tlb_orig_size, 366 - get_order(io_tlb_nslabs * sizeof(size_t))); 367 - free_pages((unsigned long)io_tlb_orig_addr, 368 - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); 369 - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * 370 - sizeof(int))); 371 - free_pages((unsigned long)phys_to_virt(io_tlb_start), 372 - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); 373 - } else { 374 - memblock_free_late(__pa(io_tlb_orig_addr), 375 - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); 376 - memblock_free_late(__pa(io_tlb_orig_size), 377 - PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t))); 378 - memblock_free_late(__pa(io_tlb_list), 379 - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); 380 - memblock_free_late(io_tlb_start, 381 - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 382 - } 383 - swiotlb_cleanup(); 403 + size = struct_size(mem, slots, mem->nslabs); 404 + if (mem->late_alloc) 405 + free_pages((unsigned long)mem, get_order(size)); 406 + else 407 + memblock_free_late(__pa(mem), PAGE_ALIGN(size)); 408 + io_tlb_default_mem = NULL; 384 409 } 385 410 386 411 /* 387 412 * Bounce: copy the swiotlb buffer from or back to the original dma location 388 413 */ 389 - static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, 390 - size_t size, enum dma_data_direction dir) 414 + static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, 415 + enum dma_data_direction dir) 391 416 { 417 + struct io_tlb_mem *mem = io_tlb_default_mem; 418 + int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; 419 + phys_addr_t orig_addr = mem->slots[index].orig_addr; 420 + size_t alloc_size = mem->slots[index].alloc_size; 392 421 unsigned long pfn = PFN_DOWN(orig_addr); 393 422 unsigned char *vaddr = phys_to_virt(tlb_addr); 423 + 424 + if (orig_addr == INVALID_PHYS_ADDR) 425 + return; 426 + 427 + if (size > alloc_size) { 428 + dev_WARN_ONCE(dev, 1, 429 + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", 430 + alloc_size, size); 431 + size = alloc_size; 432 + } 394 433 395 434 if (PageHighMem(pfn_to_page(pfn))) { 396 435 /* The buffer does not have a mapping. Map it in and copy */ ··· 408 517 return nr_slots(boundary_mask + 1); 409 518 } 410 519 411 - static unsigned int wrap_index(unsigned int index) 520 + static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) 412 521 { 413 - if (index >= io_tlb_nslabs) 522 + if (index >= mem->nslabs) 414 523 return 0; 415 524 return index; 416 525 } ··· 422 531 static int find_slots(struct device *dev, phys_addr_t orig_addr, 423 532 size_t alloc_size) 424 533 { 534 + struct io_tlb_mem *mem = io_tlb_default_mem; 425 535 unsigned long boundary_mask = dma_get_seg_boundary(dev); 426 536 dma_addr_t tbl_dma_addr = 427 - phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; 537 + phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; 428 538 unsigned long max_slots = get_max_slots(boundary_mask); 429 539 unsigned int iotlb_align_mask = 430 540 dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); ··· 444 552 if (alloc_size >= PAGE_SIZE) 445 553 stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); 446 554 447 - spin_lock_irqsave(&io_tlb_lock, flags); 448 - if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) 555 + spin_lock_irqsave(&mem->lock, flags); 556 + if (unlikely(nslots > mem->nslabs - mem->used)) 449 557 goto not_found; 450 558 451 - index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); 559 + index = wrap = wrap_index(mem, ALIGN(mem->index, stride)); 452 560 do { 453 561 if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != 454 562 (orig_addr & iotlb_align_mask)) { 455 - index = wrap_index(index + 1); 563 + index = wrap_index(mem, index + 1); 456 564 continue; 457 565 } 458 566 ··· 464 572 if (!iommu_is_span_boundary(index, nslots, 465 573 nr_slots(tbl_dma_addr), 466 574 max_slots)) { 467 - if (io_tlb_list[index] >= nslots) 575 + if (mem->slots[index].list >= nslots) 468 576 goto found; 469 577 } 470 - index = wrap_index(index + stride); 578 + index = wrap_index(mem, index + stride); 471 579 } while (index != wrap); 472 580 473 581 not_found: 474 - spin_unlock_irqrestore(&io_tlb_lock, flags); 582 + spin_unlock_irqrestore(&mem->lock, flags); 475 583 return -1; 476 584 477 585 found: 478 586 for (i = index; i < index + nslots; i++) 479 - io_tlb_list[i] = 0; 587 + mem->slots[i].list = 0; 480 588 for (i = index - 1; 481 589 io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && 482 - io_tlb_list[i]; i--) 483 - io_tlb_list[i] = ++count; 590 + mem->slots[i].list; i--) 591 + mem->slots[i].list = ++count; 484 592 485 593 /* 486 594 * Update the indices to avoid searching in the next round. 487 595 */ 488 - if (index + nslots < io_tlb_nslabs) 489 - io_tlb_index = index + nslots; 596 + if (index + nslots < mem->nslabs) 597 + mem->index = index + nslots; 490 598 else 491 - io_tlb_index = 0; 492 - io_tlb_used += nslots; 599 + mem->index = 0; 600 + mem->used += nslots; 493 601 494 - spin_unlock_irqrestore(&io_tlb_lock, flags); 602 + spin_unlock_irqrestore(&mem->lock, flags); 495 603 return index; 496 604 } 497 605 ··· 499 607 size_t mapping_size, size_t alloc_size, 500 608 enum dma_data_direction dir, unsigned long attrs) 501 609 { 610 + struct io_tlb_mem *mem = io_tlb_default_mem; 502 611 unsigned int offset = swiotlb_align_offset(dev, orig_addr); 503 - unsigned int index, i; 612 + unsigned int i; 613 + int index; 504 614 phys_addr_t tlb_addr; 505 615 506 - if (no_iotlb_memory) 616 + if (!mem) 507 617 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); 508 618 509 619 if (mem_encrypt_active()) ··· 522 628 if (!(attrs & DMA_ATTR_NO_WARN)) 523 629 dev_warn_ratelimited(dev, 524 630 "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", 525 - alloc_size, io_tlb_nslabs, io_tlb_used); 631 + alloc_size, mem->nslabs, mem->used); 526 632 return (phys_addr_t)DMA_MAPPING_ERROR; 527 633 } 528 634 ··· 532 638 * needed. 533 639 */ 534 640 for (i = 0; i < nr_slots(alloc_size + offset); i++) { 535 - io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); 536 - io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT); 641 + mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); 642 + mem->slots[index + i].alloc_size = 643 + alloc_size - (i << IO_TLB_SHIFT); 537 644 } 538 - tlb_addr = slot_addr(io_tlb_start, index) + offset; 645 + tlb_addr = slot_addr(mem->start, index) + offset; 539 646 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 540 647 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 541 - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); 648 + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); 542 649 return tlb_addr; 543 - } 544 - 545 - static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size) 546 - { 547 - if (*size > orig_size) { 548 - /* Warn and truncate mapping_size */ 549 - dev_WARN_ONCE(hwdev, 1, 550 - "Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n", 551 - orig_size, *size); 552 - *size = orig_size; 553 - } 554 650 } 555 651 556 652 /* 557 653 * tlb_addr is the physical address of the bounce buffer to unmap. 558 654 */ 559 655 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, 560 - size_t mapping_size, size_t alloc_size, 561 - enum dma_data_direction dir, unsigned long attrs) 656 + size_t mapping_size, enum dma_data_direction dir, 657 + unsigned long attrs) 562 658 { 659 + struct io_tlb_mem *mem = io_tlb_default_mem; 563 660 unsigned long flags; 564 661 unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); 565 - int i, count, nslots = nr_slots(alloc_size + offset); 566 - int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; 567 - phys_addr_t orig_addr = io_tlb_orig_addr[index]; 568 - 569 - validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size); 662 + int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; 663 + int nslots = nr_slots(mem->slots[index].alloc_size + offset); 664 + int count, i; 570 665 571 666 /* 572 667 * First, sync the memory before unmapping the entry 573 668 */ 574 - if (orig_addr != INVALID_PHYS_ADDR && 575 - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 576 - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) 577 - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE); 669 + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 670 + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 671 + swiotlb_bounce(hwdev, tlb_addr, mapping_size, DMA_FROM_DEVICE); 578 672 579 673 /* 580 674 * Return the buffer to the free list by setting the corresponding ··· 570 688 * While returning the entries to the free list, we merge the entries 571 689 * with slots below and above the pool being returned. 572 690 */ 573 - spin_lock_irqsave(&io_tlb_lock, flags); 691 + spin_lock_irqsave(&mem->lock, flags); 574 692 if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) 575 - count = io_tlb_list[index + nslots]; 693 + count = mem->slots[index + nslots].list; 576 694 else 577 695 count = 0; 578 696 ··· 581 699 * superceeding slots 582 700 */ 583 701 for (i = index + nslots - 1; i >= index; i--) { 584 - io_tlb_list[i] = ++count; 585 - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; 586 - io_tlb_orig_size[i] = 0; 702 + mem->slots[i].list = ++count; 703 + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; 704 + mem->slots[i].alloc_size = 0; 587 705 } 588 706 589 707 /* ··· 591 709 * available (non zero) 592 710 */ 593 711 for (i = index - 1; 594 - io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; 712 + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; 595 713 i--) 596 - io_tlb_list[i] = ++count; 597 - io_tlb_used -= nslots; 598 - spin_unlock_irqrestore(&io_tlb_lock, flags); 714 + mem->slots[i].list = ++count; 715 + mem->used -= nslots; 716 + spin_unlock_irqrestore(&mem->lock, flags); 599 717 } 600 718 601 - void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, 602 - size_t size, enum dma_data_direction dir, 603 - enum dma_sync_target target) 719 + void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, 720 + size_t size, enum dma_data_direction dir) 604 721 { 605 - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; 606 - size_t orig_size = io_tlb_orig_size[index]; 607 - phys_addr_t orig_addr = io_tlb_orig_addr[index]; 722 + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 723 + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); 724 + else 725 + BUG_ON(dir != DMA_FROM_DEVICE); 726 + } 608 727 609 - if (orig_addr == INVALID_PHYS_ADDR) 610 - return; 611 - 612 - validate_sync_size_and_truncate(hwdev, orig_size, &size); 613 - 614 - switch (target) { 615 - case SYNC_FOR_CPU: 616 - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 617 - swiotlb_bounce(orig_addr, tlb_addr, 618 - size, DMA_FROM_DEVICE); 619 - else 620 - BUG_ON(dir != DMA_TO_DEVICE); 621 - break; 622 - case SYNC_FOR_DEVICE: 623 - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 624 - swiotlb_bounce(orig_addr, tlb_addr, 625 - size, DMA_TO_DEVICE); 626 - else 627 - BUG_ON(dir != DMA_FROM_DEVICE); 628 - break; 629 - default: 630 - BUG(); 631 - } 728 + void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, 729 + size_t size, enum dma_data_direction dir) 730 + { 731 + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 732 + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); 733 + else 734 + BUG_ON(dir != DMA_TO_DEVICE); 632 735 } 633 736 634 737 /* ··· 637 770 /* Ensure that the address returned is DMA'ble */ 638 771 dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); 639 772 if (unlikely(!dma_capable(dev, dma_addr, size, true))) { 640 - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, 773 + swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, 641 774 attrs | DMA_ATTR_SKIP_CPU_SYNC); 642 775 dev_WARN_ONCE(dev, 1, 643 776 "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", ··· 657 790 658 791 bool is_swiotlb_active(void) 659 792 { 660 - /* 661 - * When SWIOTLB is initialized, even if io_tlb_start points to physical 662 - * address zero, io_tlb_end surely doesn't. 663 - */ 664 - return io_tlb_end != 0; 793 + return io_tlb_default_mem != NULL; 665 794 } 795 + EXPORT_SYMBOL_GPL(is_swiotlb_active); 666 796 667 797 #ifdef CONFIG_DEBUG_FS 668 798 669 799 static int __init swiotlb_create_debugfs(void) 670 800 { 671 - struct dentry *root; 801 + struct io_tlb_mem *mem = io_tlb_default_mem; 672 802 673 - root = debugfs_create_dir("swiotlb", NULL); 674 - debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs); 675 - debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used); 803 + if (!mem) 804 + return 0; 805 + mem->debugfs = debugfs_create_dir("swiotlb", NULL); 806 + debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); 807 + debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used); 676 808 return 0; 677 809 } 678 810