Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

swiotlb: if swiotlb is full, fall back to a transient memory pool

Try to allocate a transient memory pool if no suitable slots can be found
and the respective SWIOTLB is allowed to grow. The transient pool is just
enough big for this one bounce buffer. It is inserted into a per-device
list of transient memory pools, and it is freed again when the bounce
buffer is unmapped.

Transient memory pools are kept in an RCU list. A memory barrier is
required after adding a new entry, because any address within a transient
buffer must be immediately recognized as belonging to the SWIOTLB, even if
it is passed to another CPU.

Deletion does not require any synchronization beyond RCU ordering
guarantees. After a buffer is unmapped, its physical addresses may no
longer be passed to the DMA API, so the memory range of the corresponding
stale entry in the RCU list never matches. If the memory range gets
allocated again, then it happens only after a RCU quiescent state.

Since bounce buffers can now be allocated from different pools, add a
parameter to swiotlb_alloc_pool() to let the caller know which memory pool
is used. Add swiotlb_find_pool() to find the memory pool corresponding to
an address. This function is now also used by is_swiotlb_buffer(), because
a simple boundary check is no longer sufficient.

The logic in swiotlb_alloc_tlb() is taken from __dma_direct_alloc_pages(),
simplified and enhanced to use coherent memory pools if needed.

Note that this is not the most efficient way to provide a bounce buffer,
but when a DMA buffer can't be mapped, something may (and will) actually
break. At that point it is better to make an allocation, even if it may be
an expensive operation.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>

authored by

Petr Tesarik and committed by
Christoph Hellwig
79636caa 62708b2b

+345 -10
+6
include/linux/device.h
··· 626 626 * @dma_mem: Internal for coherent mem override. 627 627 * @cma_area: Contiguous memory area for dma allocations 628 628 * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. 629 + * @dma_io_tlb_pools: List of transient swiotlb memory pools. 630 + * @dma_io_tlb_lock: Protects changes to the list of active pools. 629 631 * @archdata: For arch-specific additions. 630 632 * @of_node: Associated device tree node. 631 633 * @fwnode: Associated device node supplied by platform firmware. ··· 733 731 #endif 734 732 #ifdef CONFIG_SWIOTLB 735 733 struct io_tlb_mem *dma_io_tlb_mem; 734 + #endif 735 + #ifdef CONFIG_SWIOTLB_DYNAMIC 736 + struct list_head dma_io_tlb_pools; 737 + spinlock_t dma_io_tlb_lock; 736 738 #endif 737 739 /* arch specific additions */ 738 740 struct dev_archdata archdata;
+2
include/linux/dma-mapping.h
··· 418 418 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) 419 419 #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) 420 420 421 + bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); 422 + 421 423 static inline void *dma_alloc_coherent(struct device *dev, size_t size, 422 424 dma_addr_t *dma_handle, gfp_t gfp) 423 425 {
+28 -1
include/linux/swiotlb.h
··· 80 80 * @area_nslabs: Number of slots in each area. 81 81 * @areas: Array of memory area descriptors. 82 82 * @slots: Array of slot descriptors. 83 + * @node: Member of the IO TLB memory pool list. 84 + * @rcu: RCU head for swiotlb_dyn_free(). 85 + * @transient: %true if transient memory pool. 83 86 */ 84 87 struct io_tlb_pool { 85 88 phys_addr_t start; ··· 94 91 unsigned int area_nslabs; 95 92 struct io_tlb_area *areas; 96 93 struct io_tlb_slot *slots; 94 + #ifdef CONFIG_SWIOTLB_DYNAMIC 95 + struct list_head node; 96 + struct rcu_head rcu; 97 + bool transient; 98 + #endif 97 99 }; 98 100 99 101 /** ··· 130 122 #endif 131 123 }; 132 124 125 + #ifdef CONFIG_SWIOTLB_DYNAMIC 126 + 127 + struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); 128 + 129 + #else 130 + 131 + static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, 132 + phys_addr_t paddr) 133 + { 134 + return &dev->dma_io_tlb_mem->defpool; 135 + } 136 + 137 + #endif 138 + 133 139 /** 134 140 * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb 135 141 * @dev: Device which has mapped the buffer. ··· 159 137 { 160 138 struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 161 139 162 - return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end; 140 + if (!mem) 141 + return false; 142 + 143 + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) 144 + return swiotlb_find_pool(dev, paddr); 145 + return paddr >= mem->defpool.start && paddr < mem->defpool.end; 163 146 } 164 147 165 148 static inline bool is_swiotlb_force_bounce(struct device *dev)
+1 -1
kernel/dma/direct.c
··· 66 66 return 0; 67 67 } 68 68 69 - static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 69 + bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 70 70 { 71 71 dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); 72 72
+308 -8
kernel/dma/swiotlb.c
··· 35 35 #include <linux/memblock.h> 36 36 #include <linux/mm.h> 37 37 #include <linux/pfn.h> 38 + #include <linux/rculist.h> 38 39 #include <linux/scatterlist.h> 39 40 #include <linux/set_memory.h> 40 41 #include <linux/spinlock.h> ··· 511 510 memset(mem, 0, sizeof(*mem)); 512 511 } 513 512 513 + #ifdef CONFIG_SWIOTLB_DYNAMIC 514 + 515 + /** 516 + * alloc_dma_pages() - allocate pages to be used for DMA 517 + * @gfp: GFP flags for the allocation. 518 + * @bytes: Size of the buffer. 519 + * 520 + * Allocate pages from the buddy allocator. If successful, make the allocated 521 + * pages decrypted that they can be used for DMA. 522 + * 523 + * Return: Decrypted pages, or %NULL on failure. 524 + */ 525 + static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) 526 + { 527 + unsigned int order = get_order(bytes); 528 + struct page *page; 529 + void *vaddr; 530 + 531 + page = alloc_pages(gfp, order); 532 + if (!page) 533 + return NULL; 534 + 535 + vaddr = page_address(page); 536 + if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) 537 + goto error; 538 + return page; 539 + 540 + error: 541 + __free_pages(page, order); 542 + return NULL; 543 + } 544 + 545 + /** 546 + * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer 547 + * @dev: Device for which a memory pool is allocated. 548 + * @bytes: Size of the buffer. 549 + * @phys_limit: Maximum allowed physical address of the buffer. 550 + * @gfp: GFP flags for the allocation. 551 + * 552 + * Return: Allocated pages, or %NULL on allocation failure. 553 + */ 554 + static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, 555 + u64 phys_limit, gfp_t gfp) 556 + { 557 + struct page *page; 558 + 559 + /* 560 + * Allocate from the atomic pools if memory is encrypted and 561 + * the allocation is atomic, because decrypting may block. 562 + */ 563 + if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { 564 + void *vaddr; 565 + 566 + if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) 567 + return NULL; 568 + 569 + return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, 570 + dma_coherent_ok); 571 + } 572 + 573 + gfp &= ~GFP_ZONEMASK; 574 + if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) 575 + gfp |= __GFP_DMA; 576 + else if (phys_limit <= DMA_BIT_MASK(32)) 577 + gfp |= __GFP_DMA32; 578 + 579 + while ((page = alloc_dma_pages(gfp, bytes)) && 580 + page_to_phys(page) + bytes - 1 > phys_limit) { 581 + /* allocated, but too high */ 582 + __free_pages(page, get_order(bytes)); 583 + 584 + if (IS_ENABLED(CONFIG_ZONE_DMA32) && 585 + phys_limit < DMA_BIT_MASK(64) && 586 + !(gfp & (__GFP_DMA32 | __GFP_DMA))) 587 + gfp |= __GFP_DMA32; 588 + else if (IS_ENABLED(CONFIG_ZONE_DMA) && 589 + !(gfp & __GFP_DMA)) 590 + gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; 591 + else 592 + return NULL; 593 + } 594 + 595 + return page; 596 + } 597 + 598 + /** 599 + * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer 600 + * @vaddr: Virtual address of the buffer. 601 + * @bytes: Size of the buffer. 602 + */ 603 + static void swiotlb_free_tlb(void *vaddr, size_t bytes) 604 + { 605 + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && 606 + dma_free_from_pool(NULL, vaddr, bytes)) 607 + return; 608 + 609 + /* Intentional leak if pages cannot be encrypted again. */ 610 + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) 611 + __free_pages(virt_to_page(vaddr), get_order(bytes)); 612 + } 613 + 614 + /** 615 + * swiotlb_alloc_pool() - allocate a new IO TLB memory pool 616 + * @dev: Device for which a memory pool is allocated. 617 + * @nslabs: Desired number of slabs. 618 + * @phys_limit: Maximum DMA buffer physical address. 619 + * @gfp: GFP flags for the allocations. 620 + * 621 + * Allocate and initialize a new IO TLB memory pool. 622 + * 623 + * Return: New memory pool, or %NULL on allocation failure. 624 + */ 625 + static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, 626 + unsigned int nslabs, u64 phys_limit, gfp_t gfp) 627 + { 628 + struct io_tlb_pool *pool; 629 + struct page *tlb; 630 + size_t pool_size; 631 + size_t tlb_size; 632 + 633 + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), 1) + 634 + array_size(sizeof(*pool->slots), nslabs); 635 + pool = kzalloc(pool_size, gfp); 636 + if (!pool) 637 + goto error; 638 + pool->areas = (void *)pool + sizeof(*pool); 639 + pool->slots = (void *)pool->areas + sizeof(*pool->areas); 640 + 641 + tlb_size = nslabs << IO_TLB_SHIFT; 642 + tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp); 643 + if (!tlb) 644 + goto error_tlb; 645 + 646 + swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, 1); 647 + return pool; 648 + 649 + error_tlb: 650 + kfree(pool); 651 + error: 652 + return NULL; 653 + } 654 + 655 + /** 656 + * swiotlb_dyn_free() - RCU callback to free a memory pool 657 + * @rcu: RCU head in the corresponding struct io_tlb_pool. 658 + */ 659 + static void swiotlb_dyn_free(struct rcu_head *rcu) 660 + { 661 + struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); 662 + size_t tlb_size = pool->end - pool->start; 663 + 664 + swiotlb_free_tlb(pool->vaddr, tlb_size); 665 + kfree(pool); 666 + } 667 + 668 + /** 669 + * swiotlb_find_pool() - find the IO TLB pool for a physical address 670 + * @dev: Device which has mapped the DMA buffer. 671 + * @paddr: Physical address within the DMA buffer. 672 + * 673 + * Find the IO TLB memory pool descriptor which contains the given physical 674 + * address, if any. 675 + * 676 + * Return: Memory pool which contains @paddr, or %NULL if none. 677 + */ 678 + struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) 679 + { 680 + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 681 + struct io_tlb_pool *pool = &mem->defpool; 682 + 683 + if (paddr >= pool->start && paddr < pool->end) 684 + return pool; 685 + 686 + /* Pairs with smp_wmb() in swiotlb_find_slots(). */ 687 + smp_rmb(); 688 + 689 + rcu_read_lock(); 690 + list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { 691 + if (paddr >= pool->start && paddr < pool->end) 692 + goto out; 693 + } 694 + pool = NULL; 695 + out: 696 + rcu_read_unlock(); 697 + return pool; 698 + } 699 + 700 + /** 701 + * swiotlb_del_pool() - remove an IO TLB pool from a device 702 + * @dev: Owning device. 703 + * @pool: Memory pool to be removed. 704 + */ 705 + static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) 706 + { 707 + unsigned long flags; 708 + 709 + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); 710 + list_del_rcu(&pool->node); 711 + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); 712 + 713 + call_rcu(&pool->rcu, swiotlb_dyn_free); 714 + } 715 + 716 + #endif /* CONFIG_SWIOTLB_DYNAMIC */ 717 + 514 718 /** 515 719 * swiotlb_dev_init() - initialize swiotlb fields in &struct device 516 720 * @dev: Device to be initialized. ··· 723 517 void swiotlb_dev_init(struct device *dev) 724 518 { 725 519 dev->dma_io_tlb_mem = &io_tlb_default_mem; 520 + #ifdef CONFIG_SWIOTLB_DYNAMIC 521 + INIT_LIST_HEAD(&dev->dma_io_tlb_pools); 522 + spin_lock_init(&dev->dma_io_tlb_lock); 523 + #endif 726 524 } 727 525 728 526 /* ··· 743 533 static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, 744 534 enum dma_data_direction dir) 745 535 { 746 - struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; 536 + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); 747 537 int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; 748 538 phys_addr_t orig_addr = mem->slots[index].orig_addr; 749 539 size_t alloc_size = mem->slots[index].alloc_size; ··· 1009 799 return -1; 1010 800 } 1011 801 802 + #ifdef CONFIG_SWIOTLB_DYNAMIC 803 + 1012 804 /** 1013 805 * swiotlb_find_slots() - search for slots in the whole swiotlb 1014 806 * @dev: Device which maps the buffer. ··· 1018 806 * @alloc_size: Total requested size of the bounce buffer, 1019 807 * including initial alignment padding. 1020 808 * @alloc_align_mask: Required alignment of the allocated buffer. 809 + * @retpool: Used memory pool, updated on return. 1021 810 * 1022 811 * Search through the whole software IO TLB to find a sequence of slots that 1023 812 * match the allocation constraints. ··· 1026 813 * Return: Index of the first allocated slot, or -1 on error. 1027 814 */ 1028 815 static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, 1029 - size_t alloc_size, unsigned int alloc_align_mask) 816 + size_t alloc_size, unsigned int alloc_align_mask, 817 + struct io_tlb_pool **retpool) 1030 818 { 1031 - return swiotlb_pool_find_slots(dev, &dev->dma_io_tlb_mem->defpool, 819 + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 820 + struct io_tlb_pool *pool; 821 + unsigned long nslabs; 822 + unsigned long flags; 823 + u64 phys_limit; 824 + int index; 825 + 826 + pool = &mem->defpool; 827 + index = swiotlb_pool_find_slots(dev, pool, orig_addr, 828 + alloc_size, alloc_align_mask); 829 + if (index >= 0) 830 + goto found; 831 + 832 + if (!mem->can_grow) 833 + return -1; 834 + 835 + nslabs = nr_slots(alloc_size); 836 + phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); 837 + pool = swiotlb_alloc_pool(dev, nslabs, phys_limit, 838 + GFP_NOWAIT | __GFP_NOWARN); 839 + if (!pool) 840 + return -1; 841 + 842 + index = swiotlb_pool_find_slots(dev, pool, orig_addr, 843 + alloc_size, alloc_align_mask); 844 + if (index < 0) { 845 + swiotlb_dyn_free(&pool->rcu); 846 + return -1; 847 + } 848 + 849 + pool->transient = true; 850 + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); 851 + list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); 852 + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); 853 + 854 + /* Pairs with smp_rmb() in swiotlb_find_pool(). */ 855 + smp_wmb(); 856 + found: 857 + *retpool = pool; 858 + return index; 859 + } 860 + 861 + #else /* !CONFIG_SWIOTLB_DYNAMIC */ 862 + 863 + static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, 864 + size_t alloc_size, unsigned int alloc_align_mask, 865 + struct io_tlb_pool **retpool) 866 + { 867 + *retpool = &dev->dma_io_tlb_mem->defpool; 868 + return swiotlb_pool_find_slots(dev, *retpool, 1032 869 orig_addr, alloc_size, alloc_align_mask); 1033 870 } 871 + 872 + #endif /* CONFIG_SWIOTLB_DYNAMIC */ 1034 873 1035 874 #ifdef CONFIG_DEBUG_FS 1036 875 ··· 1164 899 } 1165 900 1166 901 index = swiotlb_find_slots(dev, orig_addr, 1167 - alloc_size + offset, alloc_align_mask); 902 + alloc_size + offset, alloc_align_mask, &pool); 1168 903 if (index == -1) { 1169 904 if (!(attrs & DMA_ATTR_NO_WARN)) 1170 905 dev_warn_ratelimited(dev, ··· 1178 913 * This is needed when we sync the memory. Then we sync the buffer if 1179 914 * needed. 1180 915 */ 1181 - pool = &mem->defpool; 1182 916 for (i = 0; i < nr_slots(alloc_size + offset); i++) 1183 917 pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); 1184 918 tlb_addr = slot_addr(pool->start, index) + offset; ··· 1194 930 1195 931 static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) 1196 932 { 1197 - struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool; 933 + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); 1198 934 unsigned long flags; 1199 935 unsigned int offset = swiotlb_align_offset(dev, tlb_addr); 1200 936 int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; ··· 1241 977 dec_used(dev->dma_io_tlb_mem, nslots); 1242 978 } 1243 979 980 + #ifdef CONFIG_SWIOTLB_DYNAMIC 981 + 982 + /** 983 + * swiotlb_del_transient() - delete a transient memory pool 984 + * @dev: Device which mapped the buffer. 985 + * @tlb_addr: Physical address within a bounce buffer. 986 + * 987 + * Check whether the address belongs to a transient SWIOTLB memory pool. 988 + * If yes, then delete the pool. 989 + * 990 + * Return: %true if @tlb_addr belonged to a transient pool that was released. 991 + */ 992 + static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) 993 + { 994 + struct io_tlb_pool *pool; 995 + 996 + pool = swiotlb_find_pool(dev, tlb_addr); 997 + if (!pool->transient) 998 + return false; 999 + 1000 + dec_used(dev->dma_io_tlb_mem, pool->nslabs); 1001 + swiotlb_del_pool(dev, pool); 1002 + return true; 1003 + } 1004 + 1005 + #else /* !CONFIG_SWIOTLB_DYNAMIC */ 1006 + 1007 + static inline bool swiotlb_del_transient(struct device *dev, 1008 + phys_addr_t tlb_addr) 1009 + { 1010 + return false; 1011 + } 1012 + 1013 + #endif /* CONFIG_SWIOTLB_DYNAMIC */ 1014 + 1244 1015 /* 1245 1016 * tlb_addr is the physical address of the bounce buffer to unmap. 1246 1017 */ ··· 1290 991 (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 1291 992 swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); 1292 993 994 + if (swiotlb_del_transient(dev, tlb_addr)) 995 + return; 1293 996 swiotlb_release_slots(dev, tlb_addr); 1294 997 } 1295 998 ··· 1480 1179 if (!mem) 1481 1180 return NULL; 1482 1181 1483 - index = swiotlb_find_slots(dev, 0, size, 0); 1182 + index = swiotlb_find_slots(dev, 0, size, 0, &pool); 1484 1183 if (index == -1) 1485 1184 return NULL; 1486 1185 1487 - pool = &mem->defpool; 1488 1186 tlb_addr = slot_addr(pool->start, index); 1489 1187 1490 1188 return pfn_to_page(PFN_DOWN(tlb_addr));