Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dma-pool: add additional coherent pools to map to gfp mask

The single atomic pool is allocated from the lowest zone possible since
it is guaranteed to be applicable for any DMA allocation.

Devices may allocate through the DMA API but not have a strict reliance
on GFP_DMA memory. Since the atomic pool will be used for all
non-blockable allocations, returning all memory from ZONE_DMA may
unnecessarily deplete the zone.

Provision for multiple atomic pools that will map to the optimal gfp
mask of the device.

When allocating non-blockable memory, determine the optimal gfp mask of
the device and use the appropriate atomic pool.

The coherent DMA mask will remain the same between allocation and free
and, thus, memory will be freed to the same atomic pool it was allocated
from.

__dma_atomic_pool_init() will be changed to return struct gen_pool *
later once dynamic expansion is added.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

authored by

David Rientjes and committed by
Christoph Hellwig
c84dc6e6 e860c299

+91 -54
+3 -2
drivers/iommu/dma-iommu.c
··· 952 952 953 953 /* Non-coherent atomic allocation? Easy */ 954 954 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 955 - dma_free_from_pool(cpu_addr, alloc_size)) 955 + dma_free_from_pool(dev, cpu_addr, alloc_size)) 956 956 return; 957 957 958 958 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { ··· 1035 1035 1036 1036 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1037 1037 !gfpflags_allow_blocking(gfp) && !coherent) 1038 - cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp); 1038 + cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, 1039 + gfp); 1039 1040 else 1040 1041 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); 1041 1042 if (!cpu_addr)
+2
include/linux/dma-direct.h
··· 67 67 } 68 68 69 69 u64 dma_direct_get_required_mask(struct device *dev); 70 + gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 71 + u64 *phys_mask); 70 72 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 71 73 gfp_t gfp, unsigned long attrs); 72 74 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
+3 -3
include/linux/dma-mapping.h
··· 630 630 pgprot_t prot, const void *caller); 631 631 void dma_common_free_remap(void *cpu_addr, size_t size); 632 632 633 - bool dma_in_atomic_pool(void *start, size_t size); 634 - void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); 635 - bool dma_free_from_pool(void *start, size_t size); 633 + void *dma_alloc_from_pool(struct device *dev, size_t size, 634 + struct page **ret_page, gfp_t flags); 635 + bool dma_free_from_pool(struct device *dev, void *start, size_t size); 636 636 637 637 int 638 638 dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
+6 -6
kernel/dma/direct.c
··· 45 45 return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; 46 46 } 47 47 48 - static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 49 - u64 *phys_limit) 48 + gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 49 + u64 *phys_limit) 50 50 { 51 51 u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); 52 52 ··· 89 89 90 90 /* we always manually zero the memory once we are done: */ 91 91 gfp &= ~__GFP_ZERO; 92 - gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 93 - &phys_limit); 92 + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 93 + &phys_limit); 94 94 page = dma_alloc_contiguous(dev, alloc_size, gfp); 95 95 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 96 96 dma_free_contiguous(dev, page, alloc_size); ··· 128 128 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 129 129 dma_alloc_need_uncached(dev, attrs) && 130 130 !gfpflags_allow_blocking(gfp)) { 131 - ret = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp); 131 + ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp); 132 132 if (!ret) 133 133 return NULL; 134 134 goto done; ··· 212 212 } 213 213 214 214 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 215 - dma_free_from_pool(cpu_addr, PAGE_ALIGN(size))) 215 + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) 216 216 return; 217 217 218 218 if (force_dma_unencrypted(dev))
+77 -43
kernel/dma/pool.c
··· 10 10 #include <linux/genalloc.h> 11 11 #include <linux/slab.h> 12 12 13 - static struct gen_pool *atomic_pool __ro_after_init; 13 + static struct gen_pool *atomic_pool_dma __ro_after_init; 14 + static struct gen_pool *atomic_pool_dma32 __ro_after_init; 15 + static struct gen_pool *atomic_pool_kernel __ro_after_init; 14 16 15 17 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 16 18 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; ··· 24 22 } 25 23 early_param("coherent_pool", early_coherent_pool); 26 24 27 - static gfp_t dma_atomic_pool_gfp(void) 25 + static int __init __dma_atomic_pool_init(struct gen_pool **pool, 26 + size_t pool_size, gfp_t gfp) 28 27 { 29 - if (IS_ENABLED(CONFIG_ZONE_DMA)) 30 - return GFP_DMA; 31 - if (IS_ENABLED(CONFIG_ZONE_DMA32)) 32 - return GFP_DMA32; 33 - return GFP_KERNEL; 34 - } 35 - 36 - static int __init dma_atomic_pool_init(void) 37 - { 38 - unsigned int pool_size_order = get_order(atomic_pool_size); 39 - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; 28 + const unsigned int order = get_order(pool_size); 29 + const unsigned long nr_pages = pool_size >> PAGE_SHIFT; 40 30 struct page *page; 41 31 void *addr; 42 32 int ret; 43 33 44 34 if (dev_get_cma_area(NULL)) 45 - page = dma_alloc_from_contiguous(NULL, nr_pages, 46 - pool_size_order, false); 35 + page = dma_alloc_from_contiguous(NULL, nr_pages, order, false); 47 36 else 48 - page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order); 37 + page = alloc_pages(gfp, order); 49 38 if (!page) 50 39 goto out; 51 40 52 - arch_dma_prep_coherent(page, atomic_pool_size); 41 + arch_dma_prep_coherent(page, pool_size); 53 42 54 - atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 55 - if (!atomic_pool) 43 + *pool = gen_pool_create(PAGE_SHIFT, -1); 44 + if (!*pool) 56 45 goto free_page; 57 46 58 - addr = dma_common_contiguous_remap(page, atomic_pool_size, 47 + addr = dma_common_contiguous_remap(page, pool_size, 59 48 pgprot_dmacoherent(PAGE_KERNEL), 60 49 __builtin_return_address(0)); 61 50 if (!addr) 62 51 goto destroy_genpool; 63 52 64 - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, 65 - page_to_phys(page), atomic_pool_size, -1); 53 + ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page), 54 + pool_size, -1); 66 55 if (ret) 67 56 goto remove_mapping; 68 - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL); 57 + gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL); 69 58 70 - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", 71 - atomic_pool_size / 1024); 59 + pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n", 60 + pool_size >> 10, &gfp); 72 61 return 0; 73 62 74 63 remove_mapping: 75 - dma_common_free_remap(addr, atomic_pool_size); 64 + dma_common_free_remap(addr, pool_size); 76 65 destroy_genpool: 77 - gen_pool_destroy(atomic_pool); 78 - atomic_pool = NULL; 66 + gen_pool_destroy(*pool); 67 + *pool = NULL; 79 68 free_page: 80 69 if (!dma_release_from_contiguous(NULL, page, nr_pages)) 81 - __free_pages(page, pool_size_order); 70 + __free_pages(page, order); 82 71 out: 83 - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 84 - atomic_pool_size / 1024); 72 + pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n", 73 + pool_size >> 10, &gfp); 85 74 return -ENOMEM; 75 + } 76 + 77 + static int __init dma_atomic_pool_init(void) 78 + { 79 + int ret = 0; 80 + int err; 81 + 82 + ret = __dma_atomic_pool_init(&atomic_pool_kernel, atomic_pool_size, 83 + GFP_KERNEL); 84 + if (IS_ENABLED(CONFIG_ZONE_DMA)) { 85 + err = __dma_atomic_pool_init(&atomic_pool_dma, 86 + atomic_pool_size, GFP_DMA); 87 + if (!ret && err) 88 + ret = err; 89 + } 90 + if (IS_ENABLED(CONFIG_ZONE_DMA32)) { 91 + err = __dma_atomic_pool_init(&atomic_pool_dma32, 92 + atomic_pool_size, GFP_DMA32); 93 + if (!ret && err) 94 + ret = err; 95 + } 96 + return ret; 86 97 } 87 98 postcore_initcall(dma_atomic_pool_init); 88 99 89 - bool dma_in_atomic_pool(void *start, size_t size) 100 + static inline struct gen_pool *dev_to_pool(struct device *dev) 90 101 { 91 - if (unlikely(!atomic_pool)) 92 - return false; 102 + u64 phys_mask; 103 + gfp_t gfp; 93 104 94 - return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); 105 + gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 106 + &phys_mask); 107 + if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA) 108 + return atomic_pool_dma; 109 + if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32) 110 + return atomic_pool_dma32; 111 + return atomic_pool_kernel; 95 112 } 96 113 97 - void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) 114 + static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size) 98 115 { 116 + struct gen_pool *pool = dev_to_pool(dev); 117 + 118 + if (unlikely(!pool)) 119 + return false; 120 + return gen_pool_has_addr(pool, (unsigned long)start, size); 121 + } 122 + 123 + void *dma_alloc_from_pool(struct device *dev, size_t size, 124 + struct page **ret_page, gfp_t flags) 125 + { 126 + struct gen_pool *pool = dev_to_pool(dev); 99 127 unsigned long val; 100 128 void *ptr = NULL; 101 129 102 - if (!atomic_pool) { 103 - WARN(1, "coherent pool not initialised!\n"); 130 + if (!pool) { 131 + WARN(1, "%pGg atomic pool not initialised!\n", &flags); 104 132 return NULL; 105 133 } 106 134 107 - val = gen_pool_alloc(atomic_pool, size); 135 + val = gen_pool_alloc(pool, size); 108 136 if (val) { 109 - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 137 + phys_addr_t phys = gen_pool_virt_to_phys(pool, val); 110 138 111 139 *ret_page = pfn_to_page(__phys_to_pfn(phys)); 112 140 ptr = (void *)val; ··· 146 114 return ptr; 147 115 } 148 116 149 - bool dma_free_from_pool(void *start, size_t size) 117 + bool dma_free_from_pool(struct device *dev, void *start, size_t size) 150 118 { 151 - if (!dma_in_atomic_pool(start, size)) 119 + struct gen_pool *pool = dev_to_pool(dev); 120 + 121 + if (!dma_in_atomic_pool(dev, start, size)) 152 122 return false; 153 - gen_pool_free(atomic_pool, (unsigned long)start, size); 123 + gen_pool_free(pool, (unsigned long)start, size); 154 124 return true; 155 125 }