Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.20-rc3 317 lines 8.6 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 2 * 3 * page_pool.c 4 * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> 5 * Copyright (C) 2016 Red Hat, Inc. 6 */ 7#include <linux/types.h> 8#include <linux/kernel.h> 9#include <linux/slab.h> 10 11#include <net/page_pool.h> 12#include <linux/dma-direction.h> 13#include <linux/dma-mapping.h> 14#include <linux/page-flags.h> 15#include <linux/mm.h> /* for __put_page() */ 16 17static int page_pool_init(struct page_pool *pool, 18 const struct page_pool_params *params) 19{ 20 unsigned int ring_qsize = 1024; /* Default */ 21 22 memcpy(&pool->p, params, sizeof(pool->p)); 23 24 /* Validate only known flags were used */ 25 if (pool->p.flags & ~(PP_FLAG_ALL)) 26 return -EINVAL; 27 28 if (pool->p.pool_size) 29 ring_qsize = pool->p.pool_size; 30 31 /* Sanity limit mem that can be pinned down */ 32 if (ring_qsize > 32768) 33 return -E2BIG; 34 35 /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. 36 * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, 37 * which is the XDP_TX use-case. 38 */ 39 if ((pool->p.dma_dir != DMA_FROM_DEVICE) && 40 (pool->p.dma_dir != DMA_BIDIRECTIONAL)) 41 return -EINVAL; 42 43 if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) 44 return -ENOMEM; 45 46 return 0; 47} 48 49struct page_pool *page_pool_create(const struct page_pool_params *params) 50{ 51 struct page_pool *pool; 52 int err = 0; 53 54 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); 55 if (!pool) 56 return ERR_PTR(-ENOMEM); 57 58 err = page_pool_init(pool, params); 59 if (err < 0) { 60 pr_warn("%s() gave up with errno %d\n", __func__, err); 61 kfree(pool); 62 return ERR_PTR(err); 63 } 64 return pool; 65} 66EXPORT_SYMBOL(page_pool_create); 67 68/* fast path */ 69static struct page *__page_pool_get_cached(struct page_pool *pool) 70{ 71 struct ptr_ring *r = &pool->ring; 72 struct page *page; 73 74 /* Quicker fallback, avoid locks when ring is empty */ 75 if (__ptr_ring_empty(r)) 76 return NULL; 77 78 /* Test for safe-context, caller should provide this guarantee */ 79 if (likely(in_serving_softirq())) { 80 if (likely(pool->alloc.count)) { 81 /* Fast-path */ 82 page = pool->alloc.cache[--pool->alloc.count]; 83 return page; 84 } 85 /* Slower-path: Alloc array empty, time to refill 86 * 87 * Open-coded bulk ptr_ring consumer. 88 * 89 * Discussion: the ring consumer lock is not really 90 * needed due to the softirq/NAPI protection, but 91 * later need the ability to reclaim pages on the 92 * ring. Thus, keeping the locks. 93 */ 94 spin_lock(&r->consumer_lock); 95 while ((page = __ptr_ring_consume(r))) { 96 if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) 97 break; 98 pool->alloc.cache[pool->alloc.count++] = page; 99 } 100 spin_unlock(&r->consumer_lock); 101 return page; 102 } 103 104 /* Slow-path: Get page from locked ring queue */ 105 page = ptr_ring_consume(&pool->ring); 106 return page; 107} 108 109/* slow path */ 110noinline 111static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, 112 gfp_t _gfp) 113{ 114 struct page *page; 115 gfp_t gfp = _gfp; 116 dma_addr_t dma; 117 118 /* We could always set __GFP_COMP, and avoid this branch, as 119 * prep_new_page() can handle order-0 with __GFP_COMP. 120 */ 121 if (pool->p.order) 122 gfp |= __GFP_COMP; 123 124 /* FUTURE development: 125 * 126 * Current slow-path essentially falls back to single page 127 * allocations, which doesn't improve performance. This code 128 * need bulk allocation support from the page allocator code. 129 */ 130 131 /* Cache was empty, do real allocation */ 132 page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); 133 if (!page) 134 return NULL; 135 136 if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 137 goto skip_dma_map; 138 139 /* Setup DMA mapping: use page->private for DMA-addr 140 * This mapping is kept for lifetime of page, until leaving pool. 141 */ 142 dma = dma_map_page(pool->p.dev, page, 0, 143 (PAGE_SIZE << pool->p.order), 144 pool->p.dma_dir); 145 if (dma_mapping_error(pool->p.dev, dma)) { 146 put_page(page); 147 return NULL; 148 } 149 set_page_private(page, dma); /* page->private = dma; */ 150 151skip_dma_map: 152 /* When page just alloc'ed is should/must have refcnt 1. */ 153 return page; 154} 155 156/* For using page_pool replace: alloc_pages() API calls, but provide 157 * synchronization guarantee for allocation side. 158 */ 159struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) 160{ 161 struct page *page; 162 163 /* Fast-path: Get a page from cache */ 164 page = __page_pool_get_cached(pool); 165 if (page) 166 return page; 167 168 /* Slow-path: cache empty, do real allocation */ 169 page = __page_pool_alloc_pages_slow(pool, gfp); 170 return page; 171} 172EXPORT_SYMBOL(page_pool_alloc_pages); 173 174/* Cleanup page_pool state from page */ 175static void __page_pool_clean_page(struct page_pool *pool, 176 struct page *page) 177{ 178 if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 179 return; 180 181 /* DMA unmap */ 182 dma_unmap_page(pool->p.dev, page_private(page), 183 PAGE_SIZE << pool->p.order, pool->p.dma_dir); 184 set_page_private(page, 0); 185} 186 187/* Return a page to the page allocator, cleaning up our state */ 188static void __page_pool_return_page(struct page_pool *pool, struct page *page) 189{ 190 __page_pool_clean_page(pool, page); 191 put_page(page); 192 /* An optimization would be to call __free_pages(page, pool->p.order) 193 * knowing page is not part of page-cache (thus avoiding a 194 * __page_cache_release() call). 195 */ 196} 197 198static bool __page_pool_recycle_into_ring(struct page_pool *pool, 199 struct page *page) 200{ 201 int ret; 202 /* BH protection not needed if current is serving softirq */ 203 if (in_serving_softirq()) 204 ret = ptr_ring_produce(&pool->ring, page); 205 else 206 ret = ptr_ring_produce_bh(&pool->ring, page); 207 208 return (ret == 0) ? true : false; 209} 210 211/* Only allow direct recycling in special circumstances, into the 212 * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. 213 * 214 * Caller must provide appropriate safe context. 215 */ 216static bool __page_pool_recycle_direct(struct page *page, 217 struct page_pool *pool) 218{ 219 if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) 220 return false; 221 222 /* Caller MUST have verified/know (page_ref_count(page) == 1) */ 223 pool->alloc.cache[pool->alloc.count++] = page; 224 return true; 225} 226 227void __page_pool_put_page(struct page_pool *pool, 228 struct page *page, bool allow_direct) 229{ 230 /* This allocator is optimized for the XDP mode that uses 231 * one-frame-per-page, but have fallbacks that act like the 232 * regular page allocator APIs. 233 * 234 * refcnt == 1 means page_pool owns page, and can recycle it. 235 */ 236 if (likely(page_ref_count(page) == 1)) { 237 /* Read barrier done in page_ref_count / READ_ONCE */ 238 239 if (allow_direct && in_serving_softirq()) 240 if (__page_pool_recycle_direct(page, pool)) 241 return; 242 243 if (!__page_pool_recycle_into_ring(pool, page)) { 244 /* Cache full, fallback to free pages */ 245 __page_pool_return_page(pool, page); 246 } 247 return; 248 } 249 /* Fallback/non-XDP mode: API user have elevated refcnt. 250 * 251 * Many drivers split up the page into fragments, and some 252 * want to keep doing this to save memory and do refcnt based 253 * recycling. Support this use case too, to ease drivers 254 * switching between XDP/non-XDP. 255 * 256 * In-case page_pool maintains the DMA mapping, API user must 257 * call page_pool_put_page once. In this elevated refcnt 258 * case, the DMA is unmapped/released, as driver is likely 259 * doing refcnt based recycle tricks, meaning another process 260 * will be invoking put_page. 261 */ 262 __page_pool_clean_page(pool, page); 263 put_page(page); 264} 265EXPORT_SYMBOL(__page_pool_put_page); 266 267static void __page_pool_empty_ring(struct page_pool *pool) 268{ 269 struct page *page; 270 271 /* Empty recycle ring */ 272 while ((page = ptr_ring_consume_bh(&pool->ring))) { 273 /* Verify the refcnt invariant of cached pages */ 274 if (!(page_ref_count(page) == 1)) 275 pr_crit("%s() page_pool refcnt %d violation\n", 276 __func__, page_ref_count(page)); 277 278 __page_pool_return_page(pool, page); 279 } 280} 281 282static void __page_pool_destroy_rcu(struct rcu_head *rcu) 283{ 284 struct page_pool *pool; 285 286 pool = container_of(rcu, struct page_pool, rcu); 287 288 WARN(pool->alloc.count, "API usage violation"); 289 290 __page_pool_empty_ring(pool); 291 ptr_ring_cleanup(&pool->ring, NULL); 292 kfree(pool); 293} 294 295/* Cleanup and release resources */ 296void page_pool_destroy(struct page_pool *pool) 297{ 298 struct page *page; 299 300 /* Empty alloc cache, assume caller made sure this is 301 * no-longer in use, and page_pool_alloc_pages() cannot be 302 * call concurrently. 303 */ 304 while (pool->alloc.count) { 305 page = pool->alloc.cache[--pool->alloc.count]; 306 __page_pool_return_page(pool, page); 307 } 308 309 /* No more consumers should exist, but producers could still 310 * be in-flight. 311 */ 312 __page_pool_empty_ring(pool); 313 314 /* An xdp_mem_allocator can still ref page_pool pointer */ 315 call_rcu(&pool->rcu, __page_pool_destroy_rcu); 316} 317EXPORT_SYMBOL(page_pool_destroy);