at v5.13 254 lines 7.8 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 2 * 3 * page_pool.h 4 * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> 5 * Copyright (C) 2016 Red Hat, Inc. 6 */ 7 8/** 9 * DOC: page_pool allocator 10 * 11 * This page_pool allocator is optimized for the XDP mode that 12 * uses one-frame-per-page, but have fallbacks that act like the 13 * regular page allocator APIs. 14 * 15 * Basic use involve replacing alloc_pages() calls with the 16 * page_pool_alloc_pages() call. Drivers should likely use 17 * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). 18 * 19 * API keeps track of in-flight pages, in-order to let API user know 20 * when it is safe to dealloactor page_pool object. Thus, API users 21 * must make sure to call page_pool_release_page() when a page is 22 * "leaving" the page_pool. Or call page_pool_put_page() where 23 * appropiate. For maintaining correct accounting. 24 * 25 * API user must only call page_pool_put_page() once on a page, as it 26 * will either recycle the page, or in case of elevated refcnt, it 27 * will release the DMA mapping and in-flight state accounting. We 28 * hope to lift this requirement in the future. 29 */ 30#ifndef _NET_PAGE_POOL_H 31#define _NET_PAGE_POOL_H 32 33#include <linux/mm.h> /* Needed by ptr_ring */ 34#include <linux/ptr_ring.h> 35#include <linux/dma-direction.h> 36 37#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA 38 * map/unmap 39 */ 40#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets 41 * from page_pool will be 42 * DMA-synced-for-device according to 43 * the length provided by the device 44 * driver. 45 * Please note DMA-sync-for-CPU is still 46 * device driver responsibility 47 */ 48#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) 49 50/* 51 * Fast allocation side cache array/stack 52 * 53 * The cache size and refill watermark is related to the network 54 * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX 55 * ring is usually refilled and the max consumed elements will be 64, 56 * thus a natural max size of objects needed in the cache. 57 * 58 * Keeping room for more objects, is due to XDP_DROP use-case. As 59 * XDP_DROP allows the opportunity to recycle objects directly into 60 * this array, as it shares the same softirq/NAPI protection. If 61 * cache is already full (or partly full) then the XDP_DROP recycles 62 * would have to take a slower code path. 63 */ 64#define PP_ALLOC_CACHE_SIZE 128 65#define PP_ALLOC_CACHE_REFILL 64 66struct pp_alloc_cache { 67 u32 count; 68 struct page *cache[PP_ALLOC_CACHE_SIZE]; 69}; 70 71struct page_pool_params { 72 unsigned int flags; 73 unsigned int order; 74 unsigned int pool_size; 75 int nid; /* Numa node id to allocate from pages from */ 76 struct device *dev; /* device, for DMA pre-mapping purposes */ 77 enum dma_data_direction dma_dir; /* DMA mapping direction */ 78 unsigned int max_len; /* max DMA sync memory size */ 79 unsigned int offset; /* DMA addr offset */ 80}; 81 82struct page_pool { 83 struct page_pool_params p; 84 85 struct delayed_work release_dw; 86 void (*disconnect)(void *); 87 unsigned long defer_start; 88 unsigned long defer_warn; 89 90 u32 pages_state_hold_cnt; 91 92 /* 93 * Data structure for allocation side 94 * 95 * Drivers allocation side usually already perform some kind 96 * of resource protection. Piggyback on this protection, and 97 * require driver to protect allocation side. 98 * 99 * For NIC drivers this means, allocate a page_pool per 100 * RX-queue. As the RX-queue is already protected by 101 * Softirq/BH scheduling and napi_schedule. NAPI schedule 102 * guarantee that a single napi_struct will only be scheduled 103 * on a single CPU (see napi_schedule). 104 */ 105 struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; 106 107 /* Data structure for storing recycled pages. 108 * 109 * Returning/freeing pages is more complicated synchronization 110 * wise, because free's can happen on remote CPUs, with no 111 * association with allocation resource. 112 * 113 * Use ptr_ring, as it separates consumer and producer 114 * effeciently, it a way that doesn't bounce cache-lines. 115 * 116 * TODO: Implement bulk return pages into this structure. 117 */ 118 struct ptr_ring ring; 119 120 atomic_t pages_state_release_cnt; 121 122 /* A page_pool is strictly tied to a single RX-queue being 123 * protected by NAPI, due to above pp_alloc_cache. This 124 * refcnt serves purpose is to simplify drivers error handling. 125 */ 126 refcount_t user_cnt; 127 128 u64 destroy_cnt; 129}; 130 131struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); 132 133static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) 134{ 135 gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); 136 137 return page_pool_alloc_pages(pool, gfp); 138} 139 140/* get the stored dma direction. A driver might decide to treat this locally and 141 * avoid the extra cache line from page_pool to determine the direction 142 */ 143static 144inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) 145{ 146 return pool->p.dma_dir; 147} 148 149struct page_pool *page_pool_create(const struct page_pool_params *params); 150 151#ifdef CONFIG_PAGE_POOL 152void page_pool_destroy(struct page_pool *pool); 153void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); 154void page_pool_release_page(struct page_pool *pool, struct page *page); 155void page_pool_put_page_bulk(struct page_pool *pool, void **data, 156 int count); 157#else 158static inline void page_pool_destroy(struct page_pool *pool) 159{ 160} 161 162static inline void page_pool_use_xdp_mem(struct page_pool *pool, 163 void (*disconnect)(void *)) 164{ 165} 166static inline void page_pool_release_page(struct page_pool *pool, 167 struct page *page) 168{ 169} 170 171static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, 172 int count) 173{ 174} 175#endif 176 177void page_pool_put_page(struct page_pool *pool, struct page *page, 178 unsigned int dma_sync_size, bool allow_direct); 179 180/* Same as above but will try to sync the entire area pool->max_len */ 181static inline void page_pool_put_full_page(struct page_pool *pool, 182 struct page *page, bool allow_direct) 183{ 184 /* When page_pool isn't compiled-in, net/core/xdp.c doesn't 185 * allow registering MEM_TYPE_PAGE_POOL, but shield linker. 186 */ 187#ifdef CONFIG_PAGE_POOL 188 page_pool_put_page(pool, page, -1, allow_direct); 189#endif 190} 191 192/* Same as above but the caller must guarantee safe context. e.g NAPI */ 193static inline void page_pool_recycle_direct(struct page_pool *pool, 194 struct page *page) 195{ 196 page_pool_put_full_page(pool, page, true); 197} 198 199static inline dma_addr_t page_pool_get_dma_addr(struct page *page) 200{ 201 dma_addr_t ret = page->dma_addr[0]; 202 if (sizeof(dma_addr_t) > sizeof(unsigned long)) 203 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; 204 return ret; 205} 206 207static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) 208{ 209 page->dma_addr[0] = addr; 210 if (sizeof(dma_addr_t) > sizeof(unsigned long)) 211 page->dma_addr[1] = upper_32_bits(addr); 212} 213 214static inline bool is_page_pool_compiled_in(void) 215{ 216#ifdef CONFIG_PAGE_POOL 217 return true; 218#else 219 return false; 220#endif 221} 222 223static inline bool page_pool_put(struct page_pool *pool) 224{ 225 return refcount_dec_and_test(&pool->user_cnt); 226} 227 228/* Caller must provide appropriate safe context, e.g. NAPI. */ 229void page_pool_update_nid(struct page_pool *pool, int new_nid); 230static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) 231{ 232 if (unlikely(pool->p.nid != new_nid)) 233 page_pool_update_nid(pool, new_nid); 234} 235 236static inline void page_pool_ring_lock(struct page_pool *pool) 237 __acquires(&pool->ring.producer_lock) 238{ 239 if (in_serving_softirq()) 240 spin_lock(&pool->ring.producer_lock); 241 else 242 spin_lock_bh(&pool->ring.producer_lock); 243} 244 245static inline void page_pool_ring_unlock(struct page_pool *pool) 246 __releases(&pool->ring.producer_lock) 247{ 248 if (in_serving_softirq()) 249 spin_unlock(&pool->ring.producer_lock); 250 else 251 spin_unlock_bh(&pool->ring.producer_lock); 252} 253 254#endif /* _NET_PAGE_POOL_H */