Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'page_pool-DMA-sync'

Lorenzo Bianconi says:

====================
add DMA-sync-for-device capability to page_pool API

Introduce the possibility to sync DMA memory for device in the page_pool API.
This feature allows to sync proper DMA size and not always full buffer
(dma_sync_single_for_device can be very costly).
Please note DMA-sync-for-CPU is still device driver responsibility.
Relying on page_pool DMA sync mvneta driver improves XDP_DROP pps of
about 170Kpps:

- XDP_DROP DMA sync managed by mvneta driver: ~420Kpps
- XDP_DROP DMA sync managed by page_pool API: ~585Kpps

Do not change naming convention for the moment since the changes will hit other
drivers as well. I will address it in another series.

Changes since v4:
- do not allow the driver to set max_len to 0
- convert PP_FLAG_DMA_MAP/PP_FLAG_DMA_SYNC_DEV to BIT() macro

Changes since v3:
- move dma_sync_for_device before putting the page in ptr_ring in
__page_pool_recycle_into_ring since ptr_ring can be consumed
concurrently. Simplify the code moving dma_sync_for_device
before running __page_pool_recycle_direct/__page_pool_recycle_into_ring

Changes since v2:
- rely on PP_FLAG_DMA_SYNC_DEV flag instead of dma_sync

Changes since v1:
- rename sync in dma_sync
- set dma_sync_size to 0xFFFFFFFF in page_pool_recycle_direct and
page_pool_put_page routines
- Improve documentation
====================

Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

+67 -17
+15 -9
drivers/net/ethernet/marvell/mvneta.c
··· 1846 1846 struct mvneta_rx_queue *rxq, 1847 1847 gfp_t gfp_mask) 1848 1848 { 1849 - enum dma_data_direction dma_dir; 1850 1849 dma_addr_t phys_addr; 1851 1850 struct page *page; 1852 1851 ··· 1855 1856 return -ENOMEM; 1856 1857 1857 1858 phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction; 1858 - dma_dir = page_pool_get_dma_dir(rxq->page_pool); 1859 - dma_sync_single_for_device(pp->dev->dev.parent, phys_addr, 1860 - MVNETA_MAX_RX_BUF_SIZE, dma_dir); 1861 1859 mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq); 1862 1860 1863 1861 return 0; ··· 2093 2097 err = xdp_do_redirect(pp->dev, xdp, prog); 2094 2098 if (err) { 2095 2099 ret = MVNETA_XDP_DROPPED; 2096 - xdp_return_buff(xdp); 2100 + __page_pool_put_page(rxq->page_pool, 2101 + virt_to_head_page(xdp->data), 2102 + xdp->data_end - xdp->data_hard_start, 2103 + true); 2097 2104 } else { 2098 2105 ret = MVNETA_XDP_REDIR; 2099 2106 } ··· 2105 2106 case XDP_TX: 2106 2107 ret = mvneta_xdp_xmit_back(pp, xdp); 2107 2108 if (ret != MVNETA_XDP_TX) 2108 - xdp_return_buff(xdp); 2109 + __page_pool_put_page(rxq->page_pool, 2110 + virt_to_head_page(xdp->data), 2111 + xdp->data_end - xdp->data_hard_start, 2112 + true); 2109 2113 break; 2110 2114 default: 2111 2115 bpf_warn_invalid_xdp_action(act); ··· 2117 2115 trace_xdp_exception(pp->dev, prog, act); 2118 2116 /* fall through */ 2119 2117 case XDP_DROP: 2120 - page_pool_recycle_direct(rxq->page_pool, 2121 - virt_to_head_page(xdp->data)); 2118 + __page_pool_put_page(rxq->page_pool, 2119 + virt_to_head_page(xdp->data), 2120 + xdp->data_end - xdp->data_hard_start, 2121 + true); 2122 2122 ret = MVNETA_XDP_DROPPED; 2123 2123 break; 2124 2124 } ··· 3069 3065 struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog); 3070 3066 struct page_pool_params pp_params = { 3071 3067 .order = 0, 3072 - .flags = PP_FLAG_DMA_MAP, 3068 + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 3073 3069 .pool_size = size, 3074 3070 .nid = cpu_to_node(0), 3075 3071 .dev = pp->dev->dev.parent, 3076 3072 .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, 3073 + .offset = pp->rx_offset_correction, 3074 + .max_len = MVNETA_MAX_RX_BUF_SIZE, 3077 3075 }; 3078 3076 int err; 3079 3077
+18 -6
include/net/page_pool.h
··· 34 34 #include <linux/ptr_ring.h> 35 35 #include <linux/dma-direction.h> 36 36 37 - #define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ 38 - #define PP_FLAG_ALL PP_FLAG_DMA_MAP 37 + #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA 38 + * map/unmap 39 + */ 40 + #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets 41 + * from page_pool will be 42 + * DMA-synced-for-device according to 43 + * the length provided by the device 44 + * driver. 45 + * Please note DMA-sync-for-CPU is still 46 + * device driver responsibility 47 + */ 48 + #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) 39 49 40 50 /* 41 51 * Fast allocation side cache array/stack ··· 75 65 int nid; /* Numa node id to allocate from pages from */ 76 66 struct device *dev; /* device, for DMA pre-mapping purposes */ 77 67 enum dma_data_direction dma_dir; /* DMA mapping direction */ 68 + unsigned int max_len; /* max DMA sync memory size */ 69 + unsigned int offset; /* DMA addr offset */ 78 70 }; 79 71 80 72 struct page_pool { ··· 163 151 #endif 164 152 165 153 /* Never call this directly, use helpers below */ 166 - void __page_pool_put_page(struct page_pool *pool, 167 - struct page *page, bool allow_direct); 154 + void __page_pool_put_page(struct page_pool *pool, struct page *page, 155 + unsigned int dma_sync_size, bool allow_direct); 168 156 169 157 static inline void page_pool_put_page(struct page_pool *pool, 170 158 struct page *page, bool allow_direct) ··· 173 161 * allow registering MEM_TYPE_PAGE_POOL, but shield linker. 174 162 */ 175 163 #ifdef CONFIG_PAGE_POOL 176 - __page_pool_put_page(pool, page, allow_direct); 164 + __page_pool_put_page(pool, page, -1, allow_direct); 177 165 #endif 178 166 } 179 167 /* Very limited use-cases allow recycle direct */ 180 168 static inline void page_pool_recycle_direct(struct page_pool *pool, 181 169 struct page *page) 182 170 { 183 - __page_pool_put_page(pool, page, true); 171 + __page_pool_put_page(pool, page, -1, true); 184 172 } 185 173 186 174 /* Disconnects a page (from a page_pool). API users can have a need
+34 -2
net/core/page_pool.c
··· 47 47 (pool->p.dma_dir != DMA_BIDIRECTIONAL)) 48 48 return -EINVAL; 49 49 50 + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { 51 + /* In order to request DMA-sync-for-device the page 52 + * needs to be mapped 53 + */ 54 + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 55 + return -EINVAL; 56 + 57 + if (!pool->p.max_len) 58 + return -EINVAL; 59 + 60 + /* pool->p.offset has to be set according to the address 61 + * offset used by the DMA engine to start copying rx data 62 + */ 63 + } 64 + 50 65 if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) 51 66 return -ENOMEM; 52 67 ··· 130 115 return page; 131 116 } 132 117 118 + static void page_pool_dma_sync_for_device(struct page_pool *pool, 119 + struct page *page, 120 + unsigned int dma_sync_size) 121 + { 122 + dma_sync_size = min(dma_sync_size, pool->p.max_len); 123 + dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, 124 + pool->p.offset, dma_sync_size, 125 + pool->p.dma_dir); 126 + } 127 + 133 128 /* slow path */ 134 129 noinline 135 130 static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, ··· 183 158 return NULL; 184 159 } 185 160 page->dma_addr = dma; 161 + 162 + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 163 + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); 186 164 187 165 skip_dma_map: 188 166 /* Track how many pages are held 'in-flight' */ ··· 320 292 return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid; 321 293 } 322 294 323 - void __page_pool_put_page(struct page_pool *pool, 324 - struct page *page, bool allow_direct) 295 + void __page_pool_put_page(struct page_pool *pool, struct page *page, 296 + unsigned int dma_sync_size, bool allow_direct) 325 297 { 326 298 /* This allocator is optimized for the XDP mode that uses 327 299 * one-frame-per-page, but have fallbacks that act like the ··· 332 304 if (likely(page_ref_count(page) == 1 && 333 305 pool_page_reusable(pool, page))) { 334 306 /* Read barrier done in page_ref_count / READ_ONCE */ 307 + 308 + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 309 + page_pool_dma_sync_for_device(pool, page, 310 + dma_sync_size); 335 311 336 312 if (allow_direct && in_serving_softirq()) 337 313 if (__page_pool_recycle_direct(page, pool))