Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-avoid-the-memory-waste-in-some-ethernet-drivers'

Kevin Hao says:

====================
net: Avoid the memory waste in some Ethernet drivers

In the current implementation of napi_alloc_frag(), it doesn't have any
align guarantee for the returned buffer address. We would have to use
some ugly workarounds to make sure that we can get a align buffer
address for some Ethernet drivers. This patch series tries to introduce
some helper functions to make sure that an align buffer is returned.
Then we can drop the ugly workarounds and avoid the unnecessary memory
waste.
====================

Link: https://lore.kernel.org/r/20210204105638.1584-1-haokexin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+61 -27
+1 -2
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
··· 764 764 /* Prepare the HW SGT structure */ 765 765 sgt_buf_size = priv->tx_data_offset + 766 766 sizeof(struct dpaa2_sg_entry) * num_dma_bufs; 767 - sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); 767 + sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); 768 768 if (unlikely(!sgt_buf)) { 769 769 err = -ENOMEM; 770 770 goto sgt_buf_alloc_failed; 771 771 } 772 - sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN); 773 772 memset(sgt_buf, 0, sgt_buf_size); 774 773 775 774 sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+1 -2
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
··· 488 488 dma_addr_t iova; 489 489 u8 *buf; 490 490 491 - buf = napi_alloc_frag(pool->rbsize + OTX2_ALIGN); 491 + buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); 492 492 if (unlikely(!buf)) 493 493 return -ENOMEM; 494 494 495 - buf = PTR_ALIGN(buf, OTX2_ALIGN); 496 495 iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, 497 496 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 498 497 if (unlikely(dma_mapping_error(pfvf->dev, iova))) {
+10 -2
include/linux/gfp.h
··· 583 583 584 584 struct page_frag_cache; 585 585 extern void __page_frag_cache_drain(struct page *page, unsigned int count); 586 - extern void *page_frag_alloc(struct page_frag_cache *nc, 587 - unsigned int fragsz, gfp_t gfp_mask); 586 + extern void *page_frag_alloc_align(struct page_frag_cache *nc, 587 + unsigned int fragsz, gfp_t gfp_mask, 588 + unsigned int align_mask); 589 + 590 + static inline void *page_frag_alloc(struct page_frag_cache *nc, 591 + unsigned int fragsz, gfp_t gfp_mask) 592 + { 593 + return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); 594 + } 595 + 588 596 extern void page_frag_free(void *addr); 589 597 590 598 #define __free_page(page) __free_pages((page), 0)
+34 -2
include/linux/skbuff.h
··· 2818 2818 2819 2819 unsigned int skb_rbtree_purge(struct rb_root *root); 2820 2820 2821 - void *netdev_alloc_frag(unsigned int fragsz); 2821 + void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); 2822 + 2823 + /** 2824 + * netdev_alloc_frag - allocate a page fragment 2825 + * @fragsz: fragment size 2826 + * 2827 + * Allocates a frag from a page for receive buffer. 2828 + * Uses GFP_ATOMIC allocations. 2829 + */ 2830 + static inline void *netdev_alloc_frag(unsigned int fragsz) 2831 + { 2832 + return __netdev_alloc_frag_align(fragsz, ~0u); 2833 + } 2834 + 2835 + static inline void *netdev_alloc_frag_align(unsigned int fragsz, 2836 + unsigned int align) 2837 + { 2838 + WARN_ON_ONCE(!is_power_of_2(align)); 2839 + return __netdev_alloc_frag_align(fragsz, -align); 2840 + } 2822 2841 2823 2842 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, 2824 2843 gfp_t gfp_mask); ··· 2896 2877 page_frag_free(addr); 2897 2878 } 2898 2879 2899 - void *napi_alloc_frag(unsigned int fragsz); 2880 + void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); 2881 + 2882 + static inline void *napi_alloc_frag(unsigned int fragsz) 2883 + { 2884 + return __napi_alloc_frag_align(fragsz, ~0u); 2885 + } 2886 + 2887 + static inline void *napi_alloc_frag_align(unsigned int fragsz, 2888 + unsigned int align) 2889 + { 2890 + WARN_ON_ONCE(!is_power_of_2(align)); 2891 + return __napi_alloc_frag_align(fragsz, -align); 2892 + } 2893 + 2900 2894 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, 2901 2895 unsigned int length, gfp_t gfp_mask); 2902 2896 static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
+5 -3
mm/page_alloc.c
··· 5137 5137 } 5138 5138 EXPORT_SYMBOL(__page_frag_cache_drain); 5139 5139 5140 - void *page_frag_alloc(struct page_frag_cache *nc, 5141 - unsigned int fragsz, gfp_t gfp_mask) 5140 + void *page_frag_alloc_align(struct page_frag_cache *nc, 5141 + unsigned int fragsz, gfp_t gfp_mask, 5142 + unsigned int align_mask) 5142 5143 { 5143 5144 unsigned int size = PAGE_SIZE; 5144 5145 struct page *page; ··· 5191 5190 } 5192 5191 5193 5192 nc->pagecnt_bias--; 5193 + offset &= align_mask; 5194 5194 nc->offset = offset; 5195 5195 5196 5196 return nc->va + offset; 5197 5197 } 5198 - EXPORT_SYMBOL(page_frag_alloc); 5198 + EXPORT_SYMBOL(page_frag_alloc_align); 5199 5199 5200 5200 /* 5201 5201 * Frees a page fragment allocated out of either a compound or order 0 page.
+10 -16
net/core/skbuff.c
··· 374 374 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); 375 375 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); 376 376 377 - static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 377 + static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask, 378 + unsigned int align_mask) 378 379 { 379 380 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); 380 381 381 - return page_frag_alloc(&nc->page, fragsz, gfp_mask); 382 + return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask); 382 383 } 383 384 384 - void *napi_alloc_frag(unsigned int fragsz) 385 + void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) 385 386 { 386 387 fragsz = SKB_DATA_ALIGN(fragsz); 387 388 388 - return __napi_alloc_frag(fragsz, GFP_ATOMIC); 389 + return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); 389 390 } 390 - EXPORT_SYMBOL(napi_alloc_frag); 391 + EXPORT_SYMBOL(__napi_alloc_frag_align); 391 392 392 - /** 393 - * netdev_alloc_frag - allocate a page fragment 394 - * @fragsz: fragment size 395 - * 396 - * Allocates a frag from a page for receive buffer. 397 - * Uses GFP_ATOMIC allocations. 398 - */ 399 - void *netdev_alloc_frag(unsigned int fragsz) 393 + void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) 400 394 { 401 395 struct page_frag_cache *nc; 402 396 void *data; ··· 398 404 fragsz = SKB_DATA_ALIGN(fragsz); 399 405 if (in_irq() || irqs_disabled()) { 400 406 nc = this_cpu_ptr(&netdev_alloc_cache); 401 - data = page_frag_alloc(nc, fragsz, GFP_ATOMIC); 407 + data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); 402 408 } else { 403 409 local_bh_disable(); 404 - data = __napi_alloc_frag(fragsz, GFP_ATOMIC); 410 + data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); 405 411 local_bh_enable(); 406 412 } 407 413 return data; 408 414 } 409 - EXPORT_SYMBOL(netdev_alloc_frag); 415 + EXPORT_SYMBOL(__netdev_alloc_frag_align); 410 416 411 417 /** 412 418 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device