Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

page_pool: Allow drivers to hint on SKB recycling

Up to now several high speed NICs have custom mechanisms of recycling
the allocated memory they use for their payloads.
Our page_pool API already has recycling capabilities that are always
used when we are running in 'XDP mode'. So let's tweak the API and the
kernel network stack slightly and allow the recycling to happen even
during the standard operation.
The API doesn't take into account 'split page' policies used by those
drivers currently, but can be extended once we have users for that.

The idea is to be able to intercept the packet on skb_release_data().
If it's a buffer coming from our page_pool API recycle it back to the
pool for further usage or just release the packet entirely.

To achieve that we introduce a bit in struct sk_buff (pp_recycle:1) and
a field in struct page (page->pp) to store the page_pool pointer.
Storing the information in page->pp allows us to recycle both SKBs and
their fragments.
We could have skipped the skb bit entirely, since identical information
can bederived from struct page. However, in an effort to affect the free path
as less as possible, reading a single bit in the skb which is already
in cache, is better that trying to derive identical information for the
page stored data.

The driver or page_pool has to take care of the sync operations on it's own
during the buffer recycling since the buffer is, after opting-in to the
recycling, never unmapped.

Since the gain on the drivers depends on the architecture, we are not
enabling recycling by default if the page_pool API is used on a driver.
In order to enable recycling the driver must call skb_mark_for_recycle()
to store the information we need for recycling in page->pp and
enabling the recycling bit, or page_pool_store_mem_info() for a fragment.

Co-developed-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Co-developed-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ilias Apalodimas and committed by
David S. Miller
6a5bcd84 c420c989

+77 -7
+30 -3
include/linux/skbuff.h
··· 37 37 #include <linux/in6.h> 38 38 #include <linux/if_packet.h> 39 39 #include <net/flow.h> 40 + #include <net/page_pool.h> 40 41 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 41 42 #include <linux/netfilter/nf_conntrack_common.h> 42 43 #endif ··· 668 667 * @head_frag: skb was allocated from page fragments, 669 668 * not allocated by kmalloc() or vmalloc(). 670 669 * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves 670 + * @pp_recycle: mark the packet for recycling instead of freeing (implies 671 + * page_pool support on driver) 671 672 * @active_extensions: active extensions (skb_ext_id types) 672 673 * @ndisc_nodetype: router type (from link layer) 673 674 * @ooo_okay: allow the mapping of a socket to a queue to be changed ··· 794 791 fclone:2, 795 792 peeked:1, 796 793 head_frag:1, 797 - pfmemalloc:1; 794 + pfmemalloc:1, 795 + pp_recycle:1; /* page_pool recycle indicator */ 798 796 #ifdef CONFIG_SKB_EXTENSIONS 799 797 __u8 active_extensions; 800 798 #endif 799 + 801 800 /* fields enclosed in headers_start/headers_end are copied 802 801 * using a single memcpy() in __copy_skb_header() 803 802 */ ··· 3093 3088 */ 3094 3089 static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) 3095 3090 { 3096 - put_page(skb_frag_page(frag)); 3091 + struct page *page = skb_frag_page(frag); 3092 + 3093 + #ifdef CONFIG_PAGE_POOL 3094 + if (recycle && page_pool_return_skb_page(page)) 3095 + return; 3096 + #endif 3097 + put_page(page); 3097 3098 } 3098 3099 3099 3100 /** ··· 3111 3100 */ 3112 3101 static inline void skb_frag_unref(struct sk_buff *skb, int f) 3113 3102 { 3114 - __skb_frag_unref(&skb_shinfo(skb)->frags[f], false); 3103 + __skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); 3115 3104 } 3116 3105 3117 3106 /** ··· 4708 4697 #else 4709 4698 return 0; 4710 4699 #endif 4700 + } 4701 + 4702 + #ifdef CONFIG_PAGE_POOL 4703 + static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page, 4704 + struct page_pool *pp) 4705 + { 4706 + skb->pp_recycle = 1; 4707 + page_pool_store_mem_info(page, pp); 4708 + } 4709 + #endif 4710 + 4711 + static inline bool skb_pp_recycle(struct sk_buff *skb, void *data) 4712 + { 4713 + if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) 4714 + return false; 4715 + return page_pool_return_skb_page(virt_to_page(data)); 4711 4716 } 4712 4717 4713 4718 #endif /* __KERNEL__ */
+9
include/net/page_pool.h
··· 146 146 return pool->p.dma_dir; 147 147 } 148 148 149 + bool page_pool_return_skb_page(struct page *page); 150 + 149 151 struct page_pool *page_pool_create(const struct page_pool_params *params); 150 152 151 153 #ifdef CONFIG_PAGE_POOL ··· 251 249 spin_unlock(&pool->ring.producer_lock); 252 250 else 253 251 spin_unlock_bh(&pool->ring.producer_lock); 252 + } 253 + 254 + /* Store mem_info on struct page and use it while recycling skb frags */ 255 + static inline 256 + void page_pool_store_mem_info(struct page *page, struct page_pool *pp) 257 + { 258 + page->pp = pp; 254 259 } 255 260 256 261 #endif /* _NET_PAGE_POOL_H */
+22
net/core/page_pool.c
··· 628 628 } 629 629 } 630 630 EXPORT_SYMBOL(page_pool_update_nid); 631 + 632 + bool page_pool_return_skb_page(struct page *page) 633 + { 634 + struct page_pool *pp; 635 + 636 + page = compound_head(page); 637 + if (unlikely(page->pp_magic != PP_SIGNATURE)) 638 + return false; 639 + 640 + pp = page->pp; 641 + 642 + /* Driver set this to memory recycling info. Reset it on recycle. 643 + * This will *not* work for NIC using a split-page memory model. 644 + * The page will be returned to the pool here regardless of the 645 + * 'flipped' fragment being in use or not. 646 + */ 647 + page->pp = NULL; 648 + page_pool_put_full_page(pp, page, false); 649 + 650 + return true; 651 + } 652 + EXPORT_SYMBOL(page_pool_return_skb_page);
+16 -4
net/core/skbuff.c
··· 70 70 #include <net/xfrm.h> 71 71 #include <net/mpls.h> 72 72 #include <net/mptcp.h> 73 + #include <net/page_pool.h> 73 74 74 75 #include <linux/uaccess.h> 75 76 #include <trace/events/skb.h> ··· 646 645 { 647 646 unsigned char *head = skb->head; 648 647 649 - if (skb->head_frag) 648 + if (skb->head_frag) { 649 + if (skb_pp_recycle(skb, head)) 650 + return; 650 651 skb_free_frag(head); 651 - else 652 + } else { 652 653 kfree(head); 654 + } 653 655 } 654 656 655 657 static void skb_release_data(struct sk_buff *skb) ··· 668 664 skb_zcopy_clear(skb, true); 669 665 670 666 for (i = 0; i < shinfo->nr_frags; i++) 671 - __skb_frag_unref(&shinfo->frags[i], false); 667 + __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); 672 668 673 669 if (shinfo->frag_list) 674 670 kfree_skb_list(shinfo->frag_list); ··· 1050 1046 n->nohdr = 0; 1051 1047 n->peeked = 0; 1052 1048 C(pfmemalloc); 1049 + C(pp_recycle); 1053 1050 n->destructor = NULL; 1054 1051 C(tail); 1055 1052 C(end); ··· 3500 3495 fragto = &skb_shinfo(tgt)->frags[merge]; 3501 3496 3502 3497 skb_frag_size_add(fragto, skb_frag_size(fragfrom)); 3503 - __skb_frag_unref(fragfrom, false); 3498 + __skb_frag_unref(fragfrom, skb->pp_recycle); 3504 3499 } 3505 3500 3506 3501 /* Reposition in the original skb */ ··· 5288 5283 *fragstolen = false; 5289 5284 5290 5285 if (skb_cloned(to)) 5286 + return false; 5287 + 5288 + /* The page pool signature of struct page will eventually figure out 5289 + * which pages can be recycled or not but for now let's prohibit slab 5290 + * allocated and page_pool allocated SKBs from being coalesced. 5291 + */ 5292 + if (to->pp_recycle != from->pp_recycle) 5291 5293 return false; 5292 5294 5293 5295 if (len <= skb_tailroom(to)) {