Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netvm: propagate page->pfmemalloc from skb_alloc_page to skb

The skb->pfmemalloc flag gets set to true iff during the slab allocation
of data in __alloc_skb that the the PFMEMALLOC reserves were used. If
page splitting is used, it is possible that pages will be allocated from
the PFMEMALLOC reserve without propagating this information to the skb.
This patch propagates page->pfmemalloc from pages allocated for fragments
to the skb.

It works by reintroducing and expanding the skb_alloc_page() API to take
an skb. If the page was allocated from pfmemalloc reserves, it is
automatically copied. If the driver allocates the page before the skb, it
should call skb_propagate_pfmemalloc() after the skb is allocated to
ensure the flag is copied properly.

Failure to do so is not critical. The resulting driver may perform slower
if it is used for swap-over-NBD or swap-over-NFS but it should not result
in failure.

[davem@davemloft.net: API rename and consistency]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mel Gorman and committed by
Linus Torvalds
0614002b c48a11c7

+62 -8
+1 -1
drivers/net/ethernet/chelsio/cxgb4/sge.c
··· 528 528 #endif 529 529 530 530 while (n--) { 531 - pg = alloc_page(gfp); 531 + pg = __skb_alloc_page(gfp, NULL); 532 532 if (unlikely(!pg)) { 533 533 q->alloc_failed++; 534 534 break;
+1 -1
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
··· 653 653 654 654 alloc_small_pages: 655 655 while (n--) { 656 - page = alloc_page(gfp | __GFP_NOWARN | __GFP_COLD); 656 + page = __skb_alloc_page(gfp | __GFP_NOWARN, NULL); 657 657 if (unlikely(!page)) { 658 658 fl->alloc_failed++; 659 659 break;
+1 -1
drivers/net/ethernet/intel/igb/igb_main.c
··· 6235 6235 return true; 6236 6236 6237 6237 if (!page) { 6238 - page = alloc_page(GFP_ATOMIC | __GFP_COLD); 6238 + page = __skb_alloc_page(GFP_ATOMIC, bi->skb); 6239 6239 bi->page = page; 6240 6240 if (unlikely(!page)) { 6241 6241 rx_ring->rx_stats.alloc_failed++;
+2 -2
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
··· 1141 1141 1142 1142 /* alloc new page for storage */ 1143 1143 if (likely(!page)) { 1144 - page = alloc_pages(GFP_ATOMIC | __GFP_COLD | __GFP_COMP, 1145 - ixgbe_rx_pg_order(rx_ring)); 1144 + page = __skb_alloc_pages(GFP_ATOMIC | __GFP_COLD | __GFP_COMP, 1145 + bi->skb, ixgbe_rx_pg_order(rx_ring)); 1146 1146 if (unlikely(!page)) { 1147 1147 rx_ring->rx_stats.alloc_rx_page_failed++; 1148 1148 return false;
-1
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
··· 352 352 adapter->alloc_rx_buff_failed++; 353 353 goto no_buffers; 354 354 } 355 - 356 355 bi->skb = skb; 357 356 } 358 357 if (!bi->dma) {
+1 -1
drivers/net/usb/cdc-phonet.c
··· 130 130 struct page *page; 131 131 int err; 132 132 133 - page = alloc_page(gfp_flags); 133 + page = __skb_alloc_page(gfp_flags | __GFP_NOMEMALLOC, NULL); 134 134 if (!page) 135 135 return -ENOMEM; 136 136
+1 -1
drivers/usb/gadget/f_phonet.c
··· 301 301 struct page *page; 302 302 int err; 303 303 304 - page = alloc_page(gfp_flags); 304 + page = __skb_alloc_page(gfp_flags | __GFP_NOMEMALLOC, NULL); 305 305 if (!page) 306 306 return -ENOMEM; 307 307
+55
include/linux/skbuff.h
··· 1774 1774 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); 1775 1775 } 1776 1776 1777 + /* 1778 + * __skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data 1779 + * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX 1780 + * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used 1781 + * @order: size of the allocation 1782 + * 1783 + * Allocate a new page. 1784 + * 1785 + * %NULL is returned if there is no free memory. 1786 + */ 1787 + static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, 1788 + struct sk_buff *skb, 1789 + unsigned int order) 1790 + { 1791 + struct page *page; 1792 + 1793 + gfp_mask |= __GFP_COLD; 1794 + 1795 + if (!(gfp_mask & __GFP_NOMEMALLOC)) 1796 + gfp_mask |= __GFP_MEMALLOC; 1797 + 1798 + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); 1799 + if (skb && page && page->pfmemalloc) 1800 + skb->pfmemalloc = true; 1801 + 1802 + return page; 1803 + } 1804 + 1805 + /** 1806 + * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data 1807 + * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX 1808 + * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used 1809 + * 1810 + * Allocate a new page. 1811 + * 1812 + * %NULL is returned if there is no free memory. 1813 + */ 1814 + static inline struct page *__skb_alloc_page(gfp_t gfp_mask, 1815 + struct sk_buff *skb) 1816 + { 1817 + return __skb_alloc_pages(gfp_mask, skb, 0); 1818 + } 1819 + 1820 + /** 1821 + * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page 1822 + * @page: The page that was allocated from skb_alloc_page 1823 + * @skb: The skb that may need pfmemalloc set 1824 + */ 1825 + static inline void skb_propagate_pfmemalloc(struct page *page, 1826 + struct sk_buff *skb) 1827 + { 1828 + if (page && page->pfmemalloc) 1829 + skb->pfmemalloc = true; 1830 + } 1831 + 1777 1832 /** 1778 1833 * skb_frag_page - retrieve the page refered to by a paged fragment 1779 1834 * @frag: the paged fragment