Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2020-12-09

This series contains updates to igb, ixgbe, i40e, and ice drivers.

Sven Auhagen fixes issues with igb XDP: return correct error value in XDP
xmit back, increase header padding to include space for double VLAN, add
an extack error when Rx buffer is too small for frame size, set metasize if
it is set in xdp, change xdp_do_flush_map to xdp_do_flush, and update
trans_start to avoid possible Tx timeout.

Björn fixes an issue where an Rx buffer can be reused prematurely with
XDP redirect for ixgbe, i40e, and ice drivers.

The following are changes since commit 323a391a220c4a234cb1e678689d7f4c3b73f863:
can: isotp: isotp_setsockopt(): block setsockopt on bound sockets
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue 1GbE
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+90 -34
+20 -7
drivers/net/ethernet/intel/i40e/i40e_txrx.c
··· 1850 1850 * the adapter for another receive 1851 1851 * 1852 1852 * @rx_buffer: buffer containing the page 1853 + * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call 1853 1854 * 1854 1855 * If page is reusable, rx_buffer->page_offset is adjusted to point to 1855 1856 * an unused region in the page. ··· 1873 1872 * 1874 1873 * In either case, if the page is reusable its refcount is increased. 1875 1874 **/ 1876 - static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) 1875 + static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, 1876 + int rx_buffer_pgcnt) 1877 1877 { 1878 1878 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 1879 1879 struct page *page = rx_buffer->page; ··· 1885 1883 1886 1884 #if (PAGE_SIZE < 8192) 1887 1885 /* if we are only owner of page we can reuse it */ 1888 - if (unlikely((page_count(page) - pagecnt_bias) > 1)) 1886 + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 1889 1887 return false; 1890 1888 #else 1891 1889 #define I40E_LAST_OFFSET \ ··· 1944 1942 * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use 1945 1943 * @rx_ring: rx descriptor ring to transact packets on 1946 1944 * @size: size of buffer to add to skb 1945 + * @rx_buffer_pgcnt: buffer page refcount 1947 1946 * 1948 1947 * This function will pull an Rx buffer from the ring and synchronize it 1949 1948 * for use by the CPU. 1950 1949 */ 1951 1950 static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, 1952 - const unsigned int size) 1951 + const unsigned int size, 1952 + int *rx_buffer_pgcnt) 1953 1953 { 1954 1954 struct i40e_rx_buffer *rx_buffer; 1955 1955 1956 1956 rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); 1957 + *rx_buffer_pgcnt = 1958 + #if (PAGE_SIZE < 8192) 1959 + page_count(rx_buffer->page); 1960 + #else 1961 + 0; 1962 + #endif 1957 1963 prefetch_page_address(rx_buffer->page); 1958 1964 1959 1965 /* we are reusing so sync this buffer for CPU use */ ··· 2112 2102 * i40e_put_rx_buffer - Clean up used buffer and either recycle or free 2113 2103 * @rx_ring: rx descriptor ring to transact packets on 2114 2104 * @rx_buffer: rx buffer to pull data from 2105 + * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call 2115 2106 * 2116 2107 * This function will clean up the contents of the rx_buffer. It will 2117 2108 * either recycle the buffer or unmap it and free the associated resources. 2118 2109 */ 2119 2110 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, 2120 - struct i40e_rx_buffer *rx_buffer) 2111 + struct i40e_rx_buffer *rx_buffer, 2112 + int rx_buffer_pgcnt) 2121 2113 { 2122 - if (i40e_can_reuse_rx_page(rx_buffer)) { 2114 + if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2123 2115 /* hand second half of page back to the ring */ 2124 2116 i40e_reuse_rx_page(rx_ring, rx_buffer); 2125 2117 } else { ··· 2348 2336 while (likely(total_rx_packets < (unsigned int)budget)) { 2349 2337 struct i40e_rx_buffer *rx_buffer; 2350 2338 union i40e_rx_desc *rx_desc; 2339 + int rx_buffer_pgcnt; 2351 2340 unsigned int size; 2352 2341 u64 qword; 2353 2342 ··· 2391 2378 break; 2392 2379 2393 2380 i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); 2394 - rx_buffer = i40e_get_rx_buffer(rx_ring, size); 2381 + rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2395 2382 2396 2383 /* retrieve a buffer from the ring */ 2397 2384 if (!skb) { ··· 2434 2421 break; 2435 2422 } 2436 2423 2437 - i40e_put_rx_buffer(rx_ring, rx_buffer); 2424 + i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2438 2425 cleaned_count++; 2439 2426 2440 2427 if (i40e_is_non_eop(rx_ring, rx_desc, skb))
+22 -9
drivers/net/ethernet/intel/ice/ice_txrx.c
··· 762 762 /** 763 763 * ice_can_reuse_rx_page - Determine if page can be reused for another Rx 764 764 * @rx_buf: buffer containing the page 765 + * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call 765 766 * 766 767 * If page is reusable, we have a green light for calling ice_reuse_rx_page, 767 768 * which will assign the current buffer to the buffer that next_to_alloc is 768 769 * pointing to; otherwise, the DMA mapping needs to be destroyed and 769 770 * page freed 770 771 */ 771 - static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) 772 + static bool 773 + ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) 772 774 { 773 775 unsigned int pagecnt_bias = rx_buf->pagecnt_bias; 774 776 struct page *page = rx_buf->page; ··· 781 779 782 780 #if (PAGE_SIZE < 8192) 783 781 /* if we are only owner of page we can reuse it */ 784 - if (unlikely((page_count(page) - pagecnt_bias) > 1)) 782 + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) 785 783 return false; 786 784 #else 787 785 #define ICE_LAST_OFFSET \ ··· 866 864 * @rx_ring: Rx descriptor ring to transact packets on 867 865 * @skb: skb to be used 868 866 * @size: size of buffer to add to skb 867 + * @rx_buf_pgcnt: rx_buf page refcount 869 868 * 870 869 * This function will pull an Rx buffer from the ring and synchronize it 871 870 * for use by the CPU. 872 871 */ 873 872 static struct ice_rx_buf * 874 873 ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, 875 - const unsigned int size) 874 + const unsigned int size, int *rx_buf_pgcnt) 876 875 { 877 876 struct ice_rx_buf *rx_buf; 878 877 879 878 rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; 879 + *rx_buf_pgcnt = 880 + #if (PAGE_SIZE < 8192) 881 + page_count(rx_buf->page); 882 + #else 883 + 0; 884 + #endif 880 885 prefetchw(rx_buf->page); 881 886 *skb = rx_buf->skb; 882 887 ··· 1015 1006 * ice_put_rx_buf - Clean up used buffer and either recycle or free 1016 1007 * @rx_ring: Rx descriptor ring to transact packets on 1017 1008 * @rx_buf: Rx buffer to pull data from 1009 + * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() 1018 1010 * 1019 1011 * This function will update next_to_clean and then clean up the contents 1020 1012 * of the rx_buf. It will either recycle the buffer or unmap it and free 1021 1013 * the associated resources. 1022 1014 */ 1023 - static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) 1015 + static void 1016 + ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, 1017 + int rx_buf_pgcnt) 1024 1018 { 1025 1019 u16 ntc = rx_ring->next_to_clean + 1; 1026 1020 ··· 1034 1022 if (!rx_buf) 1035 1023 return; 1036 1024 1037 - if (ice_can_reuse_rx_page(rx_buf)) { 1025 + if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { 1038 1026 /* hand second half of page back to the ring */ 1039 1027 ice_reuse_rx_page(rx_ring, rx_buf); 1040 1028 } else { ··· 1109 1097 struct sk_buff *skb; 1110 1098 unsigned int size; 1111 1099 u16 stat_err_bits; 1100 + int rx_buf_pgcnt; 1112 1101 u16 vlan_tag = 0; 1113 1102 u8 rx_ptype; 1114 1103 ··· 1132 1119 dma_rmb(); 1133 1120 1134 1121 if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { 1135 - ice_put_rx_buf(rx_ring, NULL); 1122 + ice_put_rx_buf(rx_ring, NULL, 0); 1136 1123 cleaned_count++; 1137 1124 continue; 1138 1125 } ··· 1141 1128 ICE_RX_FLX_DESC_PKT_LEN_M; 1142 1129 1143 1130 /* retrieve a buffer from the ring */ 1144 - rx_buf = ice_get_rx_buf(rx_ring, &skb, size); 1131 + rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt); 1145 1132 1146 1133 if (!size) { 1147 1134 xdp.data = NULL; ··· 1181 1168 total_rx_pkts++; 1182 1169 1183 1170 cleaned_count++; 1184 - ice_put_rx_buf(rx_ring, rx_buf); 1171 + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); 1185 1172 continue; 1186 1173 construct_skb: 1187 1174 if (skb) { ··· 1200 1187 break; 1201 1188 } 1202 1189 1203 - ice_put_rx_buf(rx_ring, rx_buf); 1190 + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); 1204 1191 cleaned_count++; 1205 1192 1206 1193 /* skip if it is NOP desc */
+5
drivers/net/ethernet/intel/igb/igb.h
··· 138 138 /* this is the size past which hardware will drop packets when setting LPE=0 */ 139 139 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 140 140 141 + #define IGB_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) 142 + 141 143 /* Supported Rx Buffer Sizes */ 142 144 #define IGB_RXBUFFER_256 256 143 145 #define IGB_RXBUFFER_1536 1536 ··· 249 247 #define IGB_SFF_ADDRESSING_MODE 0x4 250 248 #define IGB_SFF_8472_UNSUP 0x00 251 249 250 + /* TX resources are shared between XDP and netstack 251 + * and we need to tag the buffer type to distinguish them 252 + */ 252 253 enum igb_tx_buf_type { 253 254 IGB_TYPE_SKB = 0, 254 255 IGB_TYPE_XDP,
+26 -11
drivers/net/ethernet/intel/igb/igb_main.c
··· 2824 2824 } 2825 2825 } 2826 2826 2827 - static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog) 2827 + static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) 2828 2828 { 2829 - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 2829 + int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; 2830 2830 struct igb_adapter *adapter = netdev_priv(dev); 2831 + struct bpf_prog *prog = bpf->prog, *old_prog; 2831 2832 bool running = netif_running(dev); 2832 - struct bpf_prog *old_prog; 2833 2833 bool need_reset; 2834 2834 2835 2835 /* verify igb ring attributes are sufficient for XDP */ 2836 2836 for (i = 0; i < adapter->num_rx_queues; i++) { 2837 2837 struct igb_ring *ring = adapter->rx_ring[i]; 2838 2838 2839 - if (frame_size > igb_rx_bufsz(ring)) 2839 + if (frame_size > igb_rx_bufsz(ring)) { 2840 + NL_SET_ERR_MSG_MOD(bpf->extack, 2841 + "The RX buffer size is too small for the frame size"); 2842 + netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", 2843 + igb_rx_bufsz(ring), frame_size); 2840 2844 return -EINVAL; 2845 + } 2841 2846 } 2842 2847 2843 2848 old_prog = xchg(&adapter->xdp_prog, prog); ··· 2874 2869 { 2875 2870 switch (xdp->command) { 2876 2871 case XDP_SETUP_PROG: 2877 - return igb_xdp_setup(dev, xdp->prog); 2872 + return igb_xdp_setup(dev, xdp); 2878 2873 default: 2879 2874 return -EINVAL; 2880 2875 } ··· 2915 2910 */ 2916 2911 tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL; 2917 2912 if (unlikely(!tx_ring)) 2918 - return -ENXIO; 2913 + return IGB_XDP_CONSUMED; 2919 2914 2920 2915 nq = txring_txq(tx_ring); 2921 2916 __netif_tx_lock(nq, cpu); 2917 + /* Avoid transmit queue timeout since we share it with the slow path */ 2918 + nq->trans_start = jiffies; 2922 2919 ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); 2923 2920 __netif_tx_unlock(nq); 2924 2921 ··· 2952 2945 2953 2946 nq = txring_txq(tx_ring); 2954 2947 __netif_tx_lock(nq, cpu); 2948 + 2949 + /* Avoid transmit queue timeout since we share it with the slow path */ 2950 + nq->trans_start = jiffies; 2955 2951 2956 2952 for (i = 0; i < n; i++) { 2957 2953 struct xdp_frame *xdpf = frames[i]; ··· 3960 3950 /* set default work limits */ 3961 3951 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; 3962 3952 3963 - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 3964 - VLAN_HLEN; 3953 + adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; 3965 3954 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 3966 3955 3967 3956 spin_lock_init(&adapter->nfc_lock); ··· 6500 6491 static int igb_change_mtu(struct net_device *netdev, int new_mtu) 6501 6492 { 6502 6493 struct igb_adapter *adapter = netdev_priv(netdev); 6503 - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 6494 + int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; 6504 6495 6505 6496 if (adapter->xdp_prog) { 6506 6497 int i; ··· 6509 6500 struct igb_ring *ring = adapter->rx_ring[i]; 6510 6501 6511 6502 if (max_frame > igb_rx_bufsz(ring)) { 6512 - netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n"); 6503 + netdev_warn(adapter->netdev, 6504 + "Requested MTU size is not supported with XDP. Max frame size is %d\n", 6505 + max_frame); 6513 6506 return -EINVAL; 6514 6507 } 6515 6508 } ··· 8362 8351 SKB_DATA_ALIGN(xdp->data_end - 8363 8352 xdp->data_hard_start); 8364 8353 #endif 8354 + unsigned int metasize = xdp->data - xdp->data_meta; 8365 8355 struct sk_buff *skb; 8366 8356 8367 8357 /* prefetch first cache line of first page */ ··· 8376 8364 /* update pointers within the skb to store the data */ 8377 8365 skb_reserve(skb, xdp->data - xdp->data_hard_start); 8378 8366 __skb_put(skb, xdp->data_end - xdp->data); 8367 + 8368 + if (metasize) 8369 + skb_metadata_set(skb, metasize); 8379 8370 8380 8371 /* pull timestamp out of packet data */ 8381 8372 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { ··· 8786 8771 rx_ring->skb = skb; 8787 8772 8788 8773 if (xdp_xmit & IGB_XDP_REDIR) 8789 - xdp_do_flush_map(); 8774 + xdp_do_flush(); 8790 8775 8791 8776 if (xdp_xmit & IGB_XDP_TX) { 8792 8777 struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+17 -7
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
··· 1945 1945 return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); 1946 1946 } 1947 1947 1948 - static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) 1948 + static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, 1949 + int rx_buffer_pgcnt) 1949 1950 { 1950 1951 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 1951 1952 struct page *page = rx_buffer->page; ··· 1957 1956 1958 1957 #if (PAGE_SIZE < 8192) 1959 1958 /* if we are only owner of page we can reuse it */ 1960 - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) 1959 + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 1961 1960 return false; 1962 1961 #else 1963 1962 /* The last offset is a bit aggressive in that we assume the ··· 2022 2021 static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, 2023 2022 union ixgbe_adv_rx_desc *rx_desc, 2024 2023 struct sk_buff **skb, 2025 - const unsigned int size) 2024 + const unsigned int size, 2025 + int *rx_buffer_pgcnt) 2026 2026 { 2027 2027 struct ixgbe_rx_buffer *rx_buffer; 2028 2028 2029 2029 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 2030 + *rx_buffer_pgcnt = 2031 + #if (PAGE_SIZE < 8192) 2032 + page_count(rx_buffer->page); 2033 + #else 2034 + 0; 2035 + #endif 2030 2036 prefetchw(rx_buffer->page); 2031 2037 *skb = rx_buffer->skb; 2032 2038 ··· 2063 2055 2064 2056 static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, 2065 2057 struct ixgbe_rx_buffer *rx_buffer, 2066 - struct sk_buff *skb) 2058 + struct sk_buff *skb, 2059 + int rx_buffer_pgcnt) 2067 2060 { 2068 - if (ixgbe_can_reuse_rx_page(rx_buffer)) { 2061 + if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2069 2062 /* hand second half of page back to the ring */ 2070 2063 ixgbe_reuse_rx_page(rx_ring, rx_buffer); 2071 2064 } else { ··· 2312 2303 union ixgbe_adv_rx_desc *rx_desc; 2313 2304 struct ixgbe_rx_buffer *rx_buffer; 2314 2305 struct sk_buff *skb; 2306 + int rx_buffer_pgcnt; 2315 2307 unsigned int size; 2316 2308 2317 2309 /* return some buffers to hardware, one at a time is too slow */ ··· 2332 2322 */ 2333 2323 dma_rmb(); 2334 2324 2335 - rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); 2325 + rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt); 2336 2326 2337 2327 /* retrieve a buffer from the ring */ 2338 2328 if (!skb) { ··· 2377 2367 break; 2378 2368 } 2379 2369 2380 - ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb); 2370 + ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt); 2381 2371 cleaned_count++; 2382 2372 2383 2373 /* place incomplete frames back on ring for completion */