Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: allow gso_max_size to exceed 65536

The code for gso_max_size was added originally to allow for debugging and
workaround of buggy devices that couldn't support TSO with blocks 64K in
size. The original reason for limiting it to 64K was because that was the
existing limits of IPv4 and non-jumbogram IPv6 length fields.

With the addition of Big TCP we can remove this limit and allow the value
to potentially go up to UINT_MAX and instead be limited by the tso_max_size
value.

So in order to support this we need to go through and clean up the
remaining users of the gso_max_size value so that the values will cap at
64K for non-TCPv6 flows. In addition we can clean up the GSO_MAX_SIZE value
so that 64K becomes GSO_LEGACY_MAX_SIZE and UINT_MAX will now be the upper
limit for GSO_MAX_SIZE.

v6: (edumazet) fixed a compile error if CONFIG_IPV6=n,
in a new sk_trim_gso_size() helper.
netif_set_tso_max_size() caps the requested TSO size
with GSO_MAX_SIZE.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexander Duyck and committed by
David S. Miller
7c4e983c 89527be8

+40 -17
+2 -1
drivers/net/ethernet/amd/xgbe/xgbe.h
··· 151 151 #define XGBE_TX_MAX_BUF_SIZE (0x3fff & ~(64 - 1)) 152 152 153 153 /* Descriptors required for maximum contiguous TSO/GSO packet */ 154 - #define XGBE_TX_MAX_SPLIT ((GSO_MAX_SIZE / XGBE_TX_MAX_BUF_SIZE) + 1) 154 + #define XGBE_TX_MAX_SPLIT \ 155 + ((GSO_LEGACY_MAX_SIZE / XGBE_TX_MAX_BUF_SIZE) + 1) 155 156 156 157 /* Maximum possible descriptors needed for an SKB: 157 158 * - Maximum number of SKB frags
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 2038 2038 { 2039 2039 int nr_frags = skb_shinfo(skb)->nr_frags; 2040 2040 2041 - return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE; 2041 + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_MAX_SIZE; 2042 2042 } 2043 2043 2044 2044 static void
+2 -1
drivers/net/ethernet/sfc/ef100_nic.c
··· 1008 1008 } 1009 1009 return 0; 1010 1010 case ESE_EF100_DP_GZ_TSO_MAX_PAYLOAD_LEN: 1011 - nic_data->tso_max_payload_len = min_t(u64, reader->value, GSO_MAX_SIZE); 1011 + nic_data->tso_max_payload_len = min_t(u64, reader->value, 1012 + GSO_LEGACY_MAX_SIZE); 1012 1013 netif_set_tso_max_size(efx->net_dev, 1013 1014 nic_data->tso_max_payload_len); 1014 1015 return 0;
+2 -1
drivers/net/ethernet/sfc/falcon/tx.c
··· 98 98 /* Possibly more for PCIe page boundaries within input fragments */ 99 99 if (PAGE_SIZE > EF4_PAGE_SIZE) 100 100 max_descs += max_t(unsigned int, MAX_SKB_FRAGS, 101 - DIV_ROUND_UP(GSO_MAX_SIZE, EF4_PAGE_SIZE)); 101 + DIV_ROUND_UP(GSO_LEGACY_MAX_SIZE, 102 + EF4_PAGE_SIZE)); 102 103 103 104 return max_descs; 104 105 }
+2 -1
drivers/net/ethernet/sfc/tx_common.c
··· 416 416 /* Possibly more for PCIe page boundaries within input fragments */ 417 417 if (PAGE_SIZE > EFX_PAGE_SIZE) 418 418 max_descs += max_t(unsigned int, MAX_SKB_FRAGS, 419 - DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); 419 + DIV_ROUND_UP(GSO_LEGACY_MAX_SIZE, 420 + EFX_PAGE_SIZE)); 420 421 421 422 return max_descs; 422 423 }
+2 -1
drivers/net/ethernet/synopsys/dwc-xlgmac.h
··· 38 38 #define XLGMAC_RX_DESC_MAX_DIRTY (XLGMAC_RX_DESC_CNT >> 3) 39 39 40 40 /* Descriptors required for maximum contiguous TSO/GSO packet */ 41 - #define XLGMAC_TX_MAX_SPLIT ((GSO_MAX_SIZE / XLGMAC_TX_MAX_BUF_SIZE) + 1) 41 + #define XLGMAC_TX_MAX_SPLIT \ 42 + ((GSO_LEGACY_MAX_SIZE / XLGMAC_TX_MAX_BUF_SIZE) + 1) 42 43 43 44 /* Maximum possible descriptors needed for a SKB */ 44 45 #define XLGMAC_TX_MAX_DESC_NR (MAX_SKB_FRAGS + XLGMAC_TX_MAX_SPLIT + 2)
+1 -1
drivers/net/hyperv/rndis_filter.c
··· 1349 1349 struct net_device_context *net_device_ctx = netdev_priv(net); 1350 1350 struct ndis_offload hwcaps; 1351 1351 struct ndis_offload_params offloads; 1352 - unsigned int gso_max_size = GSO_MAX_SIZE; 1352 + unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; 1353 1353 int ret; 1354 1354 1355 1355 /* Find HW offload capabilities */
+1 -1
drivers/scsi/fcoe/fcoe.c
··· 667 667 668 668 if (netdev->features & NETIF_F_FSO) { 669 669 lport->seq_offload = 1; 670 - lport->lso_max = netdev->gso_max_size; 670 + lport->lso_max = min(netdev->gso_max_size, GSO_LEGACY_MAX_SIZE); 671 671 FCOE_NETDEV_DBG(netdev, "Supports LSO for max len 0x%x\n", 672 672 lport->lso_max); 673 673 } else {
+3 -1
include/linux/netdevice.h
··· 2272 2272 const struct rtnl_link_ops *rtnl_link_ops; 2273 2273 2274 2274 /* for setting kernel sock attribute on TCP connection setup */ 2275 - #define GSO_MAX_SIZE 65536 2275 + #define GSO_LEGACY_MAX_SIZE 65536u 2276 + #define GSO_MAX_SIZE UINT_MAX 2277 + 2276 2278 unsigned int gso_max_size; 2277 2279 #define TSO_LEGACY_MAX_SIZE 65536 2278 2280 #define TSO_MAX_SIZE UINT_MAX
+1 -1
net/bpf/test_run.c
··· 1001 1001 cb->pkt_len = skb->len; 1002 1002 } else { 1003 1003 if (__skb->wire_len < skb->len || 1004 - __skb->wire_len > GSO_MAX_SIZE) 1004 + __skb->wire_len > GSO_LEGACY_MAX_SIZE) 1005 1005 return -EINVAL; 1006 1006 cb->pkt_len = __skb->wire_len; 1007 1007 }
+4 -3
net/core/dev.c
··· 2998 2998 * @size: max skb->len of a TSO frame 2999 2999 * 3000 3000 * Set the limit on the size of TSO super-frames the device can handle. 3001 - * Unless explicitly set the stack will assume the value of %GSO_MAX_SIZE. 3001 + * Unless explicitly set the stack will assume the value of 3002 + * %GSO_LEGACY_MAX_SIZE. 3002 3003 */ 3003 3004 void netif_set_tso_max_size(struct net_device *dev, unsigned int size) 3004 3005 { 3005 - dev->tso_max_size = size; 3006 + dev->tso_max_size = min(GSO_MAX_SIZE, size); 3006 3007 if (size < READ_ONCE(dev->gso_max_size)) 3007 3008 netif_set_gso_max_size(dev, size); 3008 3009 } ··· 10596 10595 10597 10596 dev_net_set(dev, &init_net); 10598 10597 10599 - dev->gso_max_size = GSO_MAX_SIZE; 10598 + dev->gso_max_size = GSO_LEGACY_MAX_SIZE; 10600 10599 dev->gso_max_segs = GSO_MAX_SEGS; 10601 10600 dev->gro_max_size = GRO_MAX_SIZE; 10602 10601 dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
+1 -1
net/core/rtnetlink.c
··· 2817 2817 if (tb[IFLA_GSO_MAX_SIZE]) { 2818 2818 u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); 2819 2819 2820 - if (max_size > GSO_MAX_SIZE || max_size > dev->tso_max_size) { 2820 + if (max_size > dev->tso_max_size) { 2821 2821 err = -EINVAL; 2822 2822 goto errout; 2823 2823 }
+14
net/core/sock.c
··· 2293 2293 } 2294 2294 EXPORT_SYMBOL_GPL(sk_free_unlock_clone); 2295 2295 2296 + static void sk_trim_gso_size(struct sock *sk) 2297 + { 2298 + if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE) 2299 + return; 2300 + #if IS_ENABLED(CONFIG_IPV6) 2301 + if (sk->sk_family == AF_INET6 && 2302 + sk_is_tcp(sk) && 2303 + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 2304 + return; 2305 + #endif 2306 + sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE; 2307 + } 2308 + 2296 2309 void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 2297 2310 { 2298 2311 u32 max_segs = 1; ··· 2325 2312 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 2326 2313 /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ 2327 2314 sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); 2315 + sk_trim_gso_size(sk); 2328 2316 sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); 2329 2317 /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ 2330 2318 max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
+1 -1
net/ipv4/tcp_bbr.c
··· 310 310 */ 311 311 bytes = min_t(unsigned long, 312 312 sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), 313 - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); 313 + GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); 314 314 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); 315 315 316 316 return min(segs, 0x7FU);
+1 -1
net/ipv4/tcp_output.c
··· 1553 1553 * SO_SNDBUF values. 1554 1554 * Also allow first and last skb in retransmit queue to be split. 1555 1555 */ 1556 - limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); 1556 + limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE); 1557 1557 if (unlikely((sk->sk_wmem_queued >> 1) > limit && 1558 1558 tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && 1559 1559 skb != tcp_rtx_queue_head(sk) &&
+2 -1
net/sctp/output.c
··· 134 134 dst_hold(tp->dst); 135 135 sk_setup_caps(sk, tp->dst); 136 136 } 137 - packet->max_size = sk_can_gso(sk) ? READ_ONCE(tp->dst->dev->gso_max_size) 137 + packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size), 138 + GSO_LEGACY_MAX_SIZE) 138 139 : asoc->pathmtu; 139 140 rcu_read_unlock(); 140 141 }