Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

GSO: Add GSO type for fixed IPv4 ID

This patch adds support for TSO using IPv4 headers with a fixed IP ID
field. This is meant to allow us to do a lossless GRO in the case of TCP
flows that use a fixed IP ID such as those that convert IPv6 header to IPv4
headers.

In addition I am adding a feature that for now I am referring to TSO with
IP ID mangling. Basically when this flag is enabled the device has the
option to either output the flow with incrementing IP IDs or with a fixed
IP ID regardless of what the original IP ID ordering was. This is useful
in cases where the DF bit is set and we do not care if the original IP ID
value is maintained.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexander Duyck and committed by
David S. Miller
cbc53e08 518f213d

+63 -24
+3
include/linux/netdev_features.h
··· 39 39 NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ 40 40 NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ 41 41 NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ 42 + NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ 42 43 NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ 43 44 NETIF_F_FSO_BIT, /* ... FCoE segmentation */ 44 45 NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ ··· 121 120 #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) 122 121 #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) 123 122 #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) 123 + #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) 124 124 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) 125 125 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) 126 126 #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) ··· 149 147 150 148 /* List of features with software fallbacks. */ 151 149 #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ 150 + NETIF_F_TSO_MANGLEID | \ 152 151 NETIF_F_TSO6 | NETIF_F_UFO) 153 152 154 153 /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be
+1
include/linux/netdevice.h
··· 3992 3992 BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); 3993 3993 BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); 3994 3994 BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); 3995 + BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); 3995 3996 BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); 3996 3997 BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); 3997 3998 BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
+11 -9
include/linux/skbuff.h
··· 465 465 /* This indicates the tcp segment has CWR set. */ 466 466 SKB_GSO_TCP_ECN = 1 << 3, 467 467 468 - SKB_GSO_TCPV6 = 1 << 4, 468 + SKB_GSO_TCP_FIXEDID = 1 << 4, 469 469 470 - SKB_GSO_FCOE = 1 << 5, 470 + SKB_GSO_TCPV6 = 1 << 5, 471 471 472 - SKB_GSO_GRE = 1 << 6, 472 + SKB_GSO_FCOE = 1 << 6, 473 473 474 - SKB_GSO_GRE_CSUM = 1 << 7, 474 + SKB_GSO_GRE = 1 << 7, 475 475 476 - SKB_GSO_IPIP = 1 << 8, 476 + SKB_GSO_GRE_CSUM = 1 << 8, 477 477 478 - SKB_GSO_SIT = 1 << 9, 478 + SKB_GSO_IPIP = 1 << 9, 479 479 480 - SKB_GSO_UDP_TUNNEL = 1 << 10, 480 + SKB_GSO_SIT = 1 << 10, 481 481 482 - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, 482 + SKB_GSO_UDP_TUNNEL = 1 << 11, 483 483 484 - SKB_GSO_TUNNEL_REMCSUM = 1 << 12, 484 + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, 485 + 486 + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, 485 487 }; 486 488 487 489 #if BITS_PER_LONG > 32
+29 -5
net/core/dev.c
··· 2825 2825 return vlan_features_check(skb, features); 2826 2826 } 2827 2827 2828 + static netdev_features_t gso_features_check(const struct sk_buff *skb, 2829 + struct net_device *dev, 2830 + netdev_features_t features) 2831 + { 2832 + u16 gso_segs = skb_shinfo(skb)->gso_segs; 2833 + 2834 + if (gso_segs > dev->gso_max_segs) 2835 + return features & ~NETIF_F_GSO_MASK; 2836 + 2837 + /* Make sure to clear the IPv4 ID mangling feature if 2838 + * the IPv4 header has the potential to be fragmented. 2839 + */ 2840 + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { 2841 + struct iphdr *iph = skb->encapsulation ? 2842 + inner_ip_hdr(skb) : ip_hdr(skb); 2843 + 2844 + if (!(iph->frag_off & htons(IP_DF))) 2845 + features &= ~NETIF_F_TSO_MANGLEID; 2846 + } 2847 + 2848 + return features; 2849 + } 2850 + 2828 2851 netdev_features_t netif_skb_features(struct sk_buff *skb) 2829 2852 { 2830 2853 struct net_device *dev = skb->dev; 2831 2854 netdev_features_t features = dev->features; 2832 - u16 gso_segs = skb_shinfo(skb)->gso_segs; 2833 2855 2834 - if (gso_segs > dev->gso_max_segs) 2835 - features &= ~NETIF_F_GSO_MASK; 2856 + if (skb_is_gso(skb)) 2857 + features = gso_features_check(skb, dev, features); 2836 2858 2837 2859 /* If encapsulation offload request, verify we are testing 2838 2860 * hardware encapsulation features instead of standard ··· 6998 6976 dev->features |= NETIF_F_SOFT_FEATURES; 6999 6977 dev->wanted_features = dev->features & dev->hw_features; 7000 6978 7001 - if (!(dev->flags & IFF_LOOPBACK)) { 6979 + if (!(dev->flags & IFF_LOOPBACK)) 7002 6980 dev->hw_features |= NETIF_F_NOCACHE_COPY; 7003 - } 6981 + 6982 + if (dev->hw_features & NETIF_F_TSO) 6983 + dev->hw_features |= NETIF_F_TSO_MANGLEID; 7004 6984 7005 6985 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 7006 6986 */
+1
net/core/ethtool.c
··· 79 79 [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", 80 80 [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", 81 81 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", 82 + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", 82 83 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 83 84 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 84 85 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+11 -8
net/ipv4/af_inet.c
··· 1195 1195 static struct sk_buff *inet_gso_segment(struct sk_buff *skb, 1196 1196 netdev_features_t features) 1197 1197 { 1198 + bool udpfrag = false, fixedid = false, encap; 1198 1199 struct sk_buff *segs = ERR_PTR(-EINVAL); 1199 1200 const struct net_offload *ops; 1200 1201 unsigned int offset = 0; 1201 - bool udpfrag, encap; 1202 1202 struct iphdr *iph; 1203 1203 int proto; 1204 1204 int nhoff; ··· 1217 1217 SKB_GSO_TCPV6 | 1218 1218 SKB_GSO_UDP_TUNNEL | 1219 1219 SKB_GSO_UDP_TUNNEL_CSUM | 1220 + SKB_GSO_TCP_FIXEDID | 1220 1221 SKB_GSO_TUNNEL_REMCSUM | 1221 1222 0))) 1222 1223 goto out; ··· 1249 1248 1250 1249 segs = ERR_PTR(-EPROTONOSUPPORT); 1251 1250 1252 - if (skb->encapsulation && 1253 - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) 1254 - udpfrag = proto == IPPROTO_UDP && encap; 1255 - else 1256 - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; 1251 + if (!skb->encapsulation || encap) { 1252 + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); 1253 + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); 1254 + 1255 + /* fixed ID is invalid if DF bit is not set */ 1256 + if (fixedid && !(iph->frag_off & htons(IP_DF))) 1257 + goto out; 1258 + } 1257 1259 1258 1260 ops = rcu_dereference(inet_offloads[proto]); 1259 1261 if (likely(ops && ops->callbacks.gso_segment)) ··· 1269 1265 do { 1270 1266 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); 1271 1267 if (udpfrag) { 1272 - iph->id = htons(id); 1273 1268 iph->frag_off = htons(offset >> 3); 1274 1269 if (skb->next) 1275 1270 iph->frag_off |= htons(IP_MF); 1276 1271 offset += skb->len - nhoff - ihl; 1277 - } else { 1272 + } else if (!fixedid) { 1278 1273 iph->id = htons(id++); 1279 1274 } 1280 1275 iph->tot_len = htons(skb->len - nhoff);
+1
net/ipv4/gre_offload.c
··· 32 32 SKB_GSO_UDP | 33 33 SKB_GSO_DODGY | 34 34 SKB_GSO_TCP_ECN | 35 + SKB_GSO_TCP_FIXEDID | 35 36 SKB_GSO_GRE | 36 37 SKB_GSO_GRE_CSUM | 37 38 SKB_GSO_IPIP |
+3 -1
net/ipv4/tcp_offload.c
··· 89 89 ~(SKB_GSO_TCPV4 | 90 90 SKB_GSO_DODGY | 91 91 SKB_GSO_TCP_ECN | 92 + SKB_GSO_TCP_FIXEDID | 92 93 SKB_GSO_TCPV6 | 93 94 SKB_GSO_GRE | 94 95 SKB_GSO_GRE_CSUM | ··· 99 98 SKB_GSO_UDP_TUNNEL_CSUM | 100 99 SKB_GSO_TUNNEL_REMCSUM | 101 100 0) || 102 - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 101 + !(type & (SKB_GSO_TCPV4 | 102 + SKB_GSO_TCPV6)))) 103 103 goto out; 104 104 105 105 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+2 -1
net/ipv6/ip6_offload.c
··· 73 73 SKB_GSO_UDP | 74 74 SKB_GSO_DODGY | 75 75 SKB_GSO_TCP_ECN | 76 + SKB_GSO_TCP_FIXEDID | 77 + SKB_GSO_TCPV6 | 76 78 SKB_GSO_GRE | 77 79 SKB_GSO_GRE_CSUM | 78 80 SKB_GSO_IPIP | ··· 82 80 SKB_GSO_UDP_TUNNEL | 83 81 SKB_GSO_UDP_TUNNEL_CSUM | 84 82 SKB_GSO_TUNNEL_REMCSUM | 85 - SKB_GSO_TCPV6 | 86 83 0))) 87 84 goto out; 88 85
+1
net/mpls/mpls_gso.c
··· 31 31 SKB_GSO_TCPV6 | 32 32 SKB_GSO_UDP | 33 33 SKB_GSO_DODGY | 34 + SKB_GSO_TCP_FIXEDID | 34 35 SKB_GSO_TCP_ECN))) 35 36 goto out; 36 37