Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

gso: Support partial splitting at the frag_list pointer

Since commit 8a29111c7 ("net: gro: allow to build full sized skb")
gro may build buffers with a frag_list. This can hurt forwarding
because most NICs can't offload such packets, they need to be
segmented in software. This patch splits buffers with a frag_list
at the frag_list pointer into buffers that can be TSO offloaded.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Steffen Klassert and committed by
David S. Miller
07b26c94 e867e87a

+69 -26
+40 -11
net/core/skbuff.c
··· 3097 3097 sg = !!(features & NETIF_F_SG); 3098 3098 csum = !!can_checksum_protocol(features, proto); 3099 3099 3100 - /* GSO partial only requires that we trim off any excess that 3101 - * doesn't fit into an MSS sized block, so take care of that 3102 - * now. 3103 - */ 3104 - if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { 3100 + if (sg && csum && (mss != GSO_BY_FRAGS)) { 3101 + if (!(features & NETIF_F_GSO_PARTIAL)) { 3102 + struct sk_buff *iter; 3103 + 3104 + if (!list_skb || 3105 + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) 3106 + goto normal; 3107 + 3108 + /* Split the buffer at the frag_list pointer. 3109 + * This is based on the assumption that all 3110 + * buffers in the chain excluding the last 3111 + * containing the same amount of data. 3112 + */ 3113 + skb_walk_frags(head_skb, iter) { 3114 + if (skb_headlen(iter)) 3115 + goto normal; 3116 + 3117 + len -= iter->len; 3118 + } 3119 + } 3120 + 3121 + /* GSO partial only requires that we trim off any excess that 3122 + * doesn't fit into an MSS sized block, so take care of that 3123 + * now. 3124 + */ 3105 3125 partial_segs = len / mss; 3106 3126 if (partial_segs > 1) 3107 3127 mss *= partial_segs; ··· 3129 3109 partial_segs = 0; 3130 3110 } 3131 3111 3112 + normal: 3132 3113 headroom = skb_headroom(head_skb); 3133 3114 pos = skb_headlen(head_skb); 3134 3115 ··· 3321 3300 */ 3322 3301 segs->prev = tail; 3323 3302 3324 - /* Update GSO info on first skb in partial sequence. */ 3325 3303 if (partial_segs) { 3304 + struct sk_buff *iter; 3326 3305 int type = skb_shinfo(head_skb)->gso_type; 3306 + unsigned short gso_size = skb_shinfo(head_skb)->gso_size; 3327 3307 3328 3308 /* Update type to add partial and then remove dodgy if set */ 3329 - type |= SKB_GSO_PARTIAL; 3309 + type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; 3330 3310 type &= ~SKB_GSO_DODGY; 3331 3311 3332 3312 /* Update GSO info and prepare to start updating headers on 3333 3313 * our way back down the stack of protocols. 3334 3314 */ 3335 - skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; 3336 - skb_shinfo(segs)->gso_segs = partial_segs; 3337 - skb_shinfo(segs)->gso_type = type; 3338 - SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; 3315 + for (iter = segs; iter; iter = iter->next) { 3316 + skb_shinfo(iter)->gso_size = gso_size; 3317 + skb_shinfo(iter)->gso_segs = partial_segs; 3318 + skb_shinfo(iter)->gso_type = type; 3319 + SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; 3320 + } 3321 + 3322 + if (tail->len - doffset <= gso_size) 3323 + skb_shinfo(tail)->gso_size = 0; 3324 + else if (tail != segs) 3325 + skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); 3339 3326 } 3340 3327 3341 3328 /* Following permits correct backpressure, for protocols
+10 -4
net/ipv4/af_inet.c
··· 1192 1192 struct sk_buff *inet_gso_segment(struct sk_buff *skb, 1193 1193 netdev_features_t features) 1194 1194 { 1195 - bool udpfrag = false, fixedid = false, encap; 1195 + bool udpfrag = false, fixedid = false, gso_partial, encap; 1196 1196 struct sk_buff *segs = ERR_PTR(-EINVAL); 1197 1197 const struct net_offload *ops; 1198 1198 unsigned int offset = 0; ··· 1245 1245 if (IS_ERR_OR_NULL(segs)) 1246 1246 goto out; 1247 1247 1248 + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); 1249 + 1248 1250 skb = segs; 1249 1251 do { 1250 1252 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); ··· 1261 1259 iph->id = htons(id); 1262 1260 id += skb_shinfo(skb)->gso_segs; 1263 1261 } 1264 - tot_len = skb_shinfo(skb)->gso_size + 1265 - SKB_GSO_CB(skb)->data_offset + 1266 - skb->head - (unsigned char *)iph; 1262 + 1263 + if (gso_partial) 1264 + tot_len = skb_shinfo(skb)->gso_size + 1265 + SKB_GSO_CB(skb)->data_offset + 1266 + skb->head - (unsigned char *)iph; 1267 + else 1268 + tot_len = skb->len - nhoff; 1267 1269 } else { 1268 1270 if (!fixedid) 1269 1271 iph->id = htons(id++);
+4 -2
net/ipv4/gre_offload.c
··· 24 24 __be16 protocol = skb->protocol; 25 25 u16 mac_len = skb->mac_len; 26 26 int gre_offset, outer_hlen; 27 - bool need_csum, ufo; 27 + bool need_csum, ufo, gso_partial; 28 28 29 29 if (!skb->encapsulation) 30 30 goto out; ··· 69 69 goto out; 70 70 } 71 71 72 + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); 73 + 72 74 outer_hlen = skb_tnl_header_len(skb); 73 75 gre_offset = outer_hlen - tnl_hlen; 74 76 skb = segs; ··· 98 96 greh = (struct gre_base_hdr *)skb_transport_header(skb); 99 97 pcsum = (__sum16 *)(greh + 1); 100 98 101 - if (skb_is_gso(skb)) { 99 + if (gso_partial) { 102 100 unsigned int partial_adj; 103 101 104 102 /* Adjust checksum to account for the fact that
+7 -6
net/ipv4/tcp_offload.c
··· 90 90 goto out; 91 91 } 92 92 93 - /* GSO partial only requires splitting the frame into an MSS 94 - * multiple and possibly a remainder. So update the mss now. 95 - */ 96 - if (features & NETIF_F_GSO_PARTIAL) 97 - mss = skb->len - (skb->len % mss); 98 - 99 93 copy_destructor = gso_skb->destructor == tcp_wfree; 100 94 ooo_okay = gso_skb->ooo_okay; 101 95 /* All segments but the first should have ooo_okay cleared */ ··· 101 107 102 108 /* Only first segment might have ooo_okay set */ 103 109 segs->ooo_okay = ooo_okay; 110 + 111 + /* GSO partial and frag_list segmentation only requires splitting 112 + * the frame into an MSS multiple and possibly a remainder, both 113 + * cases return a GSO skb. So update the mss now. 114 + */ 115 + if (skb_is_gso(segs)) 116 + mss *= skb_shinfo(segs)->gso_segs; 104 117 105 118 delta = htonl(oldlen + (thlen + mss)); 106 119
+4 -2
net/ipv4/udp_offload.c
··· 21 21 __be16 new_protocol, bool is_ipv6) 22 22 { 23 23 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 24 - bool remcsum, need_csum, offload_csum, ufo; 24 + bool remcsum, need_csum, offload_csum, ufo, gso_partial; 25 25 struct sk_buff *segs = ERR_PTR(-EINVAL); 26 26 struct udphdr *uh = udp_hdr(skb); 27 27 u16 mac_offset = skb->mac_header; ··· 88 88 goto out; 89 89 } 90 90 91 + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); 92 + 91 93 outer_hlen = skb_tnl_header_len(skb); 92 94 udp_offset = outer_hlen - tnl_hlen; 93 95 skb = segs; ··· 119 117 * will be using a length value equal to only one MSS sized 120 118 * segment instead of the entire frame. 121 119 */ 122 - if (skb_is_gso(skb)) { 120 + if (gso_partial) { 123 121 uh->len = htons(skb_shinfo(skb)->gso_size + 124 122 SKB_GSO_CB(skb)->data_offset + 125 123 skb->head - (unsigned char *)uh);
+4 -1
net/ipv6/ip6_offload.c
··· 69 69 int offset = 0; 70 70 bool encap, udpfrag; 71 71 int nhoff; 72 + bool gso_partial; 72 73 73 74 skb_reset_network_header(skb); 74 75 nhoff = skb_network_header(skb) - skb_mac_header(skb); ··· 102 101 if (IS_ERR(segs)) 103 102 goto out; 104 103 104 + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); 105 + 105 106 for (skb = segs; skb; skb = skb->next) { 106 107 ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); 107 - if (skb_is_gso(skb)) 108 + if (gso_partial) 108 109 payload_len = skb_shinfo(skb)->gso_size + 109 110 SKB_GSO_CB(skb)->data_offset + 110 111 skb->head - (unsigned char *)(ipv6h + 1);