[PATCH] tcp: fix TSO sizing bugs

MSS changes can be lost since we preemptively initialize the tso_segs count
for an SKB before we %100 commit to sending it out.

So, by the time we send it out, the tso_size information can be stale due
to PMTU events. This mucks up all of the logic in our send engine, and can
even result in the BUG() triggering in tcp_tso_should_defer().

Another problem we have is that we're storing the tp->mss_cache, not the
SACK block normalized MSS, as the tso_size. That's wrong too.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by David S. Miller and committed by Linus Torvalds 846998ae 0c3dba15

+28 -28
+28 -28
net/ipv4/tcp_output.c
··· 403 403 sk->sk_send_head = skb; 404 404 } 405 405 406 - static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) 406 + static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 407 407 { 408 - struct tcp_sock *tp = tcp_sk(sk); 409 - 410 - if (skb->len <= tp->mss_cache || 408 + if (skb->len <= mss_now || 411 409 !(sk->sk_route_caps & NETIF_F_TSO)) { 412 410 /* Avoid the costly divide in the normal 413 411 * non-TSO case. ··· 415 417 } else { 416 418 unsigned int factor; 417 419 418 - factor = skb->len + (tp->mss_cache - 1); 419 - factor /= tp->mss_cache; 420 + factor = skb->len + (mss_now - 1); 421 + factor /= mss_now; 420 422 skb_shinfo(skb)->tso_segs = factor; 421 - skb_shinfo(skb)->tso_size = tp->mss_cache; 423 + skb_shinfo(skb)->tso_size = mss_now; 422 424 } 423 425 } 424 426 ··· 427 429 * packet to the list. This won't be called frequently, I hope. 428 430 * Remember, these are still headerless SKBs at this point. 429 431 */ 430 - static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) 432 + static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) 431 433 { 432 434 struct tcp_sock *tp = tcp_sk(sk); 433 435 struct sk_buff *buff; ··· 490 492 } 491 493 492 494 /* Fix up tso_factor for both original and new SKB. */ 493 - tcp_set_skb_tso_segs(sk, skb); 494 - tcp_set_skb_tso_segs(sk, buff); 495 + tcp_set_skb_tso_segs(sk, skb, mss_now); 496 + tcp_set_skb_tso_segs(sk, buff, mss_now); 495 497 496 498 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { 497 499 tp->lost_out += tcp_skb_pcount(skb); ··· 567 569 * factor and mss. 568 570 */ 569 571 if (tcp_skb_pcount(skb) > 1) 570 - tcp_set_skb_tso_segs(sk, skb); 572 + tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); 571 573 572 574 return 0; 573 575 } ··· 732 734 /* This must be invoked the first time we consider transmitting 733 735 * SKB onto the wire. 734 736 */ 735 - static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) 737 + static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 736 738 { 737 739 int tso_segs = tcp_skb_pcount(skb); 738 740 739 - if (!tso_segs) { 740 - tcp_set_skb_tso_segs(sk, skb); 741 + if (!tso_segs || 742 + (tso_segs > 1 && 743 + skb_shinfo(skb)->tso_size != mss_now)) { 744 + tcp_set_skb_tso_segs(sk, skb, mss_now); 741 745 tso_segs = tcp_skb_pcount(skb); 742 746 } 743 747 return tso_segs; ··· 817 817 struct tcp_sock *tp = tcp_sk(sk); 818 818 unsigned int cwnd_quota; 819 819 820 - tcp_init_tso_segs(sk, skb); 820 + tcp_init_tso_segs(sk, skb, cur_mss); 821 821 822 822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) 823 823 return 0; ··· 854 854 * know that all the data is in scatter-gather pages, and that the 855 855 * packet has never been sent out before (and thus is not cloned). 856 856 */ 857 - static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) 857 + static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) 858 858 { 859 859 struct sk_buff *buff; 860 860 int nlen = skb->len - len; ··· 887 887 skb_split(skb, buff, len); 888 888 889 889 /* Fix up tso_factor for both original and new SKB. */ 890 - tcp_set_skb_tso_segs(sk, skb); 891 - tcp_set_skb_tso_segs(sk, buff); 890 + tcp_set_skb_tso_segs(sk, skb, mss_now); 891 + tcp_set_skb_tso_segs(sk, buff, mss_now); 892 892 893 893 /* Link BUFF into the send queue. */ 894 894 skb_header_release(buff); ··· 976 976 if (unlikely(!skb)) 977 977 return 0; 978 978 979 - tso_segs = tcp_init_tso_segs(sk, skb); 979 + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 980 980 cwnd_quota = tcp_cwnd_test(tp, skb); 981 981 if (unlikely(!cwnd_quota)) 982 982 goto out; ··· 1006 1006 limit = skb->len - trim; 1007 1007 } 1008 1008 if (skb->len > limit) { 1009 - if (tso_fragment(sk, skb, limit)) 1009 + if (tso_fragment(sk, skb, limit, mss_now)) 1010 1010 break; 1011 1011 } 1012 1012 } else if (unlikely(skb->len > mss_now)) { 1013 - if (unlikely(tcp_fragment(sk, skb, mss_now))) 1013 + if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) 1014 1014 break; 1015 1015 } 1016 1016 ··· 1039 1039 skb = sk->sk_send_head; 1040 1040 if (!skb) 1041 1041 break; 1042 - tso_segs = tcp_init_tso_segs(sk, skb); 1042 + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1043 1043 } 1044 1044 1045 1045 if (likely(sent_pkts)) { ··· 1076 1076 1077 1077 BUG_ON(!skb || skb->len < mss_now); 1078 1078 1079 - tso_segs = tcp_init_tso_segs(sk, skb); 1079 + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1080 1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); 1081 1081 1082 1082 if (likely(cwnd_quota)) { ··· 1093 1093 limit = skb->len - trim; 1094 1094 } 1095 1095 if (skb->len > limit) { 1096 - if (unlikely(tso_fragment(sk, skb, limit))) 1096 + if (unlikely(tso_fragment(sk, skb, limit, mss_now))) 1097 1097 return; 1098 1098 } 1099 1099 } else if (unlikely(skb->len > mss_now)) { 1100 - if (unlikely(tcp_fragment(sk, skb, mss_now))) 1100 + if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) 1101 1101 return; 1102 1102 } 1103 1103 ··· 1388 1388 int old_factor = tcp_skb_pcount(skb); 1389 1389 int new_factor; 1390 1390 1391 - if (tcp_fragment(sk, skb, cur_mss)) 1391 + if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1392 1392 return -ENOMEM; /* We'll try again later. */ 1393 1393 1394 1394 /* New SKB created, account for it. */ ··· 1991 1991 skb->len > mss) { 1992 1992 seg_size = min(seg_size, mss); 1993 1993 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 1994 - if (tcp_fragment(sk, skb, seg_size)) 1994 + if (tcp_fragment(sk, skb, seg_size, mss)) 1995 1995 return -1; 1996 1996 /* SWS override triggered forced fragmentation. 1997 1997 * Disable TSO, the connection is too sick. */ ··· 2000 2000 sk->sk_route_caps &= ~NETIF_F_TSO; 2001 2001 } 2002 2002 } else if (!tcp_skb_pcount(skb)) 2003 - tcp_set_skb_tso_segs(sk, skb); 2003 + tcp_set_skb_tso_segs(sk, skb, mss); 2004 2004 2005 2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2006 2006 TCP_SKB_CB(skb)->when = tcp_time_stamp;