tcp: refactor F-RTO · tjh.dev/kernel@9b44190

-17

Documentation/networking/ip-sysctl.txt

··· 239 239 interacts badly with the packet counting of the SACK enabled TCP 240 240 flow. 241 241 242 - tcp_frto_response - INTEGER 243 - When F-RTO has detected that a TCP retransmission timeout was 244 - spurious (i.e, the timeout would have been avoided had TCP set a 245 - longer retransmission timeout), TCP has several options what to do 246 - next. Possible values are: 247 - 0 Rate halving based; a smooth and conservative response, 248 - results in halved cwnd and ssthresh after one RTT 249 - 1 Very conservative response; not recommended because even 250 - though being valid, it interacts poorly with the rest of 251 - Linux TCP, halves cwnd and ssthresh immediately 252 - 2 Aggressive response; undoes congestion control measures 253 - that are now known to be unnecessary (ignoring the 254 - possibility of a lost retransmission that would require 255 - TCP to be more cautious), cwnd and ssthresh are restored 256 - to the values prior timeout 257 - Default: 0 (rate halving based) 258 - 259 242 tcp_keepalive_time - INTEGER 260 243 How often TCP sends out keepalive messages when keepalive is enabled. 261 244 Default: 2hours.

+2 -4

include/linux/tcp.h

··· 187 187 u32 window_clamp; /* Maximal window to advertise */ 188 188 u32 rcv_ssthresh; /* Current window clamp */ 189 189 190 - u32 frto_highmark; /* snd_nxt when RTO occurred */ 191 190 u16 advmss; /* Advertised MSS */ 192 - u8 frto_counter; /* Number of new acks after RTO */ 191 + u8 unused; 193 192 u8 nonagle : 4,/* Disable Nagle algorithm? */ 194 193 thin_lto : 1,/* Use linear timeouts for thin streams */ 195 194 thin_dupack : 1,/* Fast retransmit on first dupack */ 196 - repair : 1, 197 - unused : 1; 195 + repair : 1; 198 196 u8 repair_queue; 199 197 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 200 198 syn_data:1, /* SYN includes data */

-4

include/net/tcp.h

··· 272 272 extern int sysctl_tcp_adv_win_scale; 273 273 extern int sysctl_tcp_tw_reuse; 274 274 extern int sysctl_tcp_frto; 275 - extern int sysctl_tcp_frto_response; 276 275 extern int sysctl_tcp_low_latency; 277 276 extern int sysctl_tcp_dma_copybreak; 278 277 extern int sysctl_tcp_nometrics_save; ··· 423 424 bool fastopen); 424 425 extern int tcp_child_process(struct sock *parent, struct sock *child, 425 426 struct sk_buff *skb); 426 - extern bool tcp_use_frto(struct sock *sk); 427 - extern void tcp_enter_frto(struct sock *sk); 428 427 extern void tcp_enter_loss(struct sock *sk, int how); 429 428 extern void tcp_clear_retrans(struct tcp_sock *tp); 430 429 extern void tcp_update_metrics(struct sock *sk); ··· 753 756 CA_EVENT_TX_START, /* first transmit when no packets in flight */ 754 757 CA_EVENT_CWND_RESTART, /* congestion window restart */ 755 758 CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ 756 - CA_EVENT_FRTO, /* fast recovery timeout */ 757 759 CA_EVENT_LOSS, /* loss timeout */ 758 760 CA_EVENT_FAST_ACK, /* in sequence ack */ 759 761 CA_EVENT_SLOW_ACK, /* other ack */

-7

net/ipv4/sysctl_net_ipv4.c

··· 592 592 .proc_handler = proc_dointvec 593 593 }, 594 594 { 595 - .procname = "tcp_frto_response", 596 - .data = &sysctl_tcp_frto_response, 597 - .maxlen = sizeof(int), 598 - .mode = 0644, 599 - .proc_handler = proc_dointvec 600 - }, 601 - { 602 595 .procname = "tcp_low_latency", 603 596 .data = &sysctl_tcp_low_latency, 604 597 .maxlen = sizeof(int),

+4 -371

net/ipv4/tcp_input.c

··· 93 93 int sysctl_tcp_rfc1337 __read_mostly; 94 94 int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 95 95 int sysctl_tcp_frto __read_mostly = 2; 96 - int sysctl_tcp_frto_response __read_mostly; 97 96 98 97 int sysctl_tcp_thin_dupack __read_mostly; 99 98 ··· 107 108 #define FLAG_DATA_SACKED 0x20 /* New SACK. */ 108 109 #define FLAG_ECE 0x40 /* ECE in this ACK */ 109 110 #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 110 - #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 111 111 #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 112 112 #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 113 - #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ 114 113 #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 115 114 116 115 #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 117 116 #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 118 117 #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 119 118 #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 120 - #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) 121 119 122 120 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 123 121 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) ··· 1155 1159 tcp_highest_sack_seq(tp))) 1156 1160 state->reord = min(fack_count, 1157 1161 state->reord); 1158 - 1159 - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1160 - if (!after(end_seq, tp->frto_highmark)) 1161 - state->flag |= FLAG_ONLY_ORIG_SACKED; 1162 1162 } 1163 1163 1164 1164 if (sacked & TCPCB_LOST) { ··· 1547 1555 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1548 1556 u32 prior_snd_una) 1549 1557 { 1550 - const struct inet_connection_sock *icsk = inet_csk(sk); 1551 1558 struct tcp_sock *tp = tcp_sk(sk); 1552 1559 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1553 1560 TCP_SKB_CB(ack_skb)->sacked); ··· 1719 1728 start_seq, end_seq, dup_sack); 1720 1729 1721 1730 advance_sp: 1722 - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct 1723 - * due to in-order walk 1724 - */ 1725 - if (after(end_seq, tp->frto_highmark)) 1726 - state.flag &= ~FLAG_ONLY_ORIG_SACKED; 1727 - 1728 1731 i++; 1729 1732 } 1730 1733 ··· 1735 1750 tcp_verify_left_out(tp); 1736 1751 1737 1752 if ((state.reord < tp->fackets_out) && 1738 - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && 1739 - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) 1753 + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1740 1754 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1741 1755 1742 1756 out: ··· 1807 1823 static inline void tcp_reset_reno_sack(struct tcp_sock *tp) 1808 1824 { 1809 1825 tp->sacked_out = 0; 1810 - } 1811 - 1812 - static int tcp_is_sackfrto(const struct tcp_sock *tp) 1813 - { 1814 - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); 1815 - } 1816 - 1817 - /* F-RTO can only be used if TCP has never retransmitted anything other than 1818 - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) 1819 - */ 1820 - bool tcp_use_frto(struct sock *sk) 1821 - { 1822 - const struct tcp_sock *tp = tcp_sk(sk); 1823 - const struct inet_connection_sock *icsk = inet_csk(sk); 1824 - struct sk_buff *skb; 1825 - 1826 - if (!sysctl_tcp_frto) 1827 - return false; 1828 - 1829 - /* MTU probe and F-RTO won't really play nicely along currently */ 1830 - if (icsk->icsk_mtup.probe_size) 1831 - return false; 1832 - 1833 - if (tcp_is_sackfrto(tp)) 1834 - return true; 1835 - 1836 - /* Avoid expensive walking of rexmit queue if possible */ 1837 - if (tp->retrans_out > 1) 1838 - return false; 1839 - 1840 - skb = tcp_write_queue_head(sk); 1841 - if (tcp_skb_is_last(sk, skb)) 1842 - return true; 1843 - skb = tcp_write_queue_next(sk, skb); /* Skips head */ 1844 - tcp_for_write_queue_from(skb, sk) { 1845 - if (skb == tcp_send_head(sk)) 1846 - break; 1847 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1848 - return false; 1849 - /* Short-circuit when first non-SACKed skb has been checked */ 1850 - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 1851 - break; 1852 - } 1853 - return true; 1854 - } 1855 - 1856 - /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO 1857 - * recovery a bit and use heuristics in tcp_process_frto() to detect if 1858 - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to 1859 - * keep retrans_out counting accurate (with SACK F-RTO, other than head 1860 - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS 1861 - * bits are handled if the Loss state is really to be entered (in 1862 - * tcp_enter_frto_loss). 1863 - * 1864 - * Do like tcp_enter_loss() would; when RTO expires the second time it 1865 - * does: 1866 - * "Reduce ssthresh if it has not yet been made inside this window." 1867 - */ 1868 - void tcp_enter_frto(struct sock *sk) 1869 - { 1870 - const struct inet_connection_sock *icsk = inet_csk(sk); 1871 - struct tcp_sock *tp = tcp_sk(sk); 1872 - struct sk_buff *skb; 1873 - 1874 - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || 1875 - tp->snd_una == tp->high_seq || 1876 - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && 1877 - !icsk->icsk_retransmits)) { 1878 - tp->prior_ssthresh = tcp_current_ssthresh(sk); 1879 - /* Our state is too optimistic in ssthresh() call because cwnd 1880 - * is not reduced until tcp_enter_frto_loss() when previous F-RTO 1881 - * recovery has not yet completed. Pattern would be this: RTO, 1882 - * Cumulative ACK, RTO (2xRTO for the same segment does not end 1883 - * up here twice). 1884 - * RFC4138 should be more specific on what to do, even though 1885 - * RTO is quite unlikely to occur after the first Cumulative ACK 1886 - * due to back-off and complexity of triggering events ... 1887 - */ 1888 - if (tp->frto_counter) { 1889 - u32 stored_cwnd; 1890 - stored_cwnd = tp->snd_cwnd; 1891 - tp->snd_cwnd = 2; 1892 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1893 - tp->snd_cwnd = stored_cwnd; 1894 - } else { 1895 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1896 - } 1897 - /* ... in theory, cong.control module could do "any tricks" in 1898 - * ssthresh(), which means that ca_state, lost bits and lost_out 1899 - * counter would have to be faked before the call occurs. We 1900 - * consider that too expensive, unlikely and hacky, so modules 1901 - * using these in ssthresh() must deal these incompatibility 1902 - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 1903 - */ 1904 - tcp_ca_event(sk, CA_EVENT_FRTO); 1905 - } 1906 - 1907 - tp->undo_marker = tp->snd_una; 1908 - tp->undo_retrans = 0; 1909 - 1910 - skb = tcp_write_queue_head(sk); 1911 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1912 - tp->undo_marker = 0; 1913 - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 1914 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1915 - tp->retrans_out -= tcp_skb_pcount(skb); 1916 - } 1917 - tcp_verify_left_out(tp); 1918 - 1919 - /* Too bad if TCP was application limited */ 1920 - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); 1921 - 1922 - /* Earlier loss recovery underway (see RFC4138; Appendix B). 1923 - * The last condition is necessary at least in tp->frto_counter case. 1924 - */ 1925 - if (tcp_is_sackfrto(tp) && (tp->frto_counter || 1926 - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && 1927 - after(tp->high_seq, tp->snd_una)) { 1928 - tp->frto_highmark = tp->high_seq; 1929 - } else { 1930 - tp->frto_highmark = tp->snd_nxt; 1931 - } 1932 - tcp_set_ca_state(sk, TCP_CA_Disorder); 1933 - tp->high_seq = tp->snd_nxt; 1934 - tp->frto_counter = 1; 1935 - } 1936 - 1937 - /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, 1938 - * which indicates that we should follow the traditional RTO recovery, 1939 - * i.e. mark everything lost and do go-back-N retransmission. 1940 - */ 1941 - static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) 1942 - { 1943 - struct tcp_sock *tp = tcp_sk(sk); 1944 - struct sk_buff *skb; 1945 - 1946 - tp->lost_out = 0; 1947 - tp->retrans_out = 0; 1948 - if (tcp_is_reno(tp)) 1949 - tcp_reset_reno_sack(tp); 1950 - 1951 - tcp_for_write_queue(skb, sk) { 1952 - if (skb == tcp_send_head(sk)) 1953 - break; 1954 - 1955 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1956 - /* 1957 - * Count the retransmission made on RTO correctly (only when 1958 - * waiting for the first ACK and did not get it)... 1959 - */ 1960 - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { 1961 - /* For some reason this R-bit might get cleared? */ 1962 - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) 1963 - tp->retrans_out += tcp_skb_pcount(skb); 1964 - /* ...enter this if branch just for the first segment */ 1965 - flag |= FLAG_DATA_ACKED; 1966 - } else { 1967 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1968 - tp->undo_marker = 0; 1969 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1970 - } 1971 - 1972 - /* Marking forward transmissions that were made after RTO lost 1973 - * can cause unnecessary retransmissions in some scenarios, 1974 - * SACK blocks will mitigate that in some but not in all cases. 1975 - * We used to not mark them but it was causing break-ups with 1976 - * receivers that do only in-order receival. 1977 - * 1978 - * TODO: we could detect presence of such receiver and select 1979 - * different behavior per flow. 1980 - */ 1981 - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 1982 - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1983 - tp->lost_out += tcp_skb_pcount(skb); 1984 - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; 1985 - } 1986 - } 1987 - tcp_verify_left_out(tp); 1988 - 1989 - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; 1990 - tp->snd_cwnd_cnt = 0; 1991 - tp->snd_cwnd_stamp = tcp_time_stamp; 1992 - tp->frto_counter = 0; 1993 - 1994 - tp->reordering = min_t(unsigned int, tp->reordering, 1995 - sysctl_tcp_reordering); 1996 - tcp_set_ca_state(sk, TCP_CA_Loss); 1997 - tp->high_seq = tp->snd_nxt; 1998 - TCP_ECN_queue_cwr(tp); 1999 - 2000 - tcp_clear_all_retrans_hints(tp); 2001 1826 } 2002 1827 2003 1828 static void tcp_clear_retrans_partial(struct tcp_sock *tp) ··· 1883 2090 tcp_set_ca_state(sk, TCP_CA_Loss); 1884 2091 tp->high_seq = tp->snd_nxt; 1885 2092 TCP_ECN_queue_cwr(tp); 1886 - /* Abort F-RTO algorithm if one is in progress */ 1887 - tp->frto_counter = 0; 1888 2093 } 1889 2094 1890 2095 /* If ACK arrived pointing to a remembered SACK, it means that our ··· 2065 2274 { 2066 2275 struct tcp_sock *tp = tcp_sk(sk); 2067 2276 __u32 packets_out; 2068 - 2069 - /* Do not perform any recovery during F-RTO algorithm */ 2070 - if (tp->frto_counter) 2071 - return false; 2072 2277 2073 2278 /* Trick#1: The loss is proven. */ 2074 2279 if (tp->lost_out) ··· 2547 2760 2548 2761 tcp_verify_left_out(tp); 2549 2762 2550 - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) 2763 + if (!tcp_any_retrans_done(sk)) 2551 2764 tp->retrans_stamp = 0; 2552 2765 2553 2766 if (flag & FLAG_ECE) ··· 2985 3198 flag |= FLAG_RETRANS_DATA_ACKED; 2986 3199 ca_seq_rtt = -1; 2987 3200 seq_rtt = -1; 2988 - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) 2989 - flag |= FLAG_NONHEAD_RETRANS_ACKED; 2990 3201 } else { 2991 3202 ca_seq_rtt = now - scb->when; 2992 3203 last_ackt = skb->tstamp; ··· 3193 3408 return flag; 3194 3409 } 3195 3410 3196 - /* A very conservative spurious RTO response algorithm: reduce cwnd and 3197 - * continue in congestion avoidance. 3198 - */ 3199 - static void tcp_conservative_spur_to_response(struct tcp_sock *tp) 3200 - { 3201 - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 3202 - tp->snd_cwnd_cnt = 0; 3203 - TCP_ECN_queue_cwr(tp); 3204 - tcp_moderate_cwnd(tp); 3205 - } 3206 - 3207 - /* A conservative spurious RTO response algorithm: reduce cwnd using 3208 - * PRR and continue in congestion avoidance. 3209 - */ 3210 - static void tcp_cwr_spur_to_response(struct sock *sk) 3211 - { 3212 - tcp_enter_cwr(sk, 0); 3213 - } 3214 - 3215 - static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3216 - { 3217 - if (flag & FLAG_ECE) 3218 - tcp_cwr_spur_to_response(sk); 3219 - else 3220 - tcp_undo_cwr(sk, true); 3221 - } 3222 - 3223 - /* F-RTO spurious RTO detection algorithm (RFC4138) 3224 - * 3225 - * F-RTO affects during two new ACKs following RTO (well, almost, see inline 3226 - * comments). State (ACK number) is kept in frto_counter. When ACK advances 3227 - * window (but not to or beyond highest sequence sent before RTO): 3228 - * On First ACK, send two new segments out. 3229 - * On Second ACK, RTO was likely spurious. Do spurious response (response 3230 - * algorithm is not part of the F-RTO detection algorithm 3231 - * given in RFC4138 but can be selected separately). 3232 - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss 3233 - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding 3234 - * of Nagle, this is done using frto_counter states 2 and 3, when a new data 3235 - * segment of any size sent during F-RTO, state 2 is upgraded to 3. 3236 - * 3237 - * Rationale: if the RTO was spurious, new ACKs should arrive from the 3238 - * original window even after we transmit two new data segments. 3239 - * 3240 - * SACK version: 3241 - * on first step, wait until first cumulative ACK arrives, then move to 3242 - * the second step. In second step, the next ACK decides. 3243 - * 3244 - * F-RTO is implemented (mainly) in four functions: 3245 - * - tcp_use_frto() is used to determine if TCP is can use F-RTO 3246 - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is 3247 - * called when tcp_use_frto() showed green light 3248 - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm 3249 - * - tcp_enter_frto_loss() is called if there is not enough evidence 3250 - * to prove that the RTO is indeed spurious. It transfers the control 3251 - * from F-RTO to the conventional RTO recovery 3252 - */ 3253 - static bool tcp_process_frto(struct sock *sk, int flag) 3254 - { 3255 - struct tcp_sock *tp = tcp_sk(sk); 3256 - 3257 - tcp_verify_left_out(tp); 3258 - 3259 - /* Duplicate the behavior from Loss state (fastretrans_alert) */ 3260 - if (flag & FLAG_DATA_ACKED) 3261 - inet_csk(sk)->icsk_retransmits = 0; 3262 - 3263 - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || 3264 - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) 3265 - tp->undo_marker = 0; 3266 - 3267 - if (!before(tp->snd_una, tp->frto_highmark)) { 3268 - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); 3269 - return true; 3270 - } 3271 - 3272 - if (!tcp_is_sackfrto(tp)) { 3273 - /* RFC4138 shortcoming in step 2; should also have case c): 3274 - * ACK isn't duplicate nor advances window, e.g., opposite dir 3275 - * data, winupdate 3276 - */ 3277 - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) 3278 - return true; 3279 - 3280 - if (!(flag & FLAG_DATA_ACKED)) { 3281 - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3282 - flag); 3283 - return true; 3284 - } 3285 - } else { 3286 - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { 3287 - if (!tcp_packets_in_flight(tp)) { 3288 - tcp_enter_frto_loss(sk, 2, flag); 3289 - return true; 3290 - } 3291 - 3292 - /* Prevent sending of new data. */ 3293 - tp->snd_cwnd = min(tp->snd_cwnd, 3294 - tcp_packets_in_flight(tp)); 3295 - return true; 3296 - } 3297 - 3298 - if ((tp->frto_counter >= 2) && 3299 - (!(flag & FLAG_FORWARD_PROGRESS) || 3300 - ((flag & FLAG_DATA_SACKED) && 3301 - !(flag & FLAG_ONLY_ORIG_SACKED)))) { 3302 - /* RFC4138 shortcoming (see comment above) */ 3303 - if (!(flag & FLAG_FORWARD_PROGRESS) && 3304 - (flag & FLAG_NOT_DUP)) 3305 - return true; 3306 - 3307 - tcp_enter_frto_loss(sk, 3, flag); 3308 - return true; 3309 - } 3310 - } 3311 - 3312 - if (tp->frto_counter == 1) { 3313 - /* tcp_may_send_now needs to see updated state */ 3314 - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; 3315 - tp->frto_counter = 2; 3316 - 3317 - if (!tcp_may_send_now(sk)) 3318 - tcp_enter_frto_loss(sk, 2, flag); 3319 - 3320 - return true; 3321 - } else { 3322 - switch (sysctl_tcp_frto_response) { 3323 - case 2: 3324 - tcp_undo_spur_to_response(sk, flag); 3325 - break; 3326 - case 1: 3327 - tcp_conservative_spur_to_response(tp); 3328 - break; 3329 - default: 3330 - tcp_cwr_spur_to_response(sk); 3331 - break; 3332 - } 3333 - tp->frto_counter = 0; 3334 - tp->undo_marker = 0; 3335 - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); 3336 - } 3337 - return false; 3338 - } 3339 - 3340 3411 /* RFC 5961 7 [ACK Throttling] */ 3341 3412 static void tcp_send_challenge_ack(struct sock *sk) 3342 3413 { ··· 3257 3616 int prior_packets; 3258 3617 int prior_sacked = tp->sacked_out; 3259 3618 int pkts_acked = 0; 3260 - bool frto_cwnd = false; 3261 3619 3262 3620 /* If the ack is older than previous acks 3263 3621 * then we can probably ignore it. ··· 3330 3690 3331 3691 pkts_acked = prior_packets - tp->packets_out; 3332 3692 3333 - if (tp->frto_counter) 3334 - frto_cwnd = tcp_process_frto(sk, flag); 3335 - /* Guarantee sacktag reordering detection against wrap-arounds */ 3336 - if (before(tp->frto_highmark, tp->snd_una)) 3337 - tp->frto_highmark = 0; 3338 - 3339 3693 if (tcp_ack_is_dubious(sk, flag)) { 3340 3694 /* Advance CWND, if state allows this. */ 3341 - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3342 - tcp_may_raise_cwnd(sk, flag)) 3695 + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) 3343 3696 tcp_cong_avoid(sk, ack, prior_in_flight); 3344 3697 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3345 3698 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3346 3699 is_dupack, flag); 3347 3700 } else { 3348 - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3701 + if (flag & FLAG_DATA_ACKED) 3349 3702 tcp_cong_avoid(sk, ack, prior_in_flight); 3350 3703 } 3351 3704

-3

net/ipv4/tcp_minisocks.c

··· 422 422 newtp->snd_cwnd = TCP_INIT_CWND; 423 423 newtp->snd_cwnd_cnt = 0; 424 424 425 - newtp->frto_counter = 0; 426 - newtp->frto_highmark = 0; 427 - 428 425 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && 429 426 !try_module_get(newicsk->icsk_ca_ops->owner)) 430 427 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;

+2 -9

net/ipv4/tcp_output.c

··· 78 78 tcp_advance_send_head(sk, skb); 79 79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 80 80 81 - /* Don't override Nagle indefinitely with F-RTO */ 82 - if (tp->frto_counter == 2) 83 - tp->frto_counter = 3; 84 - 85 81 tp->packets_out += tcp_skb_pcount(skb); 86 82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 87 83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) ··· 1466 1470 if (nonagle & TCP_NAGLE_PUSH) 1467 1471 return true; 1468 1472 1469 - /* Don't use the nagle rule for urgent data (or for the final FIN). 1470 - * Nagle can be ignored during F-RTO too (see RFC4138). 1471 - */ 1472 - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1473 - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1473 + /* Don't use the nagle rule for urgent data (or for the final FIN). */ 1474 + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1474 1475 return true; 1475 1476 1476 1477 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))

+1 -5

net/ipv4/tcp_timer.c

··· 416 416 NET_INC_STATS_BH(sock_net(sk), mib_idx); 417 417 } 418 418 419 - if (tcp_use_frto(sk)) { 420 - tcp_enter_frto(sk); 421 - } else { 422 - tcp_enter_loss(sk, 0); 423 - } 419 + tcp_enter_loss(sk, 0); 424 420 425 421 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { 426 422 /* Retransmission failed because of local congestion,

+1 -1

net/ipv4/tcp_westwood.c

··· 236 236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 237 237 break; 238 238 239 - case CA_EVENT_FRTO: 239 + case CA_EVENT_LOSS: 240 240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 241 241 /* Update RTT_min when next ack arrives */ 242 242 w->reset_rtt_min = 1;