Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: refactor F-RTO

The patch series refactor the F-RTO feature (RFC4138/5682).

This is to simplify the loss recovery processing. Existing F-RTO
was developed during the experimental stage (RFC4138) and has
many experimental features. It takes a separate code path from
the traditional timeout processing by overloading CA_Disorder
instead of using CA_Loss state. This complicates CA_Disorder state
handling because it's also used for handling dubious ACKs and undos.
While the algorithm in the RFC does not change the congestion control,
the implementation intercepts congestion control in various places
(e.g., frto_cwnd in tcp_ack()).

The new code implements newer F-RTO RFC5682 using CA_Loss processing
path. F-RTO becomes a small extension in the timeout processing
and interfaces with congestion control and Eifel undo modules.
It lets congestion control (module) determines how many to send
independently. F-RTO only chooses what to send in order to detect
spurious retranmission. If timeout is found spurious it invokes
existing Eifel undo algorithms like DSACK or TCP timestamp based
detection.

The first patch removes all F-RTO code except the sysctl_tcp_frto is
left for the new implementation. Since CA_EVENT_FRTO is removed, TCP
westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yuchung Cheng and committed by
David S. Miller
9b44190d e306e2c1

+10 -421
-17
Documentation/networking/ip-sysctl.txt
··· 239 239 interacts badly with the packet counting of the SACK enabled TCP 240 240 flow. 241 241 242 - tcp_frto_response - INTEGER 243 - When F-RTO has detected that a TCP retransmission timeout was 244 - spurious (i.e, the timeout would have been avoided had TCP set a 245 - longer retransmission timeout), TCP has several options what to do 246 - next. Possible values are: 247 - 0 Rate halving based; a smooth and conservative response, 248 - results in halved cwnd and ssthresh after one RTT 249 - 1 Very conservative response; not recommended because even 250 - though being valid, it interacts poorly with the rest of 251 - Linux TCP, halves cwnd and ssthresh immediately 252 - 2 Aggressive response; undoes congestion control measures 253 - that are now known to be unnecessary (ignoring the 254 - possibility of a lost retransmission that would require 255 - TCP to be more cautious), cwnd and ssthresh are restored 256 - to the values prior timeout 257 - Default: 0 (rate halving based) 258 - 259 242 tcp_keepalive_time - INTEGER 260 243 How often TCP sends out keepalive messages when keepalive is enabled. 261 244 Default: 2hours.
+2 -4
include/linux/tcp.h
··· 187 187 u32 window_clamp; /* Maximal window to advertise */ 188 188 u32 rcv_ssthresh; /* Current window clamp */ 189 189 190 - u32 frto_highmark; /* snd_nxt when RTO occurred */ 191 190 u16 advmss; /* Advertised MSS */ 192 - u8 frto_counter; /* Number of new acks after RTO */ 191 + u8 unused; 193 192 u8 nonagle : 4,/* Disable Nagle algorithm? */ 194 193 thin_lto : 1,/* Use linear timeouts for thin streams */ 195 194 thin_dupack : 1,/* Fast retransmit on first dupack */ 196 - repair : 1, 197 - unused : 1; 195 + repair : 1; 198 196 u8 repair_queue; 199 197 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 200 198 syn_data:1, /* SYN includes data */
-4
include/net/tcp.h
··· 272 272 extern int sysctl_tcp_adv_win_scale; 273 273 extern int sysctl_tcp_tw_reuse; 274 274 extern int sysctl_tcp_frto; 275 - extern int sysctl_tcp_frto_response; 276 275 extern int sysctl_tcp_low_latency; 277 276 extern int sysctl_tcp_dma_copybreak; 278 277 extern int sysctl_tcp_nometrics_save; ··· 423 424 bool fastopen); 424 425 extern int tcp_child_process(struct sock *parent, struct sock *child, 425 426 struct sk_buff *skb); 426 - extern bool tcp_use_frto(struct sock *sk); 427 - extern void tcp_enter_frto(struct sock *sk); 428 427 extern void tcp_enter_loss(struct sock *sk, int how); 429 428 extern void tcp_clear_retrans(struct tcp_sock *tp); 430 429 extern void tcp_update_metrics(struct sock *sk); ··· 753 756 CA_EVENT_TX_START, /* first transmit when no packets in flight */ 754 757 CA_EVENT_CWND_RESTART, /* congestion window restart */ 755 758 CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ 756 - CA_EVENT_FRTO, /* fast recovery timeout */ 757 759 CA_EVENT_LOSS, /* loss timeout */ 758 760 CA_EVENT_FAST_ACK, /* in sequence ack */ 759 761 CA_EVENT_SLOW_ACK, /* other ack */
-7
net/ipv4/sysctl_net_ipv4.c
··· 592 592 .proc_handler = proc_dointvec 593 593 }, 594 594 { 595 - .procname = "tcp_frto_response", 596 - .data = &sysctl_tcp_frto_response, 597 - .maxlen = sizeof(int), 598 - .mode = 0644, 599 - .proc_handler = proc_dointvec 600 - }, 601 - { 602 595 .procname = "tcp_low_latency", 603 596 .data = &sysctl_tcp_low_latency, 604 597 .maxlen = sizeof(int),
+4 -371
net/ipv4/tcp_input.c
··· 93 93 int sysctl_tcp_rfc1337 __read_mostly; 94 94 int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 95 95 int sysctl_tcp_frto __read_mostly = 2; 96 - int sysctl_tcp_frto_response __read_mostly; 97 96 98 97 int sysctl_tcp_thin_dupack __read_mostly; 99 98 ··· 107 108 #define FLAG_DATA_SACKED 0x20 /* New SACK. */ 108 109 #define FLAG_ECE 0x40 /* ECE in this ACK */ 109 110 #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 110 - #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 111 111 #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 112 112 #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 113 - #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ 114 113 #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 115 114 116 115 #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 117 116 #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 118 117 #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 119 118 #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 120 - #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) 121 119 122 120 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 123 121 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) ··· 1155 1159 tcp_highest_sack_seq(tp))) 1156 1160 state->reord = min(fack_count, 1157 1161 state->reord); 1158 - 1159 - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1160 - if (!after(end_seq, tp->frto_highmark)) 1161 - state->flag |= FLAG_ONLY_ORIG_SACKED; 1162 1162 } 1163 1163 1164 1164 if (sacked & TCPCB_LOST) { ··· 1547 1555 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1548 1556 u32 prior_snd_una) 1549 1557 { 1550 - const struct inet_connection_sock *icsk = inet_csk(sk); 1551 1558 struct tcp_sock *tp = tcp_sk(sk); 1552 1559 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1553 1560 TCP_SKB_CB(ack_skb)->sacked); ··· 1719 1728 start_seq, end_seq, dup_sack); 1720 1729 1721 1730 advance_sp: 1722 - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct 1723 - * due to in-order walk 1724 - */ 1725 - if (after(end_seq, tp->frto_highmark)) 1726 - state.flag &= ~FLAG_ONLY_ORIG_SACKED; 1727 - 1728 1731 i++; 1729 1732 } 1730 1733 ··· 1735 1750 tcp_verify_left_out(tp); 1736 1751 1737 1752 if ((state.reord < tp->fackets_out) && 1738 - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && 1739 - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) 1753 + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1740 1754 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1741 1755 1742 1756 out: ··· 1807 1823 static inline void tcp_reset_reno_sack(struct tcp_sock *tp) 1808 1824 { 1809 1825 tp->sacked_out = 0; 1810 - } 1811 - 1812 - static int tcp_is_sackfrto(const struct tcp_sock *tp) 1813 - { 1814 - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); 1815 - } 1816 - 1817 - /* F-RTO can only be used if TCP has never retransmitted anything other than 1818 - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) 1819 - */ 1820 - bool tcp_use_frto(struct sock *sk) 1821 - { 1822 - const struct tcp_sock *tp = tcp_sk(sk); 1823 - const struct inet_connection_sock *icsk = inet_csk(sk); 1824 - struct sk_buff *skb; 1825 - 1826 - if (!sysctl_tcp_frto) 1827 - return false; 1828 - 1829 - /* MTU probe and F-RTO won't really play nicely along currently */ 1830 - if (icsk->icsk_mtup.probe_size) 1831 - return false; 1832 - 1833 - if (tcp_is_sackfrto(tp)) 1834 - return true; 1835 - 1836 - /* Avoid expensive walking of rexmit queue if possible */ 1837 - if (tp->retrans_out > 1) 1838 - return false; 1839 - 1840 - skb = tcp_write_queue_head(sk); 1841 - if (tcp_skb_is_last(sk, skb)) 1842 - return true; 1843 - skb = tcp_write_queue_next(sk, skb); /* Skips head */ 1844 - tcp_for_write_queue_from(skb, sk) { 1845 - if (skb == tcp_send_head(sk)) 1846 - break; 1847 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1848 - return false; 1849 - /* Short-circuit when first non-SACKed skb has been checked */ 1850 - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 1851 - break; 1852 - } 1853 - return true; 1854 - } 1855 - 1856 - /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO 1857 - * recovery a bit and use heuristics in tcp_process_frto() to detect if 1858 - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to 1859 - * keep retrans_out counting accurate (with SACK F-RTO, other than head 1860 - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS 1861 - * bits are handled if the Loss state is really to be entered (in 1862 - * tcp_enter_frto_loss). 1863 - * 1864 - * Do like tcp_enter_loss() would; when RTO expires the second time it 1865 - * does: 1866 - * "Reduce ssthresh if it has not yet been made inside this window." 1867 - */ 1868 - void tcp_enter_frto(struct sock *sk) 1869 - { 1870 - const struct inet_connection_sock *icsk = inet_csk(sk); 1871 - struct tcp_sock *tp = tcp_sk(sk); 1872 - struct sk_buff *skb; 1873 - 1874 - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || 1875 - tp->snd_una == tp->high_seq || 1876 - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && 1877 - !icsk->icsk_retransmits)) { 1878 - tp->prior_ssthresh = tcp_current_ssthresh(sk); 1879 - /* Our state is too optimistic in ssthresh() call because cwnd 1880 - * is not reduced until tcp_enter_frto_loss() when previous F-RTO 1881 - * recovery has not yet completed. Pattern would be this: RTO, 1882 - * Cumulative ACK, RTO (2xRTO for the same segment does not end 1883 - * up here twice). 1884 - * RFC4138 should be more specific on what to do, even though 1885 - * RTO is quite unlikely to occur after the first Cumulative ACK 1886 - * due to back-off and complexity of triggering events ... 1887 - */ 1888 - if (tp->frto_counter) { 1889 - u32 stored_cwnd; 1890 - stored_cwnd = tp->snd_cwnd; 1891 - tp->snd_cwnd = 2; 1892 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1893 - tp->snd_cwnd = stored_cwnd; 1894 - } else { 1895 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1896 - } 1897 - /* ... in theory, cong.control module could do "any tricks" in 1898 - * ssthresh(), which means that ca_state, lost bits and lost_out 1899 - * counter would have to be faked before the call occurs. We 1900 - * consider that too expensive, unlikely and hacky, so modules 1901 - * using these in ssthresh() must deal these incompatibility 1902 - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 1903 - */ 1904 - tcp_ca_event(sk, CA_EVENT_FRTO); 1905 - } 1906 - 1907 - tp->undo_marker = tp->snd_una; 1908 - tp->undo_retrans = 0; 1909 - 1910 - skb = tcp_write_queue_head(sk); 1911 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1912 - tp->undo_marker = 0; 1913 - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 1914 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1915 - tp->retrans_out -= tcp_skb_pcount(skb); 1916 - } 1917 - tcp_verify_left_out(tp); 1918 - 1919 - /* Too bad if TCP was application limited */ 1920 - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); 1921 - 1922 - /* Earlier loss recovery underway (see RFC4138; Appendix B). 1923 - * The last condition is necessary at least in tp->frto_counter case. 1924 - */ 1925 - if (tcp_is_sackfrto(tp) && (tp->frto_counter || 1926 - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && 1927 - after(tp->high_seq, tp->snd_una)) { 1928 - tp->frto_highmark = tp->high_seq; 1929 - } else { 1930 - tp->frto_highmark = tp->snd_nxt; 1931 - } 1932 - tcp_set_ca_state(sk, TCP_CA_Disorder); 1933 - tp->high_seq = tp->snd_nxt; 1934 - tp->frto_counter = 1; 1935 - } 1936 - 1937 - /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, 1938 - * which indicates that we should follow the traditional RTO recovery, 1939 - * i.e. mark everything lost and do go-back-N retransmission. 1940 - */ 1941 - static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) 1942 - { 1943 - struct tcp_sock *tp = tcp_sk(sk); 1944 - struct sk_buff *skb; 1945 - 1946 - tp->lost_out = 0; 1947 - tp->retrans_out = 0; 1948 - if (tcp_is_reno(tp)) 1949 - tcp_reset_reno_sack(tp); 1950 - 1951 - tcp_for_write_queue(skb, sk) { 1952 - if (skb == tcp_send_head(sk)) 1953 - break; 1954 - 1955 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1956 - /* 1957 - * Count the retransmission made on RTO correctly (only when 1958 - * waiting for the first ACK and did not get it)... 1959 - */ 1960 - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { 1961 - /* For some reason this R-bit might get cleared? */ 1962 - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) 1963 - tp->retrans_out += tcp_skb_pcount(skb); 1964 - /* ...enter this if branch just for the first segment */ 1965 - flag |= FLAG_DATA_ACKED; 1966 - } else { 1967 - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) 1968 - tp->undo_marker = 0; 1969 - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1970 - } 1971 - 1972 - /* Marking forward transmissions that were made after RTO lost 1973 - * can cause unnecessary retransmissions in some scenarios, 1974 - * SACK blocks will mitigate that in some but not in all cases. 1975 - * We used to not mark them but it was causing break-ups with 1976 - * receivers that do only in-order receival. 1977 - * 1978 - * TODO: we could detect presence of such receiver and select 1979 - * different behavior per flow. 1980 - */ 1981 - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 1982 - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1983 - tp->lost_out += tcp_skb_pcount(skb); 1984 - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; 1985 - } 1986 - } 1987 - tcp_verify_left_out(tp); 1988 - 1989 - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; 1990 - tp->snd_cwnd_cnt = 0; 1991 - tp->snd_cwnd_stamp = tcp_time_stamp; 1992 - tp->frto_counter = 0; 1993 - 1994 - tp->reordering = min_t(unsigned int, tp->reordering, 1995 - sysctl_tcp_reordering); 1996 - tcp_set_ca_state(sk, TCP_CA_Loss); 1997 - tp->high_seq = tp->snd_nxt; 1998 - TCP_ECN_queue_cwr(tp); 1999 - 2000 - tcp_clear_all_retrans_hints(tp); 2001 1826 } 2002 1827 2003 1828 static void tcp_clear_retrans_partial(struct tcp_sock *tp) ··· 1883 2090 tcp_set_ca_state(sk, TCP_CA_Loss); 1884 2091 tp->high_seq = tp->snd_nxt; 1885 2092 TCP_ECN_queue_cwr(tp); 1886 - /* Abort F-RTO algorithm if one is in progress */ 1887 - tp->frto_counter = 0; 1888 2093 } 1889 2094 1890 2095 /* If ACK arrived pointing to a remembered SACK, it means that our ··· 2065 2274 { 2066 2275 struct tcp_sock *tp = tcp_sk(sk); 2067 2276 __u32 packets_out; 2068 - 2069 - /* Do not perform any recovery during F-RTO algorithm */ 2070 - if (tp->frto_counter) 2071 - return false; 2072 2277 2073 2278 /* Trick#1: The loss is proven. */ 2074 2279 if (tp->lost_out) ··· 2547 2760 2548 2761 tcp_verify_left_out(tp); 2549 2762 2550 - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) 2763 + if (!tcp_any_retrans_done(sk)) 2551 2764 tp->retrans_stamp = 0; 2552 2765 2553 2766 if (flag & FLAG_ECE) ··· 2985 3198 flag |= FLAG_RETRANS_DATA_ACKED; 2986 3199 ca_seq_rtt = -1; 2987 3200 seq_rtt = -1; 2988 - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) 2989 - flag |= FLAG_NONHEAD_RETRANS_ACKED; 2990 3201 } else { 2991 3202 ca_seq_rtt = now - scb->when; 2992 3203 last_ackt = skb->tstamp; ··· 3193 3408 return flag; 3194 3409 } 3195 3410 3196 - /* A very conservative spurious RTO response algorithm: reduce cwnd and 3197 - * continue in congestion avoidance. 3198 - */ 3199 - static void tcp_conservative_spur_to_response(struct tcp_sock *tp) 3200 - { 3201 - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 3202 - tp->snd_cwnd_cnt = 0; 3203 - TCP_ECN_queue_cwr(tp); 3204 - tcp_moderate_cwnd(tp); 3205 - } 3206 - 3207 - /* A conservative spurious RTO response algorithm: reduce cwnd using 3208 - * PRR and continue in congestion avoidance. 3209 - */ 3210 - static void tcp_cwr_spur_to_response(struct sock *sk) 3211 - { 3212 - tcp_enter_cwr(sk, 0); 3213 - } 3214 - 3215 - static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3216 - { 3217 - if (flag & FLAG_ECE) 3218 - tcp_cwr_spur_to_response(sk); 3219 - else 3220 - tcp_undo_cwr(sk, true); 3221 - } 3222 - 3223 - /* F-RTO spurious RTO detection algorithm (RFC4138) 3224 - * 3225 - * F-RTO affects during two new ACKs following RTO (well, almost, see inline 3226 - * comments). State (ACK number) is kept in frto_counter. When ACK advances 3227 - * window (but not to or beyond highest sequence sent before RTO): 3228 - * On First ACK, send two new segments out. 3229 - * On Second ACK, RTO was likely spurious. Do spurious response (response 3230 - * algorithm is not part of the F-RTO detection algorithm 3231 - * given in RFC4138 but can be selected separately). 3232 - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss 3233 - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding 3234 - * of Nagle, this is done using frto_counter states 2 and 3, when a new data 3235 - * segment of any size sent during F-RTO, state 2 is upgraded to 3. 3236 - * 3237 - * Rationale: if the RTO was spurious, new ACKs should arrive from the 3238 - * original window even after we transmit two new data segments. 3239 - * 3240 - * SACK version: 3241 - * on first step, wait until first cumulative ACK arrives, then move to 3242 - * the second step. In second step, the next ACK decides. 3243 - * 3244 - * F-RTO is implemented (mainly) in four functions: 3245 - * - tcp_use_frto() is used to determine if TCP is can use F-RTO 3246 - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is 3247 - * called when tcp_use_frto() showed green light 3248 - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm 3249 - * - tcp_enter_frto_loss() is called if there is not enough evidence 3250 - * to prove that the RTO is indeed spurious. It transfers the control 3251 - * from F-RTO to the conventional RTO recovery 3252 - */ 3253 - static bool tcp_process_frto(struct sock *sk, int flag) 3254 - { 3255 - struct tcp_sock *tp = tcp_sk(sk); 3256 - 3257 - tcp_verify_left_out(tp); 3258 - 3259 - /* Duplicate the behavior from Loss state (fastretrans_alert) */ 3260 - if (flag & FLAG_DATA_ACKED) 3261 - inet_csk(sk)->icsk_retransmits = 0; 3262 - 3263 - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || 3264 - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) 3265 - tp->undo_marker = 0; 3266 - 3267 - if (!before(tp->snd_una, tp->frto_highmark)) { 3268 - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); 3269 - return true; 3270 - } 3271 - 3272 - if (!tcp_is_sackfrto(tp)) { 3273 - /* RFC4138 shortcoming in step 2; should also have case c): 3274 - * ACK isn't duplicate nor advances window, e.g., opposite dir 3275 - * data, winupdate 3276 - */ 3277 - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) 3278 - return true; 3279 - 3280 - if (!(flag & FLAG_DATA_ACKED)) { 3281 - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3282 - flag); 3283 - return true; 3284 - } 3285 - } else { 3286 - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { 3287 - if (!tcp_packets_in_flight(tp)) { 3288 - tcp_enter_frto_loss(sk, 2, flag); 3289 - return true; 3290 - } 3291 - 3292 - /* Prevent sending of new data. */ 3293 - tp->snd_cwnd = min(tp->snd_cwnd, 3294 - tcp_packets_in_flight(tp)); 3295 - return true; 3296 - } 3297 - 3298 - if ((tp->frto_counter >= 2) && 3299 - (!(flag & FLAG_FORWARD_PROGRESS) || 3300 - ((flag & FLAG_DATA_SACKED) && 3301 - !(flag & FLAG_ONLY_ORIG_SACKED)))) { 3302 - /* RFC4138 shortcoming (see comment above) */ 3303 - if (!(flag & FLAG_FORWARD_PROGRESS) && 3304 - (flag & FLAG_NOT_DUP)) 3305 - return true; 3306 - 3307 - tcp_enter_frto_loss(sk, 3, flag); 3308 - return true; 3309 - } 3310 - } 3311 - 3312 - if (tp->frto_counter == 1) { 3313 - /* tcp_may_send_now needs to see updated state */ 3314 - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; 3315 - tp->frto_counter = 2; 3316 - 3317 - if (!tcp_may_send_now(sk)) 3318 - tcp_enter_frto_loss(sk, 2, flag); 3319 - 3320 - return true; 3321 - } else { 3322 - switch (sysctl_tcp_frto_response) { 3323 - case 2: 3324 - tcp_undo_spur_to_response(sk, flag); 3325 - break; 3326 - case 1: 3327 - tcp_conservative_spur_to_response(tp); 3328 - break; 3329 - default: 3330 - tcp_cwr_spur_to_response(sk); 3331 - break; 3332 - } 3333 - tp->frto_counter = 0; 3334 - tp->undo_marker = 0; 3335 - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); 3336 - } 3337 - return false; 3338 - } 3339 - 3340 3411 /* RFC 5961 7 [ACK Throttling] */ 3341 3412 static void tcp_send_challenge_ack(struct sock *sk) 3342 3413 { ··· 3257 3616 int prior_packets; 3258 3617 int prior_sacked = tp->sacked_out; 3259 3618 int pkts_acked = 0; 3260 - bool frto_cwnd = false; 3261 3619 3262 3620 /* If the ack is older than previous acks 3263 3621 * then we can probably ignore it. ··· 3330 3690 3331 3691 pkts_acked = prior_packets - tp->packets_out; 3332 3692 3333 - if (tp->frto_counter) 3334 - frto_cwnd = tcp_process_frto(sk, flag); 3335 - /* Guarantee sacktag reordering detection against wrap-arounds */ 3336 - if (before(tp->frto_highmark, tp->snd_una)) 3337 - tp->frto_highmark = 0; 3338 - 3339 3693 if (tcp_ack_is_dubious(sk, flag)) { 3340 3694 /* Advance CWND, if state allows this. */ 3341 - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3342 - tcp_may_raise_cwnd(sk, flag)) 3695 + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) 3343 3696 tcp_cong_avoid(sk, ack, prior_in_flight); 3344 3697 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3345 3698 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3346 3699 is_dupack, flag); 3347 3700 } else { 3348 - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3701 + if (flag & FLAG_DATA_ACKED) 3349 3702 tcp_cong_avoid(sk, ack, prior_in_flight); 3350 3703 } 3351 3704
-3
net/ipv4/tcp_minisocks.c
··· 422 422 newtp->snd_cwnd = TCP_INIT_CWND; 423 423 newtp->snd_cwnd_cnt = 0; 424 424 425 - newtp->frto_counter = 0; 426 - newtp->frto_highmark = 0; 427 - 428 425 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && 429 426 !try_module_get(newicsk->icsk_ca_ops->owner)) 430 427 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
+2 -9
net/ipv4/tcp_output.c
··· 78 78 tcp_advance_send_head(sk, skb); 79 79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 80 80 81 - /* Don't override Nagle indefinitely with F-RTO */ 82 - if (tp->frto_counter == 2) 83 - tp->frto_counter = 3; 84 - 85 81 tp->packets_out += tcp_skb_pcount(skb); 86 82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 87 83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) ··· 1466 1470 if (nonagle & TCP_NAGLE_PUSH) 1467 1471 return true; 1468 1472 1469 - /* Don't use the nagle rule for urgent data (or for the final FIN). 1470 - * Nagle can be ignored during F-RTO too (see RFC4138). 1471 - */ 1472 - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1473 - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1473 + /* Don't use the nagle rule for urgent data (or for the final FIN). */ 1474 + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1474 1475 return true; 1475 1476 1476 1477 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
+1 -5
net/ipv4/tcp_timer.c
··· 416 416 NET_INC_STATS_BH(sock_net(sk), mib_idx); 417 417 } 418 418 419 - if (tcp_use_frto(sk)) { 420 - tcp_enter_frto(sk); 421 - } else { 422 - tcp_enter_loss(sk, 0); 423 - } 419 + tcp_enter_loss(sk, 0); 424 420 425 421 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { 426 422 /* Retransmission failed because of local congestion,
+1 -1
net/ipv4/tcp_westwood.c
··· 236 236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 237 237 break; 238 238 239 - case CA_EVENT_FRTO: 239 + case CA_EVENT_LOSS: 240 240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 241 241 /* Update RTT_min when next ack arrives */ 242 242 w->reset_rtt_min = 1;