tcp: retire FACK loss detection · tjh.dev/kernel@713bafe

+1 -2

Documentation/networking/ip-sysctl.txt

··· 289 289 Default: 1 (fallback enabled) 290 290 291 291 tcp_fack - BOOLEAN 292 - Enable FACK congestion avoidance and fast retransmission. 293 - The value is not used, if tcp_sack is not enabled. 292 + This is a legacy option, it has no effect anymore. 294 293 295 294 tcp_fin_timeout - INTEGER 296 295 The length of time an orphaned (no longer referenced by any

-1

include/linux/tcp.h

··· 85 85 86 86 /*These are used to set the sack_ok field in struct tcp_options_received */ 87 87 #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ 88 - #define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ 89 88 #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ 90 89 91 90 struct tcp_options_received {

+1 -13

include/net/tcp.h

··· 384 384 void tcp_init_metrics(struct sock *sk); 385 385 void tcp_metrics_init(void); 386 386 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); 387 - void tcp_disable_fack(struct tcp_sock *tp); 388 387 void tcp_close(struct sock *sk, long timeout); 389 388 void tcp_init_sock(struct sock *sk); 390 389 void tcp_init_transfer(struct sock *sk, int bpf_op); ··· 775 776 }; 776 777 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ 777 778 778 - __u8 sacked; /* State flags for SACK/FACK. */ 779 + __u8 sacked; /* State flags for SACK. */ 779 780 #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ 780 781 #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ 781 782 #define TCPCB_LOST 0x04 /* SKB is lost */ ··· 1065 1066 * 1066 1067 * tcp_is_sack - SACK enabled 1067 1068 * tcp_is_reno - No SACK 1068 - * tcp_is_fack - FACK enabled, implies SACK enabled 1069 1069 */ 1070 1070 static inline int tcp_is_sack(const struct tcp_sock *tp) 1071 1071 { ··· 1074 1076 static inline bool tcp_is_reno(const struct tcp_sock *tp) 1075 1077 { 1076 1078 return !tcp_is_sack(tp); 1077 - } 1078 - 1079 - static inline bool tcp_is_fack(const struct tcp_sock *tp) 1080 - { 1081 - return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; 1082 - } 1083 - 1084 - static inline void tcp_enable_fack(struct tcp_sock *tp) 1085 - { 1086 - tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; 1087 1079 } 1088 1080 1089 1081 static inline unsigned int tcp_left_out(const struct tcp_sock *tp)

-1

include/uapi/linux/snmp.h

··· 191 191 LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ 192 192 LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ 193 193 LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ 194 - LINUX_MIB_TCPFACKREORDER, /* TCPFACKReorder */ 195 194 LINUX_MIB_TCPSACKREORDER, /* TCPSACKReorder */ 196 195 LINUX_MIB_TCPRENOREORDER, /* TCPRenoReorder */ 197 196 LINUX_MIB_TCPTSREORDER, /* TCPTSReorder */

-1

net/ipv4/proc.c

··· 212 212 SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), 213 213 SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), 214 214 SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), 215 - SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER), 216 215 SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), 217 216 SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), 218 217 SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),

-2

net/ipv4/tcp.c

··· 2509 2509 return -EINVAL; 2510 2510 2511 2511 tp->rx_opt.sack_ok |= TCP_SACK_SEEN; 2512 - if (sock_net(sk)->ipv4.sysctl_tcp_fack) 2513 - tcp_enable_fack(tp); 2514 2512 break; 2515 2513 case TCPOPT_TIMESTAMP: 2516 2514 if (opt.opt_val != 0)

+7 -46

net/ipv4/tcp_input.c

··· 842 842 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 843 843 } 844 844 845 - /* 846 - * Packet counting of FACK is based on in-order assumptions, therefore TCP 847 - * disables it when reordering is detected 848 - */ 849 - void tcp_disable_fack(struct tcp_sock *tp) 850 - { 851 - /* RFC3517 uses different metric in lost marker => reset on change */ 852 - if (tcp_is_fack(tp)) 853 - tp->lost_skb_hint = NULL; 854 - tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; 855 - } 856 - 857 845 /* Take a notice that peer is sending D-SACKs */ 858 846 static void tcp_dsack_seen(struct tcp_sock *tp) 859 847 { ··· 869 881 tp->sacked_out, 870 882 tp->undo_marker ? tp->undo_retrans : 0); 871 883 #endif 872 - tcp_disable_fack(tp); 873 884 } 874 885 875 886 tp->rack.reord = 1; ··· 878 891 mib_idx = LINUX_MIB_TCPTSREORDER; 879 892 else if (tcp_is_reno(tp)) 880 893 mib_idx = LINUX_MIB_TCPRENOREORDER; 881 - else if (tcp_is_fack(tp)) 882 - mib_idx = LINUX_MIB_TCPFACKREORDER; 883 894 else 884 895 mib_idx = LINUX_MIB_TCPSACKREORDER; 885 896 ··· 955 970 * 3. Loss detection event of two flavors: 956 971 * A. Scoreboard estimator decided the packet is lost. 957 972 * A'. Reno "three dupacks" marks head of queue lost. 958 - * A''. Its FACK modification, head until snd.fack is lost. 959 973 * B. SACK arrives sacking SND.NXT at the moment, when the 960 974 * segment was retransmitted. 961 975 * 4. D-SACK added new rule: D-SACK changes any tag to S. ··· 1232 1248 fack_count += pcount; 1233 1249 1234 1250 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1235 - if (!tcp_is_fack(tp) && tp->lost_skb_hint && 1251 + if (tp->lost_skb_hint && 1236 1252 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) 1237 1253 tp->lost_cnt_hint += pcount; 1238 1254 ··· 2035 2051 * counter when SACK is enabled (without SACK, sacked_out is used for 2036 2052 * that purpose). 2037 2053 * 2038 - * Instead, with FACK TCP uses fackets_out that includes both SACKed 2039 - * segments up to the highest received SACK block so far and holes in 2040 - * between them. 2041 - * 2042 2054 * With reordering, holes may still be in flight, so RFC3517 recovery 2043 2055 * uses pure sacked_out (total number of SACKed segments) even though 2044 2056 * it violates the RFC that uses duplicate ACKs, often these are equal ··· 2044 2064 */ 2045 2065 static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) 2046 2066 { 2047 - return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2067 + return tp->sacked_out + 1; 2048 2068 } 2049 2069 2050 - /* Linux NewReno/SACK/FACK/ECN state machine. 2070 + /* Linux NewReno/SACK/ECN state machine. 2051 2071 * -------------------------------------- 2052 2072 * 2053 2073 * "Open" Normal state, no dubious events, fast path. ··· 2112 2132 * dynamically measured and adjusted. This is implemented in 2113 2133 * tcp_rack_mark_lost. 2114 2134 * 2115 - * FACK (Disabled by default. Subsumbed by RACK): 2116 - * It is the simplest heuristics. As soon as we decided 2117 - * that something is lost, we decide that _all_ not SACKed 2118 - * packets until the most forward SACK are lost. I.e. 2119 - * lost_out = fackets_out - sacked_out and left_out = fackets_out. 2120 - * It is absolutely correct estimate, if network does not reorder 2121 - * packets. And it loses any connection to reality when reordering 2122 - * takes place. We use FACK by default until reordering 2123 - * is suspected on the path to this destination. 2124 - * 2125 2135 * If the receiver does not support SACK: 2126 2136 * 2127 2137 * NewReno (RFC6582): in Recovery we assume that one segment ··· 2160 2190 } 2161 2191 2162 2192 /* Detect loss in event "A" above by marking head of queue up as lost. 2163 - * For FACK or non-SACK(Reno) senders, the first "packets" number of segments 2193 + * For non-SACK(Reno) senders, the first "packets" number of segments 2164 2194 * are considered lost. For RFC3517 SACK, a segment is considered lost if it 2165 2195 * has at least tp->reordering SACKed seqments above it; "packets" refers to 2166 2196 * the maximum SACKed segments to pass before reaching this limit. ··· 2196 2226 break; 2197 2227 2198 2228 oldcnt = cnt; 2199 - if (tcp_is_fack(tp) || tcp_is_reno(tp) || 2229 + if (tcp_is_reno(tp) || 2200 2230 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2201 2231 cnt += tcp_skb_pcount(skb); 2202 2232 2203 2233 if (cnt > packets) { 2204 - if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2234 + if (tcp_is_sack(tp) || 2205 2235 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 2206 2236 (oldcnt >= packets)) 2207 2237 break; ··· 2232 2262 2233 2263 if (tcp_is_reno(tp)) { 2234 2264 tcp_mark_head_lost(sk, 1, 1); 2235 - } else if (tcp_is_fack(tp)) { 2236 - int lost = tp->fackets_out - tp->reordering; 2237 - if (lost <= 0) 2238 - lost = 1; 2239 - tcp_mark_head_lost(sk, lost, 0); 2240 2265 } else { 2241 2266 int sacked_upto = tp->sacked_out - tp->reordering; 2242 2267 if (sacked_upto >= 0) ··· 3164 3199 if (reord < prior_fackets && reord <= tp->fackets_out) 3165 3200 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 3166 3201 3167 - delta = tcp_is_fack(tp) ? pkts_acked : 3168 - prior_sacked - tp->sacked_out; 3202 + delta = prior_sacked - tp->sacked_out; 3169 3203 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); 3170 3204 } 3171 3205 ··· 5671 5707 } else { 5672 5708 tp->tcp_header_len = sizeof(struct tcphdr); 5673 5709 } 5674 - 5675 - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) 5676 - tcp_enable_fack(tp); 5677 5710 5678 5711 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5679 5712 tcp_initialize_rcv_mss(sk);

+1 -3

net/ipv4/tcp_metrics.c

··· 470 470 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 471 471 } 472 472 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 473 - if (val && tp->reordering != val) { 474 - tcp_disable_fack(tp); 473 + if (val && tp->reordering != val) 475 474 tp->reordering = val; 476 - } 477 475 478 476 crtt = tcp_metric_get(tm, TCP_METRIC_RTT); 479 477 rcu_read_unlock();

+1 -4

net/ipv4/tcp_minisocks.c

··· 509 509 keepalive_time_when(newtp)); 510 510 511 511 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; 512 - if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { 513 - if (sock_net(sk)->ipv4.sysctl_tcp_fack) 514 - tcp_enable_fack(newtp); 515 - } 512 + newtp->rx_opt.sack_ok = ireq->sack_ok; 516 513 newtp->window_clamp = req->rsk_window_clamp; 517 514 newtp->rcv_ssthresh = req->rsk_rcv_wnd; 518 515 newtp->rcv_wnd = req->rsk_rcv_wnd;

+1 -4

net/ipv4/tcp_output.c

··· 1257 1257 1258 1258 if (tp->lost_skb_hint && 1259 1259 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && 1260 - (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) 1260 + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 1261 1261 tp->lost_cnt_hint -= decr; 1262 1262 1263 1263 tcp_verify_left_out(tp); ··· 2961 2961 * retransmitted data is acknowledged. It tries to continue 2962 2962 * resending the rest of the retransmit queue, until either 2963 2963 * we've sent it all or the congestion window limit is reached. 2964 - * If doing SACK, the first ACK which comes back for a timeout 2965 - * based retransmit packet might feed us FACK information again. 2966 - * If so, we use it to avoid unnecessarily retransmissions. 2967 2964 */ 2968 2965 void tcp_xmit_retransmit_queue(struct sock *sk) 2969 2966 {