Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: accecn: detect loss ACK w/ AccECN option and add TCP_ACCECN_OPTION_PERSIST

Detect spurious retransmission of a previously sent ACK carrying the
AccECN option after the second retransmission. Since this might be caused
by the middlebox dropping ACK with options it does not recognize, disable
the sending of the AccECN option in all subsequent ACKs. This patch
follows Section 3.2.3.2.2 of AccECN spec (RFC9768), and a new field
(accecn_opt_sent_w_dsack) is added to indicate that an AccECN option was
sent with duplicate SACK info.

Also, a new AccECN option sending mode is added to tcp_ecn_option sysctl:
(TCP_ECN_OPTION_PERSIST), which ignores the AccECN fallback policy and
persistently sends AccECN option once it fits into TCP option space.

Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-13-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Chia-Yu Chang and committed by
Paolo Abeni
1247fb19 2ed66124

+26 -5
+3 -1
Documentation/networking/ip-sysctl.rst
··· 482 482 1 Send AccECN option sparingly according to the minimum option 483 483 rules outlined in draft-ietf-tcpm-accurate-ecn. 484 484 2 Send AccECN option on every packet whenever it fits into TCP 485 - option space. 485 + option space except when AccECN fallback is triggered. 486 + 3 Send AccECN option on every packet whenever it fits into TCP 487 + option space even when AccECN fallback is triggered. 486 488 = ============================================================ 487 489 488 490 Default: 2
+2 -1
include/linux/tcp.h
··· 291 291 u8 nonagle : 4,/* Disable Nagle algorithm? */ 292 292 rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ 293 293 u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */ 294 - unused2:4; 294 + accecn_opt_sent_w_dsack:1,/* Sent ACCECN opt in previous ACK w/ D-SACK */ 295 + unused2:3; 295 296 u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ 296 297 est_ecnfield:2,/* ECN field for AccECN delivered estimates */ 297 298 accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
+2
include/net/tcp_ecn.h
··· 29 29 TCP_ACCECN_OPTION_DISABLED = 0, 30 30 TCP_ACCECN_OPTION_MINIMUM = 1, 31 31 TCP_ACCECN_OPTION_FULL = 2, 32 + TCP_ACCECN_OPTION_PERSIST = 3, 32 33 }; 33 34 34 35 /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ ··· 407 406 tp->received_ce_pending = 0; 408 407 __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); 409 408 __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); 409 + tp->accecn_opt_sent_w_dsack = 0; 410 410 tp->accecn_minlen = 0; 411 411 tp->accecn_opt_demand = 0; 412 412 tp->est_ecnfield = 0;
+1 -1
net/ipv4/sysctl_net_ipv4.c
··· 749 749 .mode = 0644, 750 750 .proc_handler = proc_dou8vec_minmax, 751 751 .extra1 = SYSCTL_ZERO, 752 - .extra2 = SYSCTL_TWO, 752 + .extra2 = SYSCTL_THREE, 753 753 }, 754 754 { 755 755 .procname = "tcp_ecn_option_beacon",
+12 -1
net/ipv4/tcp_input.c
··· 5046 5046 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 5047 5047 } 5048 5048 5049 - static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) 5049 + static void tcp_rcv_spurious_retrans(struct sock *sk, 5050 + const struct sk_buff *skb) 5050 5051 { 5052 + struct tcp_sock *tp = tcp_sk(sk); 5053 + 5051 5054 /* When the ACK path fails or drops most ACKs, the sender would 5052 5055 * timeout and spuriously retransmit the same segment repeatedly. 5053 5056 * If it seems our ACKs are not reaching the other side, ··· 5070 5067 /* Save last flowlabel after a spurious retrans. */ 5071 5068 tcp_save_lrcv_flowlabel(sk, skb); 5072 5069 #endif 5070 + /* Check DSACK info to detect that the previous ACK carrying the 5071 + * AccECN option was lost after the second retransmision, and then 5072 + * stop sending AccECN option in all subsequent ACKs. 5073 + */ 5074 + if (tcp_ecn_mode_accecn(tp) && 5075 + tp->accecn_opt_sent_w_dsack && 5076 + TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq) 5077 + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND); 5073 5078 } 5074 5079 5075 5080 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+6 -1
net/ipv4/tcp_output.c
··· 715 715 if (tp) { 716 716 tp->accecn_minlen = 0; 717 717 tp->accecn_opt_tstamp = tp->tcp_mstamp; 718 + tp->accecn_opt_sent_w_dsack = tp->rx_opt.dsack; 718 719 if (tp->accecn_opt_demand) 719 720 tp->accecn_opt_demand--; 720 721 } 722 + } else if (tp) { 723 + tp->accecn_opt_sent_w_dsack = 0; 721 724 } 722 725 723 726 if (unlikely(OPTION_SACK_ADVERTISE & options)) { ··· 1192 1189 if (tcp_ecn_mode_accecn(tp)) { 1193 1190 int ecn_opt = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option); 1194 1191 1195 - if (ecn_opt && tp->saw_accecn_opt && !tcp_accecn_opt_fail_send(tp) && 1192 + if (ecn_opt && tp->saw_accecn_opt && 1193 + (ecn_opt >= TCP_ACCECN_OPTION_PERSIST || 1194 + !tcp_accecn_opt_fail_send(tp)) && 1196 1195 (ecn_opt >= TCP_ACCECN_OPTION_FULL || tp->accecn_opt_demand || 1197 1196 tcp_accecn_option_beacon_check(sk))) { 1198 1197 opts->use_synack_ecn_bytes = 0;