Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: tcp: send consistent autoflowlabel in SYN_RECV state

This is a followup of commit c67b85558ff2 ("ipv6: tcp: send consistent
autoflowlabel in TIME_WAIT state"), but for SYN_RECV state.

In some cases, TCP sends a challenge ACK on behalf of a SYN_RECV request.
WHen this happens, we want to use the flow label that was used when
the prior SYNACK packet was sent, instead of another one.

After his patch, following packetdrill passes:

0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0

+.2 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > (flowlabel 0x11) S. 0:0(0) ack 1 <...>
// Test if a challenge ack is properly sent (same flowlabel than prior SYNACK)
+.01 < . 4000000000:4000000000(0) ack 1 win 320
+0 > (flowlabel 0x11) . 1:1(0) ack 1

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20220831203729.458000-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
aa51b80e 1ab3d417

+14 -12
+14 -12
net/ipv6/tcp_ipv6.c
··· 858 858 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 859 859 u32 ack, u32 win, u32 tsval, u32 tsecr, 860 860 int oif, struct tcp_md5sig_key *key, int rst, 861 - u8 tclass, __be32 label, u32 priority) 861 + u8 tclass, __be32 label, u32 priority, u32 txhash) 862 862 { 863 863 const struct tcphdr *th = tcp_hdr(skb); 864 864 struct tcphdr *t1; ··· 949 949 } 950 950 951 951 if (sk) { 952 - if (sk->sk_state == TCP_TIME_WAIT) { 952 + if (sk->sk_state == TCP_TIME_WAIT) 953 953 mark = inet_twsk(sk)->tw_mark; 954 - /* autoflowlabel relies on buff->hash */ 955 - skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 956 - PKT_HASH_TYPE_L4); 957 - } else { 954 + else 958 955 mark = sk->sk_mark; 959 - } 960 956 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 957 + } 958 + if (txhash) { 959 + /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 960 + skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 961 961 } 962 962 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 963 963 fl6.fl6_dport = t1->dest; ··· 1085 1085 } 1086 1086 1087 1087 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1088 - ipv6_get_dsfield(ipv6h), label, priority); 1088 + ipv6_get_dsfield(ipv6h), label, priority, 0); 1089 1089 1090 1090 #ifdef CONFIG_TCP_MD5SIG 1091 1091 out: ··· 1096 1096 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1097 1097 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1098 1098 struct tcp_md5sig_key *key, u8 tclass, 1099 - __be32 label, u32 priority) 1099 + __be32 label, u32 priority, u32 txhash) 1100 1100 { 1101 1101 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1102 - tclass, label, priority); 1102 + tclass, label, priority, txhash); 1103 1103 } 1104 1104 1105 1105 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) ··· 1111 1111 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1112 1112 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1113 1113 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1114 - tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1114 + tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1115 + tw->tw_txhash); 1115 1116 1116 1117 inet_twsk_put(tw); 1117 1118 } ··· 1139 1138 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1140 1139 req->ts_recent, sk->sk_bound_dev_if, 1141 1140 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1142 - ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1141 + ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, 1142 + tcp_rsk(req)->txhash); 1143 1143 } 1144 1144 1145 1145