Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: add rcu protection around tp->fastopen_rsk

Both tcp_v4_err() and tcp_v6_err() do the following operations
while they do not own the socket lock :

fastopen = tp->fastopen_rsk;
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;

The problem is that without appropriate barrier, the compiler
might reload tp->fastopen_rsk and trigger a NULL deref.

request sockets are protected by RCU, we can simply add
the missing annotations and barriers to solve the issue.

Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
d983ea6f 8caf8a91

+35 -24
+3 -3
include/linux/tcp.h
··· 393 393 /* fastopen_rsk points to request_sock that resulted in this big 394 394 * socket. Used to retransmit SYNACKs etc. 395 395 */ 396 - struct request_sock *fastopen_rsk; 396 + struct request_sock __rcu *fastopen_rsk; 397 397 u32 *saved_syn; 398 398 }; 399 399 ··· 447 447 448 448 static inline bool tcp_passive_fastopen(const struct sock *sk) 449 449 { 450 - return (sk->sk_state == TCP_SYN_RECV && 451 - tcp_sk(sk)->fastopen_rsk != NULL); 450 + return sk->sk_state == TCP_SYN_RECV && 451 + rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL; 452 452 } 453 453 454 454 static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+1 -1
net/core/request_sock.c
··· 96 96 97 97 fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; 98 98 99 - tcp_sk(sk)->fastopen_rsk = NULL; 99 + RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL); 100 100 spin_lock_bh(&fastopenq->lock); 101 101 fastopenq->qlen--; 102 102 tcp_rsk(req)->tfo_listener = false;
+2 -2
net/ipv4/inet_connection_sock.c
··· 906 906 percpu_counter_inc(sk->sk_prot->orphan_count); 907 907 908 908 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 909 - BUG_ON(tcp_sk(child)->fastopen_rsk != req); 909 + BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); 910 910 BUG_ON(sk != req->rsk_listener); 911 911 912 912 /* Paranoid, to prevent race condition if ··· 915 915 * Also to satisfy an assertion in 916 916 * tcp_v4_destroy_sock(). 917 917 */ 918 - tcp_sk(child)->fastopen_rsk = NULL; 918 + RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); 919 919 } 920 920 inet_csk_destroy_sock(child); 921 921 }
+8 -3
net/ipv4/tcp.c
··· 543 543 544 544 /* Connected or passive Fast Open socket? */ 545 545 if (state != TCP_SYN_SENT && 546 - (state != TCP_SYN_RECV || tp->fastopen_rsk)) { 546 + (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { 547 547 int target = sock_rcvlowat(sk, 0, INT_MAX); 548 548 549 549 if (tp->urg_seq == tp->copied_seq && ··· 2487 2487 } 2488 2488 2489 2489 if (sk->sk_state == TCP_CLOSE) { 2490 - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 2490 + struct request_sock *req; 2491 + 2492 + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 2493 + lockdep_sock_is_held(sk)); 2491 2494 /* We could get here with a non-NULL req if the socket is 2492 2495 * aborted (e.g., closed with unread data) before 3WHS 2493 2496 * finishes. ··· 3834 3831 3835 3832 void tcp_done(struct sock *sk) 3836 3833 { 3837 - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3834 + struct request_sock *req; 3838 3835 3836 + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 3837 + lockdep_sock_is_held(sk)); 3839 3838 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 3840 3839 TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); 3841 3840
+1 -1
net/ipv4/tcp_fastopen.c
··· 253 253 */ 254 254 tp = tcp_sk(child); 255 255 256 - tp->fastopen_rsk = req; 256 + rcu_assign_pointer(tp->fastopen_rsk, req); 257 257 tcp_rsk(req)->tfo_listener = true; 258 258 259 259 /* RFC1323: The window in SYN & SYN/ACK segments is never
+9 -4
net/ipv4/tcp_input.c
··· 2666 2666 struct tcp_sock *tp = tcp_sk(sk); 2667 2667 bool recovered = !before(tp->snd_una, tp->high_seq); 2668 2668 2669 - if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) && 2669 + if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) && 2670 2670 tcp_try_undo_loss(sk, false)) 2671 2671 return; 2672 2672 ··· 2990 2990 /* If the retrans timer is currently being used by Fast Open 2991 2991 * for SYN-ACK retrans purpose, stay put. 2992 2992 */ 2993 - if (tp->fastopen_rsk) 2993 + if (rcu_access_pointer(tp->fastopen_rsk)) 2994 2994 return; 2995 2995 2996 2996 if (!tp->packets_out) { ··· 6087 6087 6088 6088 static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) 6089 6089 { 6090 + struct request_sock *req; 6091 + 6090 6092 tcp_try_undo_loss(sk, false); 6091 6093 6092 6094 /* Reset rtx states to prevent spurious retransmits_timed_out() */ ··· 6098 6096 /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, 6099 6097 * we no longer need req so release it. 6100 6098 */ 6101 - reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); 6099 + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 6100 + lockdep_sock_is_held(sk)); 6101 + reqsk_fastopen_remove(sk, req, false); 6102 6102 6103 6103 /* Re-arm the timer because data may have been sent out. 6104 6104 * This is similar to the regular data transmission case ··· 6175 6171 6176 6172 tcp_mstamp_refresh(tp); 6177 6173 tp->rx_opt.saw_tstamp = 0; 6178 - req = tp->fastopen_rsk; 6174 + req = rcu_dereference_protected(tp->fastopen_rsk, 6175 + lockdep_sock_is_held(sk)); 6179 6176 if (req) { 6180 6177 bool req_stolen; 6181 6178
+2 -2
net/ipv4/tcp_ipv4.c
··· 478 478 icsk = inet_csk(sk); 479 479 tp = tcp_sk(sk); 480 480 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 481 - fastopen = tp->fastopen_rsk; 481 + fastopen = rcu_dereference(tp->fastopen_rsk); 482 482 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 483 483 if (sk->sk_state != TCP_LISTEN && 484 484 !between(seq, snd_una, tp->snd_nxt)) { ··· 2121 2121 if (inet_csk(sk)->icsk_bind_hash) 2122 2122 inet_put_port(sk); 2123 2123 2124 - BUG_ON(tp->fastopen_rsk); 2124 + BUG_ON(rcu_access_pointer(tp->fastopen_rsk)); 2125 2125 2126 2126 /* If socket is aborted during connect operation */ 2127 2127 tcp_free_fastopen_req(tp);
+1 -1
net/ipv4/tcp_minisocks.c
··· 541 541 newtp->rx_opt.mss_clamp = req->mss; 542 542 tcp_ecn_openreq_child(newtp, req); 543 543 newtp->fastopen_req = NULL; 544 - newtp->fastopen_rsk = NULL; 544 + RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); 545 545 546 546 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); 547 547
+1 -1
net/ipv4/tcp_output.c
··· 2482 2482 /* Don't do any loss probe on a Fast Open connection before 3WHS 2483 2483 * finishes. 2484 2484 */ 2485 - if (tp->fastopen_rsk) 2485 + if (rcu_access_pointer(tp->fastopen_rsk)) 2486 2486 return false; 2487 2487 2488 2488 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+6 -5
net/ipv4/tcp_timer.c
··· 386 386 * Timer for Fast Open socket to retransmit SYNACK. Note that the 387 387 * sk here is the child socket, not the parent (listener) socket. 388 388 */ 389 - static void tcp_fastopen_synack_timer(struct sock *sk) 389 + static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) 390 390 { 391 391 struct inet_connection_sock *icsk = inet_csk(sk); 392 392 int max_retries = icsk->icsk_syn_retries ? : 393 393 sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ 394 394 struct tcp_sock *tp = tcp_sk(sk); 395 - struct request_sock *req; 396 395 397 - req = tcp_sk(sk)->fastopen_rsk; 398 396 req->rsk_ops->syn_ack_timeout(req); 399 397 400 398 if (req->num_timeout >= max_retries) { ··· 433 435 struct tcp_sock *tp = tcp_sk(sk); 434 436 struct net *net = sock_net(sk); 435 437 struct inet_connection_sock *icsk = inet_csk(sk); 438 + struct request_sock *req; 436 439 437 - if (tp->fastopen_rsk) { 440 + req = rcu_dereference_protected(tp->fastopen_rsk, 441 + lockdep_sock_is_held(sk)); 442 + if (req) { 438 443 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 439 444 sk->sk_state != TCP_FIN_WAIT1); 440 - tcp_fastopen_synack_timer(sk); 445 + tcp_fastopen_synack_timer(sk, req); 441 446 /* Before we receive ACK to our SYN-ACK don't retransmit 442 447 * anything else (e.g., data or FIN segments). 443 448 */
+1 -1
net/ipv6/tcp_ipv6.c
··· 406 406 407 407 tp = tcp_sk(sk); 408 408 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 409 - fastopen = tp->fastopen_rsk; 409 + fastopen = rcu_dereference(tp->fastopen_rsk); 410 410 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 411 411 if (sk->sk_state != TCP_LISTEN && 412 412 !between(seq, snd_una, tp->snd_nxt)) {