Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: set TCP_USER_TIMEOUT locklessly

icsk->icsk_user_timeout can be set locklessly,
if all read sides use READ_ONCE().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
d58f2e15 d44fd4a7

+33 -29
+1 -1
include/linux/tcp.h
··· 564 564 void tcp_sock_set_nodelay(struct sock *sk); 565 565 void tcp_sock_set_quickack(struct sock *sk, int val); 566 566 int tcp_sock_set_syncnt(struct sock *sk, int val); 567 - void tcp_sock_set_user_timeout(struct sock *sk, u32 val); 567 + int tcp_sock_set_user_timeout(struct sock *sk, int val); 568 568 569 569 #endif /* _LINUX_TCP_H */
+10 -13
net/ipv4/tcp.c
··· 3296 3296 } 3297 3297 EXPORT_SYMBOL(tcp_sock_set_syncnt); 3298 3298 3299 - void tcp_sock_set_user_timeout(struct sock *sk, u32 val) 3299 + int tcp_sock_set_user_timeout(struct sock *sk, int val) 3300 3300 { 3301 - lock_sock(sk); 3301 + /* Cap the max time in ms TCP will retry or probe the window 3302 + * before giving up and aborting (ETIMEDOUT) a connection. 3303 + */ 3304 + if (val < 0) 3305 + return -EINVAL; 3306 + 3302 3307 WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); 3303 - release_sock(sk); 3308 + return 0; 3304 3309 } 3305 3310 EXPORT_SYMBOL(tcp_sock_set_user_timeout); 3306 3311 ··· 3469 3464 switch (optname) { 3470 3465 case TCP_SYNCNT: 3471 3466 return tcp_sock_set_syncnt(sk, val); 3467 + case TCP_USER_TIMEOUT: 3468 + return tcp_sock_set_user_timeout(sk, val); 3472 3469 } 3473 3470 3474 3471 sockopt_lock_sock(sk); ··· 3618 3611 err = tp->af_specific->md5_parse(sk, optname, optval, optlen); 3619 3612 break; 3620 3613 #endif 3621 - case TCP_USER_TIMEOUT: 3622 - /* Cap the max time in ms TCP will retry or probe the window 3623 - * before giving up and aborting (ETIMEDOUT) a connection. 3624 - */ 3625 - if (val < 0) 3626 - err = -EINVAL; 3627 - else 3628 - WRITE_ONCE(icsk->icsk_user_timeout, val); 3629 - break; 3630 - 3631 3614 case TCP_FASTOPEN: 3632 3615 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | 3633 3616 TCPF_LISTEN))) {
+22 -15
net/ipv4/tcp_timer.c
··· 26 26 static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) 27 27 { 28 28 struct inet_connection_sock *icsk = inet_csk(sk); 29 - u32 elapsed, start_ts; 29 + u32 elapsed, start_ts, user_timeout; 30 30 s32 remaining; 31 31 32 32 start_ts = tcp_sk(sk)->retrans_stamp; 33 - if (!icsk->icsk_user_timeout) 33 + user_timeout = READ_ONCE(icsk->icsk_user_timeout); 34 + if (!user_timeout) 34 35 return icsk->icsk_rto; 35 36 elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; 36 - remaining = icsk->icsk_user_timeout - elapsed; 37 + remaining = user_timeout - elapsed; 37 38 if (remaining <= 0) 38 39 return 1; /* user timeout has passed; fire ASAP */ 39 40 ··· 44 43 u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) 45 44 { 46 45 struct inet_connection_sock *icsk = inet_csk(sk); 47 - u32 remaining; 46 + u32 remaining, user_timeout; 48 47 s32 elapsed; 49 48 50 - if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp) 49 + user_timeout = READ_ONCE(icsk->icsk_user_timeout); 50 + if (!user_timeout || !icsk->icsk_probes_tstamp) 51 51 return when; 52 52 53 53 elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; 54 54 if (unlikely(elapsed < 0)) 55 55 elapsed = 0; 56 - remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed; 56 + remaining = msecs_to_jiffies(user_timeout) - elapsed; 57 57 remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); 58 58 59 59 return min_t(u32, remaining, when); ··· 272 270 } 273 271 if (!expired) 274 272 expired = retransmits_timed_out(sk, retry_until, 275 - icsk->icsk_user_timeout); 273 + READ_ONCE(icsk->icsk_user_timeout)); 276 274 tcp_fastopen_active_detect_blackhole(sk, expired); 277 275 278 276 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) ··· 386 384 * corresponding system limit. We also implement similar policy when 387 385 * we use RTO to probe window in tcp_retransmit_timer(). 388 386 */ 389 - if (!icsk->icsk_probes_tstamp) 387 + if (!icsk->icsk_probes_tstamp) { 390 388 icsk->icsk_probes_tstamp = tcp_jiffies32; 391 - else if (icsk->icsk_user_timeout && 392 - (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= 393 - msecs_to_jiffies(icsk->icsk_user_timeout)) 394 - goto abort; 389 + } else { 390 + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 395 391 392 + if (user_timeout && 393 + (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= 394 + msecs_to_jiffies(user_timeout)) 395 + goto abort; 396 + } 396 397 max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); 397 398 if (sock_flag(sk, SOCK_DEAD)) { 398 399 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; ··· 739 734 elapsed = keepalive_time_elapsed(tp); 740 735 741 736 if (elapsed >= keepalive_time_when(tp)) { 737 + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 738 + 742 739 /* If the TCP_USER_TIMEOUT option is enabled, use that 743 740 * to determine when to timeout instead. 744 741 */ 745 - if ((icsk->icsk_user_timeout != 0 && 746 - elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) && 742 + if ((user_timeout != 0 && 743 + elapsed >= msecs_to_jiffies(user_timeout) && 747 744 icsk->icsk_probes_out > 0) || 748 - (icsk->icsk_user_timeout == 0 && 745 + (user_timeout == 0 && 749 746 icsk->icsk_probes_out >= keepalive_probes(tp))) { 750 747 tcp_send_active_reset(sk, GFP_ATOMIC); 751 748 tcp_write_err(sk);