Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: introduce icsk->icsk_keepalive_timer

sk->sk_timer has been used for TCP keepalives.

Keepalive timers are not in fast path, we want to use sk->sk_timer
storage for retransmit timers, for better cache locality.

Create icsk->icsk_keepalive_timer and change keepalive
code to no longer use sk->sk_timer.

Added space is reclaimed in the following patch.

This includes changes to MPTCP, which was also using sk_timer.

Alias icsk->mptcp_tout_timer and icsk->icsk_keepalive_timer
for inet_sk_diag_fill() sake.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251124175013.1473655-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
08dfe370 27e8257a

+35 -24
+1
Documentation/networking/net_cachelines/inet_connection_sock.rst
··· 14 14 struct inet_bind2_bucket icsk_bind2_hash read_mostly tcp_set_state,inet_put_port 15 15 struct timer_list icsk_retransmit_timer read_write inet_csk_reset_xmit_timer,tcp_connect 16 16 struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect 17 + struct timer_list icsk_keepalive_timer 17 18 u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one 18 19 u32 icsk_rto_min 19 20 u32 icsk_rto_max read_mostly tcp_reset_xmit_timer
+9 -2
include/net/inet_connection_sock.h
··· 57 57 * @icsk_bind_hash: Bind node 58 58 * @icsk_bind2_hash: Bind node in the bhash2 table 59 59 * @icsk_retransmit_timer: Resend (no ack) 60 + * @icsk_delack_timer: Delayed ACK timer 61 + * @icsk_keepalive_timer: Keepalive timer 62 + * @mptcp_tout_timer: mptcp timer 60 63 * @icsk_rto: Retransmit timeout 61 64 * @icsk_pmtu_cookie Last pmtu seen by socket 62 65 * @icsk_ca_ops Pluggable congestion control hook ··· 84 81 struct request_sock_queue icsk_accept_queue; 85 82 struct inet_bind_bucket *icsk_bind_hash; 86 83 struct inet_bind2_bucket *icsk_bind2_hash; 87 - struct timer_list icsk_retransmit_timer; 88 - struct timer_list icsk_delack_timer; 84 + struct timer_list icsk_retransmit_timer; 85 + struct timer_list icsk_delack_timer; 86 + union { 87 + struct timer_list icsk_keepalive_timer; 88 + struct timer_list mptcp_tout_timer; 89 + }; 89 90 __u32 icsk_rto; 90 91 __u32 icsk_rto_min; 91 92 u32 icsk_rto_max;
+3 -3
net/ipv4/inet_connection_sock.c
··· 739 739 740 740 timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); 741 741 timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); 742 - timer_setup(&sk->sk_timer, keepalive_handler, 0); 742 + timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0); 743 743 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 744 744 } 745 745 ··· 752 752 753 753 sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 754 754 sk_stop_timer(sk, &icsk->icsk_delack_timer); 755 - sk_stop_timer(sk, &sk->sk_timer); 755 + sk_stop_timer(sk, &icsk->icsk_keepalive_timer); 756 756 } 757 757 758 758 void inet_csk_clear_xmit_timers_sync(struct sock *sk) ··· 767 767 768 768 sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); 769 769 sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); 770 - sk_stop_timer_sync(sk, &sk->sk_timer); 770 + sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer); 771 771 } 772 772 773 773 struct dst_entry *inet_csk_route_req(const struct sock *sk,
+2 -2
net/ipv4/inet_diag.c
··· 293 293 r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); 294 294 r->idiag_expires = 295 295 jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); 296 - } else if (timer_pending(&sk->sk_timer)) { 296 + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 297 297 r->idiag_timer = 2; 298 298 r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); 299 299 r->idiag_expires = 300 - jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); 300 + jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies); 301 301 } 302 302 303 303 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
+2 -2
net/ipv4/tcp_ipv4.c
··· 2873 2873 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2874 2874 timer_active = 4; 2875 2875 timer_expires = tcp_timeout_expires(sk); 2876 - } else if (timer_pending(&sk->sk_timer)) { 2876 + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2877 2877 timer_active = 2; 2878 - timer_expires = sk->sk_timer.expires; 2878 + timer_expires = icsk->icsk_keepalive_timer.expires; 2879 2879 } else { 2880 2880 timer_active = 0; 2881 2881 timer_expires = jiffies;
+5 -4
net/ipv4/tcp_timer.c
··· 755 755 756 756 void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) 757 757 { 758 - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 758 + sk_reset_timer(sk, &inet_csk(sk)->icsk_keepalive_timer, jiffies + len); 759 759 } 760 760 761 761 static void tcp_delete_keepalive_timer(struct sock *sk) 762 762 { 763 - sk_stop_timer(sk, &sk->sk_timer); 763 + sk_stop_timer(sk, &inet_csk(sk)->icsk_keepalive_timer); 764 764 } 765 765 766 766 void tcp_set_keepalive(struct sock *sk, int val) ··· 777 777 778 778 static void tcp_keepalive_timer(struct timer_list *t) 779 779 { 780 - struct sock *sk = timer_container_of(sk, t, sk_timer); 781 - struct inet_connection_sock *icsk = inet_csk(sk); 780 + struct inet_connection_sock *icsk = 781 + timer_container_of(icsk, t, icsk_keepalive_timer); 782 + struct sock *sk = &icsk->icsk_inet.sk; 782 783 struct tcp_sock *tp = tcp_sk(sk); 783 784 u32 elapsed; 784 785
+2 -2
net/ipv6/tcp_ipv6.c
··· 2167 2167 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2168 2168 timer_active = 4; 2169 2169 timer_expires = tcp_timeout_expires(sp); 2170 - } else if (timer_pending(&sp->sk_timer)) { 2170 + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2171 2171 timer_active = 2; 2172 - timer_expires = sp->sk_timer.expires; 2172 + timer_expires = icsk->icsk_keepalive_timer.expires; 2173 2173 } else { 2174 2174 timer_active = 0; 2175 2175 timer_expires = jiffies;
+6 -4
net/mptcp/protocol.c
··· 2374 2374 2375 2375 static void mptcp_tout_timer(struct timer_list *t) 2376 2376 { 2377 - struct sock *sk = timer_container_of(sk, t, sk_timer); 2377 + struct inet_connection_sock *icsk = 2378 + timer_container_of(icsk, t, mptcp_tout_timer); 2379 + struct sock *sk = &icsk->icsk_inet.sk; 2378 2380 2379 2381 mptcp_schedule_work(sk); 2380 2382 sock_put(sk); ··· 2830 2828 */ 2831 2829 timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; 2832 2830 2833 - sk_reset_timer(sk, &sk->sk_timer, timeout); 2831 + sk_reset_timer(sk, &inet_csk(sk)->mptcp_tout_timer, timeout); 2834 2832 } 2835 2833 2836 2834 static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) ··· 2976 2974 2977 2975 /* re-use the csk retrans timer for MPTCP-level retrans */ 2978 2976 timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); 2979 - timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); 2977 + timer_setup(&msk->sk.mptcp_tout_timer, mptcp_tout_timer, 0); 2980 2978 } 2981 2979 2982 2980 static void mptcp_ca_reset(struct sock *sk) ··· 3178 3176 might_sleep(); 3179 3177 3180 3178 mptcp_stop_rtx_timer(sk); 3181 - sk_stop_timer(sk, &sk->sk_timer); 3179 + sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer); 3182 3180 msk->pm.status = 0; 3183 3181 mptcp_release_sched(msk); 3184 3182
+1 -1
net/mptcp/protocol.h
··· 892 892 if (!inet_csk(sk)->icsk_mtup.probe_timestamp) 893 893 return; 894 894 895 - sk_stop_timer(sk, &sk->sk_timer); 895 + sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer); 896 896 inet_csk(sk)->icsk_mtup.probe_timestamp = 0; 897 897 } 898 898
+2 -2
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
··· 103 103 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 104 104 timer_active = 4; 105 105 timer_expires = icsk->icsk_retransmit_timer.expires; 106 - } else if (timer_pending(&sp->sk_timer)) { 106 + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 107 107 timer_active = 2; 108 - timer_expires = sp->sk_timer.expires; 108 + timer_expires = icsk->icsk_keepalive_timer.expires; 109 109 } else { 110 110 timer_active = 0; 111 111 timer_expires = bpf_jiffies64();
+2 -2
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
··· 103 103 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 104 104 timer_active = 4; 105 105 timer_expires = icsk->icsk_retransmit_timer.expires; 106 - } else if (timer_pending(&sp->sk_timer)) { 106 + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 107 107 timer_active = 2; 108 - timer_expires = sp->sk_timer.expires; 108 + timer_expires = icsk->icsk_keepalive_timer.expires; 109 109 } else { 110 110 timer_active = 0; 111 111 timer_expires = bpf_jiffies64();