Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: remove icsk->icsk_retransmit_timer

Now sk->sk_timer is no longer used by TCP keepalive, we can use
its storage for TCP and MPTCP retransmit timers for better
cache locality.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251124175013.1473655-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
9a5e5334 08dfe370

+25 -30
-1
Documentation/networking/net_cachelines/inet_connection_sock.rst
··· 12 12 struct request_sock_queue icsk_accept_queue 13 13 struct inet_bind_bucket icsk_bind_hash read_mostly tcp_set_state 14 14 struct inet_bind2_bucket icsk_bind2_hash read_mostly tcp_set_state,inet_put_port 15 - struct timer_list icsk_retransmit_timer read_write inet_csk_reset_xmit_timer,tcp_connect 16 15 struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect 17 16 struct timer_list icsk_keepalive_timer 18 17 u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
+3 -5
include/net/inet_connection_sock.h
··· 56 56 * @icsk_accept_queue: FIFO of established children 57 57 * @icsk_bind_hash: Bind node 58 58 * @icsk_bind2_hash: Bind node in the bhash2 table 59 - * @icsk_retransmit_timer: Resend (no ack) 60 59 * @icsk_delack_timer: Delayed ACK timer 61 60 * @icsk_keepalive_timer: Keepalive timer 62 61 * @mptcp_tout_timer: mptcp timer ··· 83 84 struct request_sock_queue icsk_accept_queue; 84 85 struct inet_bind_bucket *icsk_bind_hash; 85 86 struct inet_bind2_bucket *icsk_bind2_hash; 86 - struct timer_list icsk_retransmit_timer; 87 87 struct timer_list icsk_delack_timer; 88 88 union { 89 89 struct timer_list icsk_keepalive_timer; ··· 191 193 192 194 static inline unsigned long tcp_timeout_expires(const struct sock *sk) 193 195 { 194 - return READ_ONCE(inet_csk(sk)->icsk_retransmit_timer.expires); 196 + return READ_ONCE(sk->tcp_retransmit_timer.expires); 195 197 } 196 198 197 199 static inline unsigned long ··· 207 209 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { 208 210 smp_store_release(&icsk->icsk_pending, 0); 209 211 #ifdef INET_CSK_CLEAR_TIMERS 210 - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 212 + sk_stop_timer(sk, &sk->tcp_retransmit_timer); 211 213 #endif 212 214 } else if (what == ICSK_TIME_DACK) { 213 215 smp_store_release(&icsk->icsk_ack.pending, 0); ··· 239 241 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || 240 242 what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { 241 243 smp_store_release(&icsk->icsk_pending, what); 242 - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when); 244 + sk_reset_timer(sk, &sk->tcp_retransmit_timer, when); 243 245 } else if (what == ICSK_TIME_DACK) { 244 246 smp_store_release(&icsk->icsk_ack.pending, 245 247 icsk->icsk_ack.pending | ICSK_ACK_TIMER);
+7 -2
include/net/sock.h
··· 305 305 * @sk_txrehash: enable TX hash rethink 306 306 * @sk_filter: socket filtering instructions 307 307 * @sk_timer: sock cleanup timer 308 + * @tcp_retransmit_timer: tcp retransmit timer 309 + * @mptcp_retransmit_timer: mptcp retransmit timer 308 310 * @sk_stamp: time stamp of last packet received 309 311 * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only 310 312 * @sk_tsflags: SO_TIMESTAMPING flags ··· 484 482 }; 485 483 struct sk_buff_head sk_write_queue; 486 484 struct page_frag sk_frag; 487 - struct timer_list sk_timer; 488 - 485 + union { 486 + struct timer_list sk_timer; 487 + struct timer_list tcp_retransmit_timer; 488 + struct timer_list mptcp_retransmit_timer; 489 + }; 489 490 unsigned long sk_pacing_rate; /* bytes per second */ 490 491 atomic_t sk_zckey; 491 492 atomic_t sk_tskey;
+3 -3
net/ipv4/inet_connection_sock.c
··· 737 737 { 738 738 struct inet_connection_sock *icsk = inet_csk(sk); 739 739 740 - timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); 740 + timer_setup(&sk->tcp_retransmit_timer, retransmit_handler, 0); 741 741 timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); 742 742 timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0); 743 743 icsk->icsk_pending = icsk->icsk_ack.pending = 0; ··· 750 750 smp_store_release(&icsk->icsk_pending, 0); 751 751 smp_store_release(&icsk->icsk_ack.pending, 0); 752 752 753 - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 753 + sk_stop_timer(sk, &sk->tcp_retransmit_timer); 754 754 sk_stop_timer(sk, &icsk->icsk_delack_timer); 755 755 sk_stop_timer(sk, &icsk->icsk_keepalive_timer); 756 756 } ··· 765 765 smp_store_release(&icsk->icsk_pending, 0); 766 766 smp_store_release(&icsk->icsk_ack.pending, 0); 767 767 768 - sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); 768 + sk_stop_timer_sync(sk, &sk->tcp_retransmit_timer); 769 769 sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); 770 770 sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer); 771 771 }
+3 -5
net/ipv4/tcp_timer.c
··· 698 698 return; 699 699 700 700 if (time_after(tcp_timeout_expires(sk), jiffies)) { 701 - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, 701 + sk_reset_timer(sk, &sk->tcp_retransmit_timer, 702 702 tcp_timeout_expires(sk)); 703 703 return; 704 704 } ··· 725 725 726 726 static void tcp_write_timer(struct timer_list *t) 727 727 { 728 - struct inet_connection_sock *icsk = 729 - timer_container_of(icsk, t, icsk_retransmit_timer); 730 - struct sock *sk = &icsk->icsk_inet.sk; 728 + struct sock *sk = timer_container_of(sk, t, tcp_retransmit_timer); 731 729 732 730 /* Avoid locking the socket when there is no pending event. */ 733 - if (!smp_load_acquire(&icsk->icsk_pending)) 731 + if (!smp_load_acquire(&inet_csk(sk)->icsk_pending)) 734 732 goto out; 735 733 736 734 bh_lock_sock(sk);
+5 -10
net/mptcp/protocol.c
··· 411 411 412 412 static void mptcp_stop_rtx_timer(struct sock *sk) 413 413 { 414 - struct inet_connection_sock *icsk = inet_csk(sk); 415 - 416 - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 414 + sk_stop_timer(sk, &sk->mptcp_retransmit_timer); 417 415 mptcp_sk(sk)->timer_ival = 0; 418 416 } 419 417 ··· 952 954 953 955 static bool mptcp_rtx_timer_pending(struct sock *sk) 954 956 { 955 - return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); 957 + return timer_pending(&sk->mptcp_retransmit_timer); 956 958 } 957 959 958 960 static void mptcp_reset_rtx_timer(struct sock *sk) 959 961 { 960 - struct inet_connection_sock *icsk = inet_csk(sk); 961 962 unsigned long tout; 962 963 963 964 /* prevent rescheduling on close */ ··· 964 967 return; 965 968 966 969 tout = mptcp_sk(sk)->timer_ival; 967 - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); 970 + sk_reset_timer(sk, &sk->mptcp_retransmit_timer, jiffies + tout); 968 971 } 969 972 970 973 bool mptcp_schedule_work(struct sock *sk) ··· 2351 2354 2352 2355 static void mptcp_retransmit_timer(struct timer_list *t) 2353 2356 { 2354 - struct inet_connection_sock *icsk = timer_container_of(icsk, t, 2355 - icsk_retransmit_timer); 2356 - struct sock *sk = &icsk->icsk_inet.sk; 2357 + struct sock *sk = timer_container_of(sk, t, mptcp_retransmit_timer); 2357 2358 struct mptcp_sock *msk = mptcp_sk(sk); 2358 2359 2359 2360 bh_lock_sock(sk); ··· 2970 2975 spin_lock_init(&msk->fallback_lock); 2971 2976 2972 2977 /* re-use the csk retrans timer for MPTCP-level retrans */ 2973 - timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); 2978 + timer_setup(&sk->mptcp_retransmit_timer, mptcp_retransmit_timer, 0); 2974 2979 timer_setup(&msk->sk.mptcp_tout_timer, mptcp_tout_timer, 0); 2975 2980 } 2976 2981
+2 -2
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
··· 99 99 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 100 100 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 101 101 timer_active = 1; 102 - timer_expires = icsk->icsk_retransmit_timer.expires; 102 + timer_expires = sp->tcp_retransmit_timer.expires; 103 103 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 104 104 timer_active = 4; 105 - timer_expires = icsk->icsk_retransmit_timer.expires; 105 + timer_expires = sp->tcp_retransmit_timer.expires; 106 106 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 107 107 timer_active = 2; 108 108 timer_expires = icsk->icsk_keepalive_timer.expires;
+2 -2
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
··· 99 99 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 100 100 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 101 101 timer_active = 1; 102 - timer_expires = icsk->icsk_retransmit_timer.expires; 102 + timer_expires = sp->tcp_retransmit_timer.expires; 103 103 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 104 104 timer_active = 4; 105 - timer_expires = icsk->icsk_retransmit_timer.expires; 105 + timer_expires = sp->tcp_retransmit_timer.expires; 106 106 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 107 107 timer_active = 2; 108 108 timer_expires = icsk->icsk_keepalive_timer.expires;