Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inet: call inet6_ehashfn() once from inet6_hash_connect()

inet6_ehashfn() being called from __inet6_check_established()
has a big impact on performance, as shown in the Tested section.

After prior patch, we can compute the hash for port 0
from inet6_hash_connect(), and derive each hash in
__inet_hash_connect() from this initial hash:

hash(saddr, lport, daddr, dport) == hash(saddr, 0, daddr, dport) + lport

Apply the same principle for __inet_check_established(),
although inet_ehashfn() has a smaller cost.

Tested:

Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog
Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server

Before this patch:

utime_start=0.286131
utime_end=4.378886
stime_start=11.952556
stime_end=1991.655533
num_transactions=1446830
latency_min=0.001061085
latency_max=12.075275028
latency_mean=0.376375302
latency_stddev=1.361969596
num_samples=306383
throughput=151866.56

perf top:

50.01% [kernel] [k] __inet6_check_established
20.65% [kernel] [k] __inet_hash_connect
15.81% [kernel] [k] inet6_ehashfn
2.92% [kernel] [k] rcu_all_qs
2.34% [kernel] [k] __cond_resched
0.50% [kernel] [k] _raw_spin_lock
0.34% [kernel] [k] sched_balance_trigger
0.24% [kernel] [k] queued_spin_lock_slowpath

After this patch:

utime_start=0.315047
utime_end=9.257617
stime_start=7.041489
stime_end=1923.688387
num_transactions=3057968
latency_min=0.003041375
latency_max=7.056589232
latency_mean=0.141075048 # Better latency metrics
latency_stddev=0.526900516
num_samples=312996
throughput=320677.21 # 111 % increase, and 229 % for the series

perf top: inet6_ehashfn is no longer seen.

39.67% [kernel] [k] __inet_hash_connect
37.06% [kernel] [k] __inet6_check_established
4.79% [kernel] [k] rcu_all_qs
3.82% [kernel] [k] __cond_resched
1.76% [kernel] [k] sched_balance_domains
0.82% [kernel] [k] _raw_spin_lock
0.81% [kernel] [k] sched_balance_rq
0.81% [kernel] [k] sched_balance_trigger
0.76% [kernel] [k] queued_spin_lock_slowpath

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Tested-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20250305034550.879255-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
d4438ce6 9544d60a

+33 -14
+3 -1
include/net/inet_hashtables.h
··· 527 527 528 528 int __inet_hash_connect(struct inet_timewait_death_row *death_row, 529 529 struct sock *sk, u64 port_offset, 530 + u32 hash_port0, 530 531 int (*check_established)(struct inet_timewait_death_row *, 531 532 struct sock *, __u16, 532 533 struct inet_timewait_sock **, 533 - bool rcu_lookup)); 534 + bool rcu_lookup, 535 + u32 hash)); 534 536 535 537 int inet_hash_connect(struct inet_timewait_death_row *death_row, 536 538 struct sock *sk);
+1 -1
include/net/ip.h
··· 357 357 bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); 358 358 359 359 #ifdef CONFIG_SYSCTL 360 - static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) 360 + static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port) 361 361 { 362 362 if (!net->ipv4.sysctl_local_reserved_ports) 363 363 return false;
+18 -8
net/ipv4/inet_hashtables.c
··· 538 538 static int __inet_check_established(struct inet_timewait_death_row *death_row, 539 539 struct sock *sk, __u16 lport, 540 540 struct inet_timewait_sock **twp, 541 - bool rcu_lookup) 541 + bool rcu_lookup, 542 + u32 hash) 542 543 { 543 544 struct inet_hashinfo *hinfo = death_row->hashinfo; 544 545 struct inet_sock *inet = inet_sk(sk); ··· 550 549 int sdif = l3mdev_master_ifindex_by_index(net, dif); 551 550 INET_ADDR_COOKIE(acookie, saddr, daddr); 552 551 const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 553 - unsigned int hash = inet_ehashfn(net, daddr, lport, 554 - saddr, inet->inet_dport); 555 552 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 556 553 struct inet_timewait_sock *tw = NULL; 557 554 const struct hlist_nulls_node *node; ··· 1006 1007 1007 1008 int __inet_hash_connect(struct inet_timewait_death_row *death_row, 1008 1009 struct sock *sk, u64 port_offset, 1010 + u32 hash_port0, 1009 1011 int (*check_established)(struct inet_timewait_death_row *, 1010 1012 struct sock *, __u16, struct inet_timewait_sock **, 1011 - bool rcu_lookup)) 1013 + bool rcu_lookup, u32 hash)) 1012 1014 { 1013 1015 struct inet_hashinfo *hinfo = death_row->hashinfo; 1014 1016 struct inet_bind_hashbucket *head, *head2; ··· 1027 1027 1028 1028 if (port) { 1029 1029 local_bh_disable(); 1030 - ret = check_established(death_row, sk, port, NULL, false); 1030 + ret = check_established(death_row, sk, port, NULL, false, 1031 + hash_port0 + port); 1031 1032 local_bh_enable(); 1032 1033 return ret; 1033 1034 } ··· 1072 1071 rcu_read_unlock(); 1073 1072 goto next_port; 1074 1073 } 1075 - if (!check_established(death_row, sk, port, &tw, true)) 1074 + if (!check_established(death_row, sk, port, &tw, true, 1075 + hash_port0 + port)) 1076 1076 break; 1077 1077 rcu_read_unlock(); 1078 1078 goto next_port; ··· 1092 1090 goto next_port_unlock; 1093 1091 WARN_ON(hlist_empty(&tb->bhash2)); 1094 1092 if (!check_established(death_row, sk, 1095 - port, &tw, false)) 1093 + port, &tw, false, 1094 + hash_port0 + port)) 1096 1095 goto ok; 1097 1096 goto next_port_unlock; 1098 1097 } ··· 1200 1197 int inet_hash_connect(struct inet_timewait_death_row *death_row, 1201 1198 struct sock *sk) 1202 1199 { 1200 + const struct inet_sock *inet = inet_sk(sk); 1201 + const struct net *net = sock_net(sk); 1203 1202 u64 port_offset = 0; 1203 + u32 hash_port0; 1204 1204 1205 1205 if (!inet_sk(sk)->inet_num) 1206 1206 port_offset = inet_sk_port_offset(sk); 1207 - return __inet_hash_connect(death_row, sk, port_offset, 1207 + 1208 + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, 1209 + inet->inet_daddr, inet->inet_dport); 1210 + 1211 + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, 1208 1212 __inet_check_established); 1209 1213 } 1210 1214 EXPORT_SYMBOL_GPL(inet_hash_connect);
+11 -4
net/ipv6/inet6_hashtables.c
··· 264 264 static int __inet6_check_established(struct inet_timewait_death_row *death_row, 265 265 struct sock *sk, const __u16 lport, 266 266 struct inet_timewait_sock **twp, 267 - bool rcu_lookup) 267 + bool rcu_lookup, 268 + u32 hash) 268 269 { 269 270 struct inet_hashinfo *hinfo = death_row->hashinfo; 270 271 struct inet_sock *inet = inet_sk(sk); ··· 275 274 struct net *net = sock_net(sk); 276 275 const int sdif = l3mdev_master_ifindex_by_index(net, dif); 277 276 const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 278 - const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, 279 - inet->inet_dport); 280 277 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 281 278 struct inet_timewait_sock *tw = NULL; 282 279 const struct hlist_nulls_node *node; ··· 353 354 int inet6_hash_connect(struct inet_timewait_death_row *death_row, 354 355 struct sock *sk) 355 356 { 357 + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; 358 + const struct in6_addr *saddr = &sk->sk_v6_daddr; 359 + const struct inet_sock *inet = inet_sk(sk); 360 + const struct net *net = sock_net(sk); 356 361 u64 port_offset = 0; 362 + u32 hash_port0; 357 363 358 364 if (!inet_sk(sk)->inet_num) 359 365 port_offset = inet6_sk_port_offset(sk); 360 - return __inet_hash_connect(death_row, sk, port_offset, 366 + 367 + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); 368 + 369 + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, 361 370 __inet6_check_established); 362 371 } 363 372 EXPORT_SYMBOL_GPL(inet6_hash_connect);