Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'udp-4tuple-hash'

Philo Lu says:

====================
udp: Add 4-tuple hash for connected sockets

This patchset introduces 4-tuple hash for connected udp sockets, to make
connected udp lookup faster.

Stress test results (with 1 cpu fully used) are shown below, in pps:
(1) _un-connected_ socket as server
[a] w/o hash4: 1,825176
[b] w/ hash4: 1,831750 (+0.36%)

(2) 500 _connected_ sockets as server
[c] w/o hash4: 290860 (only 16% of [a])
[d] w/ hash4: 1,889658 (+3.1% compared with [b])
With hash4, compute_score is skipped when lookup, so [d] is slightly
better than [b].

Patch1: Add a new counter for hslot2 named hash4_cnt, to avoid cache line
miss when lookup.
Patch2: Add hslot/hlist_nulls for 4-tuple hash.
Patch3 and 4: Implement 4-tuple hash for ipv4 and ipv6.

The detailed motivation is described in Patch 3.

The 4-tuple hash increases the size of udp_sock and udp_hslot. Thus add it
with CONFIG_BASE_SMALL, i.e., it's a no op with CONFIG_BASE_SMALL.

Intentionally, the feature is not available for udplite. Though udplite
shares some structs and functions with udp, its connect() keeps unchanged.
So all udplite sockets perform the same as un-connected udp sockets.
Besides, udplite also shares the additional memory consumption in udp_sock
and udptable.

changelogs:
v8 -> v9 (Paolo Abeni):
- Add explanation about udplite in cover letter
- Update tags for co-developers
- Add acked-by tags of Paolo and Willem

v7 -> v8:
- add EXPORT_SYMBOL for ipv6.ko build

v6 -> v7 (Kuniyuki Iwashima):
- export udp_ehashfn to be used by udpv6 rehash

v5 -> v6 (Paolo Abeni):
- move udp_table_hash4_init from patch2 to patch1
- use hlist_nulls for lookup-rehash race
- add test results in commit log
- add more comment, e.g., for rehash4 used in hash4
- add ipv6 support (Patch4), and refactor some functions for better
sharing, without functionality change

v4 -> v5 (Paolo Abeni):
- add CONFIG_BASE_SMALL with which udp hash4 does nothing

v3 -> v4 (Willem de Bruijn):
- fix mistakes in udp_pernet_table_alloc()

RFCv2 -> v3 (Gur Stavi):
- minor fix in udp_hashslot2() and udp_table_init()
- add rcu sync in rehash4()

RFCv1 -> RFCv2:
- add a new struct for hslot2
- remove the sockopt UDP_HASH4 because it has little side effect for
unconnected sockets
- add rehash in connect()
- re-organize the patch into 3 smaller ones
- other minor fix

v8:
https://lore.kernel.org/all/20241108054836.123484-1-lulie@linux.alibaba.com/
v7:
https://lore.kernel.org/all/20241105121225.12513-1-lulie@linux.alibaba.com/
v6:
https://lore.kernel.org/all/20241031124550.20227-1-lulie@linux.alibaba.com/
v5:
https://lore.kernel.org/all/20241018114535.35712-1-lulie@linux.alibaba.com/
v4:
https://lore.kernel.org/all/20241012012918.70888-1-lulie@linux.alibaba.com/
v3:
https://lore.kernel.org/all/20241010090351.79698-1-lulie@linux.alibaba.com/
RFCv2:
https://lore.kernel.org/all/20240924110414.52618-1-lulie@linux.alibaba.com/
RFCv1:
https://lore.kernel.org/all/20240913100941.8565-1-lulie@linux.alibaba.com/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+468 -42
+11
include/linux/udp.h
··· 56 56 int pending; /* Any pending frames ? */ 57 57 __u8 encap_type; /* Is this an Encapsulation socket? */ 58 58 59 + #if !IS_ENABLED(CONFIG_BASE_SMALL) 60 + /* For UDP 4-tuple hash */ 61 + __u16 udp_lrpa_hash; 62 + struct hlist_nulls_node udp_lrpa_node; 63 + #endif 64 + 59 65 /* 60 66 * Following member retains the information to create a UDP header 61 67 * when the socket is uncorked. ··· 211 205 212 206 #define udp_portaddr_for_each_entry_rcu(__sk, list) \ 213 207 hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) 208 + 209 + #if !IS_ENABLED(CONFIG_BASE_SMALL) 210 + #define udp_lrpa_for_each_entry_rcu(__up, node, list) \ 211 + hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node) 212 + #endif 214 213 215 214 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) 216 215
+131 -6
include/net/udp.h
··· 50 50 #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) 51 51 52 52 /** 53 - * struct udp_hslot - UDP hash slot 53 + * struct udp_hslot - UDP hash slot used by udp_table.hash/hash4 54 54 * 55 55 * @head: head of list of sockets 56 + * @nulls_head: head of list of sockets, only used by hash4 56 57 * @count: number of sockets in 'head' list 57 58 * @lock: spinlock protecting changes to head/count 58 59 */ 59 60 struct udp_hslot { 60 - struct hlist_head head; 61 + union { 62 + struct hlist_head head; 63 + /* hash4 uses hlist_nulls to avoid moving wrongly onto another 64 + * hlist, because rehash() can happen with lookup(). 65 + */ 66 + struct hlist_nulls_head nulls_head; 67 + }; 61 68 int count; 62 69 spinlock_t lock; 63 - } __attribute__((aligned(2 * sizeof(long)))); 70 + } __aligned(2 * sizeof(long)); 71 + 72 + /** 73 + * struct udp_hslot_main - UDP hash slot used by udp_table.hash2 74 + * 75 + * @hslot: basic hash slot 76 + * @hash4_cnt: number of sockets in hslot4 of the same 77 + * (local port, local address) 78 + */ 79 + struct udp_hslot_main { 80 + struct udp_hslot hslot; /* must be the first member */ 81 + #if !IS_ENABLED(CONFIG_BASE_SMALL) 82 + u32 hash4_cnt; 83 + #endif 84 + } __aligned(2 * sizeof(long)); 85 + #define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot)) 64 86 65 87 /** 66 88 * struct udp_table - UDP table 67 89 * 68 90 * @hash: hash table, sockets are hashed on (local port) 69 91 * @hash2: hash table, sockets are hashed on (local port, local address) 92 + * @hash4: hash table, connected sockets are hashed on 93 + * (local port, local address, remote port, remote address) 70 94 * @mask: number of slots in hash tables, minus 1 71 95 * @log: log2(number of slots in hash table) 72 96 */ 73 97 struct udp_table { 74 98 struct udp_hslot *hash; 75 - struct udp_hslot *hash2; 99 + struct udp_hslot_main *hash2; 100 + #if !IS_ENABLED(CONFIG_BASE_SMALL) 101 + struct udp_hslot *hash4; 102 + #endif 76 103 unsigned int mask; 77 104 unsigned int log; 78 105 }; ··· 111 84 { 112 85 return &table->hash[udp_hashfn(net, num, table->mask)]; 113 86 } 87 + 114 88 /* 115 89 * For secondary hash, net_hash_mix() is performed before calling 116 90 * udp_hashslot2(), this explains difference with udp_hashslot() ··· 119 91 static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, 120 92 unsigned int hash) 121 93 { 122 - return &table->hash2[hash & table->mask]; 94 + return &table->hash2[hash & table->mask].hslot; 123 95 } 96 + 97 + #if IS_ENABLED(CONFIG_BASE_SMALL) 98 + static inline void udp_table_hash4_init(struct udp_table *table) 99 + { 100 + } 101 + 102 + static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, 103 + unsigned int hash) 104 + { 105 + BUILD_BUG(); 106 + return NULL; 107 + } 108 + 109 + static inline bool udp_hashed4(const struct sock *sk) 110 + { 111 + return false; 112 + } 113 + 114 + static inline unsigned int udp_hash4_slot_size(void) 115 + { 116 + return 0; 117 + } 118 + 119 + static inline bool udp_has_hash4(const struct udp_hslot *hslot2) 120 + { 121 + return false; 122 + } 123 + 124 + static inline void udp_hash4_inc(struct udp_hslot *hslot2) 125 + { 126 + } 127 + 128 + static inline void udp_hash4_dec(struct udp_hslot *hslot2) 129 + { 130 + } 131 + #else /* !CONFIG_BASE_SMALL */ 132 + 133 + /* Must be called with table->hash2 initialized */ 134 + static inline void udp_table_hash4_init(struct udp_table *table) 135 + { 136 + table->hash4 = (void *)(table->hash2 + (table->mask + 1)); 137 + for (int i = 0; i <= table->mask; i++) { 138 + table->hash2[i].hash4_cnt = 0; 139 + 140 + INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i); 141 + table->hash4[i].count = 0; 142 + spin_lock_init(&table->hash4[i].lock); 143 + } 144 + } 145 + 146 + static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, 147 + unsigned int hash) 148 + { 149 + return &table->hash4[hash & table->mask]; 150 + } 151 + 152 + static inline bool udp_hashed4(const struct sock *sk) 153 + { 154 + return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node); 155 + } 156 + 157 + static inline unsigned int udp_hash4_slot_size(void) 158 + { 159 + return sizeof(struct udp_hslot); 160 + } 161 + 162 + static inline bool udp_has_hash4(const struct udp_hslot *hslot2) 163 + { 164 + return UDP_HSLOT_MAIN(hslot2)->hash4_cnt; 165 + } 166 + 167 + static inline void udp_hash4_inc(struct udp_hslot *hslot2) 168 + { 169 + UDP_HSLOT_MAIN(hslot2)->hash4_cnt++; 170 + } 171 + 172 + static inline void udp_hash4_dec(struct udp_hslot *hslot2) 173 + { 174 + UDP_HSLOT_MAIN(hslot2)->hash4_cnt--; 175 + } 176 + #endif /* CONFIG_BASE_SMALL */ 124 177 125 178 extern struct proto udp_prot; 126 179 ··· 302 193 } 303 194 304 195 void udp_lib_unhash(struct sock *sk); 305 - void udp_lib_rehash(struct sock *sk, u16 new_hash); 196 + void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); 197 + u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, 198 + const __be32 faddr, const __be16 fport); 306 199 307 200 static inline void udp_lib_close(struct sock *sk, long timeout) 308 201 { 309 202 sk_common_release(sk); 310 203 } 204 + 205 + /* hash4 routines shared between UDPv4/6 */ 206 + #if IS_ENABLED(CONFIG_BASE_SMALL) 207 + static inline void udp_lib_hash4(struct sock *sk, u16 hash) 208 + { 209 + } 210 + 211 + static inline void udp4_hash4(struct sock *sk) 212 + { 213 + } 214 + #else /* !CONFIG_BASE_SMALL */ 215 + void udp_lib_hash4(struct sock *sk, u16 hash); 216 + void udp4_hash4(struct sock *sk); 217 + #endif /* CONFIG_BASE_SMALL */ 311 218 312 219 int udp_lib_get_port(struct sock *sk, unsigned short snum, 313 220 unsigned int hash2_nulladdr);
+220 -25
net/ipv4/udp.c
··· 410 410 return score; 411 411 } 412 412 413 - INDIRECT_CALLABLE_SCOPE 414 413 u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, 415 414 const __be32 faddr, const __be16 fport) 416 415 { ··· 418 419 return __inet_ehashfn(laddr, lport, faddr, fport, 419 420 udp_ehash_secret + net_hash_mix(net)); 420 421 } 422 + EXPORT_SYMBOL(udp_ehashfn); 421 423 422 424 /* called with rcu_read_lock() */ 423 425 static struct sock *udp4_lib_lookup2(const struct net *net, ··· 478 478 return result; 479 479 } 480 480 481 + #if IS_ENABLED(CONFIG_BASE_SMALL) 482 + static struct sock *udp4_lib_lookup4(const struct net *net, 483 + __be32 saddr, __be16 sport, 484 + __be32 daddr, unsigned int hnum, 485 + int dif, int sdif, 486 + struct udp_table *udptable) 487 + { 488 + return NULL; 489 + } 490 + 491 + static void udp_rehash4(struct udp_table *udptable, struct sock *sk, 492 + u16 newhash4) 493 + { 494 + } 495 + 496 + static void udp_unhash4(struct udp_table *udptable, struct sock *sk) 497 + { 498 + } 499 + #else /* !CONFIG_BASE_SMALL */ 500 + static struct sock *udp4_lib_lookup4(const struct net *net, 501 + __be32 saddr, __be16 sport, 502 + __be32 daddr, unsigned int hnum, 503 + int dif, int sdif, 504 + struct udp_table *udptable) 505 + { 506 + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 507 + const struct hlist_nulls_node *node; 508 + struct udp_hslot *hslot4; 509 + unsigned int hash4, slot; 510 + struct udp_sock *up; 511 + struct sock *sk; 512 + 513 + hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport); 514 + slot = hash4 & udptable->mask; 515 + hslot4 = &udptable->hash4[slot]; 516 + INET_ADDR_COOKIE(acookie, saddr, daddr); 517 + 518 + begin: 519 + /* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */ 520 + udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) { 521 + sk = (struct sock *)up; 522 + if (inet_match(net, sk, acookie, ports, dif, sdif)) 523 + return sk; 524 + } 525 + 526 + /* if the nulls value we got at the end of this lookup is not the 527 + * expected one, we must restart lookup. We probably met an item that 528 + * was moved to another chain due to rehash. 529 + */ 530 + if (get_nulls_value(node) != slot) 531 + goto begin; 532 + 533 + return NULL; 534 + } 535 + 536 + /* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */ 537 + static void udp_rehash4(struct udp_table *udptable, struct sock *sk, 538 + u16 newhash4) 539 + { 540 + struct udp_hslot *hslot4, *nhslot4; 541 + 542 + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); 543 + nhslot4 = udp_hashslot4(udptable, newhash4); 544 + udp_sk(sk)->udp_lrpa_hash = newhash4; 545 + 546 + if (hslot4 != nhslot4) { 547 + spin_lock_bh(&hslot4->lock); 548 + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); 549 + hslot4->count--; 550 + spin_unlock_bh(&hslot4->lock); 551 + 552 + spin_lock_bh(&nhslot4->lock); 553 + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, 554 + &nhslot4->nulls_head); 555 + nhslot4->count++; 556 + spin_unlock_bh(&nhslot4->lock); 557 + } 558 + } 559 + 560 + static void udp_unhash4(struct udp_table *udptable, struct sock *sk) 561 + { 562 + struct udp_hslot *hslot2, *hslot4; 563 + 564 + if (udp_hashed4(sk)) { 565 + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 566 + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); 567 + 568 + spin_lock(&hslot4->lock); 569 + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); 570 + hslot4->count--; 571 + spin_unlock(&hslot4->lock); 572 + 573 + spin_lock(&hslot2->lock); 574 + udp_hash4_dec(hslot2); 575 + spin_unlock(&hslot2->lock); 576 + } 577 + } 578 + 579 + void udp_lib_hash4(struct sock *sk, u16 hash) 580 + { 581 + struct udp_hslot *hslot, *hslot2, *hslot4; 582 + struct net *net = sock_net(sk); 583 + struct udp_table *udptable; 584 + 585 + /* Connected udp socket can re-connect to another remote address, 586 + * so rehash4 is needed. 587 + */ 588 + udptable = net->ipv4.udp_table; 589 + if (udp_hashed4(sk)) { 590 + udp_rehash4(udptable, sk, hash); 591 + return; 592 + } 593 + 594 + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); 595 + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 596 + hslot4 = udp_hashslot4(udptable, hash); 597 + udp_sk(sk)->udp_lrpa_hash = hash; 598 + 599 + spin_lock_bh(&hslot->lock); 600 + if (rcu_access_pointer(sk->sk_reuseport_cb)) 601 + reuseport_detach_sock(sk); 602 + 603 + spin_lock(&hslot4->lock); 604 + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, 605 + &hslot4->nulls_head); 606 + hslot4->count++; 607 + spin_unlock(&hslot4->lock); 608 + 609 + spin_lock(&hslot2->lock); 610 + udp_hash4_inc(hslot2); 611 + spin_unlock(&hslot2->lock); 612 + 613 + spin_unlock_bh(&hslot->lock); 614 + } 615 + EXPORT_SYMBOL(udp_lib_hash4); 616 + 617 + /* call with sock lock */ 618 + void udp4_hash4(struct sock *sk) 619 + { 620 + struct net *net = sock_net(sk); 621 + unsigned int hash; 622 + 623 + if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY)) 624 + return; 625 + 626 + hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num, 627 + sk->sk_daddr, sk->sk_dport); 628 + 629 + udp_lib_hash4(sk, hash); 630 + } 631 + EXPORT_SYMBOL(udp4_hash4); 632 + #endif /* CONFIG_BASE_SMALL */ 633 + 481 634 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try 482 635 * harder than this. -DaveM 483 636 */ ··· 639 486 int sdif, struct udp_table *udptable, struct sk_buff *skb) 640 487 { 641 488 unsigned short hnum = ntohs(dport); 642 - unsigned int hash2, slot2; 643 489 struct udp_hslot *hslot2; 644 490 struct sock *result, *sk; 491 + unsigned int hash2; 645 492 646 493 hash2 = ipv4_portaddr_hash(net, daddr, hnum); 647 - slot2 = hash2 & udptable->mask; 648 - hslot2 = &udptable->hash2[slot2]; 494 + hslot2 = udp_hashslot2(udptable, hash2); 495 + 496 + if (udp_has_hash4(hslot2)) { 497 + result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum, 498 + dif, sdif, udptable); 499 + if (result) /* udp4_lib_lookup4 return sk or NULL */ 500 + return result; 501 + } 649 502 650 503 /* Lookup connected or non-wildcard socket */ 651 504 result = udp4_lib_lookup2(net, saddr, sport, ··· 678 519 679 520 /* Lookup wildcard sockets */ 680 521 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 681 - slot2 = hash2 & udptable->mask; 682 - hslot2 = &udptable->hash2[slot2]; 522 + hslot2 = udp_hashslot2(udptable, hash2); 683 523 684 524 result = udp4_lib_lookup2(net, saddr, sport, 685 525 htonl(INADDR_ANY), hnum, dif, sdif, ··· 2093 1935 } 2094 1936 EXPORT_SYMBOL(udp_pre_connect); 2095 1937 1938 + static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1939 + { 1940 + int res; 1941 + 1942 + lock_sock(sk); 1943 + res = __ip4_datagram_connect(sk, uaddr, addr_len); 1944 + if (!res) 1945 + udp4_hash4(sk); 1946 + release_sock(sk); 1947 + return res; 1948 + } 1949 + 2096 1950 int __udp_disconnect(struct sock *sk, int flags) 2097 1951 { 2098 1952 struct inet_sock *inet = inet_sk(sk); ··· 2164 1994 hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 2165 1995 hslot2->count--; 2166 1996 spin_unlock(&hslot2->lock); 1997 + 1998 + udp_unhash4(udptable, sk); 2167 1999 } 2168 2000 spin_unlock_bh(&hslot->lock); 2169 2001 } ··· 2175 2003 /* 2176 2004 * inet_rcv_saddr was changed, we must rehash secondary hash 2177 2005 */ 2178 - void udp_lib_rehash(struct sock *sk, u16 newhash) 2006 + void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) 2179 2007 { 2180 2008 if (sk_hashed(sk)) { 2181 2009 struct udp_table *udptable = udp_get_table_prot(sk); ··· 2207 2035 spin_unlock(&nhslot2->lock); 2208 2036 } 2209 2037 2038 + if (udp_hashed4(sk)) { 2039 + udp_rehash4(udptable, sk, newhash4); 2040 + 2041 + if (hslot2 != nhslot2) { 2042 + spin_lock(&hslot2->lock); 2043 + udp_hash4_dec(hslot2); 2044 + spin_unlock(&hslot2->lock); 2045 + 2046 + spin_lock(&nhslot2->lock); 2047 + udp_hash4_inc(nhslot2); 2048 + spin_unlock(&nhslot2->lock); 2049 + } 2050 + } 2210 2051 spin_unlock_bh(&hslot->lock); 2211 2052 } 2212 2053 } ··· 2231 2046 u16 new_hash = ipv4_portaddr_hash(sock_net(sk), 2232 2047 inet_sk(sk)->inet_rcv_saddr, 2233 2048 inet_sk(sk)->inet_num); 2234 - udp_lib_rehash(sk, new_hash); 2049 + u16 new_hash4 = udp_ehashfn(sock_net(sk), 2050 + sk->sk_rcv_saddr, sk->sk_num, 2051 + sk->sk_daddr, sk->sk_dport); 2052 + 2053 + udp_lib_rehash(sk, new_hash, new_hash4); 2235 2054 } 2236 2055 2237 2056 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ··· 2457 2268 udptable->mask; 2458 2269 hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; 2459 2270 start_lookup: 2460 - hslot = &udptable->hash2[hash2]; 2271 + hslot = &udptable->hash2[hash2].hslot; 2461 2272 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); 2462 2273 } 2463 2274 ··· 2728 2539 struct udp_table *udptable = net->ipv4.udp_table; 2729 2540 INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); 2730 2541 unsigned short hnum = ntohs(loc_port); 2731 - unsigned int hash2, slot2; 2732 2542 struct udp_hslot *hslot2; 2543 + unsigned int hash2; 2733 2544 __portpair ports; 2734 2545 struct sock *sk; 2735 2546 2736 2547 hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); 2737 - slot2 = hash2 & udptable->mask; 2738 - hslot2 = &udptable->hash2[slot2]; 2548 + hslot2 = udp_hashslot2(udptable, hash2); 2739 2549 ports = INET_COMBINED_PORTS(rmt_port, hnum); 2740 2550 2741 2551 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { ··· 3128 2940 .owner = THIS_MODULE, 3129 2941 .close = udp_lib_close, 3130 2942 .pre_connect = udp_pre_connect, 3131 - .connect = ip4_datagram_connect, 2943 + .connect = udp_connect, 3132 2944 .disconnect = udp_disconnect, 3133 2945 .ioctl = udp_ioctl, 3134 2946 .init = udp_init_sock, ··· 3375 3187 batch_sks = 0; 3376 3188 3377 3189 for (; state->bucket <= udptable->mask; state->bucket++) { 3378 - struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; 3190 + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot; 3379 3191 3380 3192 if (hlist_empty(&hslot2->head)) 3381 3193 continue; ··· 3616 3428 3617 3429 void __init udp_table_init(struct udp_table *table, const char *name) 3618 3430 { 3619 - unsigned int i; 3431 + unsigned int i, slot_size; 3620 3432 3433 + slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) + 3434 + udp_hash4_slot_size(); 3621 3435 table->hash = alloc_large_system_hash(name, 3622 - 2 * sizeof(struct udp_hslot), 3436 + slot_size, 3623 3437 uhash_entries, 3624 3438 21, /* one slot per 2 MB */ 3625 3439 0, ··· 3630 3440 UDP_HTABLE_SIZE_MIN, 3631 3441 UDP_HTABLE_SIZE_MAX); 3632 3442 3633 - table->hash2 = table->hash + (table->mask + 1); 3443 + table->hash2 = (void *)(table->hash + (table->mask + 1)); 3634 3444 for (i = 0; i <= table->mask; i++) { 3635 3445 INIT_HLIST_HEAD(&table->hash[i].head); 3636 3446 table->hash[i].count = 0; 3637 3447 spin_lock_init(&table->hash[i].lock); 3638 3448 } 3639 3449 for (i = 0; i <= table->mask; i++) { 3640 - INIT_HLIST_HEAD(&table->hash2[i].head); 3641 - table->hash2[i].count = 0; 3642 - spin_lock_init(&table->hash2[i].lock); 3450 + INIT_HLIST_HEAD(&table->hash2[i].hslot.head); 3451 + table->hash2[i].hslot.count = 0; 3452 + spin_lock_init(&table->hash2[i].hslot.lock); 3643 3453 } 3454 + udp_table_hash4_init(table); 3644 3455 } 3645 3456 3646 3457 u32 udp_flow_hashrnd(void) ··· 3667 3476 static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries) 3668 3477 { 3669 3478 struct udp_table *udptable; 3479 + unsigned int slot_size; 3670 3480 int i; 3671 3481 3672 3482 udptable = kmalloc(sizeof(*udptable), GFP_KERNEL); 3673 3483 if (!udptable) 3674 3484 goto out; 3675 3485 3676 - udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot), 3486 + slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) + 3487 + udp_hash4_slot_size(); 3488 + udptable->hash = vmalloc_huge(hash_entries * slot_size, 3677 3489 GFP_KERNEL_ACCOUNT); 3678 3490 if (!udptable->hash) 3679 3491 goto free_table; 3680 3492 3681 - udptable->hash2 = udptable->hash + hash_entries; 3493 + udptable->hash2 = (void *)(udptable->hash + hash_entries); 3682 3494 udptable->mask = hash_entries - 1; 3683 3495 udptable->log = ilog2(hash_entries); 3684 3496 ··· 3690 3496 udptable->hash[i].count = 0; 3691 3497 spin_lock_init(&udptable->hash[i].lock); 3692 3498 3693 - INIT_HLIST_HEAD(&udptable->hash2[i].head); 3694 - udptable->hash2[i].count = 0; 3695 - spin_lock_init(&udptable->hash2[i].lock); 3499 + INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head); 3500 + udptable->hash2[i].hslot.count = 0; 3501 + spin_lock_init(&udptable->hash2[i].hslot.lock); 3696 3502 } 3503 + udp_table_hash4_init(udptable); 3697 3504 3698 3505 return udptable; 3699 3506
+106 -11
net/ipv6/udp.c
··· 110 110 u16 new_hash = ipv6_portaddr_hash(sock_net(sk), 111 111 &sk->sk_v6_rcv_saddr, 112 112 inet_sk(sk)->inet_num); 113 + u16 new_hash4; 113 114 114 - udp_lib_rehash(sk, new_hash); 115 + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { 116 + new_hash4 = udp_ehashfn(sock_net(sk), 117 + sk->sk_rcv_saddr, sk->sk_num, 118 + sk->sk_daddr, sk->sk_dport); 119 + } else { 120 + new_hash4 = udp6_ehashfn(sock_net(sk), 121 + &sk->sk_v6_rcv_saddr, sk->sk_num, 122 + &sk->sk_v6_daddr, sk->sk_dport); 123 + } 124 + 125 + udp_lib_rehash(sk, new_hash, new_hash4); 115 126 } 116 127 117 128 static int compute_score(struct sock *sk, const struct net *net, ··· 227 216 return result; 228 217 } 229 218 219 + #if IS_ENABLED(CONFIG_BASE_SMALL) 220 + static struct sock *udp6_lib_lookup4(const struct net *net, 221 + const struct in6_addr *saddr, __be16 sport, 222 + const struct in6_addr *daddr, 223 + unsigned int hnum, int dif, int sdif, 224 + struct udp_table *udptable) 225 + { 226 + return NULL; 227 + } 228 + 229 + static void udp6_hash4(struct sock *sk) 230 + { 231 + } 232 + #else /* !CONFIG_BASE_SMALL */ 233 + static struct sock *udp6_lib_lookup4(const struct net *net, 234 + const struct in6_addr *saddr, __be16 sport, 235 + const struct in6_addr *daddr, 236 + unsigned int hnum, int dif, int sdif, 237 + struct udp_table *udptable) 238 + { 239 + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 240 + const struct hlist_nulls_node *node; 241 + struct udp_hslot *hslot4; 242 + unsigned int hash4, slot; 243 + struct udp_sock *up; 244 + struct sock *sk; 245 + 246 + hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport); 247 + slot = hash4 & udptable->mask; 248 + hslot4 = &udptable->hash4[slot]; 249 + 250 + begin: 251 + udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) { 252 + sk = (struct sock *)up; 253 + if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) 254 + return sk; 255 + } 256 + 257 + /* if the nulls value we got at the end of this lookup is not the 258 + * expected one, we must restart lookup. We probably met an item that 259 + * was moved to another chain due to rehash. 260 + */ 261 + if (get_nulls_value(node) != slot) 262 + goto begin; 263 + 264 + return NULL; 265 + } 266 + 267 + static void udp6_hash4(struct sock *sk) 268 + { 269 + struct net *net = sock_net(sk); 270 + unsigned int hash; 271 + 272 + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { 273 + udp4_hash4(sk); 274 + return; 275 + } 276 + 277 + if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 278 + return; 279 + 280 + hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num, 281 + &sk->sk_v6_daddr, sk->sk_dport); 282 + 283 + udp_lib_hash4(sk, hash); 284 + } 285 + #endif /* CONFIG_BASE_SMALL */ 286 + 230 287 /* rcu_read_lock() must be held */ 231 288 struct sock *__udp6_lib_lookup(const struct net *net, 232 289 const struct in6_addr *saddr, __be16 sport, ··· 303 224 struct sk_buff *skb) 304 225 { 305 226 unsigned short hnum = ntohs(dport); 306 - unsigned int hash2, slot2; 307 227 struct udp_hslot *hslot2; 308 228 struct sock *result, *sk; 229 + unsigned int hash2; 309 230 310 231 hash2 = ipv6_portaddr_hash(net, daddr, hnum); 311 - slot2 = hash2 & udptable->mask; 312 - hslot2 = &udptable->hash2[slot2]; 232 + hslot2 = udp_hashslot2(udptable, hash2); 233 + 234 + if (udp_has_hash4(hslot2)) { 235 + result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum, 236 + dif, sdif, udptable); 237 + if (result) /* udp6_lib_lookup4 return sk or NULL */ 238 + return result; 239 + } 313 240 314 241 /* Lookup connected or non-wildcard sockets */ 315 242 result = udp6_lib_lookup2(net, saddr, sport, ··· 342 257 343 258 /* Lookup wildcard sockets */ 344 259 hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); 345 - slot2 = hash2 & udptable->mask; 346 - hslot2 = &udptable->hash2[slot2]; 260 + hslot2 = udp_hashslot2(udptable, hash2); 347 261 348 262 result = udp6_lib_lookup2(net, saddr, sport, 349 263 &in6addr_any, hnum, dif, sdif, ··· 943 859 udptable->mask; 944 860 hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; 945 861 start_lookup: 946 - hslot = &udptable->hash2[hash2]; 862 + hslot = &udptable->hash2[hash2].hslot; 947 863 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); 948 864 } 949 865 ··· 1149 1065 { 1150 1066 struct udp_table *udptable = net->ipv4.udp_table; 1151 1067 unsigned short hnum = ntohs(loc_port); 1152 - unsigned int hash2, slot2; 1153 1068 struct udp_hslot *hslot2; 1069 + unsigned int hash2; 1154 1070 __portpair ports; 1155 1071 struct sock *sk; 1156 1072 1157 1073 hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); 1158 - slot2 = hash2 & udptable->mask; 1159 - hslot2 = &udptable->hash2[slot2]; 1074 + hslot2 = udp_hashslot2(udptable, hash2); 1160 1075 ports = INET_COMBINED_PORTS(rmt_port, hnum); 1161 1076 1162 1077 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { ··· 1250 1167 return -EINVAL; 1251 1168 1252 1169 return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); 1170 + } 1171 + 1172 + static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1173 + { 1174 + int res; 1175 + 1176 + lock_sock(sk); 1177 + res = __ip6_datagram_connect(sk, uaddr, addr_len); 1178 + if (!res) 1179 + udp6_hash4(sk); 1180 + release_sock(sk); 1181 + return res; 1253 1182 } 1254 1183 1255 1184 /** ··· 1859 1764 .owner = THIS_MODULE, 1860 1765 .close = udp_lib_close, 1861 1766 .pre_connect = udpv6_pre_connect, 1862 - .connect = ip6_datagram_connect, 1767 + .connect = udpv6_connect, 1863 1768 .disconnect = udp_disconnect, 1864 1769 .ioctl = udp_ioctl, 1865 1770 .init = udpv6_init_sock,