Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: selftests: Stress reuseport listen

This patch adds a test that has 300 VIPs listening on port 443.
Each VIP:443 will have 80 listening socks by using SO_REUSEPORT.
Thus, it will have 24000 listening socks.

Before removing the port only listening_hash, all socks will be in the
same port 443 bucket and inet_reuseport_add_sock() spends much time to
walk through the bucket. After removing the port only listening_hash
and move all usage to the port+addr lhash2, each bucket in the
ideal case has 80 sk which is much smaller than before.

Here is the test result from a qemu:
Before: listen 24000 socks took 210.210485362 (~210s)
After: listen 24000 socks took 0.207173 (~210ms)

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Martin KaFai Lau and committed by
Jakub Kicinski
ec8cb4f6 cae3873c

+132
+2
tools/testing/selftests/net/Makefile
··· 38 38 TEST_PROGS += vrf_strict_mode_test.sh 39 39 TEST_PROGS += arp_ndisc_evict_nocarrier.sh 40 40 TEST_PROGS += ndisc_unsolicited_na_test.sh 41 + TEST_PROGS += stress_reuseport_listen.sh 41 42 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh 42 43 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh 43 44 TEST_GEN_FILES = socket nettest ··· 57 56 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls 58 57 TEST_GEN_FILES += toeplitz 59 58 TEST_GEN_FILES += cmsg_sender 59 + TEST_GEN_FILES += stress_reuseport_listen 60 60 TEST_PROGS += test_vxlan_vnifiltering.sh 61 61 62 62 TEST_FILES := settings
+105
tools/testing/selftests/net/stress_reuseport_listen.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 4 + 5 + /* Test listening on the same port 443 with multiple VIPS. 6 + * Each VIP:443 will have multiple sk listening on by using 7 + * SO_REUSEPORT. 8 + */ 9 + 10 + #include <unistd.h> 11 + #include <stdio.h> 12 + #include <stdlib.h> 13 + #include <error.h> 14 + #include <errno.h> 15 + #include <time.h> 16 + #include <arpa/inet.h> 17 + 18 + #define IP6_LADDR_START "2401:dead::1" 19 + #define IP6_LPORT 443 20 + #define NSEC_PER_SEC 1000000000L 21 + #define NSEC_PER_USEC 1000L 22 + 23 + static unsigned int nr_socks_per_vip; 24 + static unsigned int nr_vips; 25 + 26 + static int *bind_reuseport_sock6(void) 27 + { 28 + int *lfds, *cur_fd, err, optvalue = 1; 29 + struct sockaddr_in6 sa6 = {}; 30 + unsigned int i, j; 31 + 32 + sa6.sin6_family = AF_INET6; 33 + sa6.sin6_port = htons(IP6_LPORT); 34 + err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr); 35 + if (err != 1) 36 + error(1, err, "inet_pton(%s)", IP6_LADDR_START); 37 + 38 + lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0])); 39 + if (!lfds) 40 + error(1, errno, "cannot alloc array of lfds"); 41 + 42 + cur_fd = lfds; 43 + for (i = 0; i < nr_vips; i++) { 44 + for (j = 0; j < nr_socks_per_vip; j++) { 45 + *cur_fd = socket(AF_INET6, SOCK_STREAM, 0); 46 + if (*cur_fd == -1) 47 + error(1, errno, 48 + "lfds[%u,%u] = socket(AF_INET6)", i, j); 49 + 50 + err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT, 51 + &optvalue, sizeof(optvalue)); 52 + if (err) 53 + error(1, errno, 54 + "setsockopt(lfds[%u,%u], SO_REUSEPORT)", 55 + i, j); 56 + 57 + err = bind(*cur_fd, (struct sockaddr *)&sa6, 58 + sizeof(sa6)); 59 + if (err) 60 + error(1, errno, "bind(lfds[%u,%u])", i, j); 61 + cur_fd++; 62 + } 63 + sa6.sin6_addr.s6_addr32[3]++; 64 + } 65 + 66 + return lfds; 67 + } 68 + 69 + int main(int argc, const char *argv[]) 70 + { 71 + struct timespec start_ts, end_ts; 72 + unsigned long start_ns, end_ns; 73 + unsigned int nr_lsocks; 74 + int *lfds, i, err; 75 + 76 + if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0) 77 + error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n", 78 + argv[0]); 79 + 80 + nr_vips = atoi(argv[1]); 81 + nr_socks_per_vip = atoi(argv[2]); 82 + nr_lsocks = nr_vips * nr_socks_per_vip; 83 + lfds = bind_reuseport_sock6(); 84 + 85 + clock_gettime(CLOCK_MONOTONIC, &start_ts); 86 + for (i = 0; i < nr_lsocks; i++) { 87 + err = listen(lfds[i], 0); 88 + if (err) 89 + error(1, errno, "listen(lfds[%d])", i); 90 + } 91 + clock_gettime(CLOCK_MONOTONIC, &end_ts); 92 + 93 + start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec; 94 + end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec; 95 + 96 + printf("listen %d socks took %lu.%lu\n", nr_lsocks, 97 + (end_ns - start_ns) / NSEC_PER_SEC, 98 + (end_ns - start_ns) / NSEC_PER_USEC); 99 + 100 + for (i = 0; i < nr_lsocks; i++) 101 + close(lfds[i]); 102 + 103 + free(lfds); 104 + return 0; 105 + }
+25
tools/testing/selftests/net/stress_reuseport_listen.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 4 + 5 + NS='stress_reuseport_listen_ns' 6 + NR_FILES=24100 7 + SAVED_NR_FILES=$(ulimit -n) 8 + 9 + setup() { 10 + ip netns add $NS 11 + ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1 12 + ulimit -n $NR_FILES 13 + } 14 + 15 + cleanup() { 16 + ip netns del $NS 17 + ulimit -n $SAVED_NR_FILES 18 + } 19 + 20 + trap cleanup EXIT 21 + setup 22 + # 300 different vips listen on port 443 23 + # Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT 24 + # Total 24000 listening socks 25 + ip netns exec $NS ./stress_reuseport_listen 300 80