Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftest: Don't reuse port for SO_INCOMING_CPU test.

Jakub reported that ASSERT_EQ(cpu, i) in so_incoming_cpu.c seems to
fire somewhat randomly.

# # RUN so_incoming_cpu.before_reuseport.test3 ...
# # so_incoming_cpu.c:191:test3:Expected cpu (32) == i (0)
# # test3: Test terminated by assertion
# # FAIL so_incoming_cpu.before_reuseport.test3
# not ok 3 so_incoming_cpu.before_reuseport.test3

When the test failed, not-yet-accepted CLOSE_WAIT sockets received
SYN with a "challenging" SEQ number, which was sent from an unexpected
CPU that did not create the receiver.

The test basically does:

1. for each cpu:
1-1. create a server
1-2. set SO_INCOMING_CPU

2. for each cpu:
2-1. set cpu affinity
2-2. create some clients
2-3. let clients connect() to the server on the same cpu
2-4. close() clients

3. for each server:
3-1. accept() all child sockets
3-2. check if all children have the same SO_INCOMING_CPU with the server

The root cause was the close() in 2-4. and net.ipv4.tcp_tw_reuse.

In a loop of 2., close() changed the client state to FIN_WAIT_2, and
the peer transitioned to CLOSE_WAIT.

In another loop of 2., connect() happened to select the same port of
the FIN_WAIT_2 socket, and it was reused as the default value of
net.ipv4.tcp_tw_reuse is 2.

As a result, the new client sent SYN to the CLOSE_WAIT socket from
a different CPU, and the receiver's sk_incoming_cpu was overwritten
with unexpected CPU ID.

Also, the SYN had a different SEQ number, so the CLOSE_WAIT socket
responded with Challenge ACK. The new client properly returned RST
and effectively killed the CLOSE_WAIT socket.

This way, all clients were created successfully, but the error was
detected later by 3-2., ASSERT_EQ(cpu, i).

To avoid the failure, let's make sure that (i) the number of clients
is less than the number of available ports and (ii) such reuse never
happens.

Fixes: 6df96146b202 ("selftest: Add test for SO_INCOMING_CPU.")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Tested-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240120031642.67014-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Kuniyuki Iwashima and committed by
Paolo Abeni
97de5a15 7267e8dc

+50 -18
+50 -18
tools/testing/selftests/net/so_incoming_cpu.c
··· 3 3 #define _GNU_SOURCE 4 4 #include <sched.h> 5 5 6 + #include <fcntl.h> 7 + 6 8 #include <netinet/in.h> 7 9 #include <sys/socket.h> 8 10 #include <sys/sysinfo.h> 9 11 10 12 #include "../kselftest_harness.h" 11 13 12 - #define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ 13 - #define NR_SERVER self->nproc 14 - #define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) 15 - 16 14 FIXTURE(so_incoming_cpu) 17 15 { 18 - int nproc; 19 16 int *servers; 20 17 union { 21 18 struct sockaddr addr; ··· 53 56 .when_to_set = AFTER_ALL_LISTEN, 54 57 }; 55 58 59 + static void write_sysctl(struct __test_metadata *_metadata, 60 + char *filename, char *string) 61 + { 62 + int fd, len, ret; 63 + 64 + fd = open(filename, O_WRONLY); 65 + ASSERT_NE(fd, -1); 66 + 67 + len = strlen(string); 68 + ret = write(fd, string, len); 69 + ASSERT_EQ(ret, len); 70 + } 71 + 72 + static void setup_netns(struct __test_metadata *_metadata) 73 + { 74 + ASSERT_EQ(unshare(CLONE_NEWNET), 0); 75 + ASSERT_EQ(system("ip link set lo up"), 0); 76 + 77 + write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001"); 78 + write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0"); 79 + } 80 + 81 + #define NR_PORT (60001 - 10000 - 1) 82 + #define NR_CLIENT_PER_SERVER_DEFAULT 32 83 + static int nr_client_per_server, nr_server, nr_client; 84 + 56 85 FIXTURE_SETUP(so_incoming_cpu) 57 86 { 58 - self->nproc = get_nprocs(); 59 - ASSERT_LE(2, self->nproc); 87 + setup_netns(_metadata); 60 88 61 - self->servers = malloc(sizeof(int) * NR_SERVER); 89 + nr_server = get_nprocs(); 90 + ASSERT_LE(2, nr_server); 91 + 92 + if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT) 93 + nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT; 94 + else 95 + nr_client_per_server = NR_PORT / nr_server; 96 + 97 + nr_client = nr_client_per_server * nr_server; 98 + 99 + self->servers = malloc(sizeof(int) * nr_server); 62 100 ASSERT_NE(self->servers, NULL); 63 101 64 102 self->in_addr.sin_family = AF_INET; ··· 106 74 { 107 75 int i; 108 76 109 - for (i = 0; i < NR_SERVER; i++) 77 + for (i = 0; i < nr_server; i++) 110 78 close(self->servers[i]); 111 79 112 80 free(self->servers); ··· 142 110 if (variant->when_to_set == BEFORE_LISTEN) 143 111 set_so_incoming_cpu(_metadata, fd, cpu); 144 112 145 - /* We don't use CLIENT_PER_SERVER here not to block 113 + /* We don't use nr_client_per_server here not to block 146 114 * this test at connect() if SO_INCOMING_CPU is broken. 147 115 */ 148 - ret = listen(fd, NR_CLIENT); 116 + ret = listen(fd, nr_client); 149 117 ASSERT_EQ(ret, 0); 150 118 151 119 if (variant->when_to_set == AFTER_LISTEN) ··· 160 128 { 161 129 int i, ret; 162 130 163 - for (i = 0; i < NR_SERVER; i++) { 131 + for (i = 0; i < nr_server; i++) { 164 132 self->servers[i] = create_server(_metadata, self, variant, i); 165 133 166 134 if (i == 0) { ··· 170 138 } 171 139 172 140 if (variant->when_to_set == AFTER_ALL_LISTEN) { 173 - for (i = 0; i < NR_SERVER; i++) 141 + for (i = 0; i < nr_server; i++) 174 142 set_so_incoming_cpu(_metadata, self->servers[i], i); 175 143 } 176 144 } ··· 181 149 cpu_set_t cpu_set; 182 150 int i, j, fd, ret; 183 151 184 - for (i = 0; i < NR_SERVER; i++) { 152 + for (i = 0; i < nr_server; i++) { 185 153 CPU_ZERO(&cpu_set); 186 154 187 155 CPU_SET(i, &cpu_set); ··· 194 162 ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); 195 163 ASSERT_EQ(ret, 0); 196 164 197 - for (j = 0; j < CLIENT_PER_SERVER; j++) { 165 + for (j = 0; j < nr_client_per_server; j++) { 198 166 fd = socket(AF_INET, SOCK_STREAM, 0); 199 167 ASSERT_NE(fd, -1); 200 168 ··· 212 180 int i, j, fd, cpu, ret, total = 0; 213 181 socklen_t len = sizeof(int); 214 182 215 - for (i = 0; i < NR_SERVER; i++) { 216 - for (j = 0; j < CLIENT_PER_SERVER; j++) { 183 + for (i = 0; i < nr_server; i++) { 184 + for (j = 0; j < nr_client_per_server; j++) { 217 185 /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ 218 186 fd = accept(self->servers[i], &self->addr, &self->addrlen); 219 187 ASSERT_NE(fd, -1); ··· 227 195 } 228 196 } 229 197 230 - ASSERT_EQ(total, NR_CLIENT); 198 + ASSERT_EQ(total, nr_client); 231 199 TH_LOG("SO_INCOMING_CPU is very likely to be " 232 200 "working correctly with %d sockets.", total); 233 201 }