Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: fix psock_fanout selftest hash collision

Fix flaky results with PACKET_FANOUT_HASH depending on whether the
two flows hash into the same packet socket or not.

Also adds tests for PACKET_FANOUT_LB and PACKET_FANOUT_CPU and
replaces the counting method with a packet ring.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Willem de Bruijn and committed by
David S. Miller
23a9072e b44540ea

+123 -43
+123 -43
tools/testing/selftests/net-afpacket/psock_fanout.c
··· 16 16 * The test currently runs for 17 17 * - PACKET_FANOUT_HASH 18 18 * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER 19 + * - PACKET_FANOUT_LB 20 + * - PACKET_FANOUT_CPU 19 21 * - PACKET_FANOUT_ROLLOVER 20 22 * 21 23 * Todo: 22 - * - datapath: PACKET_FANOUT_LB 23 - * - datapath: PACKET_FANOUT_CPU 24 24 * - functionality: PACKET_FANOUT_FLAG_DEFRAG 25 25 * 26 26 * License (GPLv2): ··· 39 39 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 40 40 */ 41 41 42 + #define _GNU_SOURCE /* for sched_setaffinity */ 43 + 42 44 #include <arpa/inet.h> 43 45 #include <errno.h> 46 + #include <fcntl.h> 44 47 #include <linux/filter.h> 45 48 #include <linux/if_packet.h> 46 49 #include <net/ethernet.h> 47 50 #include <netinet/ip.h> 48 51 #include <netinet/udp.h> 49 - #include <fcntl.h> 52 + #include <poll.h> 53 + #include <sched.h> 50 54 #include <stdint.h> 51 55 #include <stdio.h> 52 56 #include <stdlib.h> 53 57 #include <string.h> 58 + #include <sys/mman.h> 54 59 #include <sys/socket.h> 55 60 #include <sys/stat.h> 56 61 #include <sys/types.h> ··· 63 58 64 59 #define DATA_LEN 100 65 60 #define DATA_CHAR 'a' 61 + #define RING_NUM_FRAMES 20 62 + #define PORT_BASE 8000 66 63 67 64 static void pair_udp_open(int fds[], uint16_t port) 68 65 { ··· 169 162 return -1; 170 163 } 171 164 172 - val = sizeof(struct iphdr) + sizeof(struct udphdr) + DATA_LEN; 173 - val *= num_packets; 174 - /* hack: apparently, the above calculation is too small (TODO: fix) */ 175 - val *= 3; 176 - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) { 177 - perror("setsockopt SO_RCVBUF"); 178 - exit(1); 179 - } 180 - 181 165 sock_fanout_setfilter(fd); 182 166 return fd; 183 167 } 184 168 185 - static void sock_fanout_read(int fds[], const int expect[]) 169 + static char *sock_fanout_open_ring(int fd) 186 170 { 187 - struct tpacket_stats stats; 188 - socklen_t ssize; 171 + struct tpacket_req req = { 172 + .tp_block_size = getpagesize(), 173 + .tp_frame_size = getpagesize(), 174 + .tp_block_nr = RING_NUM_FRAMES, 175 + .tp_frame_nr = RING_NUM_FRAMES, 176 + }; 177 + char *ring; 178 + 179 + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, 180 + sizeof(req))) { 181 + perror("packetsock ring setsockopt"); 182 + exit(1); 183 + } 184 + 185 + ring = mmap(0, req.tp_block_size * req.tp_block_nr, 186 + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 187 + if (!ring) { 188 + fprintf(stderr, "packetsock ring mmap\n"); 189 + exit(1); 190 + } 191 + 192 + return ring; 193 + } 194 + 195 + static int sock_fanout_read_ring(int fd, void *ring) 196 + { 197 + struct tpacket_hdr *header = ring; 198 + int count = 0; 199 + 200 + while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) { 201 + count++; 202 + header = ring + (count * getpagesize()); 203 + } 204 + 205 + return count; 206 + } 207 + 208 + static int sock_fanout_read(int fds[], char *rings[], const int expect[]) 209 + { 189 210 int ret[2]; 190 211 191 - ssize = sizeof(stats); 192 - if (getsockopt(fds[0], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) { 193 - perror("getsockopt statistics 0"); 194 - exit(1); 195 - } 196 - ret[0] = stats.tp_packets - stats.tp_drops; 197 - ssize = sizeof(stats); 198 - if (getsockopt(fds[1], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) { 199 - perror("getsockopt statistics 1"); 200 - exit(1); 201 - } 202 - ret[1] = stats.tp_packets - stats.tp_drops; 212 + ret[0] = sock_fanout_read_ring(fds[0], rings[0]); 213 + ret[1] = sock_fanout_read_ring(fds[1], rings[1]); 203 214 204 215 fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", 205 216 ret[0], ret[1], expect[0], expect[1]); ··· 225 200 if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && 226 201 (!(ret[0] == expect[1] && ret[1] == expect[0]))) { 227 202 fprintf(stderr, "ERROR: incorrect queue lengths\n"); 228 - exit(1); 203 + return 1; 229 204 } 205 + 206 + return 0; 230 207 } 231 208 232 209 /* Test illegal mode + flag combination */ ··· 280 253 } 281 254 } 282 255 283 - static void test_datapath(uint16_t typeflags, 284 - const int expect1[], const int expect2[]) 256 + static int test_datapath(uint16_t typeflags, int port_off, 257 + const int expect1[], const int expect2[]) 285 258 { 286 259 const int expect0[] = { 0, 0 }; 287 - int fds[2], fds_udp[2][2]; 260 + char *rings[2]; 261 + int fds[2], fds_udp[2][2], ret; 288 262 289 263 fprintf(stderr, "test: datapath 0x%hx\n", typeflags); 290 264 ··· 295 267 fprintf(stderr, "ERROR: failed open\n"); 296 268 exit(1); 297 269 } 298 - pair_udp_open(fds_udp[0], 8000); 299 - pair_udp_open(fds_udp[1], 8002); 300 - sock_fanout_read(fds, expect0); 270 + rings[0] = sock_fanout_open_ring(fds[0]); 271 + rings[1] = sock_fanout_open_ring(fds[1]); 272 + pair_udp_open(fds_udp[0], PORT_BASE); 273 + pair_udp_open(fds_udp[1], PORT_BASE + port_off); 274 + sock_fanout_read(fds, rings, expect0); 301 275 302 276 /* Send data, but not enough to overflow a queue */ 303 277 pair_udp_send(fds_udp[0], 15); 304 278 pair_udp_send(fds_udp[1], 5); 305 - sock_fanout_read(fds, expect1); 279 + ret = sock_fanout_read(fds, rings, expect1); 306 280 307 281 /* Send more data, overflow the queue */ 308 282 pair_udp_send(fds_udp[0], 15); 309 283 /* TODO: ensure consistent order between expect1 and expect2 */ 310 - sock_fanout_read(fds, expect2); 284 + ret |= sock_fanout_read(fds, rings, expect2); 311 285 286 + if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || 287 + munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { 288 + fprintf(stderr, "close rings\n"); 289 + exit(1); 290 + } 312 291 if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || 313 292 close(fds_udp[0][1]) || close(fds_udp[0][0]) || 314 293 close(fds[1]) || close(fds[0])) { 315 294 fprintf(stderr, "close datapath\n"); 316 295 exit(1); 317 296 } 297 + 298 + return ret; 299 + } 300 + 301 + static int set_cpuaffinity(int cpuid) 302 + { 303 + cpu_set_t mask; 304 + 305 + CPU_ZERO(&mask); 306 + CPU_SET(cpuid, &mask); 307 + if (sched_setaffinity(0, sizeof(mask), &mask)) { 308 + if (errno != EINVAL) { 309 + fprintf(stderr, "setaffinity %d\n", cpuid); 310 + exit(1); 311 + } 312 + return 1; 313 + } 314 + 315 + return 0; 318 316 } 319 317 320 318 int main(int argc, char **argv) 321 319 { 322 - const int expect_hash[2][2] = { { 15, 5 }, { 5, 0 } }; 323 - const int expect_hash_rb[2][2] = { { 15, 5 }, { 5, 10 } }; 324 - const int expect_rb[2][2] = { { 20, 0 }, { 0, 15 } }; 320 + const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; 321 + const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; 322 + const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; 323 + const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; 324 + const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; 325 + const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; 326 + int port_off = 2, tries = 5, ret; 325 327 326 328 test_control_single(); 327 329 test_control_group(); 328 330 329 - test_datapath(PACKET_FANOUT_HASH, expect_hash[0], expect_hash[1]); 330 - test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, 331 - expect_hash_rb[0], expect_hash_rb[1]); 332 - test_datapath(PACKET_FANOUT_ROLLOVER, expect_rb[0], expect_rb[1]); 331 + /* find a set of ports that do not collide onto the same socket */ 332 + ret = test_datapath(PACKET_FANOUT_HASH, port_off, 333 + expect_hash[0], expect_hash[1]); 334 + while (ret && tries--) { 335 + fprintf(stderr, "info: trying alternate ports (%d)\n", tries); 336 + ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, 337 + expect_hash[0], expect_hash[1]); 338 + } 339 + 340 + ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, 341 + port_off, expect_hash_rb[0], expect_hash_rb[1]); 342 + ret |= test_datapath(PACKET_FANOUT_LB, 343 + port_off, expect_lb[0], expect_lb[1]); 344 + ret |= test_datapath(PACKET_FANOUT_ROLLOVER, 345 + port_off, expect_rb[0], expect_rb[1]); 346 + 347 + set_cpuaffinity(0); 348 + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, 349 + expect_cpu0[0], expect_cpu0[1]); 350 + if (!set_cpuaffinity(1)) 351 + /* TODO: test that choice alternates with previous */ 352 + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, 353 + expect_cpu1[0], expect_cpu1[1]); 354 + 355 + if (ret) 356 + return 1; 333 357 334 358 printf("OK. All tests passed\n"); 335 359 return 0;