Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests: net: Add busy_poll_test

Add an epoll busy poll test using netdevsim.

This test is comprised of:
- busy_poller (via busy_poller.c)
- busy_poll_test.sh which loads netdevsim, sets up network namespaces,
and runs busy_poller to receive data and socat to send data.

The selftest tests two different scenarios:
- busy poll (the pre-existing version in the kernel)
- busy poll with suspend enabled (what this series adds)

The data transmit is a 1MiB temporary file generated from /dev/urandom
and the test is considered passing if the md5sum of the input file to
socat matches the md5sum of the output file from busy_poller.

netdevsim was chosen instead of veth due to netdevsim's support for
netdev-genl.

For now, this test uses the functionality that netdevsim provides. In the
future, perhaps netdevsim can be extended to emulate device IRQs to more
thoroughly test all pre-existing kernel options (like defer_hard_irqs)
and suspend.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Co-developed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241109050245.191288-6-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Joe Damato and committed by
Jakub Kicinski
347fcdc4 8a6de262

+521
+1
tools/testing/selftests/net/.gitignore
··· 2 2 bind_bhash 3 3 bind_timewait 4 4 bind_wildcard 5 + busy_poller 5 6 cmsg_sender 6 7 diag_uid 7 8 epoll_busy_poll
+9
tools/testing/selftests/net/Makefile
··· 97 97 TEST_PROGS += vlan_hw_filter.sh 98 98 TEST_PROGS += bpf_offload.py 99 99 TEST_PROGS += ipv6_route_update_soft_lockup.sh 100 + TEST_PROGS += busy_poll_test.sh 101 + 102 + # YNL files, must be before "include ..lib.mk" 103 + YNL_GEN_FILES := busy_poller 104 + TEST_GEN_FILES += $(YNL_GEN_FILES) 100 105 101 106 TEST_FILES := settings 102 107 TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh ··· 111 106 TEST_INCLUDES := forwarding/lib.sh 112 107 113 108 include ../lib.mk 109 + 110 + # YNL build 111 + YNL_GENS := netdev 112 + include ynl.mk 114 113 115 114 $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap 116 115 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
+165
tools/testing/selftests/net/busy_poll_test.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + source net_helper.sh 4 + 5 + NSIM_SV_ID=$((256 + RANDOM % 256)) 6 + NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID 7 + NSIM_CL_ID=$((512 + RANDOM % 256)) 8 + NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID 9 + 10 + NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device 11 + NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device 12 + NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device 13 + NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device 14 + 15 + SERVER_IP=192.168.1.1 16 + CLIENT_IP=192.168.1.2 17 + SERVER_PORT=48675 18 + 19 + # busy poll config 20 + MAX_EVENTS=8 21 + BUSY_POLL_USECS=0 22 + BUSY_POLL_BUDGET=16 23 + PREFER_BUSY_POLL=1 24 + 25 + # IRQ deferral config 26 + NAPI_DEFER_HARD_IRQS=100 27 + GRO_FLUSH_TIMEOUT=50000 28 + SUSPEND_TIMEOUT=20000000 29 + 30 + setup_ns() 31 + { 32 + set -e 33 + ip netns add nssv 34 + ip netns add nscl 35 + 36 + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ 37 + -path $NSIM_SV_SYS/net -exec basename {} \;) 38 + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ 39 + -path $NSIM_CL_SYS/net -exec basename {} \;) 40 + 41 + # ensure the server has 1 queue 42 + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null 43 + 44 + ip link set $NSIM_SV_NAME netns nssv 45 + ip link set $NSIM_CL_NAME netns nscl 46 + 47 + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME 48 + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME 49 + 50 + ip netns exec nssv ip link set dev $NSIM_SV_NAME up 51 + ip netns exec nscl ip link set dev $NSIM_CL_NAME up 52 + 53 + set +e 54 + } 55 + 56 + cleanup_ns() 57 + { 58 + ip netns del nscl 59 + ip netns del nssv 60 + } 61 + 62 + test_busypoll() 63 + { 64 + suspend_value=${1:-0} 65 + tmp_file=$(mktemp) 66 + out_file=$(mktemp) 67 + 68 + # fill a test file with random data 69 + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null 70 + 71 + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ 72 + -p${SERVER_PORT} \ 73 + -b${SERVER_IP} \ 74 + -m${MAX_EVENTS} \ 75 + -u${BUSY_POLL_USECS} \ 76 + -P${PREFER_BUSY_POLL} \ 77 + -g${BUSY_POLL_BUDGET} \ 78 + -i${NSIM_SV_IFIDX} \ 79 + -s${suspend_value} \ 80 + -o${out_file}& 81 + 82 + wait_local_port_listen nssv ${SERVER_PORT} tcp 83 + 84 + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} 85 + 86 + wait 87 + 88 + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') 89 + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') 90 + 91 + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then 92 + res=0 93 + else 94 + echo "md5sum mismatch" 95 + echo "input file md5sum: ${tmp_file_md5sum}"; 96 + echo "output file md5sum: ${out_file_md5sum}"; 97 + res=1 98 + fi 99 + 100 + rm $out_file $tmp_file 101 + 102 + return $res 103 + } 104 + 105 + test_busypoll_with_suspend() 106 + { 107 + test_busypoll ${SUSPEND_TIMEOUT} 108 + 109 + return $? 110 + } 111 + 112 + ### 113 + ### Code start 114 + ### 115 + 116 + modprobe netdevsim 117 + 118 + # linking 119 + 120 + echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW 121 + echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW 122 + udevadm settle 123 + 124 + setup_ns 125 + 126 + NSIM_SV_FD=$((256 + RANDOM % 256)) 127 + exec {NSIM_SV_FD}</var/run/netns/nssv 128 + NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) 129 + 130 + NSIM_CL_FD=$((256 + RANDOM % 256)) 131 + exec {NSIM_CL_FD}</var/run/netns/nscl 132 + NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) 133 + 134 + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ 135 + $NSIM_DEV_SYS_LINK 136 + 137 + if [ $? -ne 0 ]; then 138 + echo "linking netdevsim1 with netdevsim2 should succeed" 139 + cleanup_ns 140 + exit 1 141 + fi 142 + 143 + test_busypoll 144 + if [ $? -ne 0 ]; then 145 + echo "test_busypoll failed" 146 + cleanup_ns 147 + exit 1 148 + fi 149 + 150 + test_busypoll_with_suspend 151 + if [ $? -ne 0 ]; then 152 + echo "test_busypoll_with_suspend failed" 153 + cleanup_ns 154 + exit 1 155 + fi 156 + 157 + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK 158 + 159 + echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL 160 + 161 + cleanup_ns 162 + 163 + modprobe -r netdevsim 164 + 165 + exit 0
+346
tools/testing/selftests/net/busy_poller.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <assert.h> 3 + #include <errno.h> 4 + #include <error.h> 5 + #include <fcntl.h> 6 + #include <inttypes.h> 7 + #include <limits.h> 8 + #include <stdlib.h> 9 + #include <stdio.h> 10 + #include <string.h> 11 + #include <unistd.h> 12 + #include <ynl.h> 13 + 14 + #include <arpa/inet.h> 15 + #include <netinet/in.h> 16 + 17 + #include <sys/epoll.h> 18 + #include <sys/ioctl.h> 19 + #include <sys/socket.h> 20 + #include <sys/types.h> 21 + 22 + #include <linux/genetlink.h> 23 + #include <linux/netlink.h> 24 + 25 + #include "netdev-user.h" 26 + 27 + /* The below ifdef blob is required because: 28 + * 29 + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, 30 + * systems with older glibcs will not have them available. However, 31 + * sys/epoll.h does include the type definition for epoll_data, which is 32 + * needed by the user program (e.g. epoll_event.data.fd) 33 + * 34 + * - linux/eventpoll.h does not define the epoll_data type, it is simply an 35 + * opaque __u64. It does, however, include the ioctl definition. 36 + * 37 + * Including both headers is impossible (types would be redefined), so I've 38 + * opted instead to take sys/epoll.h, and include the blob below. 39 + * 40 + * Someday, when glibc is globally up to date, the blob below can be removed. 41 + */ 42 + #if !defined(EPOLL_IOC_TYPE) 43 + struct epoll_params { 44 + uint32_t busy_poll_usecs; 45 + uint16_t busy_poll_budget; 46 + uint8_t prefer_busy_poll; 47 + 48 + /* pad the struct to a multiple of 64bits */ 49 + uint8_t __pad; 50 + }; 51 + 52 + #define EPOLL_IOC_TYPE 0x8A 53 + #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) 54 + #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) 55 + #endif 56 + 57 + static uint32_t cfg_port = 8000; 58 + static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; 59 + static char *cfg_outfile; 60 + static int cfg_max_events = 8; 61 + static int cfg_ifindex; 62 + 63 + /* busy poll params */ 64 + static uint32_t cfg_busy_poll_usecs; 65 + static uint32_t cfg_busy_poll_budget; 66 + static uint32_t cfg_prefer_busy_poll; 67 + 68 + /* IRQ params */ 69 + static uint32_t cfg_defer_hard_irqs; 70 + static uint64_t cfg_gro_flush_timeout; 71 + static uint64_t cfg_irq_suspend_timeout; 72 + 73 + static void usage(const char *filepath) 74 + { 75 + error(1, 0, 76 + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", 77 + filepath); 78 + } 79 + 80 + static void parse_opts(int argc, char **argv) 81 + { 82 + int ret; 83 + int c; 84 + 85 + if (argc <= 1) 86 + usage(argv[0]); 87 + 88 + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { 89 + switch (c) { 90 + case 'u': 91 + cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); 92 + if (cfg_busy_poll_usecs == ULONG_MAX || 93 + cfg_busy_poll_usecs > UINT32_MAX) 94 + error(1, ERANGE, "busy_poll_usecs too large"); 95 + break; 96 + case 'P': 97 + cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); 98 + if (cfg_prefer_busy_poll == ULONG_MAX || 99 + cfg_prefer_busy_poll > 1) 100 + error(1, ERANGE, 101 + "prefer busy poll should be 0 or 1"); 102 + break; 103 + case 'g': 104 + cfg_busy_poll_budget = strtoul(optarg, NULL, 0); 105 + if (cfg_busy_poll_budget == ULONG_MAX || 106 + cfg_busy_poll_budget > UINT16_MAX) 107 + error(1, ERANGE, 108 + "busy poll budget must be [0, UINT16_MAX]"); 109 + break; 110 + case 'p': 111 + cfg_port = strtoul(optarg, NULL, 0); 112 + if (cfg_port > UINT16_MAX) 113 + error(1, ERANGE, "port must be <= 65535"); 114 + break; 115 + case 'b': 116 + ret = inet_aton(optarg, &cfg_bind_addr); 117 + if (ret == 0) 118 + error(1, errno, 119 + "bind address %s invalid", optarg); 120 + break; 121 + case 'o': 122 + cfg_outfile = strdup(optarg); 123 + if (!cfg_outfile) 124 + error(1, 0, "outfile invalid"); 125 + break; 126 + case 'm': 127 + cfg_max_events = strtol(optarg, NULL, 0); 128 + 129 + if (cfg_max_events == LONG_MIN || 130 + cfg_max_events == LONG_MAX || 131 + cfg_max_events <= 0) 132 + error(1, ERANGE, 133 + "max events must be > 0 and < LONG_MAX"); 134 + break; 135 + case 'd': 136 + cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); 137 + 138 + if (cfg_defer_hard_irqs == ULONG_MAX || 139 + cfg_defer_hard_irqs > INT32_MAX) 140 + error(1, ERANGE, 141 + "defer_hard_irqs must be <= INT32_MAX"); 142 + break; 143 + case 'r': 144 + cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); 145 + 146 + if (cfg_gro_flush_timeout == ULLONG_MAX) 147 + error(1, ERANGE, 148 + "gro_flush_timeout must be < ULLONG_MAX"); 149 + break; 150 + case 's': 151 + cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); 152 + 153 + if (cfg_irq_suspend_timeout == ULLONG_MAX) 154 + error(1, ERANGE, 155 + "irq_suspend_timeout must be < ULLONG_MAX"); 156 + break; 157 + case 'i': 158 + cfg_ifindex = strtoul(optarg, NULL, 0); 159 + if (cfg_ifindex == ULONG_MAX) 160 + error(1, ERANGE, 161 + "ifindex must be < ULONG_MAX"); 162 + break; 163 + } 164 + } 165 + 166 + if (!cfg_ifindex) 167 + usage(argv[0]); 168 + 169 + if (optind != argc) 170 + usage(argv[0]); 171 + } 172 + 173 + static void epoll_ctl_add(int epfd, int fd, uint32_t events) 174 + { 175 + struct epoll_event ev; 176 + 177 + ev.events = events; 178 + ev.data.fd = fd; 179 + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) 180 + error(1, errno, "epoll_ctl add fd: %d", fd); 181 + } 182 + 183 + static void setnonblock(int sockfd) 184 + { 185 + int flags; 186 + 187 + flags = fcntl(sockfd, F_GETFL, 0); 188 + 189 + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) 190 + error(1, errno, "unable to set socket to nonblocking mode"); 191 + } 192 + 193 + static void write_chunk(int fd, char *buf, ssize_t buflen) 194 + { 195 + ssize_t remaining = buflen; 196 + char *buf_offset = buf; 197 + ssize_t writelen = 0; 198 + ssize_t write_result; 199 + 200 + while (writelen < buflen) { 201 + write_result = write(fd, buf_offset, remaining); 202 + if (write_result == -1) 203 + error(1, errno, "unable to write data to outfile"); 204 + 205 + writelen += write_result; 206 + remaining -= write_result; 207 + buf_offset += write_result; 208 + } 209 + } 210 + 211 + static void setup_queue(void) 212 + { 213 + struct netdev_napi_get_list *napi_list = NULL; 214 + struct netdev_napi_get_req_dump *req = NULL; 215 + struct netdev_napi_set_req *set_req = NULL; 216 + struct ynl_sock *ys; 217 + struct ynl_error yerr; 218 + uint32_t napi_id; 219 + 220 + ys = ynl_sock_create(&ynl_netdev_family, &yerr); 221 + if (!ys) 222 + error(1, 0, "YNL: %s", yerr.msg); 223 + 224 + req = netdev_napi_get_req_dump_alloc(); 225 + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); 226 + napi_list = netdev_napi_get_dump(ys, req); 227 + 228 + /* assume there is 1 NAPI configured and take the first */ 229 + if (napi_list->obj._present.id) 230 + napi_id = napi_list->obj.id; 231 + else 232 + error(1, 0, "napi ID not present?"); 233 + 234 + set_req = netdev_napi_set_req_alloc(); 235 + netdev_napi_set_req_set_id(set_req, napi_id); 236 + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); 237 + netdev_napi_set_req_set_gro_flush_timeout(set_req, 238 + cfg_gro_flush_timeout); 239 + netdev_napi_set_req_set_irq_suspend_timeout(set_req, 240 + cfg_irq_suspend_timeout); 241 + 242 + if (netdev_napi_set(ys, set_req)) 243 + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); 244 + 245 + netdev_napi_get_list_free(napi_list); 246 + netdev_napi_get_req_dump_free(req); 247 + netdev_napi_set_req_free(set_req); 248 + ynl_sock_destroy(ys); 249 + } 250 + 251 + static void run_poller(void) 252 + { 253 + struct epoll_event events[cfg_max_events]; 254 + struct epoll_params epoll_params = {0}; 255 + struct sockaddr_in server_addr; 256 + int i, epfd, nfds; 257 + ssize_t readlen; 258 + int outfile_fd; 259 + char buf[1024]; 260 + int sockfd; 261 + int conn; 262 + int val; 263 + 264 + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); 265 + if (outfile_fd == -1) 266 + error(1, errno, "unable to open outfile: %s", cfg_outfile); 267 + 268 + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 269 + if (sockfd == -1) 270 + error(1, errno, "unable to create listen socket"); 271 + 272 + server_addr.sin_family = AF_INET; 273 + server_addr.sin_port = htons(cfg_port); 274 + server_addr.sin_addr = cfg_bind_addr; 275 + 276 + /* these values are range checked during parse_opts, so casting is safe 277 + * here 278 + */ 279 + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; 280 + epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; 281 + epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; 282 + epoll_params.__pad = 0; 283 + 284 + val = 1; 285 + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) 286 + error(1, errno, "poller setsockopt reuseaddr"); 287 + 288 + setnonblock(sockfd); 289 + 290 + if (bind(sockfd, (struct sockaddr *)&server_addr, 291 + sizeof(struct sockaddr_in))) 292 + error(0, errno, "poller bind to port: %d\n", cfg_port); 293 + 294 + if (listen(sockfd, 1)) 295 + error(1, errno, "poller listen"); 296 + 297 + epfd = epoll_create1(0); 298 + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) 299 + error(1, errno, "unable to set busy poll params"); 300 + 301 + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); 302 + 303 + for (;;) { 304 + nfds = epoll_wait(epfd, events, cfg_max_events, -1); 305 + for (i = 0; i < nfds; i++) { 306 + if (events[i].data.fd == sockfd) { 307 + conn = accept(sockfd, NULL, NULL); 308 + if (conn == -1) 309 + error(1, errno, 310 + "accepting incoming connection failed"); 311 + 312 + setnonblock(conn); 313 + epoll_ctl_add(epfd, conn, 314 + EPOLLIN | EPOLLET | EPOLLRDHUP | 315 + EPOLLHUP); 316 + } else if (events[i].events & EPOLLIN) { 317 + for (;;) { 318 + readlen = read(events[i].data.fd, buf, 319 + sizeof(buf)); 320 + if (readlen > 0) 321 + write_chunk(outfile_fd, buf, 322 + readlen); 323 + else 324 + break; 325 + } 326 + } else { 327 + /* spurious event ? */ 328 + } 329 + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { 330 + epoll_ctl(epfd, EPOLL_CTL_DEL, 331 + events[i].data.fd, NULL); 332 + close(events[i].data.fd); 333 + close(outfile_fd); 334 + return; 335 + } 336 + } 337 + } 338 + } 339 + 340 + int main(int argc, char *argv[]) 341 + { 342 + parse_opts(argc, argv); 343 + setup_queue(); 344 + run_poller(); 345 + return 0; 346 + }