Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests: net: add netpoll basic functionality test

Add a basic selftest for the netpoll polling mechanism, specifically
targeting the netpoll poll() side.

The test creates a scenario where network transmission is running at
maximum speed, and netpoll needs to poll the NIC. This is achieved by:

1. Configuring a single RX/TX queue to create contention
2. Generating background traffic to saturate the interface
3. Sending netconsole messages to trigger netpoll polling
4. Using dynamic netconsole targets via configfs
5. Delete and create new netconsole targets after some messages
6. Start a bpftrace in parallel to make sure netpoll_poll_dev() is
called
7. If bpftrace exists and netpoll_poll_dev() was called, stop.

The test validates a critical netpoll code path by monitoring traffic
flow and ensuring netpoll_poll_dev() is called when the normal TX path
is blocked.

This addresses a gap in netpoll test coverage for a path that is
tricky for the network stack.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250714-netpoll_test-v7-3-c0220cfaa63e@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Breno Leitao and committed by
Jakub Kicinski
b3019343 fd2aadce

+397
+1
tools/testing/selftests/drivers/net/Makefile
··· 16 16 netcons_fragmented_msg.sh \ 17 17 netcons_overflow.sh \ 18 18 netcons_sysdata.sh \ 19 + netpoll_basic.py \ 19 20 ping.py \ 20 21 queues.py \ 21 22 stats.py \
+396
tools/testing/selftests/drivers/net/netpoll_basic.py
··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0 3 + # Author: Breno Leitao <leitao@debian.org> 4 + """ 5 + This test aims to evaluate the netpoll polling mechanism (as in 6 + netpoll_poll_dev()). It presents a complex scenario where the network 7 + attempts to send a packet but fails, prompting it to poll the NIC from within 8 + the netpoll TX side. 9 + 10 + This has been a crucial path in netpoll that was previously untested. Jakub 11 + suggested using a single RX/TX queue, pushing traffic to the NIC, and then 12 + sending netpoll messages (via netconsole) to trigger the poll. 13 + 14 + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If 15 + so, the test passes, otherwise it will be skipped. This test is very dependent on 16 + the driver and environment, given we are trying to trigger a tricky scenario. 17 + """ 18 + 19 + import errno 20 + import logging 21 + import os 22 + import random 23 + import string 24 + import threading 25 + import time 26 + from typing import Optional 27 + 28 + from lib.py import ( 29 + bpftrace, 30 + CmdExitFailure, 31 + defer, 32 + ethtool, 33 + GenerateTraffic, 34 + ksft_exit, 35 + ksft_pr, 36 + ksft_run, 37 + KsftFailEx, 38 + KsftSkipEx, 39 + NetDrvEpEnv, 40 + KsftXfailEx, 41 + ) 42 + 43 + # Configure logging 44 + logging.basicConfig( 45 + level=logging.INFO, 46 + format="%(asctime)s - %(levelname)s - %(message)s", 47 + ) 48 + 49 + NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" 50 + NETCONS_REMOTE_PORT: int = 6666 51 + NETCONS_LOCAL_PORT: int = 1514 52 + 53 + # Max number of netcons messages to send. Each iteration will setup 54 + # netconsole and send MAX_WRITES messages 55 + ITERATIONS: int = 20 56 + # Number of writes to /dev/kmsg per iteration 57 + MAX_WRITES: int = 40 58 + # MAPS contains the information coming from bpftrace it will have only one 59 + # key: "hits", which tells the number of times netpoll_poll_dev() was called 60 + MAPS: dict[str, int] = {} 61 + # Thread to run bpftrace in parallel 62 + BPF_THREAD: Optional[threading.Thread] = None 63 + # Time bpftrace will be running in parallel. 64 + BPFTRACE_TIMEOUT: int = 10 65 + 66 + 67 + def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: 68 + """ 69 + Read the ringsize using ethtool. This will be used to restore it after the test 70 + """ 71 + try: 72 + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] 73 + rxs = ethtool_result["rx"] 74 + txs = ethtool_result["tx"] 75 + except (KeyError, IndexError) as exception: 76 + raise KsftSkipEx( 77 + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." 78 + ) from exception 79 + 80 + return rxs, txs 81 + 82 + 83 + def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: 84 + """Try to the number of RX and TX ringsize.""" 85 + rxs = ring_size[0] 86 + txs = ring_size[1] 87 + 88 + logging.debug("Setting ring size to %d/%d", rxs, txs) 89 + try: 90 + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") 91 + except CmdExitFailure: 92 + # This might fail on real device, retry with a higher value, 93 + # worst case, keep it as it is. 94 + return False 95 + 96 + return True 97 + 98 + 99 + def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: 100 + """Read the number of RX, TX and combined queues using ethtool""" 101 + 102 + try: 103 + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] 104 + rxq = ethtool_result.get("rx", -1) 105 + txq = ethtool_result.get("tx", -1) 106 + combined = ethtool_result.get("combined", -1) 107 + 108 + except IndexError as exception: 109 + raise KsftSkipEx( 110 + f"Failed to read queues numbers: {exception}. Not going to mess with them." 111 + ) from exception 112 + 113 + return rxq, txq, combined 114 + 115 + 116 + def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: 117 + """Set the number of RX, TX and combined queues using ethtool""" 118 + rxq, txq, combined = queues 119 + 120 + cmdline = f"-L {interface_name}" 121 + 122 + if rxq != -1: 123 + cmdline += f" rx {rxq}" 124 + if txq != -1: 125 + cmdline += f" tx {txq}" 126 + if combined != -1: 127 + cmdline += f" combined {combined}" 128 + 129 + logging.debug("calling: ethtool %s", cmdline) 130 + 131 + try: 132 + ethtool(cmdline) 133 + except CmdExitFailure as exception: 134 + raise KsftSkipEx( 135 + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" 136 + ) from exception 137 + 138 + 139 + def netcons_generate_random_target_name() -> str: 140 + """Generate a random target name starting with 'netcons'""" 141 + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) 142 + return f"netcons_{random_suffix}" 143 + 144 + 145 + def netcons_create_target( 146 + config_data: dict[str, str], 147 + target_name: str, 148 + ) -> None: 149 + """Create a netconsole dynamic target against the interfaces""" 150 + logging.debug("Using netconsole name: %s", target_name) 151 + try: 152 + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) 153 + logging.debug( 154 + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name 155 + ) 156 + except OSError as exception: 157 + if exception.errno != errno.EEXIST: 158 + raise KsftFailEx( 159 + f"Failed to create netconsole target directory: {exception}" 160 + ) from exception 161 + 162 + try: 163 + for key, value in config_data.items(): 164 + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" 165 + logging.debug("Writing %s to %s", key, path) 166 + with open(path, "w", encoding="utf-8") as file: 167 + # Always convert to string to write to file 168 + file.write(str(value)) 169 + 170 + # Read all configuration values for debugging purposes 171 + for debug_key in config_data.keys(): 172 + with open( 173 + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", 174 + "r", 175 + encoding="utf-8", 176 + ) as file: 177 + content = file.read() 178 + logging.debug( 179 + "%s/%s/%s : %s", 180 + NETCONSOLE_CONFIGFS_PATH, 181 + target_name, 182 + debug_key, 183 + content.strip(), 184 + ) 185 + 186 + except Exception as exception: 187 + raise KsftFailEx( 188 + f"Failed to configure netconsole target: {exception}" 189 + ) from exception 190 + 191 + 192 + def netcons_configure_target( 193 + cfg: NetDrvEpEnv, interface_name: str, target_name: str 194 + ) -> None: 195 + """Configure netconsole on the interface with the given target name""" 196 + config_data = { 197 + "extended": "1", 198 + "dev_name": interface_name, 199 + "local_port": NETCONS_LOCAL_PORT, 200 + "remote_port": NETCONS_REMOTE_PORT, 201 + "local_ip": cfg.addr, 202 + "remote_ip": cfg.remote_addr, 203 + "remote_mac": "00:00:00:00:00:00", # Not important for this test 204 + "enabled": "1", 205 + } 206 + 207 + netcons_create_target(config_data, target_name) 208 + logging.debug( 209 + "Created netconsole target: %s on interface %s", target_name, interface_name 210 + ) 211 + 212 + 213 + def netcons_delete_target(name: str) -> None: 214 + """Delete a netconsole dynamic target""" 215 + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" 216 + try: 217 + if os.path.exists(target_path): 218 + os.rmdir(target_path) 219 + except OSError as exception: 220 + raise KsftFailEx( 221 + f"Failed to delete netconsole target: {exception}" 222 + ) from exception 223 + 224 + 225 + def netcons_load_module() -> None: 226 + """Try to load the netconsole module""" 227 + os.system("modprobe netconsole") 228 + 229 + 230 + def bpftrace_call() -> None: 231 + """Call bpftrace to find how many times netpoll_poll_dev() is called. 232 + Output is saved in the global variable `maps`""" 233 + 234 + # This is going to update the global variable, that will be seen by the 235 + # main function 236 + global MAPS # pylint: disable=W0603 237 + 238 + # This will be passed to bpftrace as in bpftrace -e "expr" 239 + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" 240 + 241 + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) 242 + logging.debug("BPFtrace output: %s", MAPS) 243 + 244 + 245 + def bpftrace_start(): 246 + """Start a thread to call `call_bpf` in a parallel thread""" 247 + global BPF_THREAD # pylint: disable=W0603 248 + 249 + BPF_THREAD = threading.Thread(target=bpftrace_call) 250 + BPF_THREAD.start() 251 + if not BPF_THREAD.is_alive(): 252 + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") 253 + 254 + 255 + def bpftrace_stop() -> None: 256 + """Stop the bpftrace thread""" 257 + if BPF_THREAD: 258 + BPF_THREAD.join() 259 + 260 + 261 + def bpftrace_any_hit(join: bool) -> bool: 262 + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" 263 + if not BPF_THREAD: 264 + raise KsftFailEx("BPFtrace didn't start") 265 + 266 + if BPF_THREAD.is_alive(): 267 + if join: 268 + # Wait for bpftrace to finish 269 + BPF_THREAD.join() 270 + else: 271 + # bpftrace is still running, so, we will not check the result yet 272 + return False 273 + 274 + logging.debug("MAPS coming from bpftrace = %s", MAPS) 275 + if "hits" not in MAPS.keys(): 276 + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") 277 + 278 + logging.debug("Got a total of %d hits", MAPS["hits"]) 279 + return MAPS["hits"] > 0 280 + 281 + 282 + def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: 283 + """Print messages to the console, trying to trigger a netpoll poll""" 284 + # Start bpftrace in parallel, so, it is watching 285 + # netpoll_poll_dev() while we are sending netconsole messages 286 + bpftrace_start() 287 + defer(bpftrace_stop) 288 + 289 + do_netpoll_flush(cfg, ifname, target_name) 290 + 291 + if bpftrace_any_hit(join=True): 292 + ksft_pr("netpoll_poll_dev() was called. Success") 293 + return 294 + 295 + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") 296 + 297 + 298 + def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: 299 + """Print messages to the console, trying to trigger a netpoll poll""" 300 + netcons_configure_target(cfg, ifname, target_name) 301 + retry = 0 302 + 303 + for i in range(int(ITERATIONS)): 304 + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): 305 + # bpftrace is done, stop sending messages 306 + break 307 + 308 + msg = f"netcons test #{i}" 309 + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: 310 + for j in range(MAX_WRITES): 311 + try: 312 + kmsg.write(f"{msg}-{j}\n") 313 + except OSError as exception: 314 + # in some cases, kmsg can be busy, so, we will retry 315 + time.sleep(1) 316 + retry += 1 317 + if retry < 5: 318 + logging.info("Failed to write to kmsg. Retrying") 319 + # Just retry a few times 320 + continue 321 + raise KsftFailEx( 322 + f"Failed to write to kmsg: {exception}" 323 + ) from exception 324 + 325 + netcons_delete_target(target_name) 326 + netcons_configure_target(cfg, ifname, target_name) 327 + # If we sleep here, we will have a better chance of triggering 328 + # This number is based on a few tests I ran while developing this test 329 + time.sleep(0.4) 330 + 331 + 332 + def configure_network(ifname: str) -> None: 333 + """Configure ring size and queue numbers""" 334 + 335 + # Set defined queues to 1 to force congestion 336 + prev_queues = ethtool_get_queues_cnt(ifname) 337 + logging.debug("RX/TX/combined queues: %s", prev_queues) 338 + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 339 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) 340 + defer(ethtool_set_queues_cnt, ifname, prev_queues) 341 + 342 + # Try to set the ring size to some low value. 343 + # Do not fail if the hardware do not accepted desired values 344 + prev_ring_size = ethtool_get_ringsize(ifname) 345 + for size in [(1, 1), (128, 128), (256, 256)]: 346 + if ethtool_set_ringsize(ifname, size): 347 + # hardware accepted the desired ringsize 348 + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) 349 + break 350 + defer(ethtool_set_ringsize, ifname, prev_ring_size) 351 + 352 + 353 + def test_netpoll(cfg: NetDrvEpEnv) -> None: 354 + """ 355 + Test netpoll by sending traffic to the interface and then sending 356 + netconsole messages to trigger a poll 357 + """ 358 + 359 + ifname = cfg.ifname 360 + configure_network(ifname) 361 + target_name = netcons_generate_random_target_name() 362 + traffic = None 363 + 364 + try: 365 + traffic = GenerateTraffic(cfg) 366 + do_netpoll_flush_monitored(cfg, ifname, target_name) 367 + finally: 368 + if traffic: 369 + traffic.stop() 370 + 371 + # Revert RX/TX queues 372 + netcons_delete_target(target_name) 373 + 374 + 375 + def test_check_dependencies() -> None: 376 + """Check if the dependencies are met""" 377 + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): 378 + raise KsftSkipEx( 379 + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 380 + ) 381 + 382 + 383 + def main() -> None: 384 + """Main function to run the test""" 385 + netcons_load_module() 386 + test_check_dependencies() 387 + with NetDrvEpEnv(__file__) as cfg: 388 + ksft_run( 389 + [test_netpoll], 390 + args=(cfg,), 391 + ) 392 + ksft_exit() 393 + 394 + 395 + if __name__ == "__main__": 396 + main()