Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Author: Breno Leitao <leitao@debian.org>
4"""
5 This test aims to evaluate the netpoll polling mechanism (as in
6 netpoll_poll_dev()). It presents a complex scenario where the network
7 attempts to send a packet but fails, prompting it to poll the NIC from within
8 the netpoll TX side.
9
10 This has been a crucial path in netpoll that was previously untested. Jakub
11 suggested using a single RX/TX queue, pushing traffic to the NIC, and then
12 sending netpoll messages (via netconsole) to trigger the poll.
13
14 In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
15 so, the test passes, otherwise it will be skipped. This test is very dependent on
16 the driver and environment, given we are trying to trigger a tricky scenario.
17"""
18
19import errno
20import logging
21import os
22import random
23import string
24import threading
25import time
26from typing import Optional
27
28from lib.py import (
29 bpftrace,
30 CmdExitFailure,
31 defer,
32 ethtool,
33 GenerateTraffic,
34 ksft_exit,
35 ksft_pr,
36 ksft_run,
37 KsftFailEx,
38 KsftSkipEx,
39 NetDrvEpEnv,
40 KsftXfailEx,
41)
42
43# Configure logging
44logging.basicConfig(
45 level=logging.INFO,
46 format="%(asctime)s - %(levelname)s - %(message)s",
47)
48
49NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
50NETCONS_REMOTE_PORT: int = 6666
51NETCONS_LOCAL_PORT: int = 1514
52
53# Max number of netcons messages to send. Each iteration will setup
54# netconsole and send MAX_WRITES messages
55ITERATIONS: int = 20
56# Number of writes to /dev/kmsg per iteration
57MAX_WRITES: int = 40
58# MAPS contains the information coming from bpftrace it will have only one
59# key: "hits", which tells the number of times netpoll_poll_dev() was called
60MAPS: dict[str, int] = {}
61# Thread to run bpftrace in parallel
62BPF_THREAD: Optional[threading.Thread] = None
63# Time bpftrace will be running in parallel.
64BPFTRACE_TIMEOUT: int = 10
65
66
67def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
68 """
69 Read the ringsize using ethtool. This will be used to restore it after the test
70 """
71 try:
72 ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
73 rxs = ethtool_result["rx"]
74 txs = ethtool_result["tx"]
75 except (KeyError, IndexError) as exception:
76 raise KsftSkipEx(
77 f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
78 ) from exception
79
80 return rxs, txs
81
82
83def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
84 """Try to the number of RX and TX ringsize."""
85 rxs = ring_size[0]
86 txs = ring_size[1]
87
88 logging.debug("Setting ring size to %d/%d", rxs, txs)
89 try:
90 ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
91 except CmdExitFailure:
92 # This might fail on real device, retry with a higher value,
93 # worst case, keep it as it is.
94 return False
95
96 return True
97
98
99def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
100 """Read the number of RX, TX and combined queues using ethtool"""
101
102 try:
103 ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
104 rxq = ethtool_result.get("rx", -1)
105 txq = ethtool_result.get("tx", -1)
106 combined = ethtool_result.get("combined", -1)
107
108 except IndexError as exception:
109 raise KsftSkipEx(
110 f"Failed to read queues numbers: {exception}. Not going to mess with them."
111 ) from exception
112
113 return rxq, txq, combined
114
115
116def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
117 """Set the number of RX, TX and combined queues using ethtool"""
118 rxq, txq, combined = queues
119
120 cmdline = f"-L {interface_name}"
121
122 if rxq != -1:
123 cmdline += f" rx {rxq}"
124 if txq != -1:
125 cmdline += f" tx {txq}"
126 if combined != -1:
127 cmdline += f" combined {combined}"
128
129 logging.debug("calling: ethtool %s", cmdline)
130
131 try:
132 ethtool(cmdline)
133 except CmdExitFailure as exception:
134 raise KsftSkipEx(
135 f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
136 ) from exception
137
138
139def netcons_generate_random_target_name() -> str:
140 """Generate a random target name starting with 'netcons'"""
141 random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
142 return f"netcons_{random_suffix}"
143
144
145def netcons_create_target(
146 config_data: dict[str, str],
147 target_name: str,
148) -> None:
149 """Create a netconsole dynamic target against the interfaces"""
150 logging.debug("Using netconsole name: %s", target_name)
151 try:
152 os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
153 logging.debug(
154 "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
155 )
156 except OSError as exception:
157 if exception.errno != errno.EEXIST:
158 raise KsftFailEx(
159 f"Failed to create netconsole target directory: {exception}"
160 ) from exception
161
162 try:
163 for key, value in config_data.items():
164 path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
165 logging.debug("Writing %s to %s", key, path)
166 with open(path, "w", encoding="utf-8") as file:
167 # Always convert to string to write to file
168 file.write(str(value))
169
170 # Read all configuration values for debugging purposes
171 for debug_key in config_data.keys():
172 with open(
173 f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
174 "r",
175 encoding="utf-8",
176 ) as file:
177 content = file.read()
178 logging.debug(
179 "%s/%s/%s : %s",
180 NETCONSOLE_CONFIGFS_PATH,
181 target_name,
182 debug_key,
183 content.strip(),
184 )
185
186 except Exception as exception:
187 raise KsftFailEx(
188 f"Failed to configure netconsole target: {exception}"
189 ) from exception
190
191
192def netcons_configure_target(
193 cfg: NetDrvEpEnv, interface_name: str, target_name: str
194) -> None:
195 """Configure netconsole on the interface with the given target name"""
196 config_data = {
197 "extended": "1",
198 "dev_name": interface_name,
199 "local_port": NETCONS_LOCAL_PORT,
200 "remote_port": NETCONS_REMOTE_PORT,
201 "local_ip": cfg.addr,
202 "remote_ip": cfg.remote_addr,
203 "remote_mac": "00:00:00:00:00:00", # Not important for this test
204 "enabled": "1",
205 }
206
207 netcons_create_target(config_data, target_name)
208 logging.debug(
209 "Created netconsole target: %s on interface %s", target_name, interface_name
210 )
211
212
213def netcons_delete_target(name: str) -> None:
214 """Delete a netconsole dynamic target"""
215 target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
216 try:
217 if os.path.exists(target_path):
218 os.rmdir(target_path)
219 except OSError as exception:
220 raise KsftFailEx(
221 f"Failed to delete netconsole target: {exception}"
222 ) from exception
223
224
225def netcons_load_module() -> None:
226 """Try to load the netconsole module"""
227 os.system("modprobe netconsole")
228
229
230def bpftrace_call() -> None:
231 """Call bpftrace to find how many times netpoll_poll_dev() is called.
232 Output is saved in the global variable `maps`"""
233
234 # This is going to update the global variable, that will be seen by the
235 # main function
236 global MAPS # pylint: disable=W0603
237
238 # This will be passed to bpftrace as in bpftrace -e "expr"
239 expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
240
241 MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
242 logging.debug("BPFtrace output: %s", MAPS)
243
244
245def bpftrace_start():
246 """Start a thread to call `call_bpf` in a parallel thread"""
247 global BPF_THREAD # pylint: disable=W0603
248
249 BPF_THREAD = threading.Thread(target=bpftrace_call)
250 BPF_THREAD.start()
251 if not BPF_THREAD.is_alive():
252 raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
253
254
255def bpftrace_stop() -> None:
256 """Stop the bpftrace thread"""
257 if BPF_THREAD:
258 BPF_THREAD.join()
259
260
261def bpftrace_any_hit(join: bool) -> bool:
262 """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
263 if not BPF_THREAD:
264 raise KsftFailEx("BPFtrace didn't start")
265
266 if BPF_THREAD.is_alive():
267 if join:
268 # Wait for bpftrace to finish
269 BPF_THREAD.join()
270 else:
271 # bpftrace is still running, so, we will not check the result yet
272 return False
273
274 logging.debug("MAPS coming from bpftrace = %s", MAPS)
275 if "hits" not in MAPS.keys():
276 raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
277
278 logging.debug("Got a total of %d hits", MAPS["hits"])
279 return MAPS["hits"] > 0
280
281
282def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
283 """Print messages to the console, trying to trigger a netpoll poll"""
284 # Start bpftrace in parallel, so, it is watching
285 # netpoll_poll_dev() while we are sending netconsole messages
286 bpftrace_start()
287 defer(bpftrace_stop)
288
289 do_netpoll_flush(cfg, ifname, target_name)
290
291 if bpftrace_any_hit(join=True):
292 ksft_pr("netpoll_poll_dev() was called. Success")
293 return
294
295 raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
296
297
298def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
299 """Print messages to the console, trying to trigger a netpoll poll"""
300 netcons_configure_target(cfg, ifname, target_name)
301 retry = 0
302
303 for i in range(int(ITERATIONS)):
304 if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
305 # bpftrace is done, stop sending messages
306 break
307
308 msg = f"netcons test #{i}"
309 with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
310 for j in range(MAX_WRITES):
311 try:
312 kmsg.write(f"{msg}-{j}\n")
313 except OSError as exception:
314 # in some cases, kmsg can be busy, so, we will retry
315 time.sleep(1)
316 retry += 1
317 if retry < 5:
318 logging.info("Failed to write to kmsg. Retrying")
319 # Just retry a few times
320 continue
321 raise KsftFailEx(
322 f"Failed to write to kmsg: {exception}"
323 ) from exception
324
325 netcons_delete_target(target_name)
326 netcons_configure_target(cfg, ifname, target_name)
327 # If we sleep here, we will have a better chance of triggering
328 # This number is based on a few tests I ran while developing this test
329 time.sleep(0.4)
330
331
332def configure_network(ifname: str) -> None:
333 """Configure ring size and queue numbers"""
334
335 # Set defined queues to 1 to force congestion
336 prev_queues = ethtool_get_queues_cnt(ifname)
337 logging.debug("RX/TX/combined queues: %s", prev_queues)
338 # Only set the queues to 1 if they exists in the device. I.e, they are > 0
339 ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
340 defer(ethtool_set_queues_cnt, ifname, prev_queues)
341
342 # Try to set the ring size to some low value.
343 # Do not fail if the hardware do not accepted desired values
344 prev_ring_size = ethtool_get_ringsize(ifname)
345 for size in [(1, 1), (128, 128), (256, 256)]:
346 if ethtool_set_ringsize(ifname, size):
347 # hardware accepted the desired ringsize
348 logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
349 break
350 defer(ethtool_set_ringsize, ifname, prev_ring_size)
351
352
353def test_netpoll(cfg: NetDrvEpEnv) -> None:
354 """
355 Test netpoll by sending traffic to the interface and then sending
356 netconsole messages to trigger a poll
357 """
358
359 ifname = cfg.ifname
360 configure_network(ifname)
361 target_name = netcons_generate_random_target_name()
362 traffic = None
363
364 try:
365 traffic = GenerateTraffic(cfg)
366 do_netpoll_flush_monitored(cfg, ifname, target_name)
367 finally:
368 if traffic:
369 traffic.stop()
370
371 # Revert RX/TX queues
372 netcons_delete_target(target_name)
373
374
375def test_check_dependencies() -> None:
376 """Check if the dependencies are met"""
377 if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
378 raise KsftSkipEx(
379 f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
380 )
381
382
383def main() -> None:
384 """Main function to run the test"""
385 netcons_load_module()
386 test_check_dependencies()
387 with NetDrvEpEnv(__file__) as cfg:
388 ksft_run(
389 [test_netpoll],
390 args=(cfg,),
391 )
392 ksft_exit()
393
394
395if __name__ == "__main__":
396 main()