Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5Test driver resilience vs page pool allocation failures.
6"""
7
8import errno
9import time
10import math
11import os
12from lib.py import ksft_run, ksft_exit, ksft_pr
13from lib.py import KsftSkipEx, KsftFailEx
14from lib.py import NetdevFamily, NlError
15from lib.py import NetDrvEpEnv
16from lib.py import cmd, tool, GenerateTraffic
17
18
19def _write_fail_config(config):
20 for key, value in config.items():
21 path = "/sys/kernel/debug/fail_function/"
22 with open(path + key, "w", encoding='ascii') as fp:
23 fp.write(str(value) + "\n")
24
25
26def _enable_pp_allocation_fail():
27 if not os.path.exists("/sys/kernel/debug/fail_function"):
28 raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
29
30 if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
31 _write_fail_config({"inject": "page_pool_alloc_netmems"})
32
33 _write_fail_config({
34 "verbose": 0,
35 "interval": 511,
36 "probability": 100,
37 "times": -1,
38 })
39
40
41def _disable_pp_allocation_fail():
42 if not os.path.exists("/sys/kernel/debug/fail_function"):
43 return
44
45 if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
46 _write_fail_config({"inject": ""})
47
48 _write_fail_config({
49 "probability": 0,
50 "times": 0,
51 })
52
53
54def test_pp_alloc(cfg, netdevnl):
55 """
56 Configure page pool allocation fail injection while traffic is running.
57 """
58
59 def get_stats():
60 return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
61
62 def check_traffic_flowing():
63 stat1 = get_stats()
64 time.sleep(1)
65 stat2 = get_stats()
66 if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
67 raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
68
69
70 try:
71 stats = get_stats()
72 except NlError as e:
73 if e.nl_msg.error == -errno.EOPNOTSUPP:
74 stats = {}
75 else:
76 raise
77 if 'rx-alloc-fail' not in stats:
78 raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
79
80 set_g = False
81 traffic = None
82 try:
83 traffic = GenerateTraffic(cfg)
84
85 check_traffic_flowing()
86
87 _enable_pp_allocation_fail()
88
89 s1 = get_stats()
90 time.sleep(3)
91 s2 = get_stats()
92
93 seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
94 if seen_fails < 1:
95 raise KsftSkipEx("Allocation failures not increasing")
96 pkts = s2['rx-packets'] - s1['rx-packets']
97 # Expecting one failure per 512 buffers, 3.1x safety margin
98 want_fails = math.floor(pkts / 512 / 3.1)
99 if seen_fails < want_fails:
100 raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
101 "packets:", pkts)
102 ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
103
104 # Basic failures are fine, try to wobble some settings to catch extra failures
105 check_traffic_flowing()
106 g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
107 if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
108 new_g = g['rx'] * 2
109 elif 'rx' in g:
110 new_g = g['rx'] // 2
111 else:
112 new_g = None
113
114 if new_g:
115 set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
116 if set_g:
117 ksft_pr("ethtool -G change retval: success")
118 else:
119 ksft_pr("ethtool -G change retval: did not succeed", new_g)
120 else:
121 ksft_pr("ethtool -G change retval: did not try")
122
123 time.sleep(0.1)
124 check_traffic_flowing()
125 finally:
126 _disable_pp_allocation_fail()
127 if traffic:
128 traffic.stop()
129 time.sleep(0.1)
130 if set_g:
131 cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
132
133
134def main() -> None:
135 """ Ksft boiler plate main """
136 netdevnl = NetdevFamily()
137 with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
138
139 ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
140 ksft_exit()
141
142
143if __name__ == "__main__":
144 main()