Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0912,R0914,R0915,R1702
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10import os
11import re
12import sys
13
14from concurrent import futures
15from datetime import datetime
16from random import shuffle
17
18from abi.helpers import AbiDebug
19
20class SystemSymbols:
21 """Stores arguments for the class and initialize class vars"""
22
23 def graph_add_file(self, path, link=None):
24 """
25 add a file path to the sysfs graph stored at self.root
26 """
27
28 if path in self.files:
29 return
30
31 name = ""
32 ref = self.root
33 for edge in path.split("/"):
34 name += edge + "/"
35 if edge not in ref:
36 ref[edge] = {"__name": [name.rstrip("/")]}
37
38 ref = ref[edge]
39
40 if link and link not in ref["__name"]:
41 ref["__name"].append(link.rstrip("/"))
42
43 self.files.add(path)
44
45 def print_graph(self, root_prefix="", root=None, level=0):
46 """Prints a reference tree graph using UTF-8 characters"""
47
48 if not root:
49 root = self.root
50 level = 0
51
52 # Prevent endless traverse
53 if level > 5:
54 return
55
56 if level > 0:
57 prefix = "├──"
58 last_prefix = "└──"
59 else:
60 prefix = ""
61 last_prefix = ""
62
63 items = list(root.items())
64
65 names = root.get("__name", [])
66 for k, edge in items:
67 if k == "__name":
68 continue
69
70 if not k:
71 k = "/"
72
73 if len(names) > 1:
74 k += " links: " + ",".join(names[1:])
75
76 if edge == items[-1][1]:
77 print(root_prefix + last_prefix + k)
78 p = root_prefix
79 if level > 0:
80 p += " "
81 self.print_graph(p, edge, level + 1)
82 else:
83 print(root_prefix + prefix + k)
84 p = root_prefix + "│ "
85 self.print_graph(p, edge, level + 1)
86
87 def _walk(self, root):
88 """
89 Walk through sysfs to get all devnodes that aren't ignored.
90
91 By default, uses /sys as sysfs mounting point. If another
92 directory is used, it replaces them to /sys at the patches.
93 """
94
95 with os.scandir(root) as obj:
96 for entry in obj:
97 path = os.path.join(root, entry.name)
98 if self.sysfs:
99 p = path.replace(self.sysfs, "/sys", count=1)
100 else:
101 p = path
102
103 if self.re_ignore.search(p):
104 return
105
106 # Handle link first to avoid directory recursion
107 if entry.is_symlink():
108 real = os.path.realpath(path)
109 if not self.sysfs:
110 self.aliases[path] = real
111 else:
112 real = real.replace(self.sysfs, "/sys", count=1)
113
114 # Add absfile location to graph if it doesn't exist
115 if not self.re_ignore.search(real):
116 # Add link to the graph
117 self.graph_add_file(real, p)
118
119 elif entry.is_file():
120 self.graph_add_file(p)
121
122 elif entry.is_dir():
123 self._walk(path)
124
125 def __init__(self, abi, sysfs="/sys", hints=False):
126 """
127 Initialize internal variables and get a list of all files inside
128 sysfs that can currently be parsed.
129
130 Please notice that there are several entries on sysfs that aren't
131 documented as ABI. Ignore those.
132
133 The real paths will be stored under self.files. Aliases will be
134 stored in separate, as self.aliases.
135 """
136
137 self.abi = abi
138 self.log = abi.log
139
140 if sysfs != "/sys":
141 self.sysfs = sysfs.rstrip("/")
142 else:
143 self.sysfs = None
144
145 self.hints = hints
146
147 self.root = {}
148 self.aliases = {}
149 self.files = set()
150
151 dont_walk = [
152 # Those require root access and aren't documented at ABI
153 f"^{sysfs}/kernel/debug",
154 f"^{sysfs}/kernel/tracing",
155 f"^{sysfs}/fs/pstore",
156 f"^{sysfs}/fs/bpf",
157 f"^{sysfs}/fs/fuse",
158
159 # This is not documented at ABI
160 f"^{sysfs}/module",
161
162 f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI
163 f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings
164 "sections|notes", # aren't actually part of ABI
165
166 # kernel-parameters.txt - not easy to parse
167 "parameters",
168 ]
169
170 self.re_ignore = re.compile("|".join(dont_walk))
171
172 print(f"Reading {sysfs} directory contents...", file=sys.stderr)
173 self._walk(sysfs)
174
175 def check_file(self, refs, found):
176 """Check missing ABI symbols for a given sysfs file"""
177
178 res_list = []
179
180 try:
181 for names in refs:
182 fname = names[0]
183
184 res = {
185 "found": False,
186 "fname": fname,
187 "msg": "",
188 }
189 res_list.append(res)
190
191 re_what = self.abi.get_regexes(fname)
192 if not re_what:
193 self.abi.log.warning(f"missing rules for {fname}")
194 continue
195
196 for name in names:
197 for r in re_what:
198 if self.abi.debug & AbiDebug.UNDEFINED:
199 self.log.debug("check if %s matches '%s'", name, r.pattern)
200 if r.match(name):
201 res["found"] = True
202 if found:
203 res["msg"] += f" {fname}: regex:\n\t"
204 continue
205
206 if self.hints and not res["found"]:
207 res["msg"] += f" {fname} not found. Tested regexes:\n"
208 for r in re_what:
209 res["msg"] += " " + r.pattern + "\n"
210
211 except KeyboardInterrupt:
212 pass
213
214 return res_list
215
216 def _ref_interactor(self, root):
217 """Recursive function to interact over the sysfs tree"""
218
219 for k, v in root.items():
220 if isinstance(v, dict):
221 yield from self._ref_interactor(v)
222
223 if root == self.root or k == "__name":
224 continue
225
226 if self.abi.re_string:
227 fname = v["__name"][0]
228 if self.abi.re_string.search(fname):
229 yield v
230 else:
231 yield v
232
233
234 def get_fileref(self, all_refs, chunk_size):
235 """Interactor to group refs into chunks"""
236
237 n = 0
238 refs = []
239
240 for ref in all_refs:
241 refs.append(ref)
242
243 n += 1
244 if n >= chunk_size:
245 yield refs
246 n = 0
247 refs = []
248
249 yield refs
250
251 def check_undefined_symbols(self, max_workers=None, chunk_size=50,
252 found=None, dry_run=None):
253 """Seach ABI for sysfs symbols missing documentation"""
254
255 self.abi.parse_abi()
256
257 if self.abi.debug & AbiDebug.GRAPH:
258 self.print_graph()
259
260 all_refs = []
261 for ref in self._ref_interactor(self.root):
262 all_refs.append(ref["__name"])
263
264 if dry_run:
265 print("Would check", file=sys.stderr)
266 for ref in all_refs:
267 print(", ".join(ref))
268
269 return
270
271 print("Starting to search symbols (it may take several minutes):",
272 file=sys.stderr)
273 start = datetime.now()
274 old_elapsed = None
275
276 # Python doesn't support multithreading due to limitations on its
277 # global lock (GIL). While Python 3.13 finally made GIL optional,
278 # there are still issues related to it. Also, we want to have
279 # backward compatibility with older versions of Python.
280 #
281 # So, use instead multiprocess. However, Python is very slow passing
282 # data from/to multiple processes. Also, it may consume lots of memory
283 # if the data to be shared is not small. So, we need to group workload
284 # in chunks that are big enough to generate performance gains while
285 # not being so big that would cause out-of-memory.
286
287 num_refs = len(all_refs)
288 print(f"Number of references to parse: {num_refs}", file=sys.stderr)
289
290 if not max_workers:
291 max_workers = os.cpu_count()
292 elif max_workers > os.cpu_count():
293 max_workers = os.cpu_count()
294
295 max_workers = max(max_workers, 1)
296
297 max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
298 chunk_size = min(chunk_size, max_chunk_size)
299 chunk_size = max(1, chunk_size)
300
301 if max_workers > 1:
302 executor = futures.ProcessPoolExecutor
303
304 # Place references in a random order. This may help improving
305 # performance, by mixing complex/simple expressions when creating
306 # chunks
307 shuffle(all_refs)
308 else:
309 # Python has a high overhead with processes. When there's just
310 # one worker, it is faster to not create a new process.
311 # Yet, User still deserves to have a progress print. So, use
312 # python's "thread", which is actually a single process, using
313 # an internal schedule to switch between tasks. No performance
314 # gains for non-IO tasks, but still it can be quickly interrupted
315 # from time to time to display progress.
316 executor = futures.ThreadPoolExecutor
317
318 not_found = []
319 f_list = []
320 with executor(max_workers=max_workers) as exe:
321 for refs in self.get_fileref(all_refs, chunk_size):
322 if refs:
323 try:
324 f_list.append(exe.submit(self.check_file, refs, found))
325
326 except KeyboardInterrupt:
327 return
328
329 total = len(f_list)
330
331 if not total:
332 if self.abi.re_string:
333 print(f"No ABI symbol matches {self.abi.search_string}")
334 else:
335 self.abi.log.warning("No ABI symbols found")
336 return
337
338 print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
339 file=sys.stderr)
340
341 while f_list:
342 try:
343 t = futures.wait(f_list, timeout=1,
344 return_when=futures.FIRST_COMPLETED)
345
346 done = t[0]
347
348 for fut in done:
349 res_list = fut.result()
350
351 for res in res_list:
352 if not res["found"]:
353 not_found.append(res["fname"])
354 if res["msg"]:
355 print(res["msg"])
356
357 f_list.remove(fut)
358 except KeyboardInterrupt:
359 return
360
361 except RuntimeError as e:
362 self.abi.log.warning(f"Future: {e}")
363 break
364
365 if sys.stderr.isatty():
366 elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
367 if len(f_list) < total:
368 elapsed += f" ({total - len(f_list)}/{total} jobs completed). "
369 if elapsed != old_elapsed:
370 print(elapsed + "\r", end="", flush=True,
371 file=sys.stderr)
372 old_elapsed = elapsed
373
374 elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
375 print(elapsed, file=sys.stderr)
376
377 for f in sorted(not_found):
378 print(f"{f} not found.")