Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10from argparse import Namespace
11import logging
12import os
13import re
14
15from pprint import pformat
16from random import randrange, seed
17
18# Import Python modules
19
20from helpers import AbiDebug, ABI_DIR
21
22
23class AbiParser:
24 """Main class to parse ABI files"""
25
26 TAGS = r"(what|where|date|kernelversion|contact|description|users)"
27 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
28
29 def __init__(self, directory, logger=None,
30 enable_lineno=False, show_warnings=True, debug=0):
31 """Stores arguments for the class and initialize class vars"""
32
33 self.directory = directory
34 self.enable_lineno = enable_lineno
35 self.show_warnings = show_warnings
36 self.debug = debug
37
38 if not logger:
39 self.log = logging.getLogger("get_abi")
40 else:
41 self.log = logger
42
43 self.data = {}
44 self.what_symbols = {}
45 self.file_refs = {}
46 self.what_refs = {}
47
48 # Ignore files that contain such suffixes
49 self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
50
51 # Regular expressions used on parser
52 self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
53 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
54 self.re_valid = re.compile(self.TAGS)
55 self.re_start_spc = re.compile(r"(\s*)(\S.*)")
56 self.re_whitespace = re.compile(r"^\s+")
57
58 # Regular used on print
59 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
60 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
61 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
62 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
63 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
64 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
65 self.re_xref_node = re.compile(self.XREF)
66
67 def warn(self, fdata, msg, extra=None):
68 """Displays a parse error if warning is enabled"""
69
70 if not self.show_warnings:
71 return
72
73 msg = f"{fdata.fname}:{fdata.ln}: {msg}"
74 if extra:
75 msg += "\n\t\t" + extra
76
77 self.log.warning(msg)
78
79 def add_symbol(self, what, fname, ln=None, xref=None):
80 """Create a reference table describing where each 'what' is located"""
81
82 if what not in self.what_symbols:
83 self.what_symbols[what] = {"file": {}}
84
85 if fname not in self.what_symbols[what]["file"]:
86 self.what_symbols[what]["file"][fname] = []
87
88 if ln and ln not in self.what_symbols[what]["file"][fname]:
89 self.what_symbols[what]["file"][fname].append(ln)
90
91 if xref:
92 self.what_symbols[what]["xref"] = xref
93
94 def _parse_line(self, fdata, line):
95 """Parse a single line of an ABI file"""
96
97 new_what = False
98 new_tag = False
99 content = None
100
101 match = self.re_tag.match(line)
102 if match:
103 new = match.group(1).lower()
104 sep = match.group(2)
105 content = match.group(3)
106
107 match = self.re_valid.search(new)
108 if match:
109 new_tag = match.group(1)
110 else:
111 if fdata.tag == "description":
112 # New "tag" is actually part of description.
113 # Don't consider it a tag
114 new_tag = False
115 elif fdata.tag != "":
116 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
117
118 if new_tag:
119 # "where" is Invalid, but was a common mistake. Warn if found
120 if new_tag == "where":
121 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
122 new_tag = "what"
123
124 if new_tag == "what":
125 fdata.space = None
126
127 if content not in self.what_symbols:
128 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
129
130 if fdata.tag == "what":
131 fdata.what.append(content.strip("\n"))
132 else:
133 if fdata.key:
134 if "description" not in self.data.get(fdata.key, {}):
135 self.warn(fdata, f"{fdata.key} doesn't have a description")
136
137 for w in fdata.what:
138 self.add_symbol(what=w, fname=fdata.fname,
139 ln=fdata.what_ln, xref=fdata.key)
140
141 fdata.label = content
142 new_what = True
143
144 key = "abi_" + content.lower()
145 fdata.key = self.re_unprintable.sub("_", key).strip("_")
146
147 # Avoid duplicated keys but using a defined seed, to make
148 # the namespace identical if there aren't changes at the
149 # ABI symbols
150 seed(42)
151
152 while fdata.key in self.data:
153 char = randrange(0, 51) + ord("A")
154 if char > ord("Z"):
155 char += ord("a") - ord("Z") - 1
156
157 fdata.key += chr(char)
158
159 if fdata.key and fdata.key not in self.data:
160 self.data[fdata.key] = {
161 "what": [content],
162 "file": [fdata.file_ref],
163 "path": fdata.ftype,
164 "line_no": fdata.ln,
165 }
166
167 fdata.what = self.data[fdata.key]["what"]
168
169 self.what_refs[content] = fdata.key
170 fdata.tag = new_tag
171 fdata.what_ln = fdata.ln
172
173 if fdata.nametag["what"]:
174 t = (content, fdata.key)
175 if t not in fdata.nametag["symbols"]:
176 fdata.nametag["symbols"].append(t)
177
178 return
179
180 if fdata.tag and new_tag:
181 fdata.tag = new_tag
182
183 if new_what:
184 fdata.label = ""
185
186 if "description" in self.data[fdata.key]:
187 self.data[fdata.key]["description"] += "\n\n"
188
189 if fdata.file_ref not in self.data[fdata.key]["file"]:
190 self.data[fdata.key]["file"].append(fdata.file_ref)
191
192 if self.debug == AbiDebug.WHAT_PARSING:
193 self.log.debug("what: %s", fdata.what)
194
195 if not fdata.what:
196 self.warn(fdata, "'What:' should come first:", line)
197 return
198
199 if new_tag == "description":
200 fdata.space = None
201
202 if content:
203 sep = sep.replace(":", " ")
204
205 c = " " * len(new_tag) + sep + content
206 c = c.expandtabs()
207
208 match = self.re_start_spc.match(c)
209 if match:
210 # Preserve initial spaces for the first line
211 fdata.space = match.group(1)
212 content = match.group(2) + "\n"
213
214 self.data[fdata.key][fdata.tag] = content
215
216 return
217
218 # Store any contents before tags at the database
219 if not fdata.tag and "what" in fdata.nametag:
220 fdata.nametag["description"] += line
221 return
222
223 if fdata.tag == "description":
224 content = line.expandtabs()
225
226 if self.re_whitespace.sub("", content) == "":
227 self.data[fdata.key][fdata.tag] += "\n"
228 return
229
230 if fdata.space is None:
231 match = self.re_start_spc.match(content)
232 if match:
233 # Preserve initial spaces for the first line
234 fdata.space = match.group(1)
235
236 content = match.group(2) + "\n"
237 else:
238 if content.startswith(fdata.space):
239 content = content[len(fdata.space):]
240
241 else:
242 fdata.space = ""
243
244 if fdata.tag == "what":
245 w = content.strip("\n")
246 if w:
247 self.data[fdata.key][fdata.tag].append(w)
248 else:
249 self.data[fdata.key][fdata.tag] += content
250 return
251
252 content = line.strip()
253 if fdata.tag:
254 if fdata.tag == "what":
255 w = content.strip("\n")
256 if w:
257 self.data[fdata.key][fdata.tag].append(w)
258 else:
259 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
260 return
261
262 # Everything else is error
263 if content:
264 self.warn(fdata, "Unexpected content", line)
265
266 def parse_readme(self, nametag, fname):
267 """Parse ABI README file"""
268
269 nametag["what"] = ["Introduction"]
270 nametag["path"] = "README"
271 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
272 for line in fp:
273 match = self.re_tag.match(line)
274 if match:
275 new = match.group(1).lower()
276
277 match = self.re_valid.search(new)
278 if match:
279 nametag["description"] += "\n:" + line
280 continue
281
282 nametag["description"] += line
283
284 def parse_file(self, fname, path, basename):
285 """Parse a single file"""
286
287 ref = f"abi_file_{path}_{basename}"
288 ref = self.re_unprintable.sub("_", ref).strip("_")
289
290 # Store per-file state into a namespace variable. This will be used
291 # by the per-line parser state machine and by the warning function.
292 fdata = Namespace
293
294 fdata.fname = fname
295 fdata.name = basename
296
297 pos = fname.find(ABI_DIR)
298 if pos > 0:
299 f = fname[pos:]
300 else:
301 f = fname
302
303 fdata.file_ref = (f, ref)
304 self.file_refs[f] = ref
305
306 fdata.ln = 0
307 fdata.what_ln = 0
308 fdata.tag = ""
309 fdata.label = ""
310 fdata.what = []
311 fdata.key = None
312 fdata.xrefs = None
313 fdata.space = None
314 fdata.ftype = path.split("/")[0]
315
316 fdata.nametag = {}
317 fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
318 fdata.nametag["type"] = "File"
319 fdata.nametag["path"] = fdata.ftype
320 fdata.nametag["file"] = [fdata.file_ref]
321 fdata.nametag["line_no"] = 1
322 fdata.nametag["description"] = ""
323 fdata.nametag["symbols"] = []
324
325 self.data[ref] = fdata.nametag
326
327 if self.debug & AbiDebug.WHAT_OPEN:
328 self.log.debug("Opening file %s", fname)
329
330 if basename == "README":
331 self.parse_readme(fdata.nametag, fname)
332 return
333
334 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
335 for line in fp:
336 fdata.ln += 1
337
338 self._parse_line(fdata, line)
339
340 if "description" in fdata.nametag:
341 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
342
343 if fdata.key:
344 if "description" not in self.data.get(fdata.key, {}):
345 self.warn(fdata, f"{fdata.key} doesn't have a description")
346
347 for w in fdata.what:
348 self.add_symbol(what=w, fname=fname, xref=fdata.key)
349
350 def _parse_abi(self, root=None):
351 """Internal function to parse documentation ABI recursively"""
352
353 if not root:
354 root = self.directory
355
356 with os.scandir(root) as obj:
357 for entry in obj:
358 name = os.path.join(root, entry.name)
359
360 if entry.is_dir():
361 self._parse_abi(name)
362 continue
363
364 if not entry.is_file():
365 continue
366
367 basename = os.path.basename(name)
368
369 if basename.startswith("."):
370 continue
371
372 if basename.endswith(self.ignore_suffixes):
373 continue
374
375 path = self.re_abi_dir.sub("", os.path.dirname(name))
376
377 self.parse_file(name, path, basename)
378
379 def parse_abi(self, root=None):
380 """Parse documentation ABI"""
381
382 self._parse_abi(root)
383
384 if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
385 self.log.debug(pformat(self.data))
386
387 def desc_txt(self, desc):
388 """Print description as found inside ABI files"""
389
390 desc = desc.strip(" \t\n")
391
392 return desc + "\n\n"
393
394 def xref(self, fname):
395 """
396 Converts a Documentation/ABI + basename into a ReST cross-reference
397 """
398
399 xref = self.file_refs.get(fname)
400 if not xref:
401 return None
402 else:
403 return xref
404
405 def desc_rst(self, desc):
406 """Enrich ReST output by creating cross-references"""
407
408 # Remove title markups from the description
409 # Having titles inside ABI files will only work if extra
410 # care would be taken in order to strictly follow the same
411 # level order for each markup.
412 desc = self.re_title_mark.sub("\n\n", "\n" + desc)
413 desc = desc.rstrip(" \t\n").lstrip("\n")
414
415 # Python's regex performance for non-compiled expressions is a lot
416 # than Perl, as Perl automatically caches them at their
417 # first usage. Here, we'll need to do the same, as otherwise the
418 # performance penalty is be high
419
420 new_desc = ""
421 for d in desc.split("\n"):
422 if d == "":
423 new_desc += "\n"
424 continue
425
426 # Use cross-references for doc files where needed
427 d = self.re_doc.sub(r":doc:`/\1`", d)
428
429 # Use cross-references for ABI generated docs where needed
430 matches = self.re_abi.findall(d)
431 for m in matches:
432 abi = m[0] + m[1]
433
434 xref = self.file_refs.get(abi)
435 if not xref:
436 # This may happen if ABI is on a separate directory,
437 # like parsing ABI testing and symbol is at stable.
438 # The proper solution is to move this part of the code
439 # for it to be inside sphinx/kernel_abi.py
440 self.log.info("Didn't find ABI reference for '%s'", abi)
441 else:
442 new = self.re_escape.sub(r"\\\1", m[1])
443 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
444
445 # Seek for cross reference symbols like /sys/...
446 # Need to be careful to avoid doing it on a code block
447 if d[0] not in [" ", "\t"]:
448 matches = self.re_xref_node.findall(d)
449 for m in matches:
450 # Finding ABI here is more complex due to wildcards
451 xref = self.what_refs.get(m)
452 if xref:
453 new = self.re_escape.sub(r"\\\1", m)
454 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
455
456 new_desc += d + "\n"
457
458 return new_desc + "\n\n"
459
460 def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
461 filter_path=None):
462 """Print ABI at stdout"""
463
464 part = None
465 for key, v in sorted(self.data.items(),
466 key=lambda x: (x[1].get("type", ""),
467 x[1].get("what"))):
468
469 wtype = v.get("type", "Symbol")
470 file_ref = v.get("file")
471 names = v.get("what", [""])
472
473 if wtype == "File":
474 if not show_file:
475 continue
476 else:
477 if not show_symbols:
478 continue
479
480 if filter_path:
481 if v.get("path") != filter_path:
482 continue
483
484 msg = ""
485
486 if wtype != "File":
487 cur_part = names[0]
488 if cur_part.find("/") >= 0:
489 match = self.re_what.match(cur_part)
490 if match:
491 symbol = match.group(1).rstrip("/")
492 cur_part = "Symbols under " + symbol
493
494 if cur_part and cur_part != part:
495 part = cur_part
496 msg += part + "\n"+ "-" * len(part) +"\n\n"
497
498 msg += f".. _{key}:\n\n"
499
500 max_len = 0
501 for i in range(0, len(names)): # pylint: disable=C0200
502 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
503
504 max_len = max(max_len, len(names[i]))
505
506 msg += "+-" + "-" * max_len + "-+\n"
507 for name in names:
508 msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
509 msg += "+-" + "-" * max_len + "-+\n"
510 msg += "\n"
511
512 for ref in file_ref:
513 if wtype == "File":
514 msg += f".. _{ref[1]}:\n\n"
515 else:
516 base = os.path.basename(ref[0])
517 msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
518
519 if wtype == "File":
520 msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
521
522 desc = v.get("description")
523 if not desc and wtype != "File":
524 msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
525
526 if desc:
527 if output_in_txt:
528 msg += self.desc_txt(desc)
529 else:
530 msg += self.desc_rst(desc)
531
532 symbols = v.get("symbols")
533 if symbols:
534 msg += "Has the following ABI:\n\n"
535
536 for w, label in symbols:
537 # Escape special chars from content
538 content = self.re_escape.sub(r"\\\1", w)
539
540 msg += f"- :ref:`{content} <{label}>`\n\n"
541
542 users = v.get("users")
543 if users and users.strip(" \t\n"):
544 users = users.strip("\n").replace('\n', '\n\t')
545 msg += f"Users:\n\t{users}\n\n"
546
547 ln = v.get("line_no", 1)
548
549 yield (msg, file_ref[0][0], ln)
550
551 def check_issues(self):
552 """Warn about duplicated ABI entries"""
553
554 for what, v in self.what_symbols.items():
555 files = v.get("file")
556 if not files:
557 # Should never happen if the parser works properly
558 self.log.warning("%s doesn't have a file associated", what)
559 continue
560
561 if len(files) == 1:
562 continue
563
564 f = []
565 for fname, lines in sorted(files.items()):
566 if not lines:
567 f.append(f"{fname}")
568 elif len(lines) == 1:
569 f.append(f"{fname}:{lines[0]}")
570 else:
571 m = fname + "lines "
572 m += ", ".join(str(x) for x in lines)
573 f.append(m)
574
575 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
576
577 def search_symbols(self, expr):
578 """ Searches for ABI symbols """
579
580 regex = re.compile(expr, re.I)
581
582 found_keys = 0
583 for t in sorted(self.data.items(), key=lambda x: [0]):
584 v = t[1]
585
586 wtype = v.get("type", "")
587 if wtype == "File":
588 continue
589
590 for what in v.get("what", [""]):
591 if regex.search(what):
592 found_keys += 1
593
594 kernelversion = v.get("kernelversion", "").strip(" \t\n")
595 date = v.get("date", "").strip(" \t\n")
596 contact = v.get("contact", "").strip(" \t\n")
597 users = v.get("users", "").strip(" \t\n")
598 desc = v.get("description", "").strip(" \t\n")
599
600 files = []
601 for f in v.get("file", ()):
602 files.append(f[0])
603
604 what = str(found_keys) + ". " + what
605 title_tag = "-" * len(what)
606
607 print(f"\n{what}\n{title_tag}\n")
608
609 if kernelversion:
610 print(f"Kernel version:\t\t{kernelversion}")
611
612 if date:
613 print(f"Date:\t\t\t{date}")
614
615 if contact:
616 print(f"Contact:\t\t{contact}")
617
618 if users:
619 print(f"Users:\t\t\t{users}")
620
621 print("Defined on file(s):\t" + ", ".join(files))
622
623 if desc:
624 desc = desc.strip("\n")
625 print(f"\n{desc}\n")
626
627 if not found_keys:
628 print(f"Regular expression /{expr}/ not found.")