scripts/lib/abi/abi_parser.py at v6.15 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / scripts / lib / abi / abi_parser.py
at v6.15 21 kB view raw
  1#!/usr/bin/env python3
  2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
  3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
  4# SPDX-License-Identifier: GPL-2.0
  5
  6"""
  7Parse ABI documentation and produce results from it.
  8"""
  9
 10from argparse import Namespace
 11import logging
 12import os
 13import re
 14
 15from pprint import pformat
 16from random import randrange, seed
 17
 18# Import Python modules
 19
 20from helpers import AbiDebug, ABI_DIR
 21
 22
 23class AbiParser:
 24    """Main class to parse ABI files"""
 25
 26    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
 27    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
 28
 29    def __init__(self, directory, logger=None,
 30                 enable_lineno=False, show_warnings=True, debug=0):
 31        """Stores arguments for the class and initialize class vars"""
 32
 33        self.directory = directory
 34        self.enable_lineno = enable_lineno
 35        self.show_warnings = show_warnings
 36        self.debug = debug
 37
 38        if not logger:
 39            self.log = logging.getLogger("get_abi")
 40        else:
 41            self.log = logger
 42
 43        self.data = {}
 44        self.what_symbols = {}
 45        self.file_refs = {}
 46        self.what_refs = {}
 47
 48        # Ignore files that contain such suffixes
 49        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
 50
 51        # Regular expressions used on parser
 52        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
 53        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
 54        self.re_valid = re.compile(self.TAGS)
 55        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
 56        self.re_whitespace = re.compile(r"^\s+")
 57
 58        # Regular used on print
 59        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
 60        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
 61        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
 62        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
 63        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
 64        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
 65        self.re_xref_node = re.compile(self.XREF)
 66
 67    def warn(self, fdata, msg, extra=None):
 68        """Displays a parse error if warning is enabled"""
 69
 70        if not self.show_warnings:
 71            return
 72
 73        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
 74        if extra:
 75            msg += "\n\t\t" + extra
 76
 77        self.log.warning(msg)
 78
 79    def add_symbol(self, what, fname, ln=None, xref=None):
 80        """Create a reference table describing where each 'what' is located"""
 81
 82        if what not in self.what_symbols:
 83            self.what_symbols[what] = {"file": {}}
 84
 85        if fname not in self.what_symbols[what]["file"]:
 86            self.what_symbols[what]["file"][fname] = []
 87
 88        if ln and ln not in self.what_symbols[what]["file"][fname]:
 89            self.what_symbols[what]["file"][fname].append(ln)
 90
 91        if xref:
 92            self.what_symbols[what]["xref"] = xref
 93
 94    def _parse_line(self, fdata, line):
 95        """Parse a single line of an ABI file"""
 96
 97        new_what = False
 98        new_tag = False
 99        content = None
100
101        match = self.re_tag.match(line)
102        if match:
103            new = match.group(1).lower()
104            sep = match.group(2)
105            content = match.group(3)
106
107            match = self.re_valid.search(new)
108            if match:
109                new_tag = match.group(1)
110            else:
111                if fdata.tag == "description":
112                    # New "tag" is actually part of description.
113                    # Don't consider it a tag
114                    new_tag = False
115                elif fdata.tag != "":
116                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
117
118        if new_tag:
119            # "where" is Invalid, but was a common mistake. Warn if found
120            if new_tag == "where":
121                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
122                new_tag = "what"
123
124            if new_tag == "what":
125                fdata.space = None
126
127                if content not in self.what_symbols:
128                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
129
130                if fdata.tag == "what":
131                    fdata.what.append(content.strip("\n"))
132                else:
133                    if fdata.key:
134                        if "description" not in self.data.get(fdata.key, {}):
135                            self.warn(fdata, f"{fdata.key} doesn't have a description")
136
137                        for w in fdata.what:
138                            self.add_symbol(what=w, fname=fdata.fname,
139                                            ln=fdata.what_ln, xref=fdata.key)
140
141                    fdata.label = content
142                    new_what = True
143
144                    key = "abi_" + content.lower()
145                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
146
147                    # Avoid duplicated keys but using a defined seed, to make
148                    # the namespace identical if there aren't changes at the
149                    # ABI symbols
150                    seed(42)
151
152                    while fdata.key in self.data:
153                        char = randrange(0, 51) + ord("A")
154                        if char > ord("Z"):
155                            char += ord("a") - ord("Z") - 1
156
157                        fdata.key += chr(char)
158
159                    if fdata.key and fdata.key not in self.data:
160                        self.data[fdata.key] = {
161                            "what": [content],
162                            "file": [fdata.file_ref],
163                            "path": fdata.ftype,
164                            "line_no": fdata.ln,
165                        }
166
167                    fdata.what = self.data[fdata.key]["what"]
168
169                self.what_refs[content] = fdata.key
170                fdata.tag = new_tag
171                fdata.what_ln = fdata.ln
172
173                if fdata.nametag["what"]:
174                    t = (content, fdata.key)
175                    if t not in fdata.nametag["symbols"]:
176                        fdata.nametag["symbols"].append(t)
177
178                return
179
180            if fdata.tag and new_tag:
181                fdata.tag = new_tag
182
183                if new_what:
184                    fdata.label = ""
185
186                    if "description" in self.data[fdata.key]:
187                        self.data[fdata.key]["description"] += "\n\n"
188
189                    if fdata.file_ref not in self.data[fdata.key]["file"]:
190                        self.data[fdata.key]["file"].append(fdata.file_ref)
191
192                    if self.debug == AbiDebug.WHAT_PARSING:
193                        self.log.debug("what: %s", fdata.what)
194
195                if not fdata.what:
196                    self.warn(fdata, "'What:' should come first:", line)
197                    return
198
199                if new_tag == "description":
200                    fdata.space = None
201
202                    if content:
203                        sep = sep.replace(":", " ")
204
205                        c = " " * len(new_tag) + sep + content
206                        c = c.expandtabs()
207
208                        match = self.re_start_spc.match(c)
209                        if match:
210                            # Preserve initial spaces for the first line
211                            fdata.space = match.group(1)
212                            content = match.group(2) + "\n"
213
214                self.data[fdata.key][fdata.tag] = content
215
216            return
217
218        # Store any contents before tags at the database
219        if not fdata.tag and "what" in fdata.nametag:
220            fdata.nametag["description"] += line
221            return
222
223        if fdata.tag == "description":
224            content = line.expandtabs()
225
226            if self.re_whitespace.sub("", content) == "":
227                self.data[fdata.key][fdata.tag] += "\n"
228                return
229
230            if fdata.space is None:
231                match = self.re_start_spc.match(content)
232                if match:
233                    # Preserve initial spaces for the first line
234                    fdata.space = match.group(1)
235
236                    content = match.group(2) + "\n"
237            else:
238                if content.startswith(fdata.space):
239                    content = content[len(fdata.space):]
240
241                else:
242                    fdata.space = ""
243
244            if fdata.tag == "what":
245                w = content.strip("\n")
246                if w:
247                    self.data[fdata.key][fdata.tag].append(w)
248            else:
249                self.data[fdata.key][fdata.tag] += content
250            return
251
252        content = line.strip()
253        if fdata.tag:
254            if fdata.tag == "what":
255                w = content.strip("\n")
256                if w:
257                    self.data[fdata.key][fdata.tag].append(w)
258            else:
259                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
260            return
261
262        # Everything else is error
263        if content:
264            self.warn(fdata, "Unexpected content", line)
265
266    def parse_readme(self, nametag, fname):
267        """Parse ABI README file"""
268
269        nametag["what"] = ["Introduction"]
270        nametag["path"] = "README"
271        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
272            for line in fp:
273                match = self.re_tag.match(line)
274                if match:
275                    new = match.group(1).lower()
276
277                    match = self.re_valid.search(new)
278                    if match:
279                        nametag["description"] += "\n:" + line
280                        continue
281
282                nametag["description"] += line
283
284    def parse_file(self, fname, path, basename):
285        """Parse a single file"""
286
287        ref = f"abi_file_{path}_{basename}"
288        ref = self.re_unprintable.sub("_", ref).strip("_")
289
290        # Store per-file state into a namespace variable. This will be used
291        # by the per-line parser state machine and by the warning function.
292        fdata = Namespace
293
294        fdata.fname = fname
295        fdata.name = basename
296
297        pos = fname.find(ABI_DIR)
298        if pos > 0:
299            f = fname[pos:]
300        else:
301            f = fname
302
303        fdata.file_ref = (f, ref)
304        self.file_refs[f] = ref
305
306        fdata.ln = 0
307        fdata.what_ln = 0
308        fdata.tag = ""
309        fdata.label = ""
310        fdata.what = []
311        fdata.key = None
312        fdata.xrefs = None
313        fdata.space = None
314        fdata.ftype = path.split("/")[0]
315
316        fdata.nametag = {}
317        fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
318        fdata.nametag["type"] = "File"
319        fdata.nametag["path"] = fdata.ftype
320        fdata.nametag["file"] = [fdata.file_ref]
321        fdata.nametag["line_no"] = 1
322        fdata.nametag["description"] = ""
323        fdata.nametag["symbols"] = []
324
325        self.data[ref] = fdata.nametag
326
327        if self.debug & AbiDebug.WHAT_OPEN:
328            self.log.debug("Opening file %s", fname)
329
330        if basename == "README":
331            self.parse_readme(fdata.nametag, fname)
332            return
333
334        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
335            for line in fp:
336                fdata.ln += 1
337
338                self._parse_line(fdata, line)
339
340            if "description" in fdata.nametag:
341                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
342
343            if fdata.key:
344                if "description" not in self.data.get(fdata.key, {}):
345                    self.warn(fdata, f"{fdata.key} doesn't have a description")
346
347                for w in fdata.what:
348                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
349
350    def _parse_abi(self, root=None):
351        """Internal function to parse documentation ABI recursively"""
352
353        if not root:
354            root = self.directory
355
356        with os.scandir(root) as obj:
357            for entry in obj:
358                name = os.path.join(root, entry.name)
359
360                if entry.is_dir():
361                    self._parse_abi(name)
362                    continue
363
364                if not entry.is_file():
365                    continue
366
367                basename = os.path.basename(name)
368
369                if basename.startswith("."):
370                    continue
371
372                if basename.endswith(self.ignore_suffixes):
373                    continue
374
375                path = self.re_abi_dir.sub("", os.path.dirname(name))
376
377                self.parse_file(name, path, basename)
378
379    def parse_abi(self, root=None):
380        """Parse documentation ABI"""
381
382        self._parse_abi(root)
383
384        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
385            self.log.debug(pformat(self.data))
386
387    def desc_txt(self, desc):
388        """Print description as found inside ABI files"""
389
390        desc = desc.strip(" \t\n")
391
392        return desc + "\n\n"
393
394    def xref(self, fname):
395        """
396        Converts a Documentation/ABI + basename into a ReST cross-reference
397        """
398
399        xref = self.file_refs.get(fname)
400        if not xref:
401            return None
402        else:
403            return xref
404
405    def desc_rst(self, desc):
406        """Enrich ReST output by creating cross-references"""
407
408        # Remove title markups from the description
409        # Having titles inside ABI files will only work if extra
410        # care would be taken in order to strictly follow the same
411        # level order for each markup.
412        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
413        desc = desc.rstrip(" \t\n").lstrip("\n")
414
415        # Python's regex performance for non-compiled expressions is a lot
416        # than Perl, as Perl automatically caches them at their
417        # first usage. Here, we'll need to do the same, as otherwise the
418        # performance penalty is be high
419
420        new_desc = ""
421        for d in desc.split("\n"):
422            if d == "":
423                new_desc += "\n"
424                continue
425
426            # Use cross-references for doc files where needed
427            d = self.re_doc.sub(r":doc:`/\1`", d)
428
429            # Use cross-references for ABI generated docs where needed
430            matches = self.re_abi.findall(d)
431            for m in matches:
432                abi = m[0] + m[1]
433
434                xref = self.file_refs.get(abi)
435                if not xref:
436                    # This may happen if ABI is on a separate directory,
437                    # like parsing ABI testing and symbol is at stable.
438                    # The proper solution is to move this part of the code
439                    # for it to be inside sphinx/kernel_abi.py
440                    self.log.info("Didn't find ABI reference for '%s'", abi)
441                else:
442                    new = self.re_escape.sub(r"\\\1", m[1])
443                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
444
445            # Seek for cross reference symbols like /sys/...
446            # Need to be careful to avoid doing it on a code block
447            if d[0] not in [" ", "\t"]:
448                matches = self.re_xref_node.findall(d)
449                for m in matches:
450                    # Finding ABI here is more complex due to wildcards
451                    xref = self.what_refs.get(m)
452                    if xref:
453                        new = self.re_escape.sub(r"\\\1", m)
454                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
455
456            new_desc += d + "\n"
457
458        return new_desc + "\n\n"
459
460    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
461            filter_path=None):
462        """Print ABI at stdout"""
463
464        part = None
465        for key, v in sorted(self.data.items(),
466                             key=lambda x: (x[1].get("type", ""),
467                                            x[1].get("what"))):
468
469            wtype = v.get("type", "Symbol")
470            file_ref = v.get("file")
471            names = v.get("what", [""])
472
473            if wtype == "File":
474                if not show_file:
475                    continue
476            else:
477                if not show_symbols:
478                    continue
479
480            if filter_path:
481                if v.get("path") != filter_path:
482                    continue
483
484            msg = ""
485
486            if wtype != "File":
487                cur_part = names[0]
488                if cur_part.find("/") >= 0:
489                    match = self.re_what.match(cur_part)
490                    if match:
491                        symbol = match.group(1).rstrip("/")
492                        cur_part = "Symbols under " + symbol
493
494                if cur_part and cur_part != part:
495                    part = cur_part
496                    msg += part + "\n"+ "-" * len(part) +"\n\n"
497
498                msg += f".. _{key}:\n\n"
499
500                max_len = 0
501                for i in range(0, len(names)):           # pylint: disable=C0200
502                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
503
504                    max_len = max(max_len, len(names[i]))
505
506                msg += "+-" + "-" * max_len + "-+\n"
507                for name in names:
508                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
509                    msg += "+-" + "-" * max_len + "-+\n"
510                msg += "\n"
511
512            for ref in file_ref:
513                if wtype == "File":
514                    msg += f".. _{ref[1]}:\n\n"
515                else:
516                    base = os.path.basename(ref[0])
517                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
518
519            if wtype == "File":
520                msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
521
522            desc = v.get("description")
523            if not desc and wtype != "File":
524                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
525
526            if desc:
527                if output_in_txt:
528                    msg += self.desc_txt(desc)
529                else:
530                    msg += self.desc_rst(desc)
531
532            symbols = v.get("symbols")
533            if symbols:
534                msg += "Has the following ABI:\n\n"
535
536                for w, label in symbols:
537                    # Escape special chars from content
538                    content = self.re_escape.sub(r"\\\1", w)
539
540                    msg += f"- :ref:`{content} <{label}>`\n\n"
541
542            users = v.get("users")
543            if users and users.strip(" \t\n"):
544                users = users.strip("\n").replace('\n', '\n\t')
545                msg += f"Users:\n\t{users}\n\n"
546
547            ln = v.get("line_no", 1)
548
549            yield (msg, file_ref[0][0], ln)
550
551    def check_issues(self):
552        """Warn about duplicated ABI entries"""
553
554        for what, v in self.what_symbols.items():
555            files = v.get("file")
556            if not files:
557                # Should never happen if the parser works properly
558                self.log.warning("%s doesn't have a file associated", what)
559                continue
560
561            if len(files) == 1:
562                continue
563
564            f = []
565            for fname, lines in sorted(files.items()):
566                if not lines:
567                    f.append(f"{fname}")
568                elif len(lines) == 1:
569                    f.append(f"{fname}:{lines[0]}")
570                else:
571                    m = fname + "lines "
572                    m += ", ".join(str(x) for x in lines)
573                    f.append(m)
574
575            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
576
577    def search_symbols(self, expr):
578        """ Searches for ABI symbols """
579
580        regex = re.compile(expr, re.I)
581
582        found_keys = 0
583        for t in sorted(self.data.items(), key=lambda x: [0]):
584            v = t[1]
585
586            wtype = v.get("type", "")
587            if wtype == "File":
588                continue
589
590            for what in v.get("what", [""]):
591                if regex.search(what):
592                    found_keys += 1
593
594                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
595                    date = v.get("date", "").strip(" \t\n")
596                    contact = v.get("contact", "").strip(" \t\n")
597                    users = v.get("users", "").strip(" \t\n")
598                    desc = v.get("description", "").strip(" \t\n")
599
600                    files = []
601                    for f in v.get("file", ()):
602                        files.append(f[0])
603
604                    what = str(found_keys) + ". " + what
605                    title_tag = "-" * len(what)
606
607                    print(f"\n{what}\n{title_tag}\n")
608
609                    if kernelversion:
610                        print(f"Kernel version:\t\t{kernelversion}")
611
612                    if date:
613                        print(f"Date:\t\t\t{date}")
614
615                    if contact:
616                        print(f"Contact:\t\t{contact}")
617
618                    if users:
619                        print(f"Users:\t\t\t{users}")
620
621                    print("Defined on file(s):\t" + ", ".join(files))
622
623                    if desc:
624                        desc = desc.strip("\n")
625                        print(f"\n{desc}\n")
626
627        if not found_keys:
628            print(f"Regular expression /{expr}/ not found.")