Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
4# pylint: disable=R0912,R0915
5
6"""
7Parse a source file or header, creating ReStructured Text cross references.
8
9It accepts an optional file to change the default symbol reference or to
10suppress symbols from the output.
11
12It is capable of identifying defines, functions, structs, typedefs,
13enums and enum symbols and create cross-references for all of them.
14It is also capable of distinguish #define used for specifying a Linux
15ioctl.
16
17The optional rules file contains a set of rules like:
18
19 ignore ioctl VIDIOC_ENUM_FMT
20 replace ioctl VIDIOC_DQBUF vidioc_qbuf
21 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22"""
23
24import os
25import re
26import sys
27
28
29class ParseDataStructs:
30 """
31 Creates an enriched version of a Kernel header file with cross-links
32 to each C data structure type.
33
34 It is meant to allow having a more comprehensive documentation, where
35 uAPI headers will create cross-reference links to the code.
36
37 It is capable of identifying defines, functions, structs, typedefs,
38 enums and enum symbols and create cross-references for all of them.
39 It is also capable of distinguish #define used for specifying a Linux
40 ioctl.
41
42 By default, it create rules for all symbols and defines, but it also
43 allows parsing an exception file. Such file contains a set of rules
44 using the syntax below:
45
46 1. Ignore rules:
47
48 ignore <type> <symbol>`
49
50 Removes the symbol from reference generation.
51
52 2. Replace rules:
53
54 replace <type> <old_symbol> <new_reference>
55
56 Replaces how old_symbol with a new reference. The new_reference can be:
57
58 - A simple symbol name;
59 - A full Sphinx reference.
60
61 3. Namespace rules
62
63 namespace <namespace>
64
65 Sets C namespace to be used during cross-reference generation. Can
66 be overridden by replace rules.
67
68 On ignore and replace rules, <type> can be:
69 - ioctl: for defines that end with _IO*, e.g. ioctl definitions
70 - define: for other defines
71 - symbol: for symbols defined within enums;
72 - typedef: for typedefs;
73 - enum: for the name of a non-anonymous enum;
74 - struct: for structs.
75
76 Examples:
77
78 ignore define __LINUX_MEDIA_H
79 ignore ioctl VIDIOC_ENUM_FMT
80 replace ioctl VIDIOC_DQBUF vidioc_qbuf
81 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
82
83 namespace MC
84 """
85
86 # Parser regexes with multiple ways to capture enums and structs
87 RE_ENUMS = [
88 re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
89 re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
90 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
91 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
92 ]
93 RE_STRUCTS = [
94 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
95 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
96 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
97 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
98 ]
99
100 # FIXME: the original code was written a long time before Sphinx C
101 # domain to have multiple namespaces. To avoid to much turn at the
102 # existing hyperlinks, the code kept using "c:type" instead of the
103 # right types. To change that, we need to change the types not only
104 # here, but also at the uAPI media documentation.
105 DEF_SYMBOL_TYPES = {
106 "ioctl": {
107 "prefix": "\\ ",
108 "suffix": "\\ ",
109 "ref_type": ":ref",
110 "description": "IOCTL Commands",
111 },
112 "define": {
113 "prefix": "\\ ",
114 "suffix": "\\ ",
115 "ref_type": ":ref",
116 "description": "Macros and Definitions",
117 },
118 # We're calling each definition inside an enum as "symbol"
119 "symbol": {
120 "prefix": "\\ ",
121 "suffix": "\\ ",
122 "ref_type": ":ref",
123 "description": "Enumeration values",
124 },
125 "typedef": {
126 "prefix": "\\ ",
127 "suffix": "\\ ",
128 "ref_type": ":c:type",
129 "description": "Type Definitions",
130 },
131 # This is the description of the enum itself
132 "enum": {
133 "prefix": "\\ ",
134 "suffix": "\\ ",
135 "ref_type": ":c:type",
136 "description": "Enumerations",
137 },
138 "struct": {
139 "prefix": "\\ ",
140 "suffix": "\\ ",
141 "ref_type": ":c:type",
142 "description": "Structures",
143 },
144 }
145
146 def __init__(self, debug: bool = False):
147 """Initialize internal vars"""
148 self.debug = debug
149 self.data = ""
150
151 self.symbols = {}
152
153 self.namespace = None
154 self.ignore = []
155 self.replace = []
156
157 for symbol_type in self.DEF_SYMBOL_TYPES:
158 self.symbols[symbol_type] = {}
159
160 def read_exceptions(self, fname: str):
161 if not fname:
162 return
163
164 name = os.path.basename(fname)
165
166 with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
167 for ln, line in enumerate(f):
168 ln += 1
169 line = line.strip()
170 if not line or line.startswith("#"):
171 continue
172
173 # ignore rules
174 match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
175
176 if match:
177 self.ignore.append((ln, match.group(1), match.group(2)))
178 continue
179
180 # replace rules
181 match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
182 if match:
183 self.replace.append((ln, match.group(1), match.group(2),
184 match.group(3)))
185 continue
186
187 match = re.match(r"^namespace\s+(\S+)", line)
188 if match:
189 self.namespace = match.group(1)
190 continue
191
192 sys.exit(f"{name}:{ln}: invalid line: {line}")
193
194 def apply_exceptions(self):
195 """
196 Process exceptions file with rules to ignore or replace references.
197 """
198
199 # Handle ignore rules
200 for ln, c_type, symbol in self.ignore:
201 if c_type not in self.DEF_SYMBOL_TYPES:
202 sys.exit(f"{name}:{ln}: {c_type} is invalid")
203
204 d = self.symbols[c_type]
205 if symbol in d:
206 del d[symbol]
207
208 # Handle replace rules
209 for ln, c_type, old, new in self.replace:
210 if c_type not in self.DEF_SYMBOL_TYPES:
211 sys.exit(f"{name}:{ln}: {c_type} is invalid")
212
213 reftype = None
214
215 # Parse reference type when the type is specified
216
217 match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
218 if match:
219 reftype = f":c:{match.group(1)}"
220 new = match.group(2)
221 else:
222 match = re.search(r"(\:ref)\:\`(.+)\`", new)
223 if match:
224 reftype = match.group(1)
225 new = match.group(2)
226
227 # If the replacement rule doesn't have a type, get default
228 if not reftype:
229 reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
230 if not reftype:
231 reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
232
233 new_ref = f"{reftype}:`{old} <{new}>`"
234
235 # Change self.symbols to use the replacement rule
236 if old in self.symbols[c_type]:
237 (_, ln) = self.symbols[c_type][old]
238 self.symbols[c_type][old] = (new_ref, ln)
239 else:
240 print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
241
242 def store_type(self, ln, symbol_type: str, symbol: str,
243 ref_name: str = None, replace_underscores: bool = True):
244 """
245 Stores a new symbol at self.symbols under symbol_type.
246
247 By default, underscores are replaced by "-"
248 """
249 defs = self.DEF_SYMBOL_TYPES[symbol_type]
250
251 prefix = defs.get("prefix", "")
252 suffix = defs.get("suffix", "")
253 ref_type = defs.get("ref_type")
254
255 # Determine ref_link based on symbol type
256 if ref_type or self.namespace:
257 if not ref_name:
258 ref_name = symbol.lower()
259
260 # c-type references don't support hash
261 if ref_type == ":ref" and replace_underscores:
262 ref_name = ref_name.replace("_", "-")
263
264 # C domain references may have namespaces
265 if ref_type.startswith(":c:"):
266 if self.namespace:
267 ref_name = f"{self.namespace}.{ref_name}"
268
269 if ref_type:
270 ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
271 else:
272 ref_link = f"`{symbol} <{ref_name}>`"
273 else:
274 ref_link = symbol
275
276 self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
277
278 def store_line(self, line):
279 """Stores a line at self.data, properly indented"""
280 line = " " + line.expandtabs()
281 self.data += line.rstrip(" ")
282
283 def parse_file(self, file_in: str, exceptions: str = None):
284 """Reads a C source file and get identifiers"""
285 self.data = ""
286 is_enum = False
287 is_comment = False
288 multiline = ""
289
290 self.read_exceptions(exceptions)
291
292 with open(file_in, "r",
293 encoding="utf-8", errors="backslashreplace") as f:
294 for line_no, line in enumerate(f):
295 self.store_line(line)
296 line = line.strip("\n")
297
298 # Handle continuation lines
299 if line.endswith(r"\\"):
300 multiline += line[-1]
301 continue
302
303 if multiline:
304 line = multiline + line
305 multiline = ""
306
307 # Handle comments. They can be multilined
308 if not is_comment:
309 if re.search(r"/\*.*", line):
310 is_comment = True
311 else:
312 # Strip C99-style comments
313 line = re.sub(r"(//.*)", "", line)
314
315 if is_comment:
316 if re.search(r".*\*/", line):
317 is_comment = False
318 else:
319 multiline = line
320 continue
321
322 # At this point, line variable may be a multilined statement,
323 # if lines end with \ or if they have multi-line comments
324 # With that, it can safely remove the entire comments,
325 # and there's no need to use re.DOTALL for the logic below
326
327 line = re.sub(r"(/\*.*\*/)", "", line)
328 if not line.strip():
329 continue
330
331 # It can be useful for debug purposes to print the file after
332 # having comments stripped and multi-lines grouped.
333 if self.debug > 1:
334 print(f"line {line_no + 1}: {line}")
335
336 # Now the fun begins: parse each type and store it.
337
338 # We opted for a two parsing logic here due to:
339 # 1. it makes easier to debug issues not-parsed symbols;
340 # 2. we want symbol replacement at the entire content, not
341 # just when the symbol is detected.
342
343 if is_enum:
344 match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
345 if match:
346 self.store_type(line_no, "symbol", match.group(1))
347 if "}" in line:
348 is_enum = False
349 continue
350
351 match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
352 if match:
353 self.store_type(line_no, "ioctl", match.group(1),
354 replace_underscores=False)
355 continue
356
357 match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
358 if match:
359 self.store_type(line_no, "define", match.group(1))
360 continue
361
362 match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
363 line)
364 if match:
365 name = match.group(2).strip()
366 symbol = match.group(3)
367 self.store_type(line_no, "typedef", symbol, ref_name=name)
368 continue
369
370 for re_enum in self.RE_ENUMS:
371 match = re_enum.match(line)
372 if match:
373 self.store_type(line_no, "enum", match.group(1))
374 is_enum = True
375 break
376
377 for re_struct in self.RE_STRUCTS:
378 match = re_struct.match(line)
379 if match:
380 self.store_type(line_no, "struct", match.group(1))
381 break
382
383 self.apply_exceptions()
384
385 def debug_print(self):
386 """
387 Print debug information containing the replacement rules per symbol.
388 To make easier to check, group them per type.
389 """
390 if not self.debug:
391 return
392
393 for c_type, refs in self.symbols.items():
394 if not refs: # Skip empty dictionaries
395 continue
396
397 print(f"{c_type}:")
398
399 for symbol, (ref, ln) in sorted(refs.items()):
400 print(f" #{ln:<5d} {symbol} -> {ref}")
401
402 print()
403
404 def gen_output(self):
405 """Write the formatted output to a file."""
406
407 # Avoid extra blank lines
408 text = re.sub(r"\s+$", "", self.data) + "\n"
409 text = re.sub(r"\n\s+\n", "\n\n", text)
410
411 # Escape Sphinx special characters
412 text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
413
414 # Source uAPI files may have special notes. Use bold font for them
415 text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
416
417 # Delimiters to catch the entire symbol after escaped
418 start_delim = r"([ \n\t\(=\*\@])"
419 end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
420
421 # Process all reference types
422 for ref_dict in self.symbols.values():
423 for symbol, (replacement, _) in ref_dict.items():
424 symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
425 text = re.sub(fr'{start_delim}{symbol}{end_delim}',
426 fr'\1{replacement}\2', text)
427
428 # Remove "\ " where not needed: before spaces and at the end of lines
429 text = re.sub(r"\\ ([\n ])", r"\1", text)
430 text = re.sub(r" \\ ", " ", text)
431
432 return text
433
434 def gen_toc(self):
435 """
436 Create a list of symbols to be part of a TOC contents table
437 """
438 text = []
439
440 # Sort symbol types per description
441 symbol_descriptions = []
442 for k, v in self.DEF_SYMBOL_TYPES.items():
443 symbol_descriptions.append((v['description'], k))
444
445 symbol_descriptions.sort()
446
447 # Process each category
448 for description, c_type in symbol_descriptions:
449
450 refs = self.symbols[c_type]
451 if not refs: # Skip empty categories
452 continue
453
454 text.append(f"{description}")
455 text.append("-" * len(description))
456 text.append("")
457
458 # Sort symbols alphabetically
459 for symbol, (ref, ln) in sorted(refs.items()):
460 text.append(f"- LINENO_{ln}: {ref}")
461
462 text.append("") # Add empty line between categories
463
464 return "\n".join(text)
465
466 def write_output(self, file_in: str, file_out: str, toc: bool):
467 title = os.path.basename(file_in)
468
469 if toc:
470 text = self.gen_toc()
471 else:
472 text = self.gen_output()
473
474 with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
475 f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
476 f.write(f"{title}\n")
477 f.write("=" * len(title) + "\n\n")
478
479 if not toc:
480 f.write(".. parsed-literal::\n\n")
481
482 f.write(text)