Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
4# pylint: disable=R0912,R0915
5
6"""
7Parse a source file or header, creating ReStructured Text cross references.
8
9It accepts an optional file to change the default symbol reference or to
10suppress symbols from the output.
11
12It is capable of identifying defines, functions, structs, typedefs,
13enums and enum symbols and create cross-references for all of them.
14It is also capable of distinguish #define used for specifying a Linux
15ioctl.
16
17The optional rules file contains a set of rules like:
18
19 ignore ioctl VIDIOC_ENUM_FMT
20 replace ioctl VIDIOC_DQBUF vidioc_qbuf
21 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22"""
23
24import os
25import re
26import sys
27
28
29class ParseDataStructs:
30 """
31 Creates an enriched version of a Kernel header file with cross-links
32 to each C data structure type.
33
34 It is meant to allow having a more comprehensive documentation, where
35 uAPI headers will create cross-reference links to the code.
36
37 It is capable of identifying defines, functions, structs, typedefs,
38 enums and enum symbols and create cross-references for all of them.
39 It is also capable of distinguish #define used for specifying a Linux
40 ioctl.
41
42 By default, it create rules for all symbols and defines, but it also
43 allows parsing an exception file. Such file contains a set of rules
44 using the syntax below:
45
46 1. Ignore rules:
47
48 ignore <type> <symbol>`
49
50 Removes the symbol from reference generation.
51
52 2. Replace rules:
53
54 replace <type> <old_symbol> <new_reference>
55
56 Replaces how old_symbol with a new reference. The new_reference can be:
57 - A simple symbol name;
58 - A full Sphinx reference.
59
60 On both cases, <type> can be:
61 - ioctl: for defines that end with _IO*, e.g. ioctl definitions
62 - define: for other defines
63 - symbol: for symbols defined within enums;
64 - typedef: for typedefs;
65 - enum: for the name of a non-anonymous enum;
66 - struct: for structs.
67
68 Examples:
69
70 ignore define __LINUX_MEDIA_H
71 ignore ioctl VIDIOC_ENUM_FMT
72 replace ioctl VIDIOC_DQBUF vidioc_qbuf
73 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
74 """
75
76 # Parser regexes with multiple ways to capture enums and structs
77 RE_ENUMS = [
78 re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
79 re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
80 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
81 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
82 ]
83 RE_STRUCTS = [
84 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
85 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
86 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
87 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
88 ]
89
90 # FIXME: the original code was written a long time before Sphinx C
91 # domain to have multiple namespaces. To avoid to much turn at the
92 # existing hyperlinks, the code kept using "c:type" instead of the
93 # right types. To change that, we need to change the types not only
94 # here, but also at the uAPI media documentation.
95 DEF_SYMBOL_TYPES = {
96 "ioctl": {
97 "prefix": "\\ ",
98 "suffix": "\\ ",
99 "ref_type": ":ref",
100 "description": "IOCTL Commands",
101 },
102 "define": {
103 "prefix": "\\ ",
104 "suffix": "\\ ",
105 "ref_type": ":ref",
106 "description": "Macros and Definitions",
107 },
108 # We're calling each definition inside an enum as "symbol"
109 "symbol": {
110 "prefix": "\\ ",
111 "suffix": "\\ ",
112 "ref_type": ":ref",
113 "description": "Enumeration values",
114 },
115 "typedef": {
116 "prefix": "\\ ",
117 "suffix": "\\ ",
118 "ref_type": ":c:type",
119 "description": "Type Definitions",
120 },
121 # This is the description of the enum itself
122 "enum": {
123 "prefix": "\\ ",
124 "suffix": "\\ ",
125 "ref_type": ":c:type",
126 "description": "Enumerations",
127 },
128 "struct": {
129 "prefix": "\\ ",
130 "suffix": "\\ ",
131 "ref_type": ":c:type",
132 "description": "Structures",
133 },
134 }
135
136 def __init__(self, debug: bool = False):
137 """Initialize internal vars"""
138 self.debug = debug
139 self.data = ""
140
141 self.symbols = {}
142
143 for symbol_type in self.DEF_SYMBOL_TYPES:
144 self.symbols[symbol_type] = {}
145
146 def store_type(self, symbol_type: str, symbol: str,
147 ref_name: str = None, replace_underscores: bool = True):
148 """
149 Stores a new symbol at self.symbols under symbol_type.
150
151 By default, underscores are replaced by "-"
152 """
153 defs = self.DEF_SYMBOL_TYPES[symbol_type]
154
155 prefix = defs.get("prefix", "")
156 suffix = defs.get("suffix", "")
157 ref_type = defs.get("ref_type")
158
159 # Determine ref_link based on symbol type
160 if ref_type:
161 if symbol_type == "enum":
162 ref_link = f"{ref_type}:`{symbol}`"
163 else:
164 if not ref_name:
165 ref_name = symbol.lower()
166
167 # c-type references don't support hash
168 if ref_type == ":ref" and replace_underscores:
169 ref_name = ref_name.replace("_", "-")
170
171 ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
172 else:
173 ref_link = symbol
174
175 self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}"
176
177 def store_line(self, line):
178 """Stores a line at self.data, properly indented"""
179 line = " " + line.expandtabs()
180 self.data += line.rstrip(" ")
181
182 def parse_file(self, file_in: str):
183 """Reads a C source file and get identifiers"""
184 self.data = ""
185 is_enum = False
186 is_comment = False
187 multiline = ""
188
189 with open(file_in, "r",
190 encoding="utf-8", errors="backslashreplace") as f:
191 for line_no, line in enumerate(f):
192 self.store_line(line)
193 line = line.strip("\n")
194
195 # Handle continuation lines
196 if line.endswith(r"\\"):
197 multiline += line[-1]
198 continue
199
200 if multiline:
201 line = multiline + line
202 multiline = ""
203
204 # Handle comments. They can be multilined
205 if not is_comment:
206 if re.search(r"/\*.*", line):
207 is_comment = True
208 else:
209 # Strip C99-style comments
210 line = re.sub(r"(//.*)", "", line)
211
212 if is_comment:
213 if re.search(r".*\*/", line):
214 is_comment = False
215 else:
216 multiline = line
217 continue
218
219 # At this point, line variable may be a multilined statement,
220 # if lines end with \ or if they have multi-line comments
221 # With that, it can safely remove the entire comments,
222 # and there's no need to use re.DOTALL for the logic below
223
224 line = re.sub(r"(/\*.*\*/)", "", line)
225 if not line.strip():
226 continue
227
228 # It can be useful for debug purposes to print the file after
229 # having comments stripped and multi-lines grouped.
230 if self.debug > 1:
231 print(f"line {line_no + 1}: {line}")
232
233 # Now the fun begins: parse each type and store it.
234
235 # We opted for a two parsing logic here due to:
236 # 1. it makes easier to debug issues not-parsed symbols;
237 # 2. we want symbol replacement at the entire content, not
238 # just when the symbol is detected.
239
240 if is_enum:
241 match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
242 if match:
243 self.store_type("symbol", match.group(1))
244 if "}" in line:
245 is_enum = False
246 continue
247
248 match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
249 if match:
250 self.store_type("ioctl", match.group(1),
251 replace_underscores=False)
252 continue
253
254 match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
255 if match:
256 self.store_type("define", match.group(1))
257 continue
258
259 match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
260 line)
261 if match:
262 name = match.group(2).strip()
263 symbol = match.group(3)
264 self.store_type("typedef", symbol, ref_name=name)
265 continue
266
267 for re_enum in self.RE_ENUMS:
268 match = re_enum.match(line)
269 if match:
270 self.store_type("enum", match.group(1))
271 is_enum = True
272 break
273
274 for re_struct in self.RE_STRUCTS:
275 match = re_struct.match(line)
276 if match:
277 self.store_type("struct", match.group(1))
278 break
279
280 def process_exceptions(self, fname: str):
281 """
282 Process exceptions file with rules to ignore or replace references.
283 """
284 if not fname:
285 return
286
287 name = os.path.basename(fname)
288
289 with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
290 for ln, line in enumerate(f):
291 ln += 1
292 line = line.strip()
293 if not line or line.startswith("#"):
294 continue
295
296 # Handle ignore rules
297 match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
298 if match:
299 c_type = match.group(1)
300 symbol = match.group(2)
301
302 if c_type not in self.DEF_SYMBOL_TYPES:
303 sys.exit(f"{name}:{ln}: {c_type} is invalid")
304
305 d = self.symbols[c_type]
306 if symbol in d:
307 del d[symbol]
308
309 continue
310
311 # Handle replace rules
312 match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
313 if not match:
314 sys.exit(f"{name}:{ln}: invalid line: {line}")
315
316 c_type, old, new = match.groups()
317
318 if c_type not in self.DEF_SYMBOL_TYPES:
319 sys.exit(f"{name}:{ln}: {c_type} is invalid")
320
321 reftype = None
322
323 # Parse reference type when the type is specified
324
325 match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new)
326 if match:
327 reftype = f":c:{match.group(1)}"
328 new = match.group(2)
329 else:
330 match = re.search(r"(\:ref)\:\`(.+)\`", new)
331 if match:
332 reftype = match.group(1)
333 new = match.group(2)
334
335 # If the replacement rule doesn't have a type, get default
336 if not reftype:
337 reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
338 if not reftype:
339 reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
340
341 new_ref = f"{reftype}:`{old} <{new}>`"
342
343 # Change self.symbols to use the replacement rule
344 if old in self.symbols[c_type]:
345 self.symbols[c_type][old] = new_ref
346 else:
347 print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
348
349 def debug_print(self):
350 """
351 Print debug information containing the replacement rules per symbol.
352 To make easier to check, group them per type.
353 """
354 if not self.debug:
355 return
356
357 for c_type, refs in self.symbols.items():
358 if not refs: # Skip empty dictionaries
359 continue
360
361 print(f"{c_type}:")
362
363 for symbol, ref in sorted(refs.items()):
364 print(f" {symbol} -> {ref}")
365
366 print()
367
368 def gen_output(self):
369 """Write the formatted output to a file."""
370
371 # Avoid extra blank lines
372 text = re.sub(r"\s+$", "", self.data) + "\n"
373 text = re.sub(r"\n\s+\n", "\n\n", text)
374
375 # Escape Sphinx special characters
376 text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
377
378 # Source uAPI files may have special notes. Use bold font for them
379 text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
380
381 # Delimiters to catch the entire symbol after escaped
382 start_delim = r"([ \n\t\(=\*\@])"
383 end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
384
385 # Process all reference types
386 for ref_dict in self.symbols.values():
387 for symbol, replacement in ref_dict.items():
388 symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
389 text = re.sub(fr'{start_delim}{symbol}{end_delim}',
390 fr'\1{replacement}\2', text)
391
392 # Remove "\ " where not needed: before spaces and at the end of lines
393 text = re.sub(r"\\ ([\n ])", r"\1", text)
394 text = re.sub(r" \\ ", " ", text)
395
396 return text
397
398 def gen_toc(self):
399 """
400 Create a TOC table pointing to each symbol from the header
401 """
402 text = []
403
404 # Add header
405 text.append(".. contents:: Table of Contents")
406 text.append(" :depth: 2")
407 text.append(" :local:")
408 text.append("")
409
410 # Sort symbol types per description
411 symbol_descriptions = []
412 for k, v in self.DEF_SYMBOL_TYPES.items():
413 symbol_descriptions.append((v['description'], k))
414
415 symbol_descriptions.sort()
416
417 # Process each category
418 for description, c_type in symbol_descriptions:
419
420 refs = self.symbols[c_type]
421 if not refs: # Skip empty categories
422 continue
423
424 text.append(f"{description}")
425 text.append("-" * len(description))
426 text.append("")
427
428 # Sort symbols alphabetically
429 for symbol, ref in sorted(refs.items()):
430 text.append(f"* :{ref}:")
431
432 text.append("") # Add empty line between categories
433
434 return "\n".join(text)
435
436 def write_output(self, file_in: str, file_out: str, toc: bool):
437 title = os.path.basename(file_in)
438
439 if toc:
440 text = self.gen_toc()
441 else:
442 text = self.gen_output()
443
444 with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
445 f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
446 f.write(f"{title}\n")
447 f.write("=" * len(title) + "\n\n")
448
449 if not toc:
450 f.write(".. parsed-literal::\n\n")
451
452 f.write(text)