Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
4
5
6"""
7Implementation of the ``kernel-include`` reST-directive.
8
9:copyright: Copyright (C) 2016 Markus Heiser
10:license: GPL Version 2, June 1991 see linux/COPYING for details.
11
12The ``kernel-include`` reST-directive is a replacement for the ``include``
13directive. The ``kernel-include`` directive expand environment variables in
14the path name and allows to include files from arbitrary locations.
15
16.. hint::
17
18 Including files from arbitrary locations (e.g. from ``/etc``) is a
19 security risk for builders. This is why the ``include`` directive from
20 docutils *prohibit* pathnames pointing to locations *above* the filesystem
21 tree where the reST document with the include directive is placed.
22
23Substrings of the form $name or ${name} are replaced by the value of
24environment variable name. Malformed variable names and references to
25non-existing variables are left unchanged.
26
27**Supported Sphinx Include Options**:
28
29:param literal:
30 If present, the included file is inserted as a literal block.
31
32:param code:
33 Specify the language for syntax highlighting (e.g., 'c', 'python').
34
35:param encoding:
36 Specify the encoding of the included file (default: 'utf-8').
37
38:param tab-width:
39 Specify the number of spaces that a tab represents.
40
41:param start-line:
42 Line number at which to start including the file (1-based).
43
44:param end-line:
45 Line number at which to stop including the file (inclusive).
46
47:param start-after:
48 Include lines after the first line matching this text.
49
50:param end-before:
51 Include lines before the first line matching this text.
52
53:param number-lines:
54 Number the included lines (integer specifies start number).
55 Only effective with 'literal' or 'code' options.
56
57:param class:
58 Specify HTML class attribute for the included content.
59
60**Kernel-specific Extensions**:
61
62:param generate-cross-refs:
63 If present, instead of directly including the file, it calls
64 ParseDataStructs() to convert C data structures into cross-references
65 that link to comprehensive documentation in other ReST files.
66
67:param exception-file:
68 (Used with generate-cross-refs)
69
70 Path to a file containing rules for handling special cases:
71 - Ignore specific C data structures
72 - Use alternative reference names
73 - Specify different reference types
74
75:param warn-broken:
76 (Used with generate-cross-refs)
77
78 Enables warnings when auto-generated cross-references don't point to
79 existing documentation targets.
80"""
81
82# ==============================================================================
83# imports
84# ==============================================================================
85
86import os.path
87import re
88import sys
89
90from difflib import get_close_matches
91
92from docutils import io, nodes, statemachine
93from docutils.statemachine import ViewList
94from docutils.parsers.rst import Directive, directives
95from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
96
97from sphinx.util import logging
98
99srctree = os.path.abspath(os.environ["srctree"])
100sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
101
102from kdoc.parse_data_structs import ParseDataStructs
103
104__version__ = "1.0"
105logger = logging.getLogger(__name__)
106
107RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
108RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
109RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
110RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
111
112def ErrorString(exc): # Shamelessly stolen from docutils
113 return f'{exc.__class__.__name}: {exc}'
114
115
116# ==============================================================================
117class KernelInclude(Directive):
118 """
119 KernelInclude (``kernel-include``) directive
120
121 Most of the stuff here came from Include directive defined at:
122 docutils/parsers/rst/directives/misc.py
123
124 Yet, overriding the class don't has any benefits: the original class
125 only have run() and argument list. Not all of them are implemented,
126 when checked against latest Sphinx version, as with time more arguments
127 were added.
128
129 So, keep its own list of supported arguments
130 """
131
132 required_arguments = 1
133 optional_arguments = 0
134 final_argument_whitespace = True
135 option_spec = {
136 'literal': directives.flag,
137 'code': directives.unchanged,
138 'encoding': directives.encoding,
139 'tab-width': int,
140 'start-line': int,
141 'end-line': int,
142 'start-after': directives.unchanged_required,
143 'end-before': directives.unchanged_required,
144 # ignored except for 'literal' or 'code':
145 'number-lines': directives.unchanged, # integer or None
146 'class': directives.class_option,
147
148 # Arguments that aren't from Sphinx Include directive
149 'generate-cross-refs': directives.flag,
150 'warn-broken': directives.flag,
151 'toc': directives.flag,
152 'exception-file': directives.unchanged,
153 }
154
155 def read_rawtext(self, path, encoding):
156 """Read and process file content with error handling"""
157 try:
158 self.state.document.settings.record_dependencies.add(path)
159 include_file = io.FileInput(source_path=path,
160 encoding=encoding,
161 error_handler=self.state.document.settings.input_encoding_error_handler)
162 except UnicodeEncodeError:
163 raise self.severe('Problems with directive path:\n'
164 'Cannot encode input file path "%s" '
165 '(wrong locale?).' % path)
166 except IOError as error:
167 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
168
169 try:
170 return include_file.read()
171 except UnicodeError as error:
172 raise self.severe('Problem with directive:\n%s' % ErrorString(error))
173
174 def apply_range(self, rawtext):
175 """
176 Handles start-line, end-line, start-after and end-before parameters
177 """
178
179 # Get to-be-included content
180 startline = self.options.get('start-line', None)
181 endline = self.options.get('end-line', None)
182 try:
183 if startline or (endline is not None):
184 lines = rawtext.splitlines()
185 rawtext = '\n'.join(lines[startline:endline])
186 except UnicodeError as error:
187 raise self.severe(f'Problem with "{self.name}" directive:\n'
188 + io.error_string(error))
189 # start-after/end-before: no restrictions on newlines in match-text,
190 # and no restrictions on matching inside lines vs. line boundaries
191 after_text = self.options.get("start-after", None)
192 if after_text:
193 # skip content in rawtext before *and incl.* a matching text
194 after_index = rawtext.find(after_text)
195 if after_index < 0:
196 raise self.severe('Problem with "start-after" option of "%s" '
197 "directive:\nText not found." % self.name)
198 rawtext = rawtext[after_index + len(after_text) :]
199 before_text = self.options.get("end-before", None)
200 if before_text:
201 # skip content in rawtext after *and incl.* a matching text
202 before_index = rawtext.find(before_text)
203 if before_index < 0:
204 raise self.severe('Problem with "end-before" option of "%s" '
205 "directive:\nText not found." % self.name)
206 rawtext = rawtext[:before_index]
207
208 return rawtext
209
210 def xref_text(self, env, path, tab_width):
211 """
212 Read and add contents from a C file parsed to have cross references.
213
214 There are two types of supported output here:
215 - A C source code with cross-references;
216 - a TOC table containing cross references.
217 """
218 parser = ParseDataStructs()
219
220 if 'exception-file' in self.options:
221 source_dir = os.path.dirname(os.path.abspath(
222 self.state_machine.input_lines.source(
223 self.lineno - self.state_machine.input_offset - 1)))
224 exceptions_file = os.path.join(source_dir, self.options['exception-file'])
225 else:
226 exceptions_file = None
227
228 parser.parse_file(path, exceptions_file)
229
230 # Store references on a symbol dict to be used at check time
231 if 'warn-broken' in self.options:
232 env._xref_files.add(path)
233
234 if "toc" not in self.options:
235
236 rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
237 self.apply_range(rawtext)
238
239 include_lines = statemachine.string2lines(rawtext, tab_width,
240 convert_whitespace=True)
241
242 # Sphinx always blame the ".. <directive>", so placing
243 # line numbers here won't make any difference
244
245 self.state_machine.insert_input(include_lines, path)
246 return []
247
248 # TOC output is a ReST file, not a literal. So, we can add line
249 # numbers
250
251 startline = self.options.get('start-line', None)
252 endline = self.options.get('end-line', None)
253
254 relpath = os.path.relpath(path, srctree)
255
256 result = ViewList()
257 for line in parser.gen_toc().split("\n"):
258 match = RE_LINENO_REF.match(line)
259 if not match:
260 result.append(line, path)
261 continue
262
263 ln, ref = match.groups()
264 ln = int(ln)
265
266 # Filter line range if needed
267 if startline and (ln < startline):
268 continue
269
270 if endline and (ln > endline):
271 continue
272
273 # Sphinx numerates starting with zero, but text editors
274 # and other tools start from one
275 realln = ln + 1
276 result.append(f"- {ref}: {relpath}#{realln}", path, ln)
277
278 self.state_machine.insert_input(result, path)
279
280 return []
281
282 def literal(self, path, tab_width, rawtext):
283 """Output a literal block"""
284
285 # Convert tabs to spaces, if `tab_width` is positive.
286 if tab_width >= 0:
287 text = rawtext.expandtabs(tab_width)
288 else:
289 text = rawtext
290 literal_block = nodes.literal_block(rawtext, source=path,
291 classes=self.options.get("class", []))
292 literal_block.line = 1
293 self.add_name(literal_block)
294 if "number-lines" in self.options:
295 try:
296 startline = int(self.options["number-lines"] or 1)
297 except ValueError:
298 raise self.error(":number-lines: with non-integer start value")
299 endline = startline + len(include_lines)
300 if text.endswith("\n"):
301 text = text[:-1]
302 tokens = NumberLines([([], text)], startline, endline)
303 for classes, value in tokens:
304 if classes:
305 literal_block += nodes.inline(value, value,
306 classes=classes)
307 else:
308 literal_block += nodes.Text(value, value)
309 else:
310 literal_block += nodes.Text(text, text)
311 return [literal_block]
312
313 def code(self, path, tab_width):
314 """Output a code block"""
315
316 include_lines = statemachine.string2lines(rawtext, tab_width,
317 convert_whitespace=True)
318
319 self.options["source"] = path
320 codeblock = CodeBlock(self.name,
321 [self.options.pop("code")], # arguments
322 self.options,
323 include_lines,
324 self.lineno,
325 self.content_offset,
326 self.block_text,
327 self.state,
328 self.state_machine)
329 return codeblock.run()
330
331 def run(self):
332 """Include a file as part of the content of this reST file."""
333 env = self.state.document.settings.env
334
335 #
336 # The include logic accepts only patches relative to the
337 # Kernel source tree. The logic does check it to prevent
338 # directory traverse issues.
339 #
340
341 srctree = os.path.abspath(os.environ["srctree"])
342
343 path = os.path.expandvars(self.arguments[0])
344 src_path = os.path.join(srctree, path)
345
346 if os.path.isfile(src_path):
347 base = srctree
348 path = src_path
349 else:
350 raise self.warning(f'File "%s" doesn\'t exist', path)
351
352 abs_base = os.path.abspath(base)
353 abs_full_path = os.path.abspath(os.path.join(base, path))
354
355 try:
356 if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
357 raise self.severe('Problems with "%s" directive, prohibited path: %s' %
358 (self.name, path))
359 except ValueError:
360 # Paths don't have the same drive (Windows) or other incompatibility
361 raise self.severe('Problems with "%s" directive, invalid path: %s' %
362 (self.name, path))
363
364 self.arguments[0] = path
365
366 #
367 # Add path location to Sphinx dependencies to ensure proper cache
368 # invalidation check.
369 #
370
371 env.note_dependency(os.path.abspath(path))
372
373 if not self.state.document.settings.file_insertion_enabled:
374 raise self.warning('"%s" directive disabled.' % self.name)
375 source = self.state_machine.input_lines.source(self.lineno -
376 self.state_machine.input_offset - 1)
377 source_dir = os.path.dirname(os.path.abspath(source))
378 path = directives.path(self.arguments[0])
379 if path.startswith("<") and path.endswith(">"):
380 path = os.path.join(self.standard_include_path, path[1:-1])
381 path = os.path.normpath(os.path.join(source_dir, path))
382
383 # HINT: this is the only line I had to change / commented out:
384 # path = utils.relative_path(None, path)
385
386 encoding = self.options.get("encoding",
387 self.state.document.settings.input_encoding)
388 tab_width = self.options.get("tab-width",
389 self.state.document.settings.tab_width)
390
391 # Get optional arguments to related to cross-references generation
392 if "generate-cross-refs" in self.options:
393 return self.xref_text(env, path, tab_width)
394
395 rawtext = self.read_rawtext(path, encoding)
396 rawtext = self.apply_range(rawtext)
397
398 if "code" in self.options:
399 return self.code(path, tab_width, rawtext)
400
401 return self.literal(path, tab_width, rawtext)
402
403# ==============================================================================
404
405reported = set()
406DOMAIN_INFO = {}
407all_refs = {}
408
409def fill_domain_info(env):
410 """
411 Get supported reference types for each Sphinx domain and C namespaces
412 """
413 if DOMAIN_INFO:
414 return
415
416 for domain_name, domain_instance in env.domains.items():
417 try:
418 object_types = list(domain_instance.object_types.keys())
419 DOMAIN_INFO[domain_name] = object_types
420 except AttributeError:
421 # Ignore domains that we can't retrieve object types, if any
422 pass
423
424 for domain in DOMAIN_INFO.keys():
425 domain_obj = env.get_domain(domain)
426 for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
427 ref_name = name.lower()
428
429 if domain == "c":
430 if '.' in ref_name:
431 ref_name = ref_name.split(".")[-1]
432
433 if not ref_name in all_refs:
434 all_refs[ref_name] = []
435
436 all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
437
438def get_suggestions(app, env, node,
439 original_target, original_domain, original_reftype):
440 """Check if target exists in the other domain or with different reftypes."""
441 original_target = original_target.lower()
442
443 # Remove namespace if present
444 if original_domain == "c":
445 if '.' in original_target:
446 original_target = original_target.split(".")[-1]
447
448 suggestions = []
449
450 # If name exists, propose exact name match on different domains
451 if original_target in all_refs:
452 return all_refs[original_target]
453
454 # If not found, get a close match, using difflib.
455 # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
456 # for a close match within a certain distance. We're using the defaults
457 # here, e.g. cutoff=0.6, proposing 3 alternatives
458 matches = get_close_matches(original_target, all_refs.keys())
459 for match in matches:
460 suggestions += all_refs[match]
461
462 return suggestions
463
464def check_missing_refs(app, env, node, contnode):
465 """Check broken refs for the files it creates xrefs"""
466 if not node.source:
467 return None
468
469 try:
470 xref_files = env._xref_files
471 except AttributeError:
472 logger.critical("FATAL: _xref_files not initialized!")
473 raise
474
475 # Only show missing references for kernel-include reference-parsed files
476 if node.source not in xref_files:
477 return None
478
479 fill_domain_info(env)
480
481 target = node.get('reftarget', '')
482 domain = node.get('refdomain', 'std')
483 reftype = node.get('reftype', '')
484
485 msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
486
487 # Don't duplicate warnings
488 data = (node.source, msg)
489 if data in reported:
490 return None
491 reported.add(data)
492
493 suggestions = get_suggestions(app, env, node, target, domain, reftype)
494 if suggestions:
495 msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
496
497 logger.warning(msg, location=node, type='ref', subtype='missing')
498
499 return None
500
501def merge_xref_info(app, env, docnames, other):
502 """
503 As each process modify env._xref_files, we need to merge them back.
504 """
505 if not hasattr(other, "_xref_files"):
506 return
507 env._xref_files.update(getattr(other, "_xref_files", set()))
508
509def init_xref_docs(app, env, docnames):
510 """Initialize a list of files that we're generating cross references¨"""
511 app.env._xref_files = set()
512
513# ==============================================================================
514
515def setup(app):
516 """Setup Sphinx exension"""
517
518 app.connect("env-before-read-docs", init_xref_docs)
519 app.connect("env-merge-info", merge_xref_info)
520 app.add_directive("kernel-include", KernelInclude)
521 app.connect("missing-reference", check_missing_refs)
522
523 return {
524 "version": __version__,
525 "parallel_read_safe": True,
526 "parallel_write_safe": True,
527 }