Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8kdoc_parser
9===========
10
11Read a C language source or header FILE and extract embedded
12documentation comments
13"""
14
15import sys
16import re
17from pprint import pformat
18
19from kdoc_re import NestedMatch, KernRe
20from kdoc_item import KdocItem
21
22#
23# Regular expressions used to parse kernel-doc markups at KernelDoc class.
24#
25# Let's declare them in lowercase outside any class to make easier to
26# convert from the python script.
27#
28# As those are evaluated at the beginning, no need to cache them
29#
30
31# Allow whitespace at end of comment start.
32doc_start = KernRe(r'^/\*\*\s*$', cache=False)
33
34doc_end = KernRe(r'\*/', cache=False)
35doc_com = KernRe(r'\s*\*\s*', cache=False)
36doc_com_body = KernRe(r'\s*\* ?', cache=False)
37doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
38
39# @params and a strictly limited set of supported section names
40# Specifically:
41# Match @word:
42# @...:
43# @{section-name}:
44# while trying to not match literal block starts like "example::"
45#
46known_section_names = 'description|context|returns?|notes?|examples?'
47known_sections = KernRe(known_section_names, flags = re.I)
48doc_sect = doc_com + \
49 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
50 flags=re.I, cache=False)
51
52doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
57
58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
60
61type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
62
63#
64# Tests for the beginning of a kerneldoc block in its various forms.
65#
66doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
67doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False)
68doc_begin_func = KernRe(str(doc_com) + # initial " * '
69 r"(?:\w+\s*\*\s*)?" + # type (not captured)
70 r'(?:define\s+)?' + # possible "define" (not captured)
71 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)"
72 r'(?:[-:].*)?$', # description (not captured)
73 cache = False)
74
75#
76# Here begins a long set of transformations to turn structure member prefixes
77# and macro invocations into something we can parse and generate kdoc for.
78#
79struct_args_pattern = r'([^,)]+)'
80
81struct_xforms = [
82 # Strip attributes
83 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
84 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
85 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
86 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
87 (KernRe(r'\s*__packed\s*', re.S), ' '),
88 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
89 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
90 (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
91 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
92 #
93 # Unwrap struct_group macros based on this definition:
94 # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
95 # which has variants like: struct_group(NAME, MEMBERS...)
96 # Only MEMBERS arguments require documentation.
97 #
98 # Parsing them happens on two steps:
99 #
100 # 1. drop struct group arguments that aren't at MEMBERS,
101 # storing them as STRUCT_GROUP(MEMBERS)
102 #
103 # 2. remove STRUCT_GROUP() ancillary macro.
104 #
105 # The original logic used to remove STRUCT_GROUP() using an
106 # advanced regex:
107 #
108 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
109 #
110 # with two patterns that are incompatible with
111 # Python re module, as it has:
112 #
113 # - a recursive pattern: (?1)
114 # - an atomic grouping: (?>...)
115 #
116 # I tried a simpler version: but it didn't work either:
117 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
118 #
119 # As it doesn't properly match the end parenthesis on some cases.
120 #
121 # So, a better solution was crafted: there's now a NestedMatch
122 # class that ensures that delimiters after a search are properly
123 # matched. So, the implementation to drop STRUCT_GROUP() will be
124 # handled in separate.
125 #
126 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
127 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
128 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
129 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
130 #
131 # Replace macros
132 #
133 # TODO: use NestedMatch for FOO($1, $2, ...) matches
134 #
135 # it is better to also move those to the NestedMatch logic,
136 # to ensure that parenthesis will be properly matched.
137 #
138 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
139 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
140 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
141 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
142 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
143 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
144 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
145 re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
146 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
147 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
148 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
149 struct_args_pattern + r'\)', re.S), r'\2 *\1'),
150 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
151 struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
152 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
153 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
154]
155#
156# Regexes here are guaranteed to have the end limiter matching
157# the start delimiter. Yet, right now, only one replace group
158# is allowed.
159#
160struct_nested_prefixes = [
161 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
162]
163
164#
165# Transforms for function prototypes
166#
167function_xforms = [
168 (KernRe(r"^static +"), ""),
169 (KernRe(r"^extern +"), ""),
170 (KernRe(r"^asmlinkage +"), ""),
171 (KernRe(r"^inline +"), ""),
172 (KernRe(r"^__inline__ +"), ""),
173 (KernRe(r"^__inline +"), ""),
174 (KernRe(r"^__always_inline +"), ""),
175 (KernRe(r"^noinline +"), ""),
176 (KernRe(r"^__FORTIFY_INLINE +"), ""),
177 (KernRe(r"__init +"), ""),
178 (KernRe(r"__init_or_module +"), ""),
179 (KernRe(r"__deprecated +"), ""),
180 (KernRe(r"__flatten +"), ""),
181 (KernRe(r"__meminit +"), ""),
182 (KernRe(r"__must_check +"), ""),
183 (KernRe(r"__weak +"), ""),
184 (KernRe(r"__sched +"), ""),
185 (KernRe(r"_noprof"), ""),
186 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
187 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
188 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
189 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
190 (KernRe(r"__attribute_const__ +"), ""),
191 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
192]
193
194#
195# Apply a set of transforms to a block of text.
196#
197def apply_transforms(xforms, text):
198 for search, subst in xforms:
199 text = search.sub(subst, text)
200 return text
201
202#
203# A little helper to get rid of excess white space
204#
205multi_space = KernRe(r'\s\s+')
206def trim_whitespace(s):
207 return multi_space.sub(' ', s.strip())
208
209#
210# Remove struct/enum members that have been marked "private".
211#
212def trim_private_members(text):
213 #
214 # First look for a "public:" block that ends a private region, then
215 # handle the "private until the end" case.
216 #
217 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
218 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
219 #
220 # We needed the comments to do the above, but now we can take them out.
221 #
222 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
223
224class state:
225 """
226 State machine enums
227 """
228
229 # Parser states
230 NORMAL = 0 # normal code
231 NAME = 1 # looking for function name
232 DECLARATION = 2 # We have seen a declaration which might not be done
233 BODY = 3 # the body of the comment
234 SPECIAL_SECTION = 4 # doc section ending with a blank line
235 PROTO = 5 # scanning prototype
236 DOCBLOCK = 6 # documentation block
237 INLINE_NAME = 7 # gathering doc outside main block
238 INLINE_TEXT = 8 # reading the body of inline docs
239
240 name = [
241 "NORMAL",
242 "NAME",
243 "DECLARATION",
244 "BODY",
245 "SPECIAL_SECTION",
246 "PROTO",
247 "DOCBLOCK",
248 "INLINE_NAME",
249 "INLINE_TEXT",
250 ]
251
252
253SECTION_DEFAULT = "Description" # default section
254
255class KernelEntry:
256
257 def __init__(self, config, ln):
258 self.config = config
259
260 self._contents = []
261 self.prototype = ""
262
263 self.warnings = []
264
265 self.parameterlist = []
266 self.parameterdescs = {}
267 self.parametertypes = {}
268 self.parameterdesc_start_lines = {}
269
270 self.section_start_lines = {}
271 self.sections = {}
272
273 self.anon_struct_union = False
274
275 self.leading_space = None
276
277 # State flags
278 self.brcount = 0
279 self.declaration_start_line = ln + 1
280
281 #
282 # Management of section contents
283 #
284 def add_text(self, text):
285 self._contents.append(text)
286
287 def contents(self):
288 return '\n'.join(self._contents) + '\n'
289
290 # TODO: rename to emit_message after removal of kernel-doc.pl
291 def emit_msg(self, log_msg, warning=True):
292 """Emit a message"""
293
294 if not warning:
295 self.config.log.info(log_msg)
296 return
297
298 # Delegate warning output to output logic, as this way it
299 # will report warnings/info only for symbols that are output
300
301 self.warnings.append(log_msg)
302 return
303
304 #
305 # Begin a new section.
306 #
307 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
308 if dump:
309 self.dump_section(start_new = True)
310 self.section = title
311 self.new_start_line = line_no
312
313 def dump_section(self, start_new=True):
314 """
315 Dumps section contents to arrays/hashes intended for that purpose.
316 """
317 #
318 # If we have accumulated no contents in the default ("description")
319 # section, don't bother.
320 #
321 if self.section == SECTION_DEFAULT and not self._contents:
322 return
323 name = self.section
324 contents = self.contents()
325
326 if type_param.match(name):
327 name = type_param.group(1)
328
329 self.parameterdescs[name] = contents
330 self.parameterdesc_start_lines[name] = self.new_start_line
331
332 self.new_start_line = 0
333
334 else:
335 if name in self.sections and self.sections[name] != "":
336 # Only warn on user-specified duplicate section names
337 if name != SECTION_DEFAULT:
338 self.emit_msg(self.new_start_line,
339 f"duplicate section name '{name}'\n")
340 # Treat as a new paragraph - add a blank line
341 self.sections[name] += '\n' + contents
342 else:
343 self.sections[name] = contents
344 self.section_start_lines[name] = self.new_start_line
345 self.new_start_line = 0
346
347# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
348
349 if start_new:
350 self.section = SECTION_DEFAULT
351 self._contents = []
352
353
354class KernelDoc:
355 """
356 Read a C language source or header FILE and extract embedded
357 documentation comments.
358 """
359
360 # Section names
361
362 section_context = "Context"
363 section_return = "Return"
364
365 undescribed = "-- undescribed --"
366
367 def __init__(self, config, fname):
368 """Initialize internal variables"""
369
370 self.fname = fname
371 self.config = config
372
373 # Initial state for the state machines
374 self.state = state.NORMAL
375
376 # Store entry currently being processed
377 self.entry = None
378
379 # Place all potential outputs into an array
380 self.entries = []
381
382 #
383 # We need Python 3.7 for its "dicts remember the insertion
384 # order" guarantee
385 #
386 if sys.version_info.major == 3 and sys.version_info.minor < 7:
387 self.emit_msg(0,
388 'Python 3.7 or later is required for correct results')
389
390 def emit_msg(self, ln, msg, warning=True):
391 """Emit a message"""
392
393 log_msg = f"{self.fname}:{ln} {msg}"
394
395 if self.entry:
396 self.entry.emit_msg(log_msg, warning)
397 return
398
399 if warning:
400 self.config.log.warning(log_msg)
401 else:
402 self.config.log.info(log_msg)
403
404 def dump_section(self, start_new=True):
405 """
406 Dumps section contents to arrays/hashes intended for that purpose.
407 """
408
409 if self.entry:
410 self.entry.dump_section(start_new)
411
412 # TODO: rename it to store_declaration after removal of kernel-doc.pl
413 def output_declaration(self, dtype, name, **args):
414 """
415 Stores the entry into an entry array.
416
417 The actual output and output filters will be handled elsewhere
418 """
419
420 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args)
421 item.warnings = self.entry.warnings
422
423 # Drop empty sections
424 # TODO: improve empty sections logic to emit warnings
425 sections = self.entry.sections
426 for section in ["Description", "Return"]:
427 if section in sections and not sections[section].rstrip():
428 del sections[section]
429 item.set_sections(sections, self.entry.section_start_lines)
430 item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
431 self.entry.parametertypes,
432 self.entry.parameterdesc_start_lines)
433 self.entries.append(item)
434
435 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
436
437 def reset_state(self, ln):
438 """
439 Ancillary routine to create a new entry. It initializes all
440 variables used by the state machine.
441 """
442
443 self.entry = KernelEntry(self.config, ln)
444
445 # State flags
446 self.state = state.NORMAL
447
448 def push_parameter(self, ln, decl_type, param, dtype,
449 org_arg, declaration_name):
450 """
451 Store parameters and their descriptions at self.entry.
452 """
453
454 if self.entry.anon_struct_union and dtype == "" and param == "}":
455 return # Ignore the ending }; from anonymous struct/union
456
457 self.entry.anon_struct_union = False
458
459 param = KernRe(r'[\[\)].*').sub('', param, count=1)
460
461 #
462 # Look at various "anonymous type" cases.
463 #
464 if dtype == '':
465 if param.endswith("..."):
466 if len(param) > 3: # there is a name provided, use that
467 param = param[:-3]
468 if not self.entry.parameterdescs.get(param):
469 self.entry.parameterdescs[param] = "variable arguments"
470
471 elif (not param) or param == "void":
472 param = "void"
473 self.entry.parameterdescs[param] = "no arguments"
474
475 elif param in ["struct", "union"]:
476 # Handle unnamed (anonymous) union or struct
477 dtype = param
478 param = "{unnamed_" + param + "}"
479 self.entry.parameterdescs[param] = "anonymous\n"
480 self.entry.anon_struct_union = True
481
482 # Warn if parameter has no description
483 # (but ignore ones starting with # as these are not parameters
484 # but inline preprocessor statements)
485 if param not in self.entry.parameterdescs and not param.startswith("#"):
486 self.entry.parameterdescs[param] = self.undescribed
487
488 if "." not in param:
489 if decl_type == 'function':
490 dname = f"{decl_type} parameter"
491 else:
492 dname = f"{decl_type} member"
493
494 self.emit_msg(ln,
495 f"{dname} '{param}' not described in '{declaration_name}'")
496
497 # Strip spaces from param so that it is one continuous string on
498 # parameterlist. This fixes a problem where check_sections()
499 # cannot find a parameter like "addr[6 + 2]" because it actually
500 # appears as "addr[6", "+", "2]" on the parameter list.
501 # However, it's better to maintain the param string unchanged for
502 # output, so just weaken the string compare in check_sections()
503 # to ignore "[blah" in a parameter string.
504
505 self.entry.parameterlist.append(param)
506 org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
507 self.entry.parametertypes[param] = org_arg
508
509
510 def create_parameter_list(self, ln, decl_type, args,
511 splitter, declaration_name):
512 """
513 Creates a list of parameters, storing them at self.entry.
514 """
515
516 # temporarily replace all commas inside function pointer definition
517 arg_expr = KernRe(r'(\([^\),]+),')
518 while arg_expr.search(args):
519 args = arg_expr.sub(r"\1#", args)
520
521 for arg in args.split(splitter):
522 # Ignore argument attributes
523 arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
524
525 # Strip leading/trailing spaces
526 arg = arg.strip()
527 arg = KernRe(r'\s+').sub(' ', arg, count=1)
528
529 if arg.startswith('#'):
530 # Treat preprocessor directive as a typeless variable just to fill
531 # corresponding data structures "correctly". Catch it later in
532 # output_* subs.
533
534 # Treat preprocessor directive as a typeless variable
535 self.push_parameter(ln, decl_type, arg, "",
536 "", declaration_name)
537 #
538 # The pointer-to-function case.
539 #
540 elif KernRe(r'\(.+\)\s*\(').search(arg):
541 arg = arg.replace('#', ',')
542 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*"
543 r'([\w\[\].]*)' # Capture the name and possible [array]
544 r'\s*\)') # Make sure the trailing ")" is there
545 if r.match(arg):
546 param = r.group(1)
547 else:
548 self.emit_msg(ln, f"Invalid param: {arg}")
549 param = arg
550 dtype = arg.replace(param, '')
551 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
552 #
553 # The array-of-pointers case. Dig the parameter name out from the middle
554 # of the declaration.
555 #
556 elif KernRe(r'\(.+\)\s*\[').search(arg):
557 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*"
558 r'([\w.]*?)' # The actual pointer name
559 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
560 if r.match(arg):
561 param = r.group(1)
562 else:
563 self.emit_msg(ln, f"Invalid param: {arg}")
564 param = arg
565 dtype = arg.replace(param, '')
566 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
567 elif arg:
568 #
569 # Clean up extraneous spaces and split the string at commas; the first
570 # element of the resulting list will also include the type information.
571 #
572 arg = KernRe(r'\s*:\s*').sub(":", arg)
573 arg = KernRe(r'\s*\[').sub('[', arg)
574 args = KernRe(r'\s*,\s*').split(arg)
575 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
576 #
577 # args[0] has a string of "type a". If "a" includes an [array]
578 # declaration, we want to not be fooled by any white space inside
579 # the brackets, so detect and handle that case specially.
580 #
581 r = KernRe(r'^([^[\]]*\s+)(.*)$')
582 if r.match(args[0]):
583 args[0] = r.group(2)
584 dtype = r.group(1)
585 else:
586 # No space in args[0]; this seems wrong but preserves previous behavior
587 dtype = ''
588
589 bitfield_re = KernRe(r'(.*?):(\w+)')
590 for param in args:
591 #
592 # For pointers, shift the star(s) from the variable name to the
593 # type declaration.
594 #
595 r = KernRe(r'^(\*+)\s*(.*)')
596 if r.match(param):
597 self.push_parameter(ln, decl_type, r.group(2),
598 f"{dtype} {r.group(1)}",
599 arg, declaration_name)
600 #
601 # Perform a similar shift for bitfields.
602 #
603 elif bitfield_re.search(param):
604 if dtype != "": # Skip unnamed bit-fields
605 self.push_parameter(ln, decl_type, bitfield_re.group(1),
606 f"{dtype}:{bitfield_re.group(2)}",
607 arg, declaration_name)
608 else:
609 self.push_parameter(ln, decl_type, param, dtype,
610 arg, declaration_name)
611
612 def check_sections(self, ln, decl_name, decl_type):
613 """
614 Check for errors inside sections, emitting warnings if not found
615 parameters are described.
616 """
617 for section in self.entry.sections:
618 if section not in self.entry.parameterlist and \
619 not known_sections.search(section):
620 if decl_type == 'function':
621 dname = f"{decl_type} parameter"
622 else:
623 dname = f"{decl_type} member"
624 self.emit_msg(ln,
625 f"Excess {dname} '{section}' description in '{decl_name}'")
626
627 def check_return_section(self, ln, declaration_name, return_type):
628 """
629 If the function doesn't return void, warns about the lack of a
630 return description.
631 """
632
633 if not self.config.wreturn:
634 return
635
636 # Ignore an empty return type (It's a macro)
637 # Ignore functions with a "void" return type (but not "void *")
638 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
639 return
640
641 if not self.entry.sections.get("Return", None):
642 self.emit_msg(ln,
643 f"No description found for return value of '{declaration_name}'")
644
645 #
646 # Split apart a structure prototype; returns (struct|union, name, members) or None
647 #
648 def split_struct_proto(self, proto):
649 type_pattern = r'(struct|union)'
650 qualifiers = [
651 "__attribute__",
652 "__packed",
653 "__aligned",
654 "____cacheline_aligned_in_smp",
655 "____cacheline_aligned",
656 ]
657 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
658
659 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
660 if r.search(proto):
661 return (r.group(1), r.group(2), r.group(3))
662 else:
663 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
664 if r.search(proto):
665 return (r.group(1), r.group(3), r.group(2))
666 return None
667 #
668 # Rewrite the members of a structure or union for easier formatting later on.
669 # Among other things, this function will turn a member like:
670 #
671 # struct { inner_members; } foo;
672 #
673 # into:
674 #
675 # struct foo; inner_members;
676 #
677 def rewrite_struct_members(self, members):
678 #
679 # Process struct/union members from the most deeply nested outward. The
680 # trick is in the ^{ below - it prevents a match of an outer struct/union
681 # until the inner one has been munged (removing the "{" in the process).
682 #
683 struct_members = KernRe(r'(struct|union)' # 0: declaration type
684 r'([^\{\};]+)' # 1: possible name
685 r'(\{)'
686 r'([^\{\}]*)' # 3: Contents of declaration
687 r'(\})'
688 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration
689 tuples = struct_members.findall(members)
690 while tuples:
691 for t in tuples:
692 newmember = ""
693 oldmember = "".join(t) # Reconstruct the original formatting
694 dtype, name, lbr, content, rbr, rest, semi = t
695 #
696 # Pass through each field name, normalizing the form and formatting.
697 #
698 for s_id in rest.split(','):
699 s_id = s_id.strip()
700 newmember += f"{dtype} {s_id}; "
701 #
702 # Remove bitfield/array/pointer info, getting the bare name.
703 #
704 s_id = KernRe(r'[:\[].*').sub('', s_id)
705 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
706 #
707 # Pass through the members of this inner structure/union.
708 #
709 for arg in content.split(';'):
710 arg = arg.strip()
711 #
712 # Look for (type)(*name)(args) - pointer to function
713 #
714 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
715 if r.match(arg):
716 dtype, name, extra = r.group(1), r.group(2), r.group(3)
717 # Pointer-to-function
718 if not s_id:
719 # Anonymous struct/union
720 newmember += f"{dtype}{name}{extra}; "
721 else:
722 newmember += f"{dtype}{s_id}.{name}{extra}; "
723 #
724 # Otherwise a non-function member.
725 #
726 else:
727 #
728 # Remove bitmap and array portions and spaces around commas
729 #
730 arg = KernRe(r':\s*\d+\s*').sub('', arg)
731 arg = KernRe(r'\[.*\]').sub('', arg)
732 arg = KernRe(r'\s*,\s*').sub(',', arg)
733 #
734 # Look for a normal decl - "type name[,name...]"
735 #
736 r = KernRe(r'(.*)\s+([\S+,]+)')
737 if r.search(arg):
738 for name in r.group(2).split(','):
739 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
740 if not s_id:
741 # Anonymous struct/union
742 newmember += f"{r.group(1)} {name}; "
743 else:
744 newmember += f"{r.group(1)} {s_id}.{name}; "
745 else:
746 newmember += f"{arg}; "
747 #
748 # At the end of the s_id loop, replace the original declaration with
749 # the munged version.
750 #
751 members = members.replace(oldmember, newmember)
752 #
753 # End of the tuple loop - search again and see if there are outer members
754 # that now turn up.
755 #
756 tuples = struct_members.findall(members)
757 return members
758
759 #
760 # Format the struct declaration into a standard form for inclusion in the
761 # resulting docs.
762 #
763 def format_struct_decl(self, declaration):
764 #
765 # Insert newlines, get rid of extra spaces.
766 #
767 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
768 declaration = KernRe(r'\}\s+;').sub('};', declaration)
769 #
770 # Format inline enums with each member on its own line.
771 #
772 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
773 while r.search(declaration):
774 declaration = r.sub(r'\1,\n\2', declaration)
775 #
776 # Now go through and supply the right number of tabs
777 # for each line.
778 #
779 def_args = declaration.split('\n')
780 level = 1
781 declaration = ""
782 for clause in def_args:
783 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
784 if clause:
785 if '}' in clause and level > 1:
786 level -= 1
787 if not clause.startswith('#'):
788 declaration += "\t" * level
789 declaration += "\t" + clause + "\n"
790 if "{" in clause and "}" not in clause:
791 level += 1
792 return declaration
793
794
795 def dump_struct(self, ln, proto):
796 """
797 Store an entry for an struct or union
798 """
799 #
800 # Do the basic parse to get the pieces of the declaration.
801 #
802 struct_parts = self.split_struct_proto(proto)
803 if not struct_parts:
804 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
805 return
806 decl_type, declaration_name, members = struct_parts
807
808 if self.entry.identifier != declaration_name:
809 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
810 f"Prototype was for {decl_type} {declaration_name} instead\n")
811 return
812 #
813 # Go through the list of members applying all of our transformations.
814 #
815 members = trim_private_members(members)
816 members = apply_transforms(struct_xforms, members)
817
818 nested = NestedMatch()
819 for search, sub in struct_nested_prefixes:
820 members = nested.sub(search, sub, members)
821 #
822 # Deal with embedded struct and union members, and drop enums entirely.
823 #
824 declaration = members
825 members = self.rewrite_struct_members(members)
826 members = re.sub(r'(\{[^\{\}]*\})', '', members)
827 #
828 # Output the result and we are done.
829 #
830 self.create_parameter_list(ln, decl_type, members, ';',
831 declaration_name)
832 self.check_sections(ln, declaration_name, decl_type)
833 self.output_declaration(decl_type, declaration_name,
834 definition=self.format_struct_decl(declaration),
835 purpose=self.entry.declaration_purpose)
836
837 def dump_enum(self, ln, proto):
838 """
839 Stores an enum inside self.entries array.
840 """
841 #
842 # Strip preprocessor directives. Note that this depends on the
843 # trailing semicolon we added in process_proto_type().
844 #
845 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
846 #
847 # Parse out the name and members of the enum. Typedef form first.
848 #
849 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
850 if r.search(proto):
851 declaration_name = r.group(2)
852 members = trim_private_members(r.group(1))
853 #
854 # Failing that, look for a straight enum
855 #
856 else:
857 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
858 if r.match(proto):
859 declaration_name = r.group(1)
860 members = trim_private_members(r.group(2))
861 #
862 # OK, this isn't going to work.
863 #
864 else:
865 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
866 return
867 #
868 # Make sure we found what we were expecting.
869 #
870 if self.entry.identifier != declaration_name:
871 if self.entry.identifier == "":
872 self.emit_msg(ln,
873 f"{proto}: wrong kernel-doc identifier on prototype")
874 else:
875 self.emit_msg(ln,
876 f"expecting prototype for enum {self.entry.identifier}. "
877 f"Prototype was for enum {declaration_name} instead")
878 return
879
880 if not declaration_name:
881 declaration_name = "(anonymous)"
882 #
883 # Parse out the name of each enum member, and verify that we
884 # have a description for it.
885 #
886 member_set = set()
887 members = KernRe(r'\([^;)]*\)').sub('', members)
888 for arg in members.split(','):
889 if not arg:
890 continue
891 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
892 self.entry.parameterlist.append(arg)
893 if arg not in self.entry.parameterdescs:
894 self.entry.parameterdescs[arg] = self.undescribed
895 self.emit_msg(ln,
896 f"Enum value '{arg}' not described in enum '{declaration_name}'")
897 member_set.add(arg)
898 #
899 # Ensure that every described member actually exists in the enum.
900 #
901 for k in self.entry.parameterdescs:
902 if k not in member_set:
903 self.emit_msg(ln,
904 f"Excess enum value '%{k}' description in '{declaration_name}'")
905
906 self.output_declaration('enum', declaration_name,
907 purpose=self.entry.declaration_purpose)
908
909 def dump_declaration(self, ln, prototype):
910 """
911 Stores a data declaration inside self.entries array.
912 """
913
914 if self.entry.decl_type == "enum":
915 self.dump_enum(ln, prototype)
916 elif self.entry.decl_type == "typedef":
917 self.dump_typedef(ln, prototype)
918 elif self.entry.decl_type in ["union", "struct"]:
919 self.dump_struct(ln, prototype)
920 else:
921 # This would be a bug
922 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
923
924 def dump_function(self, ln, prototype):
925 """
926 Stores a function of function macro inside self.entries array.
927 """
928
929 found = func_macro = False
930 return_type = ''
931 decl_type = 'function'
932 #
933 # Apply the initial transformations.
934 #
935 prototype = apply_transforms(function_xforms, prototype)
936 #
937 # If we have a macro, remove the "#define" at the front.
938 #
939 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
940 if new_proto != prototype:
941 prototype = new_proto
942 #
943 # Dispense with the simple "#define A B" case here; the key
944 # is the space after the name of the symbol being defined.
945 # NOTE that the seemingly misnamed "func_macro" indicates a
946 # macro *without* arguments.
947 #
948 r = KernRe(r'^(\w+)\s+')
949 if r.search(prototype):
950 return_type = ''
951 declaration_name = r.group(1)
952 func_macro = True
953 found = True
954
955 # Yes, this truly is vile. We are looking for:
956 # 1. Return type (may be nothing if we're looking at a macro)
957 # 2. Function name
958 # 3. Function parameters.
959 #
960 # All the while we have to watch out for function pointer parameters
961 # (which IIRC is what the two sections are for), C types (these
962 # regexps don't even start to express all the possibilities), and
963 # so on.
964 #
965 # If you mess with these regexps, it's a good idea to check that
966 # the following functions' documentation still comes out right:
967 # - parport_register_device (function pointer parameters)
968 # - atomic_set (macro)
969 # - pci_match_device, __copy_to_user (long return type)
970
971 name = r'\w+'
972 type1 = r'(?:[\w\s]+)?'
973 type2 = r'(?:[\w\s]+\*+)+'
974 #
975 # Attempt to match first on (args) with no internal parentheses; this
976 # lets us easily filter out __acquires() and other post-args stuff. If
977 # that fails, just grab the rest of the line to the last closing
978 # parenthesis.
979 #
980 proto_args = r'\(([^\(]*|.*)\)'
981 #
982 # (Except for the simple macro case) attempt to split up the prototype
983 # in the various ways we understand.
984 #
985 if not found:
986 patterns = [
987 rf'^()({name})\s*{proto_args}',
988 rf'^({type1})\s+({name})\s*{proto_args}',
989 rf'^({type2})\s*({name})\s*{proto_args}',
990 ]
991
992 for p in patterns:
993 r = KernRe(p)
994 if r.match(prototype):
995 return_type = r.group(1)
996 declaration_name = r.group(2)
997 args = r.group(3)
998 self.create_parameter_list(ln, decl_type, args, ',',
999 declaration_name)
1000 found = True
1001 break
1002 #
1003 # Parsing done; make sure that things are as we expect.
1004 #
1005 if not found:
1006 self.emit_msg(ln,
1007 f"cannot understand function prototype: '{prototype}'")
1008 return
1009 if self.entry.identifier != declaration_name:
1010 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
1011 f"Prototype was for {declaration_name}() instead")
1012 return
1013 self.check_sections(ln, declaration_name, "function")
1014 self.check_return_section(ln, declaration_name, return_type)
1015 #
1016 # Store the result.
1017 #
1018 self.output_declaration(decl_type, declaration_name,
1019 typedef=('typedef' in return_type),
1020 functiontype=return_type,
1021 purpose=self.entry.declaration_purpose,
1022 func_macro=func_macro)
1023
1024
1025 def dump_typedef(self, ln, proto):
1026 """
1027 Stores a typedef inside self.entries array.
1028 """
1029 #
1030 # We start by looking for function typedefs.
1031 #
1032 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1033 typedef_ident = r'\*?\s*(\w\S+)\s*'
1034 typedef_args = r'\s*\((.*)\);'
1035
1036 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1037 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1038
1039 # Parse function typedef prototypes
1040 for r in [typedef1, typedef2]:
1041 if not r.match(proto):
1042 continue
1043
1044 return_type = r.group(1).strip()
1045 declaration_name = r.group(2)
1046 args = r.group(3)
1047
1048 if self.entry.identifier != declaration_name:
1049 self.emit_msg(ln,
1050 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1051 return
1052
1053 self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1054
1055 self.output_declaration('function', declaration_name,
1056 typedef=True,
1057 functiontype=return_type,
1058 purpose=self.entry.declaration_purpose)
1059 return
1060 #
1061 # Not a function, try to parse a simple typedef.
1062 #
1063 r = KernRe(r'typedef.*\s+(\w+)\s*;')
1064 if r.match(proto):
1065 declaration_name = r.group(1)
1066
1067 if self.entry.identifier != declaration_name:
1068 self.emit_msg(ln,
1069 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1070 return
1071
1072 self.output_declaration('typedef', declaration_name,
1073 purpose=self.entry.declaration_purpose)
1074 return
1075
1076 self.emit_msg(ln, "error: Cannot parse typedef!")
1077
1078 @staticmethod
1079 def process_export(function_set, line):
1080 """
1081 process EXPORT_SYMBOL* tags
1082
1083 This method doesn't use any variable from the class, so declare it
1084 with a staticmethod decorator.
1085 """
1086
1087 # We support documenting some exported symbols with different
1088 # names. A horrible hack.
1089 suffixes = [ '_noprof' ]
1090
1091 # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1092 # multiple export lines would violate Kernel coding style.
1093
1094 if export_symbol.search(line):
1095 symbol = export_symbol.group(2)
1096 elif export_symbol_ns.search(line):
1097 symbol = export_symbol_ns.group(2)
1098 else:
1099 return False
1100 #
1101 # Found an export, trim out any special suffixes
1102 #
1103 for suffix in suffixes:
1104 # Be backward compatible with Python < 3.9
1105 if symbol.endswith(suffix):
1106 symbol = symbol[:-len(suffix)]
1107 function_set.add(symbol)
1108 return True
1109
1110 def process_normal(self, ln, line):
1111 """
1112 STATE_NORMAL: looking for the /** to begin everything.
1113 """
1114
1115 if not doc_start.match(line):
1116 return
1117
1118 # start a new entry
1119 self.reset_state(ln)
1120
1121 # next line is always the function name
1122 self.state = state.NAME
1123
1124 def process_name(self, ln, line):
1125 """
1126 STATE_NAME: Looking for the "name - description" line
1127 """
1128 #
1129 # Check for a DOC: block and handle them specially.
1130 #
1131 if doc_block.search(line):
1132
1133 if not doc_block.group(1):
1134 self.entry.begin_section(ln, "Introduction")
1135 else:
1136 self.entry.begin_section(ln, doc_block.group(1))
1137
1138 self.entry.identifier = self.entry.section
1139 self.state = state.DOCBLOCK
1140 #
1141 # Otherwise we're looking for a normal kerneldoc declaration line.
1142 #
1143 elif doc_decl.search(line):
1144 self.entry.identifier = doc_decl.group(1)
1145
1146 # Test for data declaration
1147 if doc_begin_data.search(line):
1148 self.entry.decl_type = doc_begin_data.group(1)
1149 self.entry.identifier = doc_begin_data.group(2)
1150 #
1151 # Look for a function description
1152 #
1153 elif doc_begin_func.search(line):
1154 self.entry.identifier = doc_begin_func.group(1)
1155 self.entry.decl_type = "function"
1156 #
1157 # We struck out.
1158 #
1159 else:
1160 self.emit_msg(ln,
1161 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}")
1162 self.state = state.NORMAL
1163 return
1164 #
1165 # OK, set up for a new kerneldoc entry.
1166 #
1167 self.state = state.BODY
1168 self.entry.identifier = self.entry.identifier.strip(" ")
1169 # if there's no @param blocks need to set up default section here
1170 self.entry.begin_section(ln + 1)
1171 #
1172 # Find the description portion, which *should* be there but
1173 # isn't always.
1174 # (We should be able to capture this from the previous parsing - someday)
1175 #
1176 r = KernRe("[-:](.*)")
1177 if r.search(line):
1178 self.entry.declaration_purpose = trim_whitespace(r.group(1))
1179 self.state = state.DECLARATION
1180 else:
1181 self.entry.declaration_purpose = ""
1182
1183 if not self.entry.declaration_purpose and self.config.wshort_desc:
1184 self.emit_msg(ln,
1185 f"missing initial short description on line:\n{line}")
1186
1187 if not self.entry.identifier and self.entry.decl_type != "enum":
1188 self.emit_msg(ln,
1189 f"wrong kernel-doc identifier on line:\n{line}")
1190 self.state = state.NORMAL
1191
1192 if self.config.verbose:
1193 self.emit_msg(ln,
1194 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1195 warning=False)
1196 #
1197 # Failed to find an identifier. Emit a warning
1198 #
1199 else:
1200 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1201
1202 #
1203 # Helper function to determine if a new section is being started.
1204 #
1205 def is_new_section(self, ln, line):
1206 if doc_sect.search(line):
1207 self.state = state.BODY
1208 #
1209 # Pick out the name of our new section, tweaking it if need be.
1210 #
1211 newsection = doc_sect.group(1)
1212 if newsection.lower() == 'description':
1213 newsection = 'Description'
1214 elif newsection.lower() == 'context':
1215 newsection = 'Context'
1216 self.state = state.SPECIAL_SECTION
1217 elif newsection.lower() in ["@return", "@returns",
1218 "return", "returns"]:
1219 newsection = "Return"
1220 self.state = state.SPECIAL_SECTION
1221 elif newsection[0] == '@':
1222 self.state = state.SPECIAL_SECTION
1223 #
1224 # Initialize the contents, and get the new section going.
1225 #
1226 newcontents = doc_sect.group(2)
1227 if not newcontents:
1228 newcontents = ""
1229 self.dump_section()
1230 self.entry.begin_section(ln, newsection)
1231 self.entry.leading_space = None
1232
1233 self.entry.add_text(newcontents.lstrip())
1234 return True
1235 return False
1236
1237 #
1238 # Helper function to detect (and effect) the end of a kerneldoc comment.
1239 #
1240 def is_comment_end(self, ln, line):
1241 if doc_end.search(line):
1242 self.dump_section()
1243
1244 # Look for doc_com + <text> + doc_end:
1245 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1246 if r.match(line):
1247 self.emit_msg(ln, f"suspicious ending line: {line}")
1248
1249 self.entry.prototype = ""
1250 self.entry.new_start_line = ln + 1
1251
1252 self.state = state.PROTO
1253 return True
1254 return False
1255
1256
1257 def process_decl(self, ln, line):
1258 """
1259 STATE_DECLARATION: We've seen the beginning of a declaration
1260 """
1261 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1262 return
1263 #
1264 # Look for anything with the " * " line beginning.
1265 #
1266 if doc_content.search(line):
1267 cont = doc_content.group(1)
1268 #
1269 # A blank line means that we have moved out of the declaration
1270 # part of the comment (without any "special section" parameter
1271 # descriptions).
1272 #
1273 if cont == "":
1274 self.state = state.BODY
1275 #
1276 # Otherwise we have more of the declaration section to soak up.
1277 #
1278 else:
1279 self.entry.declaration_purpose = \
1280 trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1281 else:
1282 # Unknown line, ignore
1283 self.emit_msg(ln, f"bad line: {line}")
1284
1285
1286 def process_special(self, ln, line):
1287 """
1288 STATE_SPECIAL_SECTION: a section ending with a blank line
1289 """
1290 #
1291 # If we have hit a blank line (only the " * " marker), then this
1292 # section is done.
1293 #
1294 if KernRe(r"\s*\*\s*$").match(line):
1295 self.entry.begin_section(ln, dump = True)
1296 self.state = state.BODY
1297 return
1298 #
1299 # Not a blank line, look for the other ways to end the section.
1300 #
1301 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1302 return
1303 #
1304 # OK, we should have a continuation of the text for this section.
1305 #
1306 if doc_content.search(line):
1307 cont = doc_content.group(1)
1308 #
1309 # If the lines of text after the first in a special section have
1310 # leading white space, we need to trim it out or Sphinx will get
1311 # confused. For the second line (the None case), see what we
1312 # find there and remember it.
1313 #
1314 if self.entry.leading_space is None:
1315 r = KernRe(r'^(\s+)')
1316 if r.match(cont):
1317 self.entry.leading_space = len(r.group(1))
1318 else:
1319 self.entry.leading_space = 0
1320 #
1321 # Otherwise, before trimming any leading chars, be *sure*
1322 # that they are white space. We should maybe warn if this
1323 # isn't the case.
1324 #
1325 for i in range(0, self.entry.leading_space):
1326 if cont[i] != " ":
1327 self.entry.leading_space = i
1328 break
1329 #
1330 # Add the trimmed result to the section and we're done.
1331 #
1332 self.entry.add_text(cont[self.entry.leading_space:])
1333 else:
1334 # Unknown line, ignore
1335 self.emit_msg(ln, f"bad line: {line}")
1336
1337 def process_body(self, ln, line):
1338 """
1339 STATE_BODY: the bulk of a kerneldoc comment.
1340 """
1341 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1342 return
1343
1344 if doc_content.search(line):
1345 cont = doc_content.group(1)
1346 self.entry.add_text(cont)
1347 else:
1348 # Unknown line, ignore
1349 self.emit_msg(ln, f"bad line: {line}")
1350
1351 def process_inline_name(self, ln, line):
1352 """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1353
1354 if doc_inline_sect.search(line):
1355 self.entry.begin_section(ln, doc_inline_sect.group(1))
1356 self.entry.add_text(doc_inline_sect.group(2).lstrip())
1357 self.state = state.INLINE_TEXT
1358 elif doc_inline_end.search(line):
1359 self.dump_section()
1360 self.state = state.PROTO
1361 elif doc_content.search(line):
1362 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1363 self.state = state.PROTO
1364 # else ... ??
1365
1366 def process_inline_text(self, ln, line):
1367 """STATE_INLINE_TEXT: docbook comments within a prototype."""
1368
1369 if doc_inline_end.search(line):
1370 self.dump_section()
1371 self.state = state.PROTO
1372 elif doc_content.search(line):
1373 self.entry.add_text(doc_content.group(1))
1374 # else ... ??
1375
1376 def syscall_munge(self, ln, proto): # pylint: disable=W0613
1377 """
1378 Handle syscall definitions
1379 """
1380
1381 is_void = False
1382
1383 # Strip newlines/CR's
1384 proto = re.sub(r'[\r\n]+', ' ', proto)
1385
1386 # Check if it's a SYSCALL_DEFINE0
1387 if 'SYSCALL_DEFINE0' in proto:
1388 is_void = True
1389
1390 # Replace SYSCALL_DEFINE with correct return type & function name
1391 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1392
1393 r = KernRe(r'long\s+(sys_.*?),')
1394 if r.search(proto):
1395 proto = KernRe(',').sub('(', proto, count=1)
1396 elif is_void:
1397 proto = KernRe(r'\)').sub('(void)', proto, count=1)
1398
1399 # Now delete all of the odd-numbered commas in the proto
1400 # so that argument types & names don't have a comma between them
1401 count = 0
1402 length = len(proto)
1403
1404 if is_void:
1405 length = 0 # skip the loop if is_void
1406
1407 for ix in range(length):
1408 if proto[ix] == ',':
1409 count += 1
1410 if count % 2 == 1:
1411 proto = proto[:ix] + ' ' + proto[ix + 1:]
1412
1413 return proto
1414
1415 def tracepoint_munge(self, ln, proto):
1416 """
1417 Handle tracepoint definitions
1418 """
1419
1420 tracepointname = None
1421 tracepointargs = None
1422
1423 # Match tracepoint name based on different patterns
1424 r = KernRe(r'TRACE_EVENT\((.*?),')
1425 if r.search(proto):
1426 tracepointname = r.group(1)
1427
1428 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1429 if r.search(proto):
1430 tracepointname = r.group(1)
1431
1432 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1433 if r.search(proto):
1434 tracepointname = r.group(2)
1435
1436 if tracepointname:
1437 tracepointname = tracepointname.lstrip()
1438
1439 r = KernRe(r'TP_PROTO\((.*?)\)')
1440 if r.search(proto):
1441 tracepointargs = r.group(1)
1442
1443 if not tracepointname or not tracepointargs:
1444 self.emit_msg(ln,
1445 f"Unrecognized tracepoint format:\n{proto}\n")
1446 else:
1447 proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1448 self.entry.identifier = f"trace_{self.entry.identifier}"
1449
1450 return proto
1451
1452 def process_proto_function(self, ln, line):
1453 """Ancillary routine to process a function prototype"""
1454
1455 # strip C99-style comments to end of line
1456 line = KernRe(r"//.*$", re.S).sub('', line)
1457 #
1458 # Soak up the line's worth of prototype text, stopping at { or ; if present.
1459 #
1460 if KernRe(r'\s*#\s*define').match(line):
1461 self.entry.prototype = line
1462 elif not line.startswith('#'): # skip other preprocessor stuff
1463 r = KernRe(r'([^\{]*)')
1464 if r.match(line):
1465 self.entry.prototype += r.group(1) + " "
1466 #
1467 # If we now have the whole prototype, clean it up and declare victory.
1468 #
1469 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1470 # strip comments and surrounding spaces
1471 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1472 #
1473 # Handle self.entry.prototypes for function pointers like:
1474 # int (*pcs_config)(struct foo)
1475 # by turning it into
1476 # int pcs_config(struct foo)
1477 #
1478 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1479 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1480 #
1481 # Handle special declaration syntaxes
1482 #
1483 if 'SYSCALL_DEFINE' in self.entry.prototype:
1484 self.entry.prototype = self.syscall_munge(ln,
1485 self.entry.prototype)
1486 else:
1487 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1488 if r.search(self.entry.prototype):
1489 self.entry.prototype = self.tracepoint_munge(ln,
1490 self.entry.prototype)
1491 #
1492 # ... and we're done
1493 #
1494 self.dump_function(ln, self.entry.prototype)
1495 self.reset_state(ln)
1496
1497 def process_proto_type(self, ln, line):
1498 """Ancillary routine to process a type"""
1499
1500 # Strip C99-style comments and surrounding whitespace
1501 line = KernRe(r"//.*$", re.S).sub('', line).strip()
1502 if not line:
1503 return # nothing to see here
1504
1505 # To distinguish preprocessor directive from regular declaration later.
1506 if line.startswith('#'):
1507 line += ";"
1508 #
1509 # Split the declaration on any of { } or ;, and accumulate pieces
1510 # until we hit a semicolon while not inside {brackets}
1511 #
1512 r = KernRe(r'(.*?)([{};])')
1513 for chunk in r.split(line):
1514 if chunk: # Ignore empty matches
1515 self.entry.prototype += chunk
1516 #
1517 # This cries out for a match statement ... someday after we can
1518 # drop Python 3.9 ...
1519 #
1520 if chunk == '{':
1521 self.entry.brcount += 1
1522 elif chunk == '}':
1523 self.entry.brcount -= 1
1524 elif chunk == ';' and self.entry.brcount <= 0:
1525 self.dump_declaration(ln, self.entry.prototype)
1526 self.reset_state(ln)
1527 return
1528 #
1529 # We hit the end of the line while still in the declaration; put
1530 # in a space to represent the newline.
1531 #
1532 self.entry.prototype += ' '
1533
1534 def process_proto(self, ln, line):
1535 """STATE_PROTO: reading a function/whatever prototype."""
1536
1537 if doc_inline_oneline.search(line):
1538 self.entry.begin_section(ln, doc_inline_oneline.group(1))
1539 self.entry.add_text(doc_inline_oneline.group(2))
1540 self.dump_section()
1541
1542 elif doc_inline_start.search(line):
1543 self.state = state.INLINE_NAME
1544
1545 elif self.entry.decl_type == 'function':
1546 self.process_proto_function(ln, line)
1547
1548 else:
1549 self.process_proto_type(ln, line)
1550
1551 def process_docblock(self, ln, line):
1552 """STATE_DOCBLOCK: within a DOC: block."""
1553
1554 if doc_end.search(line):
1555 self.dump_section()
1556 self.output_declaration("doc", self.entry.identifier)
1557 self.reset_state(ln)
1558
1559 elif doc_content.search(line):
1560 self.entry.add_text(doc_content.group(1))
1561
1562 def parse_export(self):
1563 """
1564 Parses EXPORT_SYMBOL* macros from a single Kernel source file.
1565 """
1566
1567 export_table = set()
1568
1569 try:
1570 with open(self.fname, "r", encoding="utf8",
1571 errors="backslashreplace") as fp:
1572
1573 for line in fp:
1574 self.process_export(export_table, line)
1575
1576 except IOError:
1577 return None
1578
1579 return export_table
1580
1581 #
1582 # The state/action table telling us which function to invoke in
1583 # each state.
1584 #
1585 state_actions = {
1586 state.NORMAL: process_normal,
1587 state.NAME: process_name,
1588 state.BODY: process_body,
1589 state.DECLARATION: process_decl,
1590 state.SPECIAL_SECTION: process_special,
1591 state.INLINE_NAME: process_inline_name,
1592 state.INLINE_TEXT: process_inline_text,
1593 state.PROTO: process_proto,
1594 state.DOCBLOCK: process_docblock,
1595 }
1596
1597 def parse_kdoc(self):
1598 """
1599 Open and process each line of a C source file.
1600 The parsing is controlled via a state machine, and the line is passed
1601 to a different process function depending on the state. The process
1602 function may update the state as needed.
1603
1604 Besides parsing kernel-doc tags, it also parses export symbols.
1605 """
1606
1607 prev = ""
1608 prev_ln = None
1609 export_table = set()
1610
1611 try:
1612 with open(self.fname, "r", encoding="utf8",
1613 errors="backslashreplace") as fp:
1614 for ln, line in enumerate(fp):
1615
1616 line = line.expandtabs().strip("\n")
1617
1618 # Group continuation lines on prototypes
1619 if self.state == state.PROTO:
1620 if line.endswith("\\"):
1621 prev += line.rstrip("\\")
1622 if not prev_ln:
1623 prev_ln = ln
1624 continue
1625
1626 if prev:
1627 ln = prev_ln
1628 line = prev + line
1629 prev = ""
1630 prev_ln = None
1631
1632 self.config.log.debug("%d %s: %s",
1633 ln, state.name[self.state],
1634 line)
1635
1636 # This is an optimization over the original script.
1637 # There, when export_file was used for the same file,
1638 # it was read twice. Here, we use the already-existing
1639 # loop to parse exported symbols as well.
1640 #
1641 if (self.state != state.NORMAL) or \
1642 not self.process_export(export_table, line):
1643 # Hand this line to the appropriate state handler
1644 self.state_actions[self.state](self, ln, line)
1645
1646 except OSError:
1647 self.config.log.error(f"Error: Cannot open file {self.fname}")
1648
1649 return export_table, self.entries