Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8kdoc_parser
9===========
10
11Read a C language source or header FILE and extract embedded
12documentation comments
13"""
14
15import re
16from pprint import pformat
17
18from kdoc_re import NestedMatch, KernRe
19
20
21#
22# Regular expressions used to parse kernel-doc markups at KernelDoc class.
23#
24# Let's declare them in lowercase outside any class to make easier to
25# convert from the python script.
26#
27# As those are evaluated at the beginning, no need to cache them
28#
29
30# Allow whitespace at end of comment start.
31doc_start = KernRe(r'^/\*\*\s*$', cache=False)
32
33doc_end = KernRe(r'\*/', cache=False)
34doc_com = KernRe(r'\s*\*\s*', cache=False)
35doc_com_body = KernRe(r'\s*\* ?', cache=False)
36doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
37
38# @params and a strictly limited set of supported section names
39# Specifically:
40# Match @word:
41# @...:
42# @{section-name}:
43# while trying to not match literal block starts like "example::"
44#
45doc_sect = doc_com + \
46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$',
47 flags=re.I, cache=False)
48
49doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
50doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
55attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)",
56 flags=re.I | re.S, cache=False)
57
58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
60
61type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
62
63class state:
64 """
65 State machine enums
66 """
67
68 # Parser states
69 NORMAL = 0 # normal code
70 NAME = 1 # looking for function name
71 BODY_MAYBE = 2 # body - or maybe more description
72 BODY = 3 # the body of the comment
73 BODY_WITH_BLANK_LINE = 4 # the body which has a blank line
74 PROTO = 5 # scanning prototype
75 DOCBLOCK = 6 # documentation block
76 INLINE = 7 # gathering doc outside main block
77
78 name = [
79 "NORMAL",
80 "NAME",
81 "BODY_MAYBE",
82 "BODY",
83 "BODY_WITH_BLANK_LINE",
84 "PROTO",
85 "DOCBLOCK",
86 "INLINE",
87 ]
88
89 # Inline documentation state
90 INLINE_NA = 0 # not applicable ($state != INLINE)
91 INLINE_NAME = 1 # looking for member name (@foo:)
92 INLINE_TEXT = 2 # looking for member documentation
93 INLINE_END = 3 # done
94 INLINE_ERROR = 4 # error - Comment without header was found.
95 # Spit a warning as it's not
96 # proper kernel-doc and ignore the rest.
97
98 inline_name = [
99 "",
100 "_NAME",
101 "_TEXT",
102 "_END",
103 "_ERROR",
104 ]
105
106SECTION_DEFAULT = "Description" # default section
107
108class KernelEntry:
109
110 def __init__(self, config, ln):
111 self.config = config
112
113 self.contents = ""
114 self.function = ""
115 self.sectcheck = ""
116 self.struct_actual = ""
117 self.prototype = ""
118
119 self.warnings = []
120
121 self.parameterlist = []
122 self.parameterdescs = {}
123 self.parametertypes = {}
124 self.parameterdesc_start_lines = {}
125
126 self.section_start_lines = {}
127 self.sectionlist = []
128 self.sections = {}
129
130 self.anon_struct_union = False
131
132 self.leading_space = None
133
134 # State flags
135 self.brcount = 0
136
137 self.in_doc_sect = False
138 self.declaration_start_line = ln + 1
139
140 # TODO: rename to emit_message after removal of kernel-doc.pl
141 def emit_msg(self, log_msg, warning=True):
142 """Emit a message"""
143
144 if not warning:
145 self.config.log.info(log_msg)
146 return
147
148 # Delegate warning output to output logic, as this way it
149 # will report warnings/info only for symbols that are output
150
151 self.warnings.append(log_msg)
152 return
153
154 def dump_section(self, start_new=True):
155 """
156 Dumps section contents to arrays/hashes intended for that purpose.
157 """
158
159 name = self.section
160 contents = self.contents
161
162 if type_param.match(name):
163 name = type_param.group(1)
164
165 self.parameterdescs[name] = contents
166 self.parameterdesc_start_lines[name] = self.new_start_line
167
168 self.sectcheck += name + " "
169 self.new_start_line = 0
170
171 elif name == "@...":
172 name = "..."
173 self.parameterdescs[name] = contents
174 self.sectcheck += name + " "
175 self.parameterdesc_start_lines[name] = self.new_start_line
176 self.new_start_line = 0
177
178 else:
179 if name in self.sections and self.sections[name] != "":
180 # Only warn on user-specified duplicate section names
181 if name != SECTION_DEFAULT:
182 self.emit_msg(self.new_start_line,
183 f"duplicate section name '{name}'\n")
184 self.sections[name] += contents
185 else:
186 self.sections[name] = contents
187 self.sectionlist.append(name)
188 self.section_start_lines[name] = self.new_start_line
189 self.new_start_line = 0
190
191# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
192
193 if start_new:
194 self.section = SECTION_DEFAULT
195 self.contents = ""
196
197
198class KernelDoc:
199 """
200 Read a C language source or header FILE and extract embedded
201 documentation comments.
202 """
203
204 # Section names
205
206 section_intro = "Introduction"
207 section_context = "Context"
208 section_return = "Return"
209
210 undescribed = "-- undescribed --"
211
212 def __init__(self, config, fname):
213 """Initialize internal variables"""
214
215 self.fname = fname
216 self.config = config
217
218 # Initial state for the state machines
219 self.state = state.NORMAL
220 self.inline_doc_state = state.INLINE_NA
221
222 # Store entry currently being processed
223 self.entry = None
224
225 # Place all potential outputs into an array
226 self.entries = []
227
228 def emit_msg(self, ln, msg, warning=True):
229 """Emit a message"""
230
231 log_msg = f"{self.fname}:{ln} {msg}"
232
233 if self.entry:
234 self.entry.emit_msg(log_msg, warning)
235 return
236
237 if warning:
238 self.config.log.warning(log_msg)
239 else:
240 self.config.log.info(log_msg)
241
242 def dump_section(self, start_new=True):
243 """
244 Dumps section contents to arrays/hashes intended for that purpose.
245 """
246
247 if self.entry:
248 self.entry.dump_section(start_new)
249
250 # TODO: rename it to store_declaration after removal of kernel-doc.pl
251 def output_declaration(self, dtype, name, **args):
252 """
253 Stores the entry into an entry array.
254
255 The actual output and output filters will be handled elsewhere
256 """
257
258 # The implementation here is different than the original kernel-doc:
259 # instead of checking for output filters or actually output anything,
260 # it just stores the declaration content at self.entries, as the
261 # output will happen on a separate class.
262 #
263 # For now, we're keeping the same name of the function just to make
264 # easier to compare the source code of both scripts
265
266 args["declaration_start_line"] = self.entry.declaration_start_line
267 args["type"] = dtype
268 args["warnings"] = self.entry.warnings
269
270 # TODO: use colletions.OrderedDict to remove sectionlist
271
272 sections = args.get('sections', {})
273 sectionlist = args.get('sectionlist', [])
274
275 # Drop empty sections
276 # TODO: improve empty sections logic to emit warnings
277 for section in ["Description", "Return"]:
278 if section in sectionlist:
279 if not sections[section].rstrip():
280 del sections[section]
281 sectionlist.remove(section)
282
283 self.entries.append((name, args))
284
285 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
286
287 def reset_state(self, ln):
288 """
289 Ancillary routine to create a new entry. It initializes all
290 variables used by the state machine.
291 """
292
293 self.entry = KernelEntry(self.config, ln)
294
295 # State flags
296 self.state = state.NORMAL
297 self.inline_doc_state = state.INLINE_NA
298
299 def push_parameter(self, ln, decl_type, param, dtype,
300 org_arg, declaration_name):
301 """
302 Store parameters and their descriptions at self.entry.
303 """
304
305 if self.entry.anon_struct_union and dtype == "" and param == "}":
306 return # Ignore the ending }; from anonymous struct/union
307
308 self.entry.anon_struct_union = False
309
310 param = KernRe(r'[\[\)].*').sub('', param, count=1)
311
312 if dtype == "" and param.endswith("..."):
313 if KernRe(r'\w\.\.\.$').search(param):
314 # For named variable parameters of the form `x...`,
315 # remove the dots
316 param = param[:-3]
317 else:
318 # Handles unnamed variable parameters
319 param = "..."
320
321 if param not in self.entry.parameterdescs or \
322 not self.entry.parameterdescs[param]:
323
324 self.entry.parameterdescs[param] = "variable arguments"
325
326 elif dtype == "" and (not param or param == "void"):
327 param = "void"
328 self.entry.parameterdescs[param] = "no arguments"
329
330 elif dtype == "" and param in ["struct", "union"]:
331 # Handle unnamed (anonymous) union or struct
332 dtype = param
333 param = "{unnamed_" + param + "}"
334 self.entry.parameterdescs[param] = "anonymous\n"
335 self.entry.anon_struct_union = True
336
337 # Handle cache group enforcing variables: they do not need
338 # to be described in header files
339 elif "__cacheline_group" in param:
340 # Ignore __cacheline_group_begin and __cacheline_group_end
341 return
342
343 # Warn if parameter has no description
344 # (but ignore ones starting with # as these are not parameters
345 # but inline preprocessor statements)
346 if param not in self.entry.parameterdescs and not param.startswith("#"):
347 self.entry.parameterdescs[param] = self.undescribed
348
349 if "." not in param:
350 if decl_type == 'function':
351 dname = f"{decl_type} parameter"
352 else:
353 dname = f"{decl_type} member"
354
355 self.emit_msg(ln,
356 f"{dname} '{param}' not described in '{declaration_name}'")
357
358 # Strip spaces from param so that it is one continuous string on
359 # parameterlist. This fixes a problem where check_sections()
360 # cannot find a parameter like "addr[6 + 2]" because it actually
361 # appears as "addr[6", "+", "2]" on the parameter list.
362 # However, it's better to maintain the param string unchanged for
363 # output, so just weaken the string compare in check_sections()
364 # to ignore "[blah" in a parameter string.
365
366 self.entry.parameterlist.append(param)
367 org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
368 self.entry.parametertypes[param] = org_arg
369
370 def save_struct_actual(self, actual):
371 """
372 Strip all spaces from the actual param so that it looks like
373 one string item.
374 """
375
376 actual = KernRe(r'\s*').sub("", actual, count=1)
377
378 self.entry.struct_actual += actual + " "
379
380 def create_parameter_list(self, ln, decl_type, args,
381 splitter, declaration_name):
382 """
383 Creates a list of parameters, storing them at self.entry.
384 """
385
386 # temporarily replace all commas inside function pointer definition
387 arg_expr = KernRe(r'(\([^\),]+),')
388 while arg_expr.search(args):
389 args = arg_expr.sub(r"\1#", args)
390
391 for arg in args.split(splitter):
392 # Strip comments
393 arg = KernRe(r'\/\*.*\*\/').sub('', arg)
394
395 # Ignore argument attributes
396 arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
397
398 # Strip leading/trailing spaces
399 arg = arg.strip()
400 arg = KernRe(r'\s+').sub(' ', arg, count=1)
401
402 if arg.startswith('#'):
403 # Treat preprocessor directive as a typeless variable just to fill
404 # corresponding data structures "correctly". Catch it later in
405 # output_* subs.
406
407 # Treat preprocessor directive as a typeless variable
408 self.push_parameter(ln, decl_type, arg, "",
409 "", declaration_name)
410
411 elif KernRe(r'\(.+\)\s*\(').search(arg):
412 # Pointer-to-function
413
414 arg = arg.replace('#', ',')
415
416 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)')
417 if r.match(arg):
418 param = r.group(1)
419 else:
420 self.emit_msg(ln, f"Invalid param: {arg}")
421 param = arg
422
423 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg)
424 self.save_struct_actual(param)
425 self.push_parameter(ln, decl_type, param, dtype,
426 arg, declaration_name)
427
428 elif KernRe(r'\(.+\)\s*\[').search(arg):
429 # Array-of-pointers
430
431 arg = arg.replace('#', ',')
432 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)')
433 if r.match(arg):
434 param = r.group(1)
435 else:
436 self.emit_msg(ln, f"Invalid param: {arg}")
437 param = arg
438
439 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg)
440
441 self.save_struct_actual(param)
442 self.push_parameter(ln, decl_type, param, dtype,
443 arg, declaration_name)
444
445 elif arg:
446 arg = KernRe(r'\s*:\s*').sub(":", arg)
447 arg = KernRe(r'\s*\[').sub('[', arg)
448
449 args = KernRe(r'\s*,\s*').split(arg)
450 if args[0] and '*' in args[0]:
451 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
452
453 first_arg = []
454 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$')
455 if args[0] and r.match(args[0]):
456 args.pop(0)
457 first_arg.extend(r.group(1))
458 first_arg.append(r.group(2))
459 else:
460 first_arg = KernRe(r'\s+').split(args.pop(0))
461
462 args.insert(0, first_arg.pop())
463 dtype = ' '.join(first_arg)
464
465 for param in args:
466 if KernRe(r'^(\*+)\s*(.*)').match(param):
467 r = KernRe(r'^(\*+)\s*(.*)')
468 if not r.match(param):
469 self.emit_msg(ln, f"Invalid param: {param}")
470 continue
471
472 param = r.group(1)
473
474 self.save_struct_actual(r.group(2))
475 self.push_parameter(ln, decl_type, r.group(2),
476 f"{dtype} {r.group(1)}",
477 arg, declaration_name)
478
479 elif KernRe(r'(.*?):(\w+)').search(param):
480 r = KernRe(r'(.*?):(\w+)')
481 if not r.match(param):
482 self.emit_msg(ln, f"Invalid param: {param}")
483 continue
484
485 if dtype != "": # Skip unnamed bit-fields
486 self.save_struct_actual(r.group(1))
487 self.push_parameter(ln, decl_type, r.group(1),
488 f"{dtype}:{r.group(2)}",
489 arg, declaration_name)
490 else:
491 self.save_struct_actual(param)
492 self.push_parameter(ln, decl_type, param, dtype,
493 arg, declaration_name)
494
495 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck):
496 """
497 Check for errors inside sections, emitting warnings if not found
498 parameters are described.
499 """
500
501 sects = sectcheck.split()
502 prms = prmscheck.split()
503 err = False
504
505 for sx in range(len(sects)): # pylint: disable=C0200
506 err = True
507 for px in range(len(prms)): # pylint: disable=C0200
508 prm_clean = prms[px]
509 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean)
510 prm_clean = attribute.sub('', prm_clean)
511
512 # ignore array size in a parameter string;
513 # however, the original param string may contain
514 # spaces, e.g.: addr[6 + 2]
515 # and this appears in @prms as "addr[6" since the
516 # parameter list is split at spaces;
517 # hence just ignore "[..." for the sections check;
518 prm_clean = KernRe(r'\[.*').sub('', prm_clean)
519
520 if prm_clean == sects[sx]:
521 err = False
522 break
523
524 if err:
525 if decl_type == 'function':
526 dname = f"{decl_type} parameter"
527 else:
528 dname = f"{decl_type} member"
529
530 self.emit_msg(ln,
531 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'")
532
533 def check_return_section(self, ln, declaration_name, return_type):
534 """
535 If the function doesn't return void, warns about the lack of a
536 return description.
537 """
538
539 if not self.config.wreturn:
540 return
541
542 # Ignore an empty return type (It's a macro)
543 # Ignore functions with a "void" return type (but not "void *")
544 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
545 return
546
547 if not self.entry.sections.get("Return", None):
548 self.emit_msg(ln,
549 f"No description found for return value of '{declaration_name}'")
550
551 def dump_struct(self, ln, proto):
552 """
553 Store an entry for an struct or union
554 """
555
556 type_pattern = r'(struct|union)'
557
558 qualifiers = [
559 "__attribute__",
560 "__packed",
561 "__aligned",
562 "____cacheline_aligned_in_smp",
563 "____cacheline_aligned",
564 ]
565
566 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
567 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)')
568
569 # Extract struct/union definition
570 members = None
571 declaration_name = None
572 decl_type = None
573
574 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
575 if r.search(proto):
576 decl_type = r.group(1)
577 declaration_name = r.group(2)
578 members = r.group(3)
579 else:
580 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
581
582 if r.search(proto):
583 decl_type = r.group(1)
584 declaration_name = r.group(3)
585 members = r.group(2)
586
587 if not members:
588 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
589 return
590
591 if self.entry.identifier != declaration_name:
592 self.emit_msg(ln,
593 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n")
594 return
595
596 args_pattern = r'([^,)]+)'
597
598 sub_prefixes = [
599 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''),
600 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''),
601
602 # Strip comments
603 (KernRe(r'\/\*.*?\*\/', re.S), ''),
604
605 # Strip attributes
606 (attribute, ' '),
607 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
608 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
609 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
610 (KernRe(r'\s*__packed\s*', re.S), ' '),
611 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
612 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
613 (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
614
615 # Unwrap struct_group macros based on this definition:
616 # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
617 # which has variants like: struct_group(NAME, MEMBERS...)
618 # Only MEMBERS arguments require documentation.
619 #
620 # Parsing them happens on two steps:
621 #
622 # 1. drop struct group arguments that aren't at MEMBERS,
623 # storing them as STRUCT_GROUP(MEMBERS)
624 #
625 # 2. remove STRUCT_GROUP() ancillary macro.
626 #
627 # The original logic used to remove STRUCT_GROUP() using an
628 # advanced regex:
629 #
630 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
631 #
632 # with two patterns that are incompatible with
633 # Python re module, as it has:
634 #
635 # - a recursive pattern: (?1)
636 # - an atomic grouping: (?>...)
637 #
638 # I tried a simpler version: but it didn't work either:
639 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
640 #
641 # As it doesn't properly match the end parenthesis on some cases.
642 #
643 # So, a better solution was crafted: there's now a NestedMatch
644 # class that ensures that delimiters after a search are properly
645 # matched. So, the implementation to drop STRUCT_GROUP() will be
646 # handled in separate.
647
648 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
649 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
650 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
651 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
652
653 # Replace macros
654 #
655 # TODO: use NestedMatch for FOO($1, $2, ...) matches
656 #
657 # it is better to also move those to the NestedMatch logic,
658 # to ensure that parenthesis will be properly matched.
659
660 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
661 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
662 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
663 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
664 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
665 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
666 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'),
667 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'),
668 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'),
669 ]
670
671 # Regexes here are guaranteed to have the end limiter matching
672 # the start delimiter. Yet, right now, only one replace group
673 # is allowed.
674
675 sub_nested_prefixes = [
676 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
677 ]
678
679 for search, sub in sub_prefixes:
680 members = search.sub(sub, members)
681
682 nested = NestedMatch()
683
684 for search, sub in sub_nested_prefixes:
685 members = nested.sub(search, sub, members)
686
687 # Keeps the original declaration as-is
688 declaration = members
689
690 # Split nested struct/union elements
691 #
692 # This loop was simpler at the original kernel-doc perl version, as
693 # while ($members =~ m/$struct_members/) { ... }
694 # reads 'members' string on each interaction.
695 #
696 # Python behavior is different: it parses 'members' only once,
697 # creating a list of tuples from the first interaction.
698 #
699 # On other words, this won't get nested structs.
700 #
701 # So, we need to have an extra loop on Python to override such
702 # re limitation.
703
704 while True:
705 tuples = struct_members.findall(members)
706 if not tuples:
707 break
708
709 for t in tuples:
710 newmember = ""
711 maintype = t[0]
712 s_ids = t[5]
713 content = t[3]
714
715 oldmember = "".join(t)
716
717 for s_id in s_ids.split(','):
718 s_id = s_id.strip()
719
720 newmember += f"{maintype} {s_id}; "
721 s_id = KernRe(r'[:\[].*').sub('', s_id)
722 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
723
724 for arg in content.split(';'):
725 arg = arg.strip()
726
727 if not arg:
728 continue
729
730 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)')
731 if r.match(arg):
732 # Pointer-to-function
733 dtype = r.group(1)
734 name = r.group(2)
735 extra = r.group(3)
736
737 if not name:
738 continue
739
740 if not s_id:
741 # Anonymous struct/union
742 newmember += f"{dtype}{name}{extra}; "
743 else:
744 newmember += f"{dtype}{s_id}.{name}{extra}; "
745
746 else:
747 arg = arg.strip()
748 # Handle bitmaps
749 arg = KernRe(r':\s*\d+\s*').sub('', arg)
750
751 # Handle arrays
752 arg = KernRe(r'\[.*\]').sub('', arg)
753
754 # Handle multiple IDs
755 arg = KernRe(r'\s*,\s*').sub(',', arg)
756
757 r = KernRe(r'(.*)\s+([\S+,]+)')
758
759 if r.search(arg):
760 dtype = r.group(1)
761 names = r.group(2)
762 else:
763 newmember += f"{arg}; "
764 continue
765
766 for name in names.split(','):
767 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip()
768
769 if not name:
770 continue
771
772 if not s_id:
773 # Anonymous struct/union
774 newmember += f"{dtype} {name}; "
775 else:
776 newmember += f"{dtype} {s_id}.{name}; "
777
778 members = members.replace(oldmember, newmember)
779
780 # Ignore other nested elements, like enums
781 members = re.sub(r'(\{[^\{\}]*\})', '', members)
782
783 self.create_parameter_list(ln, decl_type, members, ';',
784 declaration_name)
785 self.check_sections(ln, declaration_name, decl_type,
786 self.entry.sectcheck, self.entry.struct_actual)
787
788 # Adjust declaration for better display
789 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
790 declaration = KernRe(r'\}\s+;').sub('};', declaration)
791
792 # Better handle inlined enums
793 while True:
794 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
795 if not r.search(declaration):
796 break
797
798 declaration = r.sub(r'\1,\n\2', declaration)
799
800 def_args = declaration.split('\n')
801 level = 1
802 declaration = ""
803 for clause in def_args:
804
805 clause = clause.strip()
806 clause = KernRe(r'\s+').sub(' ', clause, count=1)
807
808 if not clause:
809 continue
810
811 if '}' in clause and level > 1:
812 level -= 1
813
814 if not KernRe(r'^\s*#').match(clause):
815 declaration += "\t" * level
816
817 declaration += "\t" + clause + "\n"
818 if "{" in clause and "}" not in clause:
819 level += 1
820
821 self.output_declaration(decl_type, declaration_name,
822 struct=declaration_name,
823 definition=declaration,
824 parameterlist=self.entry.parameterlist,
825 parameterdescs=self.entry.parameterdescs,
826 parametertypes=self.entry.parametertypes,
827 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
828 sectionlist=self.entry.sectionlist,
829 sections=self.entry.sections,
830 section_start_lines=self.entry.section_start_lines,
831 purpose=self.entry.declaration_purpose)
832
833 def dump_enum(self, ln, proto):
834 """
835 Stores an enum inside self.entries array.
836 """
837
838 # Ignore members marked private
839 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto)
840 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto)
841
842 # Strip comments
843 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto)
844
845 # Strip #define macros inside enums
846 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
847
848 members = None
849 declaration_name = None
850
851 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
852 if r.search(proto):
853 declaration_name = r.group(2)
854 members = r.group(1).rstrip()
855 else:
856 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
857 if r.match(proto):
858 declaration_name = r.group(1)
859 members = r.group(2).rstrip()
860
861 if not members:
862 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
863 return
864
865 if self.entry.identifier != declaration_name:
866 if self.entry.identifier == "":
867 self.emit_msg(ln,
868 f"{proto}: wrong kernel-doc identifier on prototype")
869 else:
870 self.emit_msg(ln,
871 f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead")
872 return
873
874 if not declaration_name:
875 declaration_name = "(anonymous)"
876
877 member_set = set()
878
879 members = KernRe(r'\([^;]*?[\)]').sub('', members)
880
881 for arg in members.split(','):
882 if not arg:
883 continue
884 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
885 self.entry.parameterlist.append(arg)
886 if arg not in self.entry.parameterdescs:
887 self.entry.parameterdescs[arg] = self.undescribed
888 self.emit_msg(ln,
889 f"Enum value '{arg}' not described in enum '{declaration_name}'")
890 member_set.add(arg)
891
892 for k in self.entry.parameterdescs:
893 if k not in member_set:
894 self.emit_msg(ln,
895 f"Excess enum value '%{k}' description in '{declaration_name}'")
896
897 self.output_declaration('enum', declaration_name,
898 enum=declaration_name,
899 parameterlist=self.entry.parameterlist,
900 parameterdescs=self.entry.parameterdescs,
901 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
902 sectionlist=self.entry.sectionlist,
903 sections=self.entry.sections,
904 section_start_lines=self.entry.section_start_lines,
905 purpose=self.entry.declaration_purpose)
906
907 def dump_declaration(self, ln, prototype):
908 """
909 Stores a data declaration inside self.entries array.
910 """
911
912 if self.entry.decl_type == "enum":
913 self.dump_enum(ln, prototype)
914 return
915
916 if self.entry.decl_type == "typedef":
917 self.dump_typedef(ln, prototype)
918 return
919
920 if self.entry.decl_type in ["union", "struct"]:
921 self.dump_struct(ln, prototype)
922 return
923
924 self.output_declaration(self.entry.decl_type, prototype,
925 entry=self.entry)
926
927 def dump_function(self, ln, prototype):
928 """
929 Stores a function of function macro inside self.entries array.
930 """
931
932 func_macro = False
933 return_type = ''
934 decl_type = 'function'
935
936 # Prefixes that would be removed
937 sub_prefixes = [
938 (r"^static +", "", 0),
939 (r"^extern +", "", 0),
940 (r"^asmlinkage +", "", 0),
941 (r"^inline +", "", 0),
942 (r"^__inline__ +", "", 0),
943 (r"^__inline +", "", 0),
944 (r"^__always_inline +", "", 0),
945 (r"^noinline +", "", 0),
946 (r"^__FORTIFY_INLINE +", "", 0),
947 (r"__init +", "", 0),
948 (r"__init_or_module +", "", 0),
949 (r"__deprecated +", "", 0),
950 (r"__flatten +", "", 0),
951 (r"__meminit +", "", 0),
952 (r"__must_check +", "", 0),
953 (r"__weak +", "", 0),
954 (r"__sched +", "", 0),
955 (r"_noprof", "", 0),
956 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0),
957 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0),
958 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0),
959 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0),
960 (r"__attribute_const__ +", "", 0),
961
962 # It seems that Python support for re.X is broken:
963 # At least for me (Python 3.13), this didn't work
964# (r"""
965# __attribute__\s*\(\(
966# (?:
967# [\w\s]+ # attribute name
968# (?:\([^)]*\))? # attribute arguments
969# \s*,? # optional comma at the end
970# )+
971# \)\)\s+
972# """, "", re.X),
973
974 # So, remove whitespaces and comments from it
975 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0),
976 ]
977
978 for search, sub, flags in sub_prefixes:
979 prototype = KernRe(search, flags).sub(sub, prototype)
980
981 # Macros are a special case, as they change the prototype format
982 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
983 if new_proto != prototype:
984 is_define_proto = True
985 prototype = new_proto
986 else:
987 is_define_proto = False
988
989 # Yes, this truly is vile. We are looking for:
990 # 1. Return type (may be nothing if we're looking at a macro)
991 # 2. Function name
992 # 3. Function parameters.
993 #
994 # All the while we have to watch out for function pointer parameters
995 # (which IIRC is what the two sections are for), C types (these
996 # regexps don't even start to express all the possibilities), and
997 # so on.
998 #
999 # If you mess with these regexps, it's a good idea to check that
1000 # the following functions' documentation still comes out right:
1001 # - parport_register_device (function pointer parameters)
1002 # - atomic_set (macro)
1003 # - pci_match_device, __copy_to_user (long return type)
1004
1005 name = r'[a-zA-Z0-9_~:]+'
1006 prototype_end1 = r'[^\(]*'
1007 prototype_end2 = r'[^\{]*'
1008 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)'
1009
1010 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group.
1011 # So, this needs to be mapped in Python with (?:...)? or (?:...)+
1012
1013 type1 = r'(?:[\w\s]+)?'
1014 type2 = r'(?:[\w\s]+\*+)+'
1015
1016 found = False
1017
1018 if is_define_proto:
1019 r = KernRe(r'^()(' + name + r')\s+')
1020
1021 if r.search(prototype):
1022 return_type = ''
1023 declaration_name = r.group(2)
1024 func_macro = True
1025
1026 found = True
1027
1028 if not found:
1029 patterns = [
1030 rf'^()({name})\s*{prototype_end}',
1031 rf'^({type1})\s+({name})\s*{prototype_end}',
1032 rf'^({type2})\s*({name})\s*{prototype_end}',
1033 ]
1034
1035 for p in patterns:
1036 r = KernRe(p)
1037
1038 if r.match(prototype):
1039
1040 return_type = r.group(1)
1041 declaration_name = r.group(2)
1042 args = r.group(3)
1043
1044 self.create_parameter_list(ln, decl_type, args, ',',
1045 declaration_name)
1046
1047 found = True
1048 break
1049 if not found:
1050 self.emit_msg(ln,
1051 f"cannot understand function prototype: '{prototype}'")
1052 return
1053
1054 if self.entry.identifier != declaration_name:
1055 self.emit_msg(ln,
1056 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead")
1057 return
1058
1059 prms = " ".join(self.entry.parameterlist)
1060 self.check_sections(ln, declaration_name, "function",
1061 self.entry.sectcheck, prms)
1062
1063 self.check_return_section(ln, declaration_name, return_type)
1064
1065 if 'typedef' in return_type:
1066 self.output_declaration(decl_type, declaration_name,
1067 function=declaration_name,
1068 typedef=True,
1069 functiontype=return_type,
1070 parameterlist=self.entry.parameterlist,
1071 parameterdescs=self.entry.parameterdescs,
1072 parametertypes=self.entry.parametertypes,
1073 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
1074 sectionlist=self.entry.sectionlist,
1075 sections=self.entry.sections,
1076 section_start_lines=self.entry.section_start_lines,
1077 purpose=self.entry.declaration_purpose,
1078 func_macro=func_macro)
1079 else:
1080 self.output_declaration(decl_type, declaration_name,
1081 function=declaration_name,
1082 typedef=False,
1083 functiontype=return_type,
1084 parameterlist=self.entry.parameterlist,
1085 parameterdescs=self.entry.parameterdescs,
1086 parametertypes=self.entry.parametertypes,
1087 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
1088 sectionlist=self.entry.sectionlist,
1089 sections=self.entry.sections,
1090 section_start_lines=self.entry.section_start_lines,
1091 purpose=self.entry.declaration_purpose,
1092 func_macro=func_macro)
1093
1094 def dump_typedef(self, ln, proto):
1095 """
1096 Stores a typedef inside self.entries array.
1097 """
1098
1099 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1100 typedef_ident = r'\*?\s*(\w\S+)\s*'
1101 typedef_args = r'\s*\((.*)\);'
1102
1103 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1104 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args)
1105
1106 # Strip comments
1107 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto)
1108
1109 # Parse function typedef prototypes
1110 for r in [typedef1, typedef2]:
1111 if not r.match(proto):
1112 continue
1113
1114 return_type = r.group(1).strip()
1115 declaration_name = r.group(2)
1116 args = r.group(3)
1117
1118 if self.entry.identifier != declaration_name:
1119 self.emit_msg(ln,
1120 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1121 return
1122
1123 decl_type = 'function'
1124 self.create_parameter_list(ln, decl_type, args, ',', declaration_name)
1125
1126 self.output_declaration(decl_type, declaration_name,
1127 function=declaration_name,
1128 typedef=True,
1129 functiontype=return_type,
1130 parameterlist=self.entry.parameterlist,
1131 parameterdescs=self.entry.parameterdescs,
1132 parametertypes=self.entry.parametertypes,
1133 parameterdesc_start_lines=self.entry.parameterdesc_start_lines,
1134 sectionlist=self.entry.sectionlist,
1135 sections=self.entry.sections,
1136 section_start_lines=self.entry.section_start_lines,
1137 purpose=self.entry.declaration_purpose)
1138 return
1139
1140 # Handle nested parentheses or brackets
1141 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$')
1142 while r.search(proto):
1143 proto = r.sub('', proto)
1144
1145 # Parse simple typedefs
1146 r = KernRe(r'typedef.*\s+(\w+)\s*;')
1147 if r.match(proto):
1148 declaration_name = r.group(1)
1149
1150 if self.entry.identifier != declaration_name:
1151 self.emit_msg(ln,
1152 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1153 return
1154
1155 self.output_declaration('typedef', declaration_name,
1156 typedef=declaration_name,
1157 sectionlist=self.entry.sectionlist,
1158 sections=self.entry.sections,
1159 section_start_lines=self.entry.section_start_lines,
1160 purpose=self.entry.declaration_purpose)
1161 return
1162
1163 self.emit_msg(ln, "error: Cannot parse typedef!")
1164
1165 @staticmethod
1166 def process_export(function_set, line):
1167 """
1168 process EXPORT_SYMBOL* tags
1169
1170 This method doesn't use any variable from the class, so declare it
1171 with a staticmethod decorator.
1172 """
1173
1174 # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1175 # multiple export lines would violate Kernel coding style.
1176
1177 if export_symbol.search(line):
1178 symbol = export_symbol.group(2)
1179 function_set.add(symbol)
1180 return
1181
1182 if export_symbol_ns.search(line):
1183 symbol = export_symbol_ns.group(2)
1184 function_set.add(symbol)
1185
1186 def process_normal(self, ln, line):
1187 """
1188 STATE_NORMAL: looking for the /** to begin everything.
1189 """
1190
1191 if not doc_start.match(line):
1192 return
1193
1194 # start a new entry
1195 self.reset_state(ln)
1196 self.entry.in_doc_sect = False
1197
1198 # next line is always the function name
1199 self.state = state.NAME
1200
1201 def process_name(self, ln, line):
1202 """
1203 STATE_NAME: Looking for the "name - description" line
1204 """
1205
1206 if doc_block.search(line):
1207 self.entry.new_start_line = ln
1208
1209 if not doc_block.group(1):
1210 self.entry.section = self.section_intro
1211 else:
1212 self.entry.section = doc_block.group(1)
1213
1214 self.entry.identifier = self.entry.section
1215 self.state = state.DOCBLOCK
1216 return
1217
1218 if doc_decl.search(line):
1219 self.entry.identifier = doc_decl.group(1)
1220 self.entry.is_kernel_comment = False
1221
1222 decl_start = str(doc_com) # comment block asterisk
1223 fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions)
1224 parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function
1225 decl_end = r"(?:[-:].*)" # end of the name part
1226
1227 # test for pointer declaration type, foo * bar() - desc
1228 r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$")
1229 if r.search(line):
1230 self.entry.identifier = r.group(1)
1231
1232 # Test for data declaration
1233 r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)")
1234 if r.search(line):
1235 self.entry.decl_type = r.group(1)
1236 self.entry.identifier = r.group(2)
1237 self.entry.is_kernel_comment = True
1238 else:
1239 # Look for foo() or static void foo() - description;
1240 # or misspelt identifier
1241
1242 r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$")
1243 r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$")
1244
1245 for r in [r1, r2]:
1246 if r.search(line):
1247 self.entry.identifier = r.group(1)
1248 self.entry.decl_type = "function"
1249
1250 r = KernRe(r"define\s+")
1251 self.entry.identifier = r.sub("", self.entry.identifier)
1252 self.entry.is_kernel_comment = True
1253 break
1254
1255 self.entry.identifier = self.entry.identifier.strip(" ")
1256
1257 self.state = state.BODY
1258
1259 # if there's no @param blocks need to set up default section here
1260 self.entry.section = SECTION_DEFAULT
1261 self.entry.new_start_line = ln + 1
1262
1263 r = KernRe("[-:](.*)")
1264 if r.search(line):
1265 # strip leading/trailing/multiple spaces
1266 self.entry.descr = r.group(1).strip(" ")
1267
1268 r = KernRe(r"\s+")
1269 self.entry.descr = r.sub(" ", self.entry.descr)
1270 self.entry.declaration_purpose = self.entry.descr
1271 self.state = state.BODY_MAYBE
1272 else:
1273 self.entry.declaration_purpose = ""
1274
1275 if not self.entry.is_kernel_comment:
1276 self.emit_msg(ln,
1277 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}")
1278 self.state = state.NORMAL
1279
1280 if not self.entry.declaration_purpose and self.config.wshort_desc:
1281 self.emit_msg(ln,
1282 f"missing initial short description on line:\n{line}")
1283
1284 if not self.entry.identifier and self.entry.decl_type != "enum":
1285 self.emit_msg(ln,
1286 f"wrong kernel-doc identifier on line:\n{line}")
1287 self.state = state.NORMAL
1288
1289 if self.config.verbose:
1290 self.emit_msg(ln,
1291 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1292 warning=False)
1293
1294 return
1295
1296 # Failed to find an identifier. Emit a warning
1297 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1298
1299 def process_body(self, ln, line):
1300 """
1301 STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
1302 """
1303
1304 if self.state == state.BODY_WITH_BLANK_LINE:
1305 r = KernRe(r"\s*\*\s?\S")
1306 if r.match(line):
1307 self.dump_section()
1308 self.entry.section = SECTION_DEFAULT
1309 self.entry.new_start_line = ln
1310 self.entry.contents = ""
1311
1312 if doc_sect.search(line):
1313 self.entry.in_doc_sect = True
1314 newsection = doc_sect.group(1)
1315
1316 if newsection.lower() in ["description", "context"]:
1317 newsection = newsection.title()
1318
1319 # Special case: @return is a section, not a param description
1320 if newsection.lower() in ["@return", "@returns",
1321 "return", "returns"]:
1322 newsection = "Return"
1323
1324 # Perl kernel-doc has a check here for contents before sections.
1325 # the logic there is always false, as in_doc_sect variable is
1326 # always true. So, just don't implement Wcontents_before_sections
1327
1328 # .title()
1329 newcontents = doc_sect.group(2)
1330 if not newcontents:
1331 newcontents = ""
1332
1333 if self.entry.contents.strip("\n"):
1334 self.dump_section()
1335
1336 self.entry.new_start_line = ln
1337 self.entry.section = newsection
1338 self.entry.leading_space = None
1339
1340 self.entry.contents = newcontents.lstrip()
1341 if self.entry.contents:
1342 self.entry.contents += "\n"
1343
1344 self.state = state.BODY
1345 return
1346
1347 if doc_end.search(line):
1348 self.dump_section()
1349
1350 # Look for doc_com + <text> + doc_end:
1351 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/')
1352 if r.match(line):
1353 self.emit_msg(ln, f"suspicious ending line: {line}")
1354
1355 self.entry.prototype = ""
1356 self.entry.new_start_line = ln + 1
1357
1358 self.state = state.PROTO
1359 return
1360
1361 if doc_content.search(line):
1362 cont = doc_content.group(1)
1363
1364 if cont == "":
1365 if self.entry.section == self.section_context:
1366 self.dump_section()
1367
1368 self.entry.new_start_line = ln
1369 self.state = state.BODY
1370 else:
1371 if self.entry.section != SECTION_DEFAULT:
1372 self.state = state.BODY_WITH_BLANK_LINE
1373 else:
1374 self.state = state.BODY
1375
1376 self.entry.contents += "\n"
1377
1378 elif self.state == state.BODY_MAYBE:
1379
1380 # Continued declaration purpose
1381 self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip()
1382 self.entry.declaration_purpose += " " + cont
1383
1384 r = KernRe(r"\s+")
1385 self.entry.declaration_purpose = r.sub(' ',
1386 self.entry.declaration_purpose)
1387
1388 else:
1389 if self.entry.section.startswith('@') or \
1390 self.entry.section == self.section_context:
1391 if self.entry.leading_space is None:
1392 r = KernRe(r'^(\s+)')
1393 if r.match(cont):
1394 self.entry.leading_space = len(r.group(1))
1395 else:
1396 self.entry.leading_space = 0
1397
1398 # Double-check if leading space are realy spaces
1399 pos = 0
1400 for i in range(0, self.entry.leading_space):
1401 if cont[i] != " ":
1402 break
1403 pos += 1
1404
1405 cont = cont[pos:]
1406
1407 # NEW LOGIC:
1408 # In case it is different, update it
1409 if self.entry.leading_space != pos:
1410 self.entry.leading_space = pos
1411
1412 self.entry.contents += cont + "\n"
1413 return
1414
1415 # Unknown line, ignore
1416 self.emit_msg(ln, f"bad line: {line}")
1417
1418 def process_inline(self, ln, line):
1419 """STATE_INLINE: docbook comments within a prototype."""
1420
1421 if self.inline_doc_state == state.INLINE_NAME and \
1422 doc_inline_sect.search(line):
1423 self.entry.section = doc_inline_sect.group(1)
1424 self.entry.new_start_line = ln
1425
1426 self.entry.contents = doc_inline_sect.group(2).lstrip()
1427 if self.entry.contents != "":
1428 self.entry.contents += "\n"
1429
1430 self.inline_doc_state = state.INLINE_TEXT
1431 # Documentation block end */
1432 return
1433
1434 if doc_inline_end.search(line):
1435 if self.entry.contents not in ["", "\n"]:
1436 self.dump_section()
1437
1438 self.state = state.PROTO
1439 self.inline_doc_state = state.INLINE_NA
1440 return
1441
1442 if doc_content.search(line):
1443 if self.inline_doc_state == state.INLINE_TEXT:
1444 self.entry.contents += doc_content.group(1) + "\n"
1445 if not self.entry.contents.strip(" ").rstrip("\n"):
1446 self.entry.contents = ""
1447
1448 elif self.inline_doc_state == state.INLINE_NAME:
1449 self.emit_msg(ln,
1450 f"Incorrect use of kernel-doc format: {line}")
1451
1452 self.inline_doc_state = state.INLINE_ERROR
1453
1454 def syscall_munge(self, ln, proto): # pylint: disable=W0613
1455 """
1456 Handle syscall definitions
1457 """
1458
1459 is_void = False
1460
1461 # Strip newlines/CR's
1462 proto = re.sub(r'[\r\n]+', ' ', proto)
1463
1464 # Check if it's a SYSCALL_DEFINE0
1465 if 'SYSCALL_DEFINE0' in proto:
1466 is_void = True
1467
1468 # Replace SYSCALL_DEFINE with correct return type & function name
1469 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1470
1471 r = KernRe(r'long\s+(sys_.*?),')
1472 if r.search(proto):
1473 proto = KernRe(',').sub('(', proto, count=1)
1474 elif is_void:
1475 proto = KernRe(r'\)').sub('(void)', proto, count=1)
1476
1477 # Now delete all of the odd-numbered commas in the proto
1478 # so that argument types & names don't have a comma between them
1479 count = 0
1480 length = len(proto)
1481
1482 if is_void:
1483 length = 0 # skip the loop if is_void
1484
1485 for ix in range(length):
1486 if proto[ix] == ',':
1487 count += 1
1488 if count % 2 == 1:
1489 proto = proto[:ix] + ' ' + proto[ix + 1:]
1490
1491 return proto
1492
1493 def tracepoint_munge(self, ln, proto):
1494 """
1495 Handle tracepoint definitions
1496 """
1497
1498 tracepointname = None
1499 tracepointargs = None
1500
1501 # Match tracepoint name based on different patterns
1502 r = KernRe(r'TRACE_EVENT\((.*?),')
1503 if r.search(proto):
1504 tracepointname = r.group(1)
1505
1506 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1507 if r.search(proto):
1508 tracepointname = r.group(1)
1509
1510 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1511 if r.search(proto):
1512 tracepointname = r.group(2)
1513
1514 if tracepointname:
1515 tracepointname = tracepointname.lstrip()
1516
1517 r = KernRe(r'TP_PROTO\((.*?)\)')
1518 if r.search(proto):
1519 tracepointargs = r.group(1)
1520
1521 if not tracepointname or not tracepointargs:
1522 self.emit_msg(ln,
1523 f"Unrecognized tracepoint format:\n{proto}\n")
1524 else:
1525 proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1526 self.entry.identifier = f"trace_{self.entry.identifier}"
1527
1528 return proto
1529
1530 def process_proto_function(self, ln, line):
1531 """Ancillary routine to process a function prototype"""
1532
1533 # strip C99-style comments to end of line
1534 r = KernRe(r"\/\/.*$", re.S)
1535 line = r.sub('', line)
1536
1537 if KernRe(r'\s*#\s*define').match(line):
1538 self.entry.prototype = line
1539 elif line.startswith('#'):
1540 # Strip other macros like #ifdef/#ifndef/#endif/...
1541 pass
1542 else:
1543 r = KernRe(r'([^\{]*)')
1544 if r.match(line):
1545 self.entry.prototype += r.group(1) + " "
1546
1547 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1548 # strip comments
1549 r = KernRe(r'/\*.*?\*/')
1550 self.entry.prototype = r.sub('', self.entry.prototype)
1551
1552 # strip newlines/cr's
1553 r = KernRe(r'[\r\n]+')
1554 self.entry.prototype = r.sub(' ', self.entry.prototype)
1555
1556 # strip leading spaces
1557 r = KernRe(r'^\s+')
1558 self.entry.prototype = r.sub('', self.entry.prototype)
1559
1560 # Handle self.entry.prototypes for function pointers like:
1561 # int (*pcs_config)(struct foo)
1562
1563 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1564 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1565
1566 if 'SYSCALL_DEFINE' in self.entry.prototype:
1567 self.entry.prototype = self.syscall_munge(ln,
1568 self.entry.prototype)
1569
1570 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1571 if r.search(self.entry.prototype):
1572 self.entry.prototype = self.tracepoint_munge(ln,
1573 self.entry.prototype)
1574
1575 self.dump_function(ln, self.entry.prototype)
1576 self.reset_state(ln)
1577
1578 def process_proto_type(self, ln, line):
1579 """Ancillary routine to process a type"""
1580
1581 # Strip newlines/cr's.
1582 line = KernRe(r'[\r\n]+', re.S).sub(' ', line)
1583
1584 # Strip leading spaces
1585 line = KernRe(r'^\s+', re.S).sub('', line)
1586
1587 # Strip trailing spaces
1588 line = KernRe(r'\s+$', re.S).sub('', line)
1589
1590 # Strip C99-style comments to the end of the line
1591 line = KernRe(r"\/\/.*$", re.S).sub('', line)
1592
1593 # To distinguish preprocessor directive from regular declaration later.
1594 if line.startswith('#'):
1595 line += ";"
1596
1597 r = KernRe(r'([^\{\};]*)([\{\};])(.*)')
1598 while True:
1599 if r.search(line):
1600 if self.entry.prototype:
1601 self.entry.prototype += " "
1602 self.entry.prototype += r.group(1) + r.group(2)
1603
1604 self.entry.brcount += r.group(2).count('{')
1605 self.entry.brcount -= r.group(2).count('}')
1606
1607 self.entry.brcount = max(self.entry.brcount, 0)
1608
1609 if r.group(2) == ';' and self.entry.brcount == 0:
1610 self.dump_declaration(ln, self.entry.prototype)
1611 self.reset_state(ln)
1612 break
1613
1614 line = r.group(3)
1615 else:
1616 self.entry.prototype += line
1617 break
1618
1619 def process_proto(self, ln, line):
1620 """STATE_PROTO: reading a function/whatever prototype."""
1621
1622 if doc_inline_oneline.search(line):
1623 self.entry.section = doc_inline_oneline.group(1)
1624 self.entry.contents = doc_inline_oneline.group(2)
1625
1626 if self.entry.contents != "":
1627 self.entry.contents += "\n"
1628 self.dump_section(start_new=False)
1629
1630 elif doc_inline_start.search(line):
1631 self.state = state.INLINE
1632 self.inline_doc_state = state.INLINE_NAME
1633
1634 elif self.entry.decl_type == 'function':
1635 self.process_proto_function(ln, line)
1636
1637 else:
1638 self.process_proto_type(ln, line)
1639
1640 def process_docblock(self, ln, line):
1641 """STATE_DOCBLOCK: within a DOC: block."""
1642
1643 if doc_end.search(line):
1644 self.dump_section()
1645 self.output_declaration("doc", self.entry.identifier,
1646 sectionlist=self.entry.sectionlist,
1647 sections=self.entry.sections,
1648 section_start_lines=self.entry.section_start_lines)
1649 self.reset_state(ln)
1650
1651 elif doc_content.search(line):
1652 self.entry.contents += doc_content.group(1) + "\n"
1653
1654 def parse_export(self):
1655 """
1656 Parses EXPORT_SYMBOL* macros from a single Kernel source file.
1657 """
1658
1659 export_table = set()
1660
1661 try:
1662 with open(self.fname, "r", encoding="utf8",
1663 errors="backslashreplace") as fp:
1664
1665 for line in fp:
1666 self.process_export(export_table, line)
1667
1668 except IOError:
1669 return None
1670
1671 return export_table
1672
1673 def parse_kdoc(self):
1674 """
1675 Open and process each line of a C source file.
1676 The parsing is controlled via a state machine, and the line is passed
1677 to a different process function depending on the state. The process
1678 function may update the state as needed.
1679
1680 Besides parsing kernel-doc tags, it also parses export symbols.
1681 """
1682
1683 cont = False
1684 prev = ""
1685 prev_ln = None
1686 export_table = set()
1687
1688 try:
1689 with open(self.fname, "r", encoding="utf8",
1690 errors="backslashreplace") as fp:
1691 for ln, line in enumerate(fp):
1692
1693 line = line.expandtabs().strip("\n")
1694
1695 # Group continuation lines on prototypes
1696 if self.state == state.PROTO:
1697 if line.endswith("\\"):
1698 prev += line.rstrip("\\")
1699 cont = True
1700
1701 if not prev_ln:
1702 prev_ln = ln
1703
1704 continue
1705
1706 if cont:
1707 ln = prev_ln
1708 line = prev + line
1709 prev = ""
1710 cont = False
1711 prev_ln = None
1712
1713 self.config.log.debug("%d %s%s: %s",
1714 ln, state.name[self.state],
1715 state.inline_name[self.inline_doc_state],
1716 line)
1717
1718 # This is an optimization over the original script.
1719 # There, when export_file was used for the same file,
1720 # it was read twice. Here, we use the already-existing
1721 # loop to parse exported symbols as well.
1722 #
1723 # TODO: It should be noticed that not all states are
1724 # needed here. On a future cleanup, process export only
1725 # at the states that aren't handling comment markups.
1726 self.process_export(export_table, line)
1727
1728 # Hand this line to the appropriate state handler
1729 if self.state == state.NORMAL:
1730 self.process_normal(ln, line)
1731 elif self.state == state.NAME:
1732 self.process_name(ln, line)
1733 elif self.state in [state.BODY, state.BODY_MAYBE,
1734 state.BODY_WITH_BLANK_LINE]:
1735 self.process_body(ln, line)
1736 elif self.state == state.INLINE: # scanning for inline parameters
1737 self.process_inline(ln, line)
1738 elif self.state == state.PROTO:
1739 self.process_proto(ln, line)
1740 elif self.state == state.DOCBLOCK:
1741 self.process_docblock(ln, line)
1742 except OSError:
1743 self.config.log.error(f"Error: Cannot open file {self.fname}")
1744
1745 return export_table, self.entries