Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# xxpylint: disable=R0903
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Convert ABI what into regular expressions
8"""
9
10import re
11import sys
12
13from pprint import pformat
14
15from abi.abi_parser import AbiParser
16from abi.helpers import AbiDebug
17
18class AbiRegex(AbiParser):
19 """Extends AbiParser to search ABI nodes with regular expressions"""
20
21 # Escape only ASCII visible characters
22 escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
23 leave_others = "others"
24
25 # Tuples with regular expressions to be compiled and replacement data
26 re_whats = [
27 # Drop escape characters that might exist
28 (re.compile("\\\\"), ""),
29
30 # Temporarily escape dot characters
31 (re.compile(r"\."), "\xf6"),
32
33 # Temporarily change [0-9]+ type of patterns
34 (re.compile(r"\[0\-9\]\+"), "\xff"),
35
36 # Temporarily change [\d+-\d+] type of patterns
37 (re.compile(r"\[0\-\d+\]"), "\xff"),
38 (re.compile(r"\[0:\d+\]"), "\xff"),
39 (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
40
41 # Temporarily change [0-9] type of patterns
42 (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
43
44 # Handle multiple option patterns
45 (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
46
47 # Handle wildcards
48 (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
49 (re.compile(r"/\*/"), "/.*/"),
50 (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
51 (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
52 (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
53 (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
54
55 (re.compile(r"XX+"), "\\\\w\xf7"),
56 (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
57 (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
58 (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
59
60 # Recover [0-9] type of patterns
61 (re.compile(r"\xf4"), "["),
62 (re.compile(r"\xf5"), "]"),
63
64 # Remove duplicated spaces
65 (re.compile(r"\s+"), r" "),
66
67 # Special case: drop comparison as in:
68 # What: foo = <something>
69 # (this happens on a few IIO definitions)
70 (re.compile(r"\s*\=.*$"), ""),
71
72 # Escape all other symbols
73 (re.compile(escape_symbols), r"\\\1"),
74 (re.compile(r"\\\\"), r"\\"),
75 (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
76 (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
77
78 (re.compile(r"\xff"), r"\\d+"),
79
80 # Special case: IIO ABI which a parenthesis.
81 (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
82
83 # Simplify regexes with multiple .*
84 (re.compile(r"(?:\.\*){2,}"), ""),
85
86 # Recover dot characters
87 (re.compile(r"\xf6"), "\\."),
88 # Recover plus characters
89 (re.compile(r"\xf7"), "+"),
90 ]
91 re_has_num = re.compile(r"\\d")
92
93 # Symbol name after escape_chars that are considered a devnode basename
94 re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
95
96 # List of popular group names to be skipped to minimize regex group size
97 # Use AbiDebug.SUBGROUP_SIZE to detect those
98 skip_names = set(["devices", "hwmon"])
99
100 def regex_append(self, what, new):
101 """
102 Get a search group for a subset of regular expressions.
103
104 As ABI may have thousands of symbols, using a for to search all
105 regular expressions is at least O(n^2). When there are wildcards,
106 the complexity increases substantially, eventually becoming exponential.
107
108 To avoid spending too much time on them, use a logic to split
109 them into groups. The smaller the group, the better, as it would
110 mean that searches will be confined to a small number of regular
111 expressions.
112
113 The conversion to a regex subset is tricky, as we need something
114 that can be easily obtained from the sysfs symbol and from the
115 regular expression. So, we need to discard nodes that have
116 wildcards.
117
118 If it can't obtain a subgroup, place the regular expression inside
119 a special group (self.leave_others).
120 """
121
122 search_group = None
123
124 for search_group in reversed(new.split("/")):
125 if not search_group or search_group in self.skip_names:
126 continue
127 if self.re_symbol_name.match(search_group):
128 break
129
130 if not search_group:
131 search_group = self.leave_others
132
133 if self.debug & AbiDebug.SUBGROUP_MAP:
134 self.log.debug("%s: mapped as %s", what, search_group)
135
136 try:
137 if search_group not in self.regex_group:
138 self.regex_group[search_group] = []
139
140 self.regex_group[search_group].append(re.compile(new))
141 if self.search_string:
142 if what.find(self.search_string) >= 0:
143 print(f"What: {what}")
144 except re.PatternError:
145 self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
146 " '%s'", what, new)
147
148 def get_regexes(self, what):
149 """
150 Given an ABI devnode, return a list of all regular expressions that
151 may match it, based on the sub-groups created by regex_append()
152 """
153
154 re_list = []
155
156 patches = what.split("/")
157 patches.reverse()
158 patches.append(self.leave_others)
159
160 for search_group in patches:
161 if search_group in self.regex_group:
162 re_list += self.regex_group[search_group]
163
164 return re_list
165
166 def __init__(self, *args, **kwargs):
167 """
168 Override init method to get verbose argument
169 """
170
171 self.regex_group = None
172 self.search_string = None
173 self.re_string = None
174
175 if "search_string" in kwargs:
176 self.search_string = kwargs.get("search_string")
177 del kwargs["search_string"]
178
179 if self.search_string:
180
181 try:
182 self.re_string = re.compile(self.search_string)
183 except re.PatternError as e:
184 msg = f"{self.search_string} is not a valid regular expression"
185 raise ValueError(msg) from e
186
187 super().__init__(*args, **kwargs)
188
189 def parse_abi(self, *args, **kwargs):
190
191 super().parse_abi(*args, **kwargs)
192
193 self.regex_group = {}
194
195 print("Converting ABI What fields into regexes...", file=sys.stderr)
196
197 for t in sorted(self.data.items(), key=lambda x: x[0]):
198 v = t[1]
199 if v.get("type") == "File":
200 continue
201
202 v["regex"] = []
203
204 for what in v.get("what", []):
205 if not what.startswith("/sys"):
206 continue
207
208 new = what
209 for r, s in self.re_whats:
210 try:
211 new = r.sub(s, new)
212 except re.PatternError as e:
213 # Help debugging troubles with new regexes
214 raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
215
216 v["regex"].append(new)
217
218 if self.debug & AbiDebug.REGEX:
219 self.log.debug("%-90s <== %s", new, what)
220
221 # Store regex into a subgroup to speedup searches
222 self.regex_append(what, new)
223
224 if self.debug & AbiDebug.SUBGROUP_DICT:
225 self.log.debug("%s", pformat(self.regex_group))
226
227 if self.debug & AbiDebug.SUBGROUP_SIZE:
228 biggestd_keys = sorted(self.regex_group.keys(),
229 key= lambda k: len(self.regex_group[k]),
230 reverse=True)
231
232 print("Top regex subgroups:", file=sys.stderr)
233 for k in biggestd_keys[:10]:
234 print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)