Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5This script helps track the translation status of the documentation
6in different locales, e.g., zh_CN. More specially, it uses `git log`
7commit to find the latest english commit from the translation commit
8(order by author date) and the latest english commits from HEAD. If
9differences occur, report the file and commits that need to be updated.
10
11The usage is as follows:
12- ./scripts/checktransupdate.py -l zh_CN
13This will print all the files that need to be updated or translated in the zh_CN locale.
14- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15This will only print the status of the specified file.
16
17The output is something like:
18Documentation/dev-tools/kfence.rst
19No translation in the locale of zh_CN
20
21Documentation/translations/zh_CN/dev-tools/testing-overview.rst
22commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
231 commits needs resolving in total
24"""
25
26import os
27import re
28import time
29import logging
30from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
31from datetime import datetime
32
33
34def get_origin_path(file_path):
35 """Get the origin path from the translation path"""
36 paths = file_path.split("/")
37 tidx = paths.index("translations")
38 opaths = paths[:tidx]
39 opaths += paths[tidx + 2 :]
40 return "/".join(opaths)
41
42
43def get_latest_commit_from(file_path, commit):
44 """Get the latest commit from the specified commit for the specified file"""
45 command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
46 logging.debug(command)
47 pipe = os.popen(command)
48 result = pipe.read()
49 result = result.split("\n")
50 if len(result) <= 1:
51 return None
52
53 logging.debug("Result: %s", result[0])
54
55 return {
56 "hash": result[0],
57 "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
58 "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
59 "message": result[4:],
60 }
61
62
63def get_origin_from_trans(origin_path, t_from_head):
64 """Get the latest origin commit from the translation commit"""
65 o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
66 while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
67 o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
68 if o_from_t is not None:
69 logging.debug("tracked origin commit id: %s", o_from_t["hash"])
70 return o_from_t
71
72
73def get_origin_from_trans_smartly(origin_path, t_from_head):
74 """Get the latest origin commit from the formatted translation commit:
75 (1) update to commit HASH (TITLE)
76 (2) Update the translation through commit HASH (TITLE)
77 """
78 # catch flag for 12-bit commit hash
79 HASH = r'([0-9a-f]{12})'
80 # pattern 1: contains "update to commit HASH"
81 pat_update_to = re.compile(rf'update to commit {HASH}')
82 # pattern 2: contains "Update the translation through commit HASH"
83 pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
84
85 origin_commit_hash = None
86 for line in t_from_head["message"]:
87 # check if the line matches the first pattern
88 match = pat_update_to.search(line)
89 if match:
90 origin_commit_hash = match.group(1)
91 break
92 # check if the line matches the second pattern
93 match = pat_update_translation.search(line)
94 if match:
95 origin_commit_hash = match.group(1)
96 break
97 if origin_commit_hash is None:
98 return None
99 o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
100 if o_from_t is not None:
101 logging.debug("tracked origin commit id: %s", o_from_t["hash"])
102 return o_from_t
103
104
105def get_commits_count_between(opath, commit1, commit2):
106 """Get the commits count between two commits for the specified file"""
107 command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
108 logging.debug(command)
109 pipe = os.popen(command)
110 result = pipe.read().split("\n")
111 # filter out empty lines
112 result = list(filter(lambda x: x != "", result))
113 return result
114
115
116def pretty_output(commit):
117 """Pretty print the commit message"""
118 command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
119 logging.debug(command)
120 pipe = os.popen(command)
121 return pipe.read()
122
123
124def valid_commit(commit):
125 """Check if the commit is valid or not"""
126 msg = pretty_output(commit)
127 return "Merge tag" not in msg
128
129def check_per_file(file_path):
130 """Check the translation status for the specified file"""
131 opath = get_origin_path(file_path)
132
133 if not os.path.isfile(opath):
134 logging.error("Cannot find the origin path for {file_path}")
135 return
136
137 o_from_head = get_latest_commit_from(opath, "HEAD")
138 t_from_head = get_latest_commit_from(file_path, "HEAD")
139
140 if o_from_head is None or t_from_head is None:
141 logging.error("Cannot find the latest commit for %s", file_path)
142 return
143
144 o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
145 # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
146 if o_from_t is None:
147 o_from_t = get_origin_from_trans(opath, t_from_head)
148
149 if o_from_t is None:
150 logging.error("Error: Cannot find the latest origin commit for %s", file_path)
151 return
152
153 if o_from_head["hash"] == o_from_t["hash"]:
154 logging.debug("No update needed for %s", file_path)
155 else:
156 logging.info(file_path)
157 commits = get_commits_count_between(
158 opath, o_from_t["hash"], o_from_head["hash"]
159 )
160 count = 0
161 for commit in commits:
162 if valid_commit(commit):
163 logging.info("commit %s", pretty_output(commit))
164 count += 1
165 logging.info("%d commits needs resolving in total\n", count)
166
167
168def valid_locales(locale):
169 """Check if the locale is valid or not"""
170 script_path = os.path.dirname(os.path.abspath(__file__))
171 linux_path = os.path.join(script_path, "..")
172 if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
173 raise ArgumentTypeError("Invalid locale: {locale}")
174 return locale
175
176
177def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
178 """List all files with the specified suffix in the folder and its subfolders"""
179 files = []
180 stack = [folder]
181
182 while stack:
183 pwd = stack.pop()
184 # filter out the exclude folders
185 if os.path.basename(pwd) in exclude_folders:
186 continue
187 # list all files and folders
188 for item in os.listdir(pwd):
189 ab_item = os.path.join(pwd, item)
190 if os.path.isdir(ab_item):
191 stack.append(ab_item)
192 else:
193 if ab_item.endswith(include_suffix):
194 files.append(ab_item)
195
196 return files
197
198
199class DmesgFormatter(logging.Formatter):
200 """Custom dmesg logging formatter"""
201 def format(self, record):
202 timestamp = time.time()
203 formatted_time = f"[{timestamp:>10.6f}]"
204 log_message = f"{formatted_time} {record.getMessage()}"
205 return log_message
206
207
208def config_logging(log_level, log_file="checktransupdate.log"):
209 """configure logging based on the log level"""
210 # set up the root logger
211 logger = logging.getLogger()
212 logger.setLevel(log_level)
213
214 # Create console handler
215 console_handler = logging.StreamHandler()
216 console_handler.setLevel(log_level)
217
218 # Create file handler
219 file_handler = logging.FileHandler(log_file)
220 file_handler.setLevel(log_level)
221
222 # Create formatter and add it to the handlers
223 formatter = DmesgFormatter()
224 console_handler.setFormatter(formatter)
225 file_handler.setFormatter(formatter)
226
227 # Add the handler to the logger
228 logger.addHandler(console_handler)
229 logger.addHandler(file_handler)
230
231
232def main():
233 """Main function of the script"""
234 script_path = os.path.dirname(os.path.abspath(__file__))
235 linux_path = os.path.join(script_path, "..")
236
237 parser = ArgumentParser(description="Check the translation update")
238 parser.add_argument(
239 "-l",
240 "--locale",
241 default="zh_CN",
242 type=valid_locales,
243 help="Locale to check when files are not specified",
244 )
245
246 parser.add_argument(
247 "--print-missing-translations",
248 action=BooleanOptionalAction,
249 default=True,
250 help="Print files that do not have translations",
251 )
252
253 parser.add_argument(
254 '--log',
255 default='INFO',
256 choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
257 help='Set the logging level')
258
259 parser.add_argument(
260 '--logfile',
261 default='checktransupdate.log',
262 help='Set the logging file (default: checktransupdate.log)')
263
264 parser.add_argument(
265 "files", nargs="*", help="Files to check, if not specified, check all files"
266 )
267 args = parser.parse_args()
268
269 # Configure logging based on the --log argument
270 log_level = getattr(logging, args.log.upper(), logging.INFO)
271 config_logging(log_level)
272
273 # Get files related to linux path
274 files = args.files
275 if len(files) == 0:
276 offical_files = list_files_with_excluding_folders(
277 os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
278 )
279
280 for file in offical_files:
281 # split the path into parts
282 path_parts = file.split(os.sep)
283 # find the index of the "Documentation" directory
284 kindex = path_parts.index("Documentation")
285 # insert the translations and locale after the Documentation directory
286 new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
287 + path_parts[kindex + 1 :]
288 # join the path parts back together
289 new_file = os.sep.join(new_path_parts)
290 if os.path.isfile(new_file):
291 files.append(new_file)
292 else:
293 if args.print_missing_translations:
294 logging.info(os.path.relpath(os.path.abspath(file), linux_path))
295 logging.info("No translation in the locale of %s\n", args.locale)
296
297 files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
298
299 # cd to linux root directory
300 os.chdir(linux_path)
301
302 for file in files:
303 check_per_file(file)
304
305
306if __name__ == "__main__":
307 main()