this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3"""
4Generate a report of C-API completeness for Pyro.
5"""
6
7import argparse
8import functools
9import itertools
10import os
11import pickle
12import re
13import subprocess
14import sys
15from pathlib import Path
16
17from capi_util import cmd_output
18
19
20CAPI_BLACKLIST = {
21 "PyStructSequence_InitType2", # Non-opaque PyTypeObject is unsupported
22 "_PyLong_One", # a const
23 "_PyLong_Zero", # a const
24 "_Py_FalseStruct", # expansion of Py_False
25 "_Py_NoneStruct", # expansion of Py_None
26 "_Py_NotImplementedStruct", # expansion of Py_NotImplemented
27 "_Py_TrueStruct", # expansion of Py_True
28}
29
30FBCODE_PLATFORM = "platform009"
31
32
33# Yield all .o files in the given cpython directory that look like they're from
34# a Module.
35def module_object_files(dirname):
36 for path, _dirs, filenames in os.walk(dirname):
37 for filename in filenames:
38 if filename.endswith(".o") and "Modules" in path:
39 yield os.path.join(path, filename)
40
41
42CAPI_SYMBOL_RE = re.compile(r"\w+\sT (_?Py.+)$")
43
44
45# Yield all externally-visible symbols beginning with Py or _Py that are
46# defined in the text section of the given object files.
47def find_defined(obj_files):
48 output = cmd_output("nm", "--defined-only", *obj_files)
49 for line in output.split("\n"):
50 match = CAPI_SYMBOL_RE.match(line)
51 if not match:
52 continue
53 yield match[1]
54
55
56# Return true if and only if the given function is exported by cpython and not
57# defined by a C module.
58def is_capi(name):
59 return name in CAPI_DEFS and name not in CAPI_BLACKLIST
60
61
62# One use of a C-API function, along with the filename it's used by (usually a
63# .o or .so file).
64class FunctionUse:
65 def __init__(self, filename, name, raw_filename=False):
66 self.filename = filename if raw_filename else self._process_filename(filename)
67 self.name = name
68
69 MODULES_RE = re.compile(r"Modules/(.+\.o)$")
70 TP2_RE = re.compile(fr"/([^/]+)/(?:[^/]+)/{FBCODE_PLATFORM}/.+/([^/]+\.so)$")
71 INSTAGRAM_RE = re.compile(r"/((?:distillery|site-packages|lib-dynload)/.+\.so)$")
72 UWSGI_RE = re.compile(r"uWSGI/master/src/build-.+/([^/]+\.o)")
73
74 @classmethod
75 def _process_filename(cls, filename):
76 match = cls.MODULES_RE.search(filename)
77 if match:
78 return f"cpython/{match[1]}"
79 match = cls.TP2_RE.search(filename)
80 if match:
81 return f"{match[1]}/{match[2]}"
82 match = cls.INSTAGRAM_RE.search(filename)
83 if match:
84 return match[1]
85 match = cls.UWSGI_RE.search(filename)
86 if match:
87 return f"uwsgi/{match[1]}"
88 raise ValueError(f"Unknown path format: '{filename}'")
89
90 def __repr__(self):
91 return f"{self.filename}: {self.name} {self.status}"
92
93 def __hash__(self):
94 return hash((self.filename, self.name))
95
96 def __eq__(self, other):
97 return self.filename == other.filename and self.name == other.name
98
99
100UNDEF_RE = re.compile(r"\s*U (_?Py.+)$")
101
102
103# Yield all C-API functions used by the given object file. This includes
104# symbols that meet all of the following:
105# - Marked as undefined by nm
106# - Starts with _Py or Py
107# - In CAPI_DEFS
108# - Not in CAPI_BLACKLIST
109def find_used_capi(obj_file):
110 output = cmd_output("nm", "--undefined-only", obj_file)
111 for line in output.split("\n"):
112 match = UNDEF_RE.match(line)
113 if not match:
114 continue
115 function = match[1]
116 if not is_capi(function):
117 continue
118 yield FunctionUse(obj_file, function)
119
120
121# Read a precomputed list of C-API functions used by Instagram from the given
122# fbsource checkout.
123def read_insta_used_capi(fbsource_path):
124 with open(
125 os.path.join(
126 fbsource_path,
127 "fbcode/experimental/pyro/cpython/tools/instagram_cpython_symbols.pkl",
128 ),
129 "rb",
130 ) as pkl_file:
131 for name in filter(is_capi, pickle.load(pkl_file)):
132 yield FunctionUse("instagram.so", name)
133
134
135# Yield C-API functions used by any .so or .o file recursively found in
136# root. If platform is given, restrict the search to object files that look
137# like they're from the given fbcode platform.
138def find_obj_used_capi(root, platform=None):
139 if not Path(root).exists():
140 raise RuntimeError(f"Path '{root}' does not exist")
141 for path, _dirs, files in os.walk(root):
142 for file in files:
143 if (file.endswith(".so") or file.endswith(".o")) and (
144 platform is None or f"/{FBCODE_PLATFORM}/" in path
145 ):
146 yield from find_used_capi(os.path.join(path, file))
147
148
149def read_raw_csv(filename):
150 result = []
151 with open(filename, "r") as file:
152 lines = iter(file)
153 next(lines)
154 for line in lines:
155 parts = line.strip().split(",")
156 if len(parts) != 5:
157 sys.exit("Malformed raw.csv.")
158 result.append(FunctionUse(parts[0], parts[3], raw_filename=True))
159 return result
160
161
162def read_func_list(filename):
163 return [
164 FunctionUse("ext.so", line.strip(), raw_filename=True)
165 for line in open(filename, "r")
166 ]
167
168
169def process_modules(args):
170 global CAPI_BLACKLIST, CAPI_DEFS
171 cpython_path = args.cpython_path
172 if not cpython_path or not Path(cpython_path).exists():
173 sys.exit("Please provide a valid cpython path.")
174
175 # Remove functions defined in the Modules directory from the search
176 # space. This includes things like hash table implementations, etc.
177 module_objs = list(module_object_files(cpython_path))
178 CAPI_BLACKLIST |= set(find_defined(module_objs))
179 if "_Py_hashtable_clear" not in CAPI_BLACKLIST:
180 raise RuntimeError("CAPI_BLACKLIST failed sanity check and is probably broken.")
181
182 CAPI_DEFS = set(find_defined([os.path.join(cpython_path, "python")]))
183 if not CAPI_DEFS:
184 raise RuntimeError("Found no defined symbols in python binary.")
185
186 used_capi = set()
187 if args.scan_modules:
188 used_capi |= set(itertools.chain(*[find_used_capi(obj) for obj in module_objs]))
189 if args.tp2:
190 used_capi |= set(find_obj_used_capi(args.tp2, platform=FBCODE_PLATFORM))
191 for so_dir in args.objs:
192 used_capi |= set(find_obj_used_capi(so_dir))
193 if not used_capi:
194 sys.exit("No C-API functions found.")
195 return list(used_capi)
196
197
198GROUP_RE = re.compile(r"_?Py([A-Za-z]+)(_|$)")
199
200
201# Return the group of the given function, which we've defined as the characters
202# between 'Py' and '_' in PyFoo_Mumble.
203def group_of(function):
204 found = GROUP_RE.match(function.name)
205 return found.group(1) if found else "<nogroup>"
206
207
208# Return true if a function needs to be implemented in Pyro.
209#
210# If PYRO_IMPLEMENTED is non-empty, use it as the authoritative source of
211# implemented functions. Otherwise, check that at least one of the following is
212# true:
213# - An UNIMPLEMENTED line with the given name was found in the source.
214# - The funtion's name does not appear in the source.
215@functools.lru_cache(maxsize=None, typed=False)
216def is_todo(name):
217 if PYRO_IMPLEMENTED:
218 return name not in PYRO_IMPLEMENTED
219
220 has_unimplemented = (
221 subprocess.run(
222 ["grep", "-rqF", f'UNIMPLEMENTED("{name}")', *PYRO_CPP_FILES],
223 stdout=subprocess.DEVNULL,
224 ).returncode
225 == 0
226 ) # 0 means it has been found
227 # TODO: Use a better regular expression to actually check for signature
228 definition_found = (
229 subprocess.run(
230 ["grep", "-qF", f"{name}(", *PYRO_CPP_FILES], stdout=subprocess.DEVNULL
231 ).returncode
232 == 0
233 ) # 0 means it has been found
234 return has_unimplemented or not definition_found
235
236
237def write_grouped_csv(filename, funcs, group_key, summary=False):
238 grouped = itertools.groupby(sorted(funcs, key=group_key), key=group_key)
239 grouped_dict = {k: sorted(list(g), key=lambda x: x.name) for k, g in grouped}
240
241 total_completed = 0
242 total_functions = 0
243
244 # group_name -> (num_completed, num_functions, percent_complete)
245 group_completion = {}
246
247 for group_name, functions in grouped_dict.items():
248 functions = {f.name for f in functions}
249 completed_functions = set(filter(lambda f: not is_todo(f), functions))
250
251 num_completed = len(completed_functions)
252 num_functions = len(functions)
253 percent_complete = 100 * num_completed // num_functions
254 group_completion[group_name] = (num_completed, num_functions, percent_complete)
255
256 total_completed += num_completed
257 total_functions += num_functions
258
259 total_percent_complete = 100 * total_completed // total_functions
260
261 with open(filename, "w") as out_file:
262 print("name,num_completed,total_functions,percent_complete", file=out_file)
263 if summary:
264 print(
265 f"SUMMARY,{total_completed},{total_functions},{total_percent_complete}",
266 file=out_file,
267 )
268
269 for group_name, stats in group_completion.items():
270 print(f"{group_name}," + ",".join(map(str, stats)), file=out_file)
271
272
273def write_raw_csv(filename, funcs):
274 funcs.sort(key=lambda f: f.name)
275 funcs.sort(key=lambda f: f.filename)
276 with open(filename, "w") as out_file:
277 print("module,module_dir,group,function,todo", file=out_file)
278 for func in funcs:
279 todo = "1" if is_todo(func.name) else "0"
280 module_dir = "/".join(func.filename.split("/")[0:-1])
281 print(
282 f"{func.filename},{module_dir},{group_of(func)},{func.name},{todo}",
283 file=out_file,
284 )
285
286
287def parse_args():
288 parser = argparse.ArgumentParser(
289 description="""
290Analyze the completeness of Pyro's C-API implementation. By default, print a
291summary line showing how many C-API functions are implemented out of the total
292needed.
293"""
294 )
295
296 parser.add_argument("--show-args", action="store_true", help=argparse.SUPPRESS)
297
298 libs = parser.add_argument_group("Source/library arguments")
299 libs.add_argument(
300 "cpython_path",
301 nargs="?",
302 help="Root of a built cpython tree. Must be provided unless "
303 "--update-csv is also given.",
304 )
305 libs.add_argument(
306 "--pyro",
307 help="Pyro source tree. Defaults to the checkout containing this script.",
308 )
309 libs.add_argument(
310 "--pyro-build",
311 help="Pyro build tree. If given, use the python binary in PYRO_BUILD "
312 "to determine which functions are implemented by Pyro.",
313 )
314 libs.add_argument(
315 "--use-pyro-binary",
316 action="store_true",
317 help="Use the final python binary from PYRO_BUILD rather than the"
318 " intermediate object files.",
319 )
320 libs.add_argument(
321 "--no-modules",
322 action="store_false",
323 dest="scan_modules",
324 help="Don't look for C-API uses in cpython/Modules.",
325 )
326 libs.add_argument(
327 "--tp2",
328 help=f"Root of a tp2 checkout to scan for .so files. Only versions "
329 "from platform {FBCODE_PLATFORM} will be considered.",
330 )
331 libs.add_argument(
332 "--objs",
333 action="append",
334 default=[],
335 help="Root of any directory to scan for .so or .o files. May be given "
336 "multiple times.",
337 )
338 libs.add_argument(
339 "--read-csv",
340 help="Read function uses from raw.csv from a previous run, rather "
341 "than inspecting object files. Proceed as usual after, including "
342 "writing out new .csv files if -csv is given.",
343 )
344 libs.add_argument(
345 "--read-func-list",
346 help="Like --read-csv, but read a newline-separate list of functions,"
347 " rather than a full raw csv file. All functions will appear to be "
348 "used by the same module.",
349 )
350
351 actions = parser.add_argument_group("Actions")
352 actions.add_argument(
353 "--reports",
354 help="""
355In addition to printing the summary line, write out a number of files: 1)
356groups.csv: One line per function group, with completion stats. 2) modules.csv:
357Like groups.csv, but grouped by module. 3) raw.csv: Raw data, one line per
358function use. 4) used_funcs.txt: Names of all required C-API functions. 5)
359implemented_funcs.txt: Names of C-API functions implemented by Pyro. 6)
360todo_funcs.txt: Names of C-API functions not implemented by Pyro. 7)
361summary.txt: The same summary line printed to stdout.
362""",
363 action="store_true",
364 )
365 actions.add_argument(
366 "--output",
367 "-o",
368 help="Set the output directory for the --csv option. Defaults to cwd.",
369 default=".",
370 )
371 return parser.parse_args()
372
373
374def get_pyro_root(args):
375 if args.pyro:
376 pyro_root = args.pyro
377 else:
378 pyro_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
379 pyro_root = os.path.realpath(pyro_root)
380 if not Path(os.path.join(pyro_root, "ext", "Objects", "longobject.cpp")).exists():
381 raise RuntimeError(f"{pyro_root} doesn't look like the root of a Pyro tree")
382 return pyro_root
383
384
385def find_pyro_impl_files(pyro_root):
386 return cmd_output("find", os.path.join(pyro_root, "ext"), "-name", "*.cpp").split()
387
388
389def find_pyro_implemented(args):
390 build_root = args.pyro_build
391
392 if args.use_pyro_binary:
393 pyro_exports = set(find_defined([os.path.join(build_root, "python")]))
394 else:
395 pyro_exports = set(
396 find_defined(cmd_output("find", build_root, "-name", "*.o").split())
397 )
398
399 pyro_unimplemented = find_pyro_unimplemented(PYRO_CPP_FILES)
400 implemented = pyro_exports - pyro_unimplemented
401 return implemented
402
403
404def find_pyro_unimplemented(pyro_files):
405 return set(
406 cmd_output(
407 "sed", "-ne", r's,^.*UNIMPLEMENTED("\(_\?Py\w\+\)").*$,\1,p', *pyro_files
408 ).split()
409 )
410
411
412def main():
413 args = parse_args()
414 if args.show_args:
415 sys.exit(args)
416
417 pyro_root = get_pyro_root(args)
418 global PYRO_CPP_FILES, PYRO_IMPLEMENTED
419 PYRO_CPP_FILES = find_pyro_impl_files(pyro_root)
420 if not PYRO_CPP_FILES:
421 # Tolerate running on revs with no cpp files in ext/
422 PYRO_CPP_FILES = ["/dev/null"]
423
424 PYRO_IMPLEMENTED = find_pyro_implemented(args) if args.pyro_build else set()
425
426 if args.read_csv:
427 used_capi = read_raw_csv(args.read_csv)
428 elif args.read_func_list:
429 used_capi = read_func_list(args.read_func_list)
430 else:
431 used_capi = process_modules(args)
432
433 used_capi_names = sorted(set(map(lambda f: f.name, used_capi)))
434 unimplemented = list(filter(is_todo, used_capi_names))
435 implemented = list(filter(lambda f: not is_todo(f), used_capi_names))
436
437 percent_done = len(implemented) * 100 / len(used_capi_names)
438 summary = (
439 f"{len(implemented)} / {len(used_capi_names)} complete ({percent_done:.1f}%)"
440 )
441
442 if args.reports:
443 output_dir = Path(args.output)
444 output_dir.mkdir(parents=True, exist_ok=True)
445
446 def outfile(f):
447 path = os.path.join(output_dir, f)
448 print(f"Writing {path}", file=sys.stderr)
449 return path
450
451 write_grouped_csv(outfile("groups.csv"), used_capi, group_of)
452 write_grouped_csv(outfile("modules.csv"), used_capi, lambda f: f.filename)
453 write_raw_csv(outfile("raw.csv"), used_capi)
454 with open(outfile("used_funcs.txt"), "w") as file:
455 print("\n".join(used_capi_names), file=file)
456 with open(outfile("implemented_funcs.txt"), "w") as file:
457 print("\n".join(implemented), file=file)
458 with open(outfile("todo_funcs.txt"), "w") as file:
459 print("\n".join(unimplemented), file=file)
460 with open(outfile("summary.txt"), "w") as file:
461 print(summary, file=file)
462
463 print(summary)
464 return 0
465
466
467if __name__ == "__main__":
468 sys.exit(main())