this repo has no description
at trunk 468 lines 16 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3""" 4Generate a report of C-API completeness for Pyro. 5""" 6 7import argparse 8import functools 9import itertools 10import os 11import pickle 12import re 13import subprocess 14import sys 15from pathlib import Path 16 17from capi_util import cmd_output 18 19 20CAPI_BLACKLIST = { 21 "PyStructSequence_InitType2", # Non-opaque PyTypeObject is unsupported 22 "_PyLong_One", # a const 23 "_PyLong_Zero", # a const 24 "_Py_FalseStruct", # expansion of Py_False 25 "_Py_NoneStruct", # expansion of Py_None 26 "_Py_NotImplementedStruct", # expansion of Py_NotImplemented 27 "_Py_TrueStruct", # expansion of Py_True 28} 29 30FBCODE_PLATFORM = "platform009" 31 32 33# Yield all .o files in the given cpython directory that look like they're from 34# a Module. 35def module_object_files(dirname): 36 for path, _dirs, filenames in os.walk(dirname): 37 for filename in filenames: 38 if filename.endswith(".o") and "Modules" in path: 39 yield os.path.join(path, filename) 40 41 42CAPI_SYMBOL_RE = re.compile(r"\w+\sT (_?Py.+)$") 43 44 45# Yield all externally-visible symbols beginning with Py or _Py that are 46# defined in the text section of the given object files. 47def find_defined(obj_files): 48 output = cmd_output("nm", "--defined-only", *obj_files) 49 for line in output.split("\n"): 50 match = CAPI_SYMBOL_RE.match(line) 51 if not match: 52 continue 53 yield match[1] 54 55 56# Return true if and only if the given function is exported by cpython and not 57# defined by a C module. 58def is_capi(name): 59 return name in CAPI_DEFS and name not in CAPI_BLACKLIST 60 61 62# One use of a C-API function, along with the filename it's used by (usually a 63# .o or .so file). 64class FunctionUse: 65 def __init__(self, filename, name, raw_filename=False): 66 self.filename = filename if raw_filename else self._process_filename(filename) 67 self.name = name 68 69 MODULES_RE = re.compile(r"Modules/(.+\.o)$") 70 TP2_RE = re.compile(fr"/([^/]+)/(?:[^/]+)/{FBCODE_PLATFORM}/.+/([^/]+\.so)$") 71 INSTAGRAM_RE = re.compile(r"/((?:distillery|site-packages|lib-dynload)/.+\.so)$") 72 UWSGI_RE = re.compile(r"uWSGI/master/src/build-.+/([^/]+\.o)") 73 74 @classmethod 75 def _process_filename(cls, filename): 76 match = cls.MODULES_RE.search(filename) 77 if match: 78 return f"cpython/{match[1]}" 79 match = cls.TP2_RE.search(filename) 80 if match: 81 return f"{match[1]}/{match[2]}" 82 match = cls.INSTAGRAM_RE.search(filename) 83 if match: 84 return match[1] 85 match = cls.UWSGI_RE.search(filename) 86 if match: 87 return f"uwsgi/{match[1]}" 88 raise ValueError(f"Unknown path format: '{filename}'") 89 90 def __repr__(self): 91 return f"{self.filename}: {self.name} {self.status}" 92 93 def __hash__(self): 94 return hash((self.filename, self.name)) 95 96 def __eq__(self, other): 97 return self.filename == other.filename and self.name == other.name 98 99 100UNDEF_RE = re.compile(r"\s*U (_?Py.+)$") 101 102 103# Yield all C-API functions used by the given object file. This includes 104# symbols that meet all of the following: 105# - Marked as undefined by nm 106# - Starts with _Py or Py 107# - In CAPI_DEFS 108# - Not in CAPI_BLACKLIST 109def find_used_capi(obj_file): 110 output = cmd_output("nm", "--undefined-only", obj_file) 111 for line in output.split("\n"): 112 match = UNDEF_RE.match(line) 113 if not match: 114 continue 115 function = match[1] 116 if not is_capi(function): 117 continue 118 yield FunctionUse(obj_file, function) 119 120 121# Read a precomputed list of C-API functions used by Instagram from the given 122# fbsource checkout. 123def read_insta_used_capi(fbsource_path): 124 with open( 125 os.path.join( 126 fbsource_path, 127 "fbcode/experimental/pyro/cpython/tools/instagram_cpython_symbols.pkl", 128 ), 129 "rb", 130 ) as pkl_file: 131 for name in filter(is_capi, pickle.load(pkl_file)): 132 yield FunctionUse("instagram.so", name) 133 134 135# Yield C-API functions used by any .so or .o file recursively found in 136# root. If platform is given, restrict the search to object files that look 137# like they're from the given fbcode platform. 138def find_obj_used_capi(root, platform=None): 139 if not Path(root).exists(): 140 raise RuntimeError(f"Path '{root}' does not exist") 141 for path, _dirs, files in os.walk(root): 142 for file in files: 143 if (file.endswith(".so") or file.endswith(".o")) and ( 144 platform is None or f"/{FBCODE_PLATFORM}/" in path 145 ): 146 yield from find_used_capi(os.path.join(path, file)) 147 148 149def read_raw_csv(filename): 150 result = [] 151 with open(filename, "r") as file: 152 lines = iter(file) 153 next(lines) 154 for line in lines: 155 parts = line.strip().split(",") 156 if len(parts) != 5: 157 sys.exit("Malformed raw.csv.") 158 result.append(FunctionUse(parts[0], parts[3], raw_filename=True)) 159 return result 160 161 162def read_func_list(filename): 163 return [ 164 FunctionUse("ext.so", line.strip(), raw_filename=True) 165 for line in open(filename, "r") 166 ] 167 168 169def process_modules(args): 170 global CAPI_BLACKLIST, CAPI_DEFS 171 cpython_path = args.cpython_path 172 if not cpython_path or not Path(cpython_path).exists(): 173 sys.exit("Please provide a valid cpython path.") 174 175 # Remove functions defined in the Modules directory from the search 176 # space. This includes things like hash table implementations, etc. 177 module_objs = list(module_object_files(cpython_path)) 178 CAPI_BLACKLIST |= set(find_defined(module_objs)) 179 if "_Py_hashtable_clear" not in CAPI_BLACKLIST: 180 raise RuntimeError("CAPI_BLACKLIST failed sanity check and is probably broken.") 181 182 CAPI_DEFS = set(find_defined([os.path.join(cpython_path, "python")])) 183 if not CAPI_DEFS: 184 raise RuntimeError("Found no defined symbols in python binary.") 185 186 used_capi = set() 187 if args.scan_modules: 188 used_capi |= set(itertools.chain(*[find_used_capi(obj) for obj in module_objs])) 189 if args.tp2: 190 used_capi |= set(find_obj_used_capi(args.tp2, platform=FBCODE_PLATFORM)) 191 for so_dir in args.objs: 192 used_capi |= set(find_obj_used_capi(so_dir)) 193 if not used_capi: 194 sys.exit("No C-API functions found.") 195 return list(used_capi) 196 197 198GROUP_RE = re.compile(r"_?Py([A-Za-z]+)(_|$)") 199 200 201# Return the group of the given function, which we've defined as the characters 202# between 'Py' and '_' in PyFoo_Mumble. 203def group_of(function): 204 found = GROUP_RE.match(function.name) 205 return found.group(1) if found else "<nogroup>" 206 207 208# Return true if a function needs to be implemented in Pyro. 209# 210# If PYRO_IMPLEMENTED is non-empty, use it as the authoritative source of 211# implemented functions. Otherwise, check that at least one of the following is 212# true: 213# - An UNIMPLEMENTED line with the given name was found in the source. 214# - The funtion's name does not appear in the source. 215@functools.lru_cache(maxsize=None, typed=False) 216def is_todo(name): 217 if PYRO_IMPLEMENTED: 218 return name not in PYRO_IMPLEMENTED 219 220 has_unimplemented = ( 221 subprocess.run( 222 ["grep", "-rqF", f'UNIMPLEMENTED("{name}")', *PYRO_CPP_FILES], 223 stdout=subprocess.DEVNULL, 224 ).returncode 225 == 0 226 ) # 0 means it has been found 227 # TODO: Use a better regular expression to actually check for signature 228 definition_found = ( 229 subprocess.run( 230 ["grep", "-qF", f"{name}(", *PYRO_CPP_FILES], stdout=subprocess.DEVNULL 231 ).returncode 232 == 0 233 ) # 0 means it has been found 234 return has_unimplemented or not definition_found 235 236 237def write_grouped_csv(filename, funcs, group_key, summary=False): 238 grouped = itertools.groupby(sorted(funcs, key=group_key), key=group_key) 239 grouped_dict = {k: sorted(list(g), key=lambda x: x.name) for k, g in grouped} 240 241 total_completed = 0 242 total_functions = 0 243 244 # group_name -> (num_completed, num_functions, percent_complete) 245 group_completion = {} 246 247 for group_name, functions in grouped_dict.items(): 248 functions = {f.name for f in functions} 249 completed_functions = set(filter(lambda f: not is_todo(f), functions)) 250 251 num_completed = len(completed_functions) 252 num_functions = len(functions) 253 percent_complete = 100 * num_completed // num_functions 254 group_completion[group_name] = (num_completed, num_functions, percent_complete) 255 256 total_completed += num_completed 257 total_functions += num_functions 258 259 total_percent_complete = 100 * total_completed // total_functions 260 261 with open(filename, "w") as out_file: 262 print("name,num_completed,total_functions,percent_complete", file=out_file) 263 if summary: 264 print( 265 f"SUMMARY,{total_completed},{total_functions},{total_percent_complete}", 266 file=out_file, 267 ) 268 269 for group_name, stats in group_completion.items(): 270 print(f"{group_name}," + ",".join(map(str, stats)), file=out_file) 271 272 273def write_raw_csv(filename, funcs): 274 funcs.sort(key=lambda f: f.name) 275 funcs.sort(key=lambda f: f.filename) 276 with open(filename, "w") as out_file: 277 print("module,module_dir,group,function,todo", file=out_file) 278 for func in funcs: 279 todo = "1" if is_todo(func.name) else "0" 280 module_dir = "/".join(func.filename.split("/")[0:-1]) 281 print( 282 f"{func.filename},{module_dir},{group_of(func)},{func.name},{todo}", 283 file=out_file, 284 ) 285 286 287def parse_args(): 288 parser = argparse.ArgumentParser( 289 description=""" 290Analyze the completeness of Pyro's C-API implementation. By default, print a 291summary line showing how many C-API functions are implemented out of the total 292needed. 293""" 294 ) 295 296 parser.add_argument("--show-args", action="store_true", help=argparse.SUPPRESS) 297 298 libs = parser.add_argument_group("Source/library arguments") 299 libs.add_argument( 300 "cpython_path", 301 nargs="?", 302 help="Root of a built cpython tree. Must be provided unless " 303 "--update-csv is also given.", 304 ) 305 libs.add_argument( 306 "--pyro", 307 help="Pyro source tree. Defaults to the checkout containing this script.", 308 ) 309 libs.add_argument( 310 "--pyro-build", 311 help="Pyro build tree. If given, use the python binary in PYRO_BUILD " 312 "to determine which functions are implemented by Pyro.", 313 ) 314 libs.add_argument( 315 "--use-pyro-binary", 316 action="store_true", 317 help="Use the final python binary from PYRO_BUILD rather than the" 318 " intermediate object files.", 319 ) 320 libs.add_argument( 321 "--no-modules", 322 action="store_false", 323 dest="scan_modules", 324 help="Don't look for C-API uses in cpython/Modules.", 325 ) 326 libs.add_argument( 327 "--tp2", 328 help=f"Root of a tp2 checkout to scan for .so files. Only versions " 329 "from platform {FBCODE_PLATFORM} will be considered.", 330 ) 331 libs.add_argument( 332 "--objs", 333 action="append", 334 default=[], 335 help="Root of any directory to scan for .so or .o files. May be given " 336 "multiple times.", 337 ) 338 libs.add_argument( 339 "--read-csv", 340 help="Read function uses from raw.csv from a previous run, rather " 341 "than inspecting object files. Proceed as usual after, including " 342 "writing out new .csv files if -csv is given.", 343 ) 344 libs.add_argument( 345 "--read-func-list", 346 help="Like --read-csv, but read a newline-separate list of functions," 347 " rather than a full raw csv file. All functions will appear to be " 348 "used by the same module.", 349 ) 350 351 actions = parser.add_argument_group("Actions") 352 actions.add_argument( 353 "--reports", 354 help=""" 355In addition to printing the summary line, write out a number of files: 1) 356groups.csv: One line per function group, with completion stats. 2) modules.csv: 357Like groups.csv, but grouped by module. 3) raw.csv: Raw data, one line per 358function use. 4) used_funcs.txt: Names of all required C-API functions. 5) 359implemented_funcs.txt: Names of C-API functions implemented by Pyro. 6) 360todo_funcs.txt: Names of C-API functions not implemented by Pyro. 7) 361summary.txt: The same summary line printed to stdout. 362""", 363 action="store_true", 364 ) 365 actions.add_argument( 366 "--output", 367 "-o", 368 help="Set the output directory for the --csv option. Defaults to cwd.", 369 default=".", 370 ) 371 return parser.parse_args() 372 373 374def get_pyro_root(args): 375 if args.pyro: 376 pyro_root = args.pyro 377 else: 378 pyro_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") 379 pyro_root = os.path.realpath(pyro_root) 380 if not Path(os.path.join(pyro_root, "ext", "Objects", "longobject.cpp")).exists(): 381 raise RuntimeError(f"{pyro_root} doesn't look like the root of a Pyro tree") 382 return pyro_root 383 384 385def find_pyro_impl_files(pyro_root): 386 return cmd_output("find", os.path.join(pyro_root, "ext"), "-name", "*.cpp").split() 387 388 389def find_pyro_implemented(args): 390 build_root = args.pyro_build 391 392 if args.use_pyro_binary: 393 pyro_exports = set(find_defined([os.path.join(build_root, "python")])) 394 else: 395 pyro_exports = set( 396 find_defined(cmd_output("find", build_root, "-name", "*.o").split()) 397 ) 398 399 pyro_unimplemented = find_pyro_unimplemented(PYRO_CPP_FILES) 400 implemented = pyro_exports - pyro_unimplemented 401 return implemented 402 403 404def find_pyro_unimplemented(pyro_files): 405 return set( 406 cmd_output( 407 "sed", "-ne", r's,^.*UNIMPLEMENTED("\(_\?Py\w\+\)").*$,\1,p', *pyro_files 408 ).split() 409 ) 410 411 412def main(): 413 args = parse_args() 414 if args.show_args: 415 sys.exit(args) 416 417 pyro_root = get_pyro_root(args) 418 global PYRO_CPP_FILES, PYRO_IMPLEMENTED 419 PYRO_CPP_FILES = find_pyro_impl_files(pyro_root) 420 if not PYRO_CPP_FILES: 421 # Tolerate running on revs with no cpp files in ext/ 422 PYRO_CPP_FILES = ["/dev/null"] 423 424 PYRO_IMPLEMENTED = find_pyro_implemented(args) if args.pyro_build else set() 425 426 if args.read_csv: 427 used_capi = read_raw_csv(args.read_csv) 428 elif args.read_func_list: 429 used_capi = read_func_list(args.read_func_list) 430 else: 431 used_capi = process_modules(args) 432 433 used_capi_names = sorted(set(map(lambda f: f.name, used_capi))) 434 unimplemented = list(filter(is_todo, used_capi_names)) 435 implemented = list(filter(lambda f: not is_todo(f), used_capi_names)) 436 437 percent_done = len(implemented) * 100 / len(used_capi_names) 438 summary = ( 439 f"{len(implemented)} / {len(used_capi_names)} complete ({percent_done:.1f}%)" 440 ) 441 442 if args.reports: 443 output_dir = Path(args.output) 444 output_dir.mkdir(parents=True, exist_ok=True) 445 446 def outfile(f): 447 path = os.path.join(output_dir, f) 448 print(f"Writing {path}", file=sys.stderr) 449 return path 450 451 write_grouped_csv(outfile("groups.csv"), used_capi, group_of) 452 write_grouped_csv(outfile("modules.csv"), used_capi, lambda f: f.filename) 453 write_raw_csv(outfile("raw.csv"), used_capi) 454 with open(outfile("used_funcs.txt"), "w") as file: 455 print("\n".join(used_capi_names), file=file) 456 with open(outfile("implemented_funcs.txt"), "w") as file: 457 print("\n".join(implemented), file=file) 458 with open(outfile("todo_funcs.txt"), "w") as file: 459 print("\n".join(unimplemented), file=file) 460 with open(outfile("summary.txt"), "w") as file: 461 print(summary, file=file) 462 463 print(summary) 464 return 0 465 466 467if __name__ == "__main__": 468 sys.exit(main())