this repo has no description
at trunk 472 lines 16 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3import _frozen_importlib 4import argparse 5import importlib 6import inspect 7import modulefinder 8import sys 9 10 11class FunctionEvent: 12 BUILTIN = "builtin/extension" 13 INTERPRETED = "interpreted" 14 15 def __init__(self, kind, function): 16 self.module_name = "[unknown module]" 17 self.function_name = "[unknown function]" 18 self.kind = kind 19 if function is not None: 20 if function.__module__ is not None and function.__module__ != "": 21 self.module_name = function.__module__ 22 if function.__qualname__ is not None: 23 self.function_name = function.__qualname__ 24 25 26class Call(FunctionEvent): 27 pass 28 29 30class Return(FunctionEvent): 31 pass 32 33 34def _has_code(func, code): 35 while func is not None: 36 func_code = getattr(func, "__code__", None) 37 if func_code is code: 38 return func 39 # Attempt to find the decorated function 40 func = getattr(func, "__wrapped__", None) 41 return None 42 43 44def get_func_in_mro(obj, code): 45 """Find a function with the supplied code object by looking in the mro of obj""" 46 # FunctionType is incompatible with Callable 47 # https://github.com/python/typeshed/issues/1378 48 val = getattr(obj, code.co_name, None) 49 if val is None: 50 return None 51 if isinstance(val, (classmethod, staticmethod)): 52 cand = val.__func__ 53 elif isinstance(val, property) and (val.fset is None) and (val.fdel is None): 54 cand = val.fget 55 else: 56 cand = val 57 return _has_code(cand, code) 58 59 60def get_func(frame): 61 """Return the function whose code object corresponds to the supplied stack frame.""" 62 code = frame.f_code 63 if code.co_name is None: 64 return None 65 # First, try to find the function in globals 66 cand = frame.f_globals.get(code.co_name, None) 67 func = _has_code(cand, code) 68 # If that failed, as will be the case with class and instance methods, try 69 # to look up the function from the first argument. In the case of class/instance 70 # methods, this should be the class (or an instance of the class) on which our 71 # method is defined. 72 if func is None and code.co_argcount >= 1: 73 first_arg = frame.f_locals.get(code.co_varnames[0]) 74 func = get_func_in_mro(first_arg, code) 75 # If we still can't find the function, as will be the case with static methods, 76 # try looking at classes in global scope. 77 if func is None: 78 for v in frame.f_globals.values(): 79 if not isinstance(v, type): 80 continue 81 func = get_func_in_mro(v, code) 82 if func is not None: 83 break 84 return func 85 86 87class CallRecorder: 88 C_EVENTS = {"c_call", "c_return"} 89 90 EVENT_CLASS = { 91 "c_call": Call, 92 "c_exception": Return, 93 "c_return": Return, 94 "call": Call, 95 "return": Return, 96 } 97 98 EVENT_TYP = { 99 "c_call": FunctionEvent.BUILTIN, 100 "c_exception": FunctionEvent.BUILTIN, 101 "c_return": FunctionEvent.BUILTIN, 102 "call": FunctionEvent.INTERPRETED, 103 "return": FunctionEvent.INTERPRETED, 104 } 105 106 def __init__(self, event_log): 107 self.event_log = event_log 108 109 def get_func(self, frame, event, arg): 110 if event in self.C_EVENTS: 111 return arg 112 try: 113 func = get_func(frame) 114 except Exception as exc: 115 print(f"ERROR: Exception while looking up function: {exc}") 116 func = None 117 return func 118 119 def __call__(self, frame, event, arg): 120 func = self.get_func(frame, event, arg) 121 self.event_log.append(self.EVENT_CLASS[event](self.EVENT_TYP[event], func)) 122 123 124class Node: 125 def __init__(self, name, attrs=None, children=None): 126 self.name = name 127 self.attrs = attrs or {} 128 self.children = children or [] 129 self.opening_tag = f"<{self.name}>" 130 if attrs: 131 attrstr = " ".join(f'{name}="{val}"' for name, val in self.attrs.items()) 132 self.opening_tag = f"<{self.name} {attrstr}>" 133 self.closing_tag = f"</{self.name}>" 134 135 def append(self, node): 136 self.children.append(node) 137 138 def pop(self): 139 return self.children.pop() 140 141 def __repr__(self): 142 return f'Node("{self.name}", {repr(self.attrs)}, {repr(self.children)})' 143 144 145class Text: 146 def __init__(self, content): 147 self.content = content 148 149 def __repr__(self): 150 return f'Text("{self.content}")' 151 152 153class Document: 154 HEADER = """ 155<html> 156 <head> 157 <link href="https://thomasf.github.io/solarized-css/solarized-light.min.css" rel="stylesheet"></link> 158 <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script> 159 <style> 160 .fn { 161 font-weight: bolder; 162 } 163 .mn { 164 font-weight: bolder; 165 } 166 .fk { 167 font-weight: bolder; 168 } 169 .hidden { 170 display: none; 171 } 172 .indent { 173 padding-left: 16px; 174 } 175 </style> 176 </head> 177""" 178 179 def __init__(self): 180 self.level = 0 181 self.lines = [] 182 183 def append(self, s): 184 self.lines.append(" " * self.level + s) 185 186 def process(self, node): 187 if isinstance(node, Node): 188 if len(node.children) == 1 and isinstance(node.children[0], Text): 189 self.append( 190 node.opening_tag + node.children[0].content + node.closing_tag 191 ) 192 else: 193 self.append(node.opening_tag) 194 self.level += 1 195 for child in node.children: 196 self.process(child) 197 self.level -= 1 198 self.append(node.closing_tag) 199 elif isinstance(node, Text): 200 self.append(node.content) 201 else: 202 raise TypeError(f"Cannot process node: {node}") 203 return self 204 205 def render(self): 206 return self.HEADER + "\n".join(self.lines) + "</html>" 207 208 209def toggle(node): 210 node_id = node.attrs["id"] 211 return Node( 212 "a", 213 attrs={"href": "#", "onClick": f"$('#{node_id}').toggle(); return false;"}, 214 children=[Text("+/-")], 215 ) 216 217 218def toggle_bar(node, title): 219 return Node("div", children=[toggle(node), title]) 220 221 222CONTAINER_ID_CTR = 0 223 224 225def toggle_bar_and_container(title): 226 global CONTAINER_ID_CTR 227 node_id = f"c{CONTAINER_ID_CTR}" 228 CONTAINER_ID_CTR += 1 229 container = Node("div", attrs={"id": node_id, "class": "hidden indent"}) 230 return toggle_bar(container, title), container 231 232 233def compute_call_map(event_log, kind): 234 funcs = {} 235 for event in event_log: 236 if isinstance(event, Call) and event.kind == kind: 237 if event.module_name not in funcs: 238 funcs[event.module_name] = set() 239 funcs[event.module_name].add(event.function_name) 240 return funcs 241 242 243def called_function_section(called_funcs, kind): 244 total_names = 0 245 for names in called_funcs.values(): 246 total_names += len(names) 247 all_bar, all_funcs = toggle_bar_and_container( 248 Text(f"Called {total_names} {kind} functions") 249 ) 250 for mod, funcs in sorted( 251 called_funcs.items(), key=lambda kv: len(kv[1]), reverse=True 252 ): 253 mod_bar, mod_funcs = toggle_bar_and_container(Text(f"{mod} ({len(funcs)})")) 254 for func in sorted(funcs): 255 mod_funcs.append(Node("div", children=[Text(func)])) 256 all_funcs.append(mod_bar) 257 all_funcs.append(mod_funcs) 258 return Node("div", children=[all_bar, all_funcs]) 259 260 261def call_node(event): 262 node = Node( 263 "span", 264 children=[ 265 Node("span", attrs={"class": "fn"}, children=[Text(event.function_name)]), 266 Text(" in "), 267 Node("span", attrs={"class": "mn"}, children=[Text(event.module_name)]), 268 ], 269 ) 270 if event.kind == FunctionEvent.BUILTIN: 271 node.append(Node("span", attrs={"class": "fk"}, children=[Text(event.kind)])) 272 return node 273 274 275def execution_trace_section(event_log): 276 section = Node("div") 277 stack = [section] 278 for event in event_log: 279 if isinstance(event, Call): 280 bar, callees = toggle_bar_and_container(call_node(event)) 281 stack[-1].append(bar) 282 stack[-1].append(callees) 283 stack.append(callees) 284 elif isinstance(event, Return): 285 callees = stack.pop() 286 if not callees.children: 287 # If there are no callees, remove the callee container div and 288 # toggle bar. Replace it with a simple text node. 289 stack[-1].children.pop() 290 bar = stack[-1].children.pop() 291 stack[-1].append(Node("div", children=[bar.children[-1]])) 292 return section 293 294 295class ImportTrace: 296 def __init__(self, module): 297 self.module = module 298 self.imports = [] 299 300 301class ImportTracer(modulefinder.ModuleFinder): 302 def __init__(self, *args, **kwargs): 303 self.seen = set() 304 self.traces = [] 305 self.stack = [] 306 super().__init__(*args, **kwargs) 307 308 def import_module(self, partname, fqname, parent): 309 if fqname in self.modules: 310 return self.modules[fqname] 311 trace = ImportTrace(fqname) 312 if self.stack: 313 self.stack[-1].imports.append(trace) 314 self.stack.append(trace) 315 result = super().import_module(partname, fqname, parent) 316 self.stack.pop() 317 if fqname not in self.modules and self.stack: 318 self.stack[-1].imports.pop() 319 if not self.stack: 320 self.traces.append(trace) 321 return result 322 323 def compute_imports(self, path): 324 self.run_script(path) 325 return self.modules.keys(), self.traces 326 327 def scan_code(self, co, m): 328 code = co.co_code 329 scanner = self.scan_opcodes 330 for what, args in scanner(co): 331 if what == "store": 332 (name,) = args 333 m.globalnames[name] = 1 334 elif what == "absolute_import": 335 fromlist, name = args 336 have_star = 0 337 if fromlist is not None: 338 if "*" in fromlist: 339 have_star = 1 340 fromlist = [f for f in fromlist if f != "*"] 341 self._safe_import_hook(name, m, fromlist, level=0) 342 if have_star: 343 # We've encountered an "import *". If it is a Python module, 344 # the code has already been parsed and we can suck out the 345 # global names. 346 mm = None 347 if m.__path__: 348 # At this point we don't know whether 'name' is a 349 # submodule of 'm' or a global module. Let's just try 350 # the full name first. 351 mm = self.modules.get(m.__name__ + "." + name) 352 if mm is None: 353 mm = self.modules.get(name) 354 if mm is not None: 355 m.globalnames.update(mm.globalnames) 356 m.starimports.update(mm.starimports) 357 if mm.__code__ is None: 358 m.starimports[name] = 1 359 else: 360 m.starimports[name] = 1 361 elif what == "relative_import": 362 level, fromlist, name = args 363 if name: 364 self._safe_import_hook(name, m, fromlist, level=level) 365 else: 366 parent = self.determine_parent(m, level=level) 367 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 368 else: 369 # We don't expect anything else from the generator. 370 raise RuntimeError(what) 371 372 373def imported_modules_section(imports): 374 bar, mod_list = toggle_bar_and_container(Text(f"Imported {len(imports)} modules")) 375 for name in sorted(imports): 376 mod_list.append(Node("div", children=[Text(name)])) 377 return Node("div", children=[bar, mod_list]) 378 379 380def build_import_trace(trace, called_builtins, indent=False): 381 attrs = {} 382 if indent: 383 attrs = {"class": "indent"} 384 msg = trace.module 385 try: 386 module = importlib.import_module(trace.module) 387 if module.__loader__ is _frozen_importlib.BuiltinImporter: 388 msg += " (builtin)" 389 except: 390 msg += f" (failed importing module)" 391 if trace.module in called_builtins: 392 msg += f" ({len(called_builtins[trace.module])} builtin/extension functions called)" 393 container = Node("div", children=[Text(msg)], attrs=attrs) 394 if trace.imports: 395 for child in trace.imports: 396 container.append(build_import_trace(child, called_builtins, True)) 397 return container 398 399 400def import_trace_section(traces, called_builtins): 401 bar, container = toggle_bar_and_container(Text("Import Trace:")) 402 for trace in traces: 403 container.append(build_import_trace(trace, called_builtins)) 404 return Node("div", children=[bar, container]) 405 406 407def render_summary(path, source, event_log): 408 imported_modules, traces = ImportTracer().compute_imports(path) 409 called_interpreted = compute_call_map(event_log, FunctionEvent.INTERPRETED) 410 called_builtins = compute_call_map(event_log, FunctionEvent.BUILTIN) 411 body = Node( 412 "body", 413 children=[ 414 Node("h1", children=[Text(f"Tracing summary for {path}")]), 415 Node("h2", children=[Text("Source:")]), 416 Node("pre", children=[Text(source)]), 417 Node("h2", children=[Text("Summary stats:")]), 418 imported_modules_section(imported_modules), 419 called_function_section(called_builtins, FunctionEvent.BUILTIN), 420 called_function_section(called_interpreted, FunctionEvent.INTERPRETED), 421 Node("h2", children=[Text("Import Trace:")]), 422 import_trace_section(traces, called_builtins), 423 Node("h2", children=[Text("Call Trace:")]), 424 execution_trace_section(event_log), 425 ], 426 ) 427 doc = Document() 428 doc.process(body) 429 return doc.render() 430 431 432if __name__ == "__main__": 433 parser = argparse.ArgumentParser( 434 formatter_class=argparse.RawTextHelpFormatter, 435 description=""" 436Determine what modules are imported and what functions are called by a script. 437 438This script does the following: 439 440 1. Executes the script at [script_path] and records what functions are called. 441 2. Statically analyzes the script to determine what modules are imported. Note 442 that this analysis does not consider inline imports and therefore may underestimate 443 the set of modules that are imported during execution. 444 445It then writes a report to [output_path] with the following information: 446 447 - A list of the modules that are imported unconditionally. 448 - A list of all the functions that were called, grouped by whether or not they 449 are interpreted or native functions. 450 - A call trace. 451 - An import trace. Each imported module will appear exactly once in the trace, 452 the first time it is executed. 453""", 454 ) 455 parser.add_argument("script_path", help="The path to the script to execute") 456 parser.add_argument("output_path", help="Where to write the report") 457 args = parser.parse_args() 458 459 # Compile the script 460 with open(args.script_path, "r") as f: 461 source = f.read() 462 code = compile(source, args.script_path, "exec") 463 464 event_log = [] 465 sys.setprofile(CallRecorder(event_log)) 466 try: 467 exec(code) 468 finally: 469 sys.setprofile(None) 470 471 with open(args.output_path, "w+") as f: 472 f.write(render_summary(args.script_path, source, event_log))