this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3import _frozen_importlib
4import argparse
5import importlib
6import inspect
7import modulefinder
8import sys
9
10
11class FunctionEvent:
12 BUILTIN = "builtin/extension"
13 INTERPRETED = "interpreted"
14
15 def __init__(self, kind, function):
16 self.module_name = "[unknown module]"
17 self.function_name = "[unknown function]"
18 self.kind = kind
19 if function is not None:
20 if function.__module__ is not None and function.__module__ != "":
21 self.module_name = function.__module__
22 if function.__qualname__ is not None:
23 self.function_name = function.__qualname__
24
25
26class Call(FunctionEvent):
27 pass
28
29
30class Return(FunctionEvent):
31 pass
32
33
34def _has_code(func, code):
35 while func is not None:
36 func_code = getattr(func, "__code__", None)
37 if func_code is code:
38 return func
39 # Attempt to find the decorated function
40 func = getattr(func, "__wrapped__", None)
41 return None
42
43
44def get_func_in_mro(obj, code):
45 """Find a function with the supplied code object by looking in the mro of obj"""
46 # FunctionType is incompatible with Callable
47 # https://github.com/python/typeshed/issues/1378
48 val = getattr(obj, code.co_name, None)
49 if val is None:
50 return None
51 if isinstance(val, (classmethod, staticmethod)):
52 cand = val.__func__
53 elif isinstance(val, property) and (val.fset is None) and (val.fdel is None):
54 cand = val.fget
55 else:
56 cand = val
57 return _has_code(cand, code)
58
59
60def get_func(frame):
61 """Return the function whose code object corresponds to the supplied stack frame."""
62 code = frame.f_code
63 if code.co_name is None:
64 return None
65 # First, try to find the function in globals
66 cand = frame.f_globals.get(code.co_name, None)
67 func = _has_code(cand, code)
68 # If that failed, as will be the case with class and instance methods, try
69 # to look up the function from the first argument. In the case of class/instance
70 # methods, this should be the class (or an instance of the class) on which our
71 # method is defined.
72 if func is None and code.co_argcount >= 1:
73 first_arg = frame.f_locals.get(code.co_varnames[0])
74 func = get_func_in_mro(first_arg, code)
75 # If we still can't find the function, as will be the case with static methods,
76 # try looking at classes in global scope.
77 if func is None:
78 for v in frame.f_globals.values():
79 if not isinstance(v, type):
80 continue
81 func = get_func_in_mro(v, code)
82 if func is not None:
83 break
84 return func
85
86
87class CallRecorder:
88 C_EVENTS = {"c_call", "c_return"}
89
90 EVENT_CLASS = {
91 "c_call": Call,
92 "c_exception": Return,
93 "c_return": Return,
94 "call": Call,
95 "return": Return,
96 }
97
98 EVENT_TYP = {
99 "c_call": FunctionEvent.BUILTIN,
100 "c_exception": FunctionEvent.BUILTIN,
101 "c_return": FunctionEvent.BUILTIN,
102 "call": FunctionEvent.INTERPRETED,
103 "return": FunctionEvent.INTERPRETED,
104 }
105
106 def __init__(self, event_log):
107 self.event_log = event_log
108
109 def get_func(self, frame, event, arg):
110 if event in self.C_EVENTS:
111 return arg
112 try:
113 func = get_func(frame)
114 except Exception as exc:
115 print(f"ERROR: Exception while looking up function: {exc}")
116 func = None
117 return func
118
119 def __call__(self, frame, event, arg):
120 func = self.get_func(frame, event, arg)
121 self.event_log.append(self.EVENT_CLASS[event](self.EVENT_TYP[event], func))
122
123
124class Node:
125 def __init__(self, name, attrs=None, children=None):
126 self.name = name
127 self.attrs = attrs or {}
128 self.children = children or []
129 self.opening_tag = f"<{self.name}>"
130 if attrs:
131 attrstr = " ".join(f'{name}="{val}"' for name, val in self.attrs.items())
132 self.opening_tag = f"<{self.name} {attrstr}>"
133 self.closing_tag = f"</{self.name}>"
134
135 def append(self, node):
136 self.children.append(node)
137
138 def pop(self):
139 return self.children.pop()
140
141 def __repr__(self):
142 return f'Node("{self.name}", {repr(self.attrs)}, {repr(self.children)})'
143
144
145class Text:
146 def __init__(self, content):
147 self.content = content
148
149 def __repr__(self):
150 return f'Text("{self.content}")'
151
152
153class Document:
154 HEADER = """
155<html>
156 <head>
157 <link href="https://thomasf.github.io/solarized-css/solarized-light.min.css" rel="stylesheet"></link>
158 <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
159 <style>
160 .fn {
161 font-weight: bolder;
162 }
163 .mn {
164 font-weight: bolder;
165 }
166 .fk {
167 font-weight: bolder;
168 }
169 .hidden {
170 display: none;
171 }
172 .indent {
173 padding-left: 16px;
174 }
175 </style>
176 </head>
177"""
178
179 def __init__(self):
180 self.level = 0
181 self.lines = []
182
183 def append(self, s):
184 self.lines.append(" " * self.level + s)
185
186 def process(self, node):
187 if isinstance(node, Node):
188 if len(node.children) == 1 and isinstance(node.children[0], Text):
189 self.append(
190 node.opening_tag + node.children[0].content + node.closing_tag
191 )
192 else:
193 self.append(node.opening_tag)
194 self.level += 1
195 for child in node.children:
196 self.process(child)
197 self.level -= 1
198 self.append(node.closing_tag)
199 elif isinstance(node, Text):
200 self.append(node.content)
201 else:
202 raise TypeError(f"Cannot process node: {node}")
203 return self
204
205 def render(self):
206 return self.HEADER + "\n".join(self.lines) + "</html>"
207
208
209def toggle(node):
210 node_id = node.attrs["id"]
211 return Node(
212 "a",
213 attrs={"href": "#", "onClick": f"$('#{node_id}').toggle(); return false;"},
214 children=[Text("+/-")],
215 )
216
217
218def toggle_bar(node, title):
219 return Node("div", children=[toggle(node), title])
220
221
222CONTAINER_ID_CTR = 0
223
224
225def toggle_bar_and_container(title):
226 global CONTAINER_ID_CTR
227 node_id = f"c{CONTAINER_ID_CTR}"
228 CONTAINER_ID_CTR += 1
229 container = Node("div", attrs={"id": node_id, "class": "hidden indent"})
230 return toggle_bar(container, title), container
231
232
233def compute_call_map(event_log, kind):
234 funcs = {}
235 for event in event_log:
236 if isinstance(event, Call) and event.kind == kind:
237 if event.module_name not in funcs:
238 funcs[event.module_name] = set()
239 funcs[event.module_name].add(event.function_name)
240 return funcs
241
242
243def called_function_section(called_funcs, kind):
244 total_names = 0
245 for names in called_funcs.values():
246 total_names += len(names)
247 all_bar, all_funcs = toggle_bar_and_container(
248 Text(f"Called {total_names} {kind} functions")
249 )
250 for mod, funcs in sorted(
251 called_funcs.items(), key=lambda kv: len(kv[1]), reverse=True
252 ):
253 mod_bar, mod_funcs = toggle_bar_and_container(Text(f"{mod} ({len(funcs)})"))
254 for func in sorted(funcs):
255 mod_funcs.append(Node("div", children=[Text(func)]))
256 all_funcs.append(mod_bar)
257 all_funcs.append(mod_funcs)
258 return Node("div", children=[all_bar, all_funcs])
259
260
261def call_node(event):
262 node = Node(
263 "span",
264 children=[
265 Node("span", attrs={"class": "fn"}, children=[Text(event.function_name)]),
266 Text(" in "),
267 Node("span", attrs={"class": "mn"}, children=[Text(event.module_name)]),
268 ],
269 )
270 if event.kind == FunctionEvent.BUILTIN:
271 node.append(Node("span", attrs={"class": "fk"}, children=[Text(event.kind)]))
272 return node
273
274
275def execution_trace_section(event_log):
276 section = Node("div")
277 stack = [section]
278 for event in event_log:
279 if isinstance(event, Call):
280 bar, callees = toggle_bar_and_container(call_node(event))
281 stack[-1].append(bar)
282 stack[-1].append(callees)
283 stack.append(callees)
284 elif isinstance(event, Return):
285 callees = stack.pop()
286 if not callees.children:
287 # If there are no callees, remove the callee container div and
288 # toggle bar. Replace it with a simple text node.
289 stack[-1].children.pop()
290 bar = stack[-1].children.pop()
291 stack[-1].append(Node("div", children=[bar.children[-1]]))
292 return section
293
294
295class ImportTrace:
296 def __init__(self, module):
297 self.module = module
298 self.imports = []
299
300
301class ImportTracer(modulefinder.ModuleFinder):
302 def __init__(self, *args, **kwargs):
303 self.seen = set()
304 self.traces = []
305 self.stack = []
306 super().__init__(*args, **kwargs)
307
308 def import_module(self, partname, fqname, parent):
309 if fqname in self.modules:
310 return self.modules[fqname]
311 trace = ImportTrace(fqname)
312 if self.stack:
313 self.stack[-1].imports.append(trace)
314 self.stack.append(trace)
315 result = super().import_module(partname, fqname, parent)
316 self.stack.pop()
317 if fqname not in self.modules and self.stack:
318 self.stack[-1].imports.pop()
319 if not self.stack:
320 self.traces.append(trace)
321 return result
322
323 def compute_imports(self, path):
324 self.run_script(path)
325 return self.modules.keys(), self.traces
326
327 def scan_code(self, co, m):
328 code = co.co_code
329 scanner = self.scan_opcodes
330 for what, args in scanner(co):
331 if what == "store":
332 (name,) = args
333 m.globalnames[name] = 1
334 elif what == "absolute_import":
335 fromlist, name = args
336 have_star = 0
337 if fromlist is not None:
338 if "*" in fromlist:
339 have_star = 1
340 fromlist = [f for f in fromlist if f != "*"]
341 self._safe_import_hook(name, m, fromlist, level=0)
342 if have_star:
343 # We've encountered an "import *". If it is a Python module,
344 # the code has already been parsed and we can suck out the
345 # global names.
346 mm = None
347 if m.__path__:
348 # At this point we don't know whether 'name' is a
349 # submodule of 'm' or a global module. Let's just try
350 # the full name first.
351 mm = self.modules.get(m.__name__ + "." + name)
352 if mm is None:
353 mm = self.modules.get(name)
354 if mm is not None:
355 m.globalnames.update(mm.globalnames)
356 m.starimports.update(mm.starimports)
357 if mm.__code__ is None:
358 m.starimports[name] = 1
359 else:
360 m.starimports[name] = 1
361 elif what == "relative_import":
362 level, fromlist, name = args
363 if name:
364 self._safe_import_hook(name, m, fromlist, level=level)
365 else:
366 parent = self.determine_parent(m, level=level)
367 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
368 else:
369 # We don't expect anything else from the generator.
370 raise RuntimeError(what)
371
372
373def imported_modules_section(imports):
374 bar, mod_list = toggle_bar_and_container(Text(f"Imported {len(imports)} modules"))
375 for name in sorted(imports):
376 mod_list.append(Node("div", children=[Text(name)]))
377 return Node("div", children=[bar, mod_list])
378
379
380def build_import_trace(trace, called_builtins, indent=False):
381 attrs = {}
382 if indent:
383 attrs = {"class": "indent"}
384 msg = trace.module
385 try:
386 module = importlib.import_module(trace.module)
387 if module.__loader__ is _frozen_importlib.BuiltinImporter:
388 msg += " (builtin)"
389 except:
390 msg += f" (failed importing module)"
391 if trace.module in called_builtins:
392 msg += f" ({len(called_builtins[trace.module])} builtin/extension functions called)"
393 container = Node("div", children=[Text(msg)], attrs=attrs)
394 if trace.imports:
395 for child in trace.imports:
396 container.append(build_import_trace(child, called_builtins, True))
397 return container
398
399
400def import_trace_section(traces, called_builtins):
401 bar, container = toggle_bar_and_container(Text("Import Trace:"))
402 for trace in traces:
403 container.append(build_import_trace(trace, called_builtins))
404 return Node("div", children=[bar, container])
405
406
407def render_summary(path, source, event_log):
408 imported_modules, traces = ImportTracer().compute_imports(path)
409 called_interpreted = compute_call_map(event_log, FunctionEvent.INTERPRETED)
410 called_builtins = compute_call_map(event_log, FunctionEvent.BUILTIN)
411 body = Node(
412 "body",
413 children=[
414 Node("h1", children=[Text(f"Tracing summary for {path}")]),
415 Node("h2", children=[Text("Source:")]),
416 Node("pre", children=[Text(source)]),
417 Node("h2", children=[Text("Summary stats:")]),
418 imported_modules_section(imported_modules),
419 called_function_section(called_builtins, FunctionEvent.BUILTIN),
420 called_function_section(called_interpreted, FunctionEvent.INTERPRETED),
421 Node("h2", children=[Text("Import Trace:")]),
422 import_trace_section(traces, called_builtins),
423 Node("h2", children=[Text("Call Trace:")]),
424 execution_trace_section(event_log),
425 ],
426 )
427 doc = Document()
428 doc.process(body)
429 return doc.render()
430
431
432if __name__ == "__main__":
433 parser = argparse.ArgumentParser(
434 formatter_class=argparse.RawTextHelpFormatter,
435 description="""
436Determine what modules are imported and what functions are called by a script.
437
438This script does the following:
439
440 1. Executes the script at [script_path] and records what functions are called.
441 2. Statically analyzes the script to determine what modules are imported. Note
442 that this analysis does not consider inline imports and therefore may underestimate
443 the set of modules that are imported during execution.
444
445It then writes a report to [output_path] with the following information:
446
447 - A list of the modules that are imported unconditionally.
448 - A list of all the functions that were called, grouped by whether or not they
449 are interpreted or native functions.
450 - A call trace.
451 - An import trace. Each imported module will appear exactly once in the trace,
452 the first time it is executed.
453""",
454 )
455 parser.add_argument("script_path", help="The path to the script to execute")
456 parser.add_argument("output_path", help="Where to write the report")
457 args = parser.parse_args()
458
459 # Compile the script
460 with open(args.script_path, "r") as f:
461 source = f.read()
462 code = compile(source, args.script_path, "exec")
463
464 event_log = []
465 sys.setprofile(CallRecorder(event_log))
466 try:
467 exec(code)
468 finally:
469 sys.setprofile(None)
470
471 with open(args.output_path, "w+") as f:
472 f.write(render_summary(args.script_path, source, event_log))