this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3"""
4Utility script that provides default arguments for executing a command
5with various performance measurement tools.
6"""
7import logging
8import os
9import re
10import subprocess
11import tempfile
12from abc import ABC, abstractmethod
13from multiprocessing.pool import ThreadPool
14
15
16log = logging.getLogger(__name__)
17SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
18
19
20def run(cmd, **kwargs):
21 env = dict(os.environ)
22 env["PYTHONHASHSEED"] = "0"
23 log.info(f">>> {' '.join(cmd)}")
24 return subprocess.run(cmd, encoding="UTF-8", env=env, check=True, **kwargs)
25
26
27def create_taskset_command(isolated_cpus):
28 if isolated_cpus == "":
29 return []
30 # If it ever matters in the future, this only pulls out the first integer
31 # encountered in the list of isolated cpus
32 isolated_cpus = re.findall(r"\d+", isolated_cpus)[0]
33 return ["taskset", "--cpu-list", isolated_cpus]
34
35
36def pin_to_cpus():
37 if not os.path.exists("/sys/devices/system/cpu/isolated"):
38 return []
39 completed_process = run(
40 ["cat", "/sys/devices/system/cpu/isolated"], stdout=subprocess.PIPE
41 )
42 isolated_cpus = completed_process.stdout.strip()
43 return create_taskset_command(isolated_cpus)
44
45
46class PerformanceTool(ABC):
47 # Read any optional command line arguments to set the internal defaults
48 # Input: A dictionary with command line arguments
49 def __init__(self, args):
50 pass
51
52 # Specify the name of the tool along with a description
53 @staticmethod
54 @abstractmethod
55 def add_tool():
56 return ""
57
58 # Add any optional command line arguments to tune the tool
59 @staticmethod
60 def add_optional_arguments(parser):
61 return parser
62
63
64class SequentialPerformanceTool(PerformanceTool):
65 # The main function to execute the specified performance tool.
66 # Input: run.Interpreter, run.Benchmark
67 # Output: A dictionary with the values to be reported
68 @abstractmethod
69 def execute(self, interpreter, benchmark):
70 pass
71
72
73class ParallelPerformanceTool(PerformanceTool):
74 # TODO update
75 # The main function to execute the specified performance tool in parallel.
76 # Input: list<run.Interpreter>, list<run.Benchmark>
77 # Output: A list of dictionaries with the values to be reported. Each
78 # dictionary must have both 'benchmark' and 'interpreter' reported
79 @abstractmethod
80 def execute_parallel(self, interpreters, benchmarks):
81 pass
82
83
84class TimeTool(SequentialPerformanceTool):
85 NAME = "time"
86
87 def execute(self, interpreter, benchmark):
88 command = pin_to_cpus()
89 command.extend(
90 [
91 *interpreter.interpreter_cmd,
92 f"{SCRIPT_DIR}/_time_tool.py",
93 # The time tool imports the module, which will use the bytecode
94 # cache. Pass the source file instead of the bytecode file.
95 benchmark.filepath(),
96 *interpreter.benchmark_args,
97 ]
98 )
99 completed_process = run(command, stdout=subprocess.PIPE)
100 time_output = completed_process.stdout.strip()
101 events = [event.split(" , ") for event in time_output.split("\n")]
102 result = {event[0]: event[1] for event in events}
103 if "time_sec" in result:
104 result["time_sec"] = float(result["time_sec"])
105 if "time_sec_mean" in result:
106 result["time_sec_mean"] = float(result["time_sec_mean"])
107 result["time_sec_stdev"] = float(result["time_sec_stdev"])
108 return result
109
110 @staticmethod
111 def add_tool():
112 return f"""
113'{TimeTool.NAME}': Use the 'time' command to measure execution time
114"""
115
116
117class PerfStat(SequentialPerformanceTool):
118 NAME = "perfstat"
119 DEFAULT_EVENTS = ["task-clock", "instructions"]
120
121 def __init__(self, args):
122 self.events = PerfStat.DEFAULT_EVENTS if not args["events"] else args["events"]
123
124 def parse_perfstat(self, output):
125 if ";" not in output:
126 log.error(f"perf stat returned an error: {output}")
127 return {}
128 events = [e.split(";") for e in output.split("\n") if ";" in e]
129 results = {}
130 for event in events:
131 name = event[2]
132 value = event[0]
133 if value in ("<not counted>", "<not supported>", ""):
134 continue
135 value = float(value) if "." in value else int(value)
136 results[name] = value
137 return results
138
139 def execute(self, interpreter, benchmark):
140 command = pin_to_cpus()
141 command += ["perf", "stat"]
142 command += ["--field-separator", ";"]
143 command += ["--repeat", "5"]
144
145 # To avoid event multiplexing, we only run two events at a time
146 results = {}
147 events = [event for event in self.events]
148 bytecode_path = compile_bytecode(interpreter, benchmark)
149 while events:
150 full_command = command + ["--event", events.pop(0)]
151 if events:
152 full_command += ["--event", events.pop(0)]
153 full_command += [
154 *interpreter.interpreter_cmd,
155 bytecode_path,
156 *interpreter.benchmark_args,
157 ]
158 completed_process = run(full_command, stderr=subprocess.PIPE)
159 perfstat_output = completed_process.stderr.strip()
160 results.update(self.parse_perfstat(perfstat_output))
161 return results
162
163 @staticmethod
164 def add_tool():
165 return f"""
166'{PerfStat.NAME}': Use `perf stat` to measure the execution time of
167a benchmark. This repeats the run 10 times to find a significant result
168"""
169
170 # Add any optional command line arguments to tune the tool
171 @staticmethod
172 def add_optional_arguments(parser):
173 perfstat_event_help = f"""
174Specify the perf stat event to run. Please note, only two are run at the
175same time to avoid event multiplexing. For a full list of perf stat events,
176run: `perf list`.
177
178Examples: 'instructions', 'branch-misses', 'L1-icache-load-misses'
179
180Default: {PerfStat.DEFAULT_EVENTS}
181"""
182 parser.add_argument(
183 "--event",
184 metavar="EVENT",
185 dest="events",
186 type=str,
187 action="append",
188 default=[],
189 help=perfstat_event_help,
190 )
191 return parser
192
193
194class Callgrind(ParallelPerformanceTool):
195 NAME = "callgrind"
196
197 def __init__(self, args):
198 self.callgrind_out_dir = args.get("callgrind_out_dir")
199
200 def _worker(self, interpreter, benchmark):
201 delete = True
202 callgrind_out_dir = self.callgrind_out_dir
203 if callgrind_out_dir is not None:
204 callgrind_out_dir = os.path.abspath(callgrind_out_dir)
205 os.makedirs(callgrind_out_dir, exist_ok=True)
206 delete = False
207 with tempfile.NamedTemporaryFile(
208 dir=callgrind_out_dir,
209 prefix=f"{benchmark.name}_",
210 suffix=".cg",
211 delete=delete,
212 ) as temp_file:
213 bytecode_path = compile_bytecode(interpreter, benchmark)
214 run(
215 [
216 "valgrind",
217 "--quiet",
218 "--tool=callgrind",
219 "--trace-children=yes",
220 f"--callgrind-out-file={temp_file.name}",
221 *interpreter.interpreter_cmd,
222 bytecode_path,
223 *interpreter.benchmark_args,
224 ]
225 )
226
227 instructions = 1
228 with open(temp_file.name) as fd:
229 r = re.compile(r"summary:\s*(.*)")
230 for line in fd:
231 m = r.match(line)
232 if m:
233 instructions = int(m.group(1))
234 return {
235 "benchmark": benchmark.name,
236 "interpreter": interpreter.name,
237 "cg_instructions": instructions,
238 }
239
240 def execute_parallel(self, interpreters, benchmarks):
241 pool = ThreadPool()
242 async_results = []
243 for interpreter in interpreters:
244 for benchmark in benchmarks:
245 r = pool.apply_async(self._worker, (interpreter, benchmark))
246 async_results.append(r)
247
248 results = []
249 for ar in async_results:
250 results.append(ar.get())
251 return results
252
253 @classmethod
254 def add_tool(cls):
255 return f"""
256'{cls.NAME}': Measure executed instructions with `valgrind`/`callgrind`.
257"""
258
259 @staticmethod
260 def add_optional_arguments(parser):
261 parser.add_argument("--callgrind-out-dir", metavar="DIRECTORY")
262 return parser
263
264
265class Size(SequentialPerformanceTool):
266 NAME = "size"
267
268 def __init__(self, args):
269 pass
270
271 def execute(self, interpreter, benchmark):
272 command = ["size", "--format=sysv", interpreter.binary]
273 completed_process = run(command, stdout=subprocess.PIPE)
274 size_output = completed_process.stdout.strip()
275 size = 0
276 r = re.compile(r"([a-zA-Z0-9_.]+)\s+([0-9]+)\s+[0-9a-fA-F]+$")
277 for line in size_output.splitlines():
278 m = r.match(line)
279 if not m:
280 continue
281 section_name = m.group(1)
282 section_size = m.group(2)
283 if section_name == ".text" or section_name == "__text":
284 size += int(section_size)
285 if size == 0:
286 log.error(f"Could not determine text segment size of {interpreter.binary}")
287 return {}
288 return {"size_text": size}
289
290 @classmethod
291 def add_tool(cls):
292 return f"""
293'{cls.NAME}': Use `size` to measure the size of the interpreters text segment.
294"""
295
296
297def add_tools_arguments(parser):
298 measure_tools_help = "The measurement tool to use. Available Tools: \n"
299 for tool in TOOLS:
300 measure_tools_help += tool.add_tool()
301
302 available_tools = [tool.NAME for tool in TOOLS]
303 parser.add_argument(
304 "--tool",
305 "-t",
306 metavar="TOOL",
307 dest="tools",
308 type=str,
309 action="append",
310 default=[],
311 choices=available_tools,
312 help=measure_tools_help,
313 )
314
315 for tool in TOOLS:
316 parser = tool.add_optional_arguments(parser)
317
318 return parser
319
320
321def compile_bytecode(interpreter, benchmark):
322 log.info(f"Compiling benchmark for {interpreter.name}: {benchmark.name}")
323 command = [
324 *interpreter.interpreter_cmd,
325 f"{SCRIPT_DIR}/_compile_tool.py",
326 benchmark.filepath(),
327 *interpreter.benchmark_args,
328 ]
329 result = run(command, stdout=subprocess.PIPE)
330 return result.stdout.lstrip().rstrip() # remove '\n'
331
332
333# Use this to register any new tools
334SEQUENTIAL_TOOLS = [TimeTool, PerfStat, Size]
335PARALLEL_TOOLS = [Callgrind]
336TOOLS = SEQUENTIAL_TOOLS + PARALLEL_TOOLS