benchmarks/run.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / benchmarks / run.py
at trunk 388 lines 14 kB view raw
wrap content
bernsteinbear.com Exclude django from create_benchmark_from_dir 3y ago
cf0bff2d
  1#!/usr/bin/env python3
  2import argparse
  3import json
  4import logging
  5import os
  6import shlex
  7import subprocess
  8import sys
  9from _display_results import build_table
 10from _tools import add_tools_arguments, PARALLEL_TOOLS, SEQUENTIAL_TOOLS
 11from typing import Optional
 12
 13
 14log = logging.getLogger(__name__)
 15
 16
 17class Benchmark:
 18    def __init__(self, path, name, ext):
 19        self.name = name
 20        self.path = path
 21        assert ext == ".py"
 22        self.ext = ext
 23        self.bytecode = {}  # mapping of Interpreter to bytecode location
 24
 25    def __lt__(self, other):
 26        return self.name.__lt__(other.name)
 27
 28    def __ne__(self, other):
 29        return not self.__eq__(other)
 30
 31    def __eq__(self, other):
 32        return type(other) is Benchmark and self.name.__eq__(other.name)
 33
 34    def __repr__(self):
 35        return self.name
 36
 37    def filepath(self):
 38        return f"{self.path}/{self.name}{self.ext}"
 39
 40
 41class BenchmarkRunner:
 42    def __init__(self, args, interpreters):
 43        self.interpreters = interpreters
 44        self.path = sys.argv[0].rsplit("/", 1)[0]
 45        self._register_measurement_tools(args["tools"], args)
 46
 47    def _register_measurement_tools(self, tool_list, args):
 48        sys.path.append(self.path)
 49        sys.path.pop()
 50        self.tools = []
 51        for tool in SEQUENTIAL_TOOLS:
 52            if tool.NAME in tool_list:
 53                self.tools.append(tool(args))
 54        self.parallel_tools = []
 55        for tool in PARALLEL_TOOLS:
 56            if tool.NAME in tool_list:
 57                self.parallel_tools.append(tool(args))
 58
 59    @staticmethod
 60    def merge_parallel_results(results, parallel_results):
 61        results = sorted(results, key=lambda x: (x["benchmark"], x["interpreter"]))
 62        parallel_results = sorted(
 63            parallel_results, key=lambda x: (x["benchmark"], x["interpreter"])
 64        )
 65        for seq_result, parallel_result in zip(results, parallel_results):
 66            seq_result.update(parallel_result)
 67        return results
 68
 69    def run_benchmarks(self):
 70        results = []
 71        for interpreter in self.interpreters:
 72            log.info(f"Running interpreter: {interpreter.name}")
 73            for benchmark in interpreter.benchmarks_to_run:
 74                log.info(f"Running benchmark: {benchmark.name}")
 75                result = {"benchmark": benchmark.name, "interpreter": interpreter.name}
 76                for tool in self.tools:
 77                    tool_result = tool.execute(interpreter, benchmark)
 78                    result.update(tool_result)
 79                results.append(result)
 80        for tool in self.parallel_tools:
 81            parallel_results = tool.execute_parallel(
 82                self.interpreters, self.interpreters[0].benchmarks_to_run
 83            )
 84            results = self.merge_parallel_results(results, parallel_results)
 85        return results
 86
 87
 88class Interpreter:
 89    CPYTHON = "fbcode-python"
 90    CPYTHON_PATH = "/usr/bin/python3.8"
 91
 92    def __init__(
 93        self,
 94        binary_path,
 95        benchmarks_path,
 96        interpreter_args: Optional[str] = None,
 97        interpreter_name: Optional[str] = None,
 98        benchmark_args: Optional[str] = None,
 99    ):
100        if binary_path == Interpreter.CPYTHON:
101            # Running locally (probably from a unit test)
102            # This could stand to be refactored
103            self.name = Interpreter.CPYTHON
104            self.binary_path = Interpreter.CPYTHON_PATH
105            self.benchmarks_path = benchmarks_path
106        else:
107            self.name = binary_path.rsplit("/", 2)[-3]
108            self.binary_path = binary_path
109            directory = f"{binary_path.rsplit('/', 1)[0]}"
110            if not os.path.isabs(benchmarks_path):
111                benchmarks_path = os.path.normpath(f"{directory}/../{benchmarks_path}")
112            self.benchmarks_path = benchmarks_path
113
114        if interpreter_name is not None:
115            self.name = interpreter_name
116
117        self.available_benchmarks = self.discover_benchmarks()
118        self.interpreter_cmd = [self.binary_path]
119        self.interpreter_args = []
120        if interpreter_args is not None:
121            self.interpreter_args.extend(shlex.split(interpreter_args))
122            self.interpreter_cmd.extend(self.interpreter_args)
123        if benchmark_args is not None:
124            self.benchmark_args = shlex.split(benchmark_args)
125        else:
126            self.benchmark_args = []
127
128    def __repr__(self):
129        return f"<Interpreter {self.binary_path!r}>"
130
131    def create_benchmark_from_file(self, benchmark_file):
132        benchmark_path, _, name_and_ext = benchmark_file.rpartition("/")
133        name, _, ext = name_and_ext.rpartition(".")
134        benchmark = Benchmark(benchmark_path, name, f".{ext}")
135        return benchmark
136
137    def create_benchmark_from_dir(self, benchmark_dir):
138        benchmark_path, _, name = benchmark_dir.rpartition("/")
139        benchmark = Benchmark(benchmark_path, name, "")
140        return benchmark
141
142    def discover_benchmarks(self):
143        discovered_benchmarks = []
144        for f in os.listdir(self.benchmarks_path):
145            path = f"{self.benchmarks_path}/{f}"
146            if os.path.isfile(path) and path.endswith(".py"):
147                b = self.create_benchmark_from_file(path)
148                discovered_benchmarks.append(b)
149            elif os.path.isdir(path) and (
150                "__pycache__" not in path and "data" not in path and "django"
151                not in path
152            ):
153                b = self.create_benchmark_from_dir(path)
154                discovered_benchmarks.append(b)
155        return discovered_benchmarks
156
157
158class PyroBenchmarkSuite:
159    def arg_parser(self):
160        parser = argparse.ArgumentParser(
161            description="Pyro benchmark suite",
162            formatter_class=argparse.RawTextHelpFormatter,
163        )
164        parser.add_argument("--verbose", "-v", action="store_true")
165        parser.add_argument(
166            "--json", action="store_true", help=f"Print the data in a json format"
167        )
168        interpreter_help = f"""
169Specify interpreter(s) to use:
170
171-i /path/to/python
172    """
173        parser.add_argument(
174            "--interpreter",
175            "-i",
176            metavar="INTERPRETER",
177            dest="interpreters",
178            type=str,
179            action="append",
180            default=[],
181            help=interpreter_help,
182        )
183        interpreter_args_help = """
184Specify command-line arguments to pass to an interpreter. Arguments must be
185supplied as a single string. Arguments apply to the corresponding interpreter.
186For example, if you supplied "-i foo -i bar -a '-X debug'" then the arguments
187'-X debug' would apply to interpreter 'foo', while interpreter 'bar' would
188have no additional arguments.
189"""
190        parser.add_argument(
191            "--interpreter-args",
192            "-a",
193            metavar="INTERPRETER_ARGS",
194            dest="interpreter_args",
195            type=str,
196            action="append",
197            default=[],
198            help=interpreter_args_help,
199        )
200        interpreter_names_help = """
201Specify the name that should be used when reporting benchmark results for an
202interpreter. Each name applies to the corresponding interpreter, in order.
203The value for interpreter is used if no display name is provided. For example,
204if you supplied "-i foo -i bar -n 'Foo interp'" then the results for interpreter
205'foo' would be displayed as 'Foo interp', while the results for interpreter 'bar'
206would be displayed as 'bar'. This is useful if you want to use the same interpreter
207with different arguments.
208"""
209        parser.add_argument(
210            "--interpreter-name",
211            "-n",
212            metavar="INTERPRETER_NAME",
213            dest="interpreter_names",
214            type=str,
215            action="append",
216            default=[],
217            help=interpreter_names_help,
218        )
219        benchmark_args_help = """
220Specify command-line arguments to pass to the benchmarks. Arguments must be
221supplied as a single string. Arguments apply to all the benchmarks.
222For example, if you supplied "-i lmao -i xyz -b foo -b bar --benchmark-args=
223--benchmark-args='-X debug'" then the arguments '-X debug' would apply to all
224the benchmarks run under interpreter 'xyz' and none for interpreter 'lmao'.
225"""
226        parser.add_argument(
227            "--benchmark-args",
228            metavar="BENCHMARK_ARGS",
229            dest="benchmark_args",
230            type=str,
231            action="append",
232            default=[],
233            help=benchmark_args_help,
234        )
235        benchmarks_path_help = f"""
236Specify benchmarks_path(s) to use. This must match with the interpreters used
237
238-p /path/to/benchmarks
239
240    """
241        parser.add_argument(
242            "--path",
243            "-p",
244            metavar="BENCHMARK_PATH",
245            dest="benchmarks_path",
246            type=str,
247            action="append",
248            default=[],
249            help=benchmarks_path_help,
250        )
251        benchmark_help = f"""
252The benchmark that you wish to run. Use repeatedly
253to select more than one benchmark:
254
255-b richards
256
257Default: all
258
259    """
260        parser.add_argument(
261            "--benchmark",
262            "-b",
263            metavar="BENCHMARK",
264            dest="benchmarks",
265            type=str,
266            action="append",
267            default=[],
268            help=benchmark_help,
269        )
270        parser = add_tools_arguments(parser)
271        return parser
272
273    def start_benchmarks(self, args):
274        log.info(f"Verifying benchmark arguments")
275
276        # Check that at least one tool was selected
277        if not args.tools:
278            raise Exception("At least one `--tool` should be specified")
279
280        # Check that at least one interpreter was selected
281        if not args.interpreters:
282            raise Exception("At least one `--interpreter` should be specified")
283
284        # Check that benchmarks path matches the number of interpreters
285        if len(args.benchmarks_path) != len(args.interpreters):
286            raise Exception("The number of --interpreter and --path should match")
287
288        if len(args.interpreter_args) > len(args.interpreters):
289            raise Exception(
290                "The number of interpreter arguments cannot exceed the number"
291                " of interpreters"
292            )
293
294        assert len(args.interpreters) > 0
295        interpreters = []
296        for i, interp in enumerate(args.interpreters):
297            interp_args = None
298            if i < len(args.interpreter_args):
299                interp_args = args.interpreter_args[i]
300            interp_name = None
301            if i < len(args.interpreter_names):
302                interp_name = args.interpreter_names[i]
303            benchmark_args = None
304            if i < len(args.benchmark_args):
305                benchmark_args = args.benchmark_args[i]
306            interpreters.append(
307                Interpreter(
308                    interp,
309                    args.benchmarks_path[i],
310                    interp_args,
311                    interp_name,
312                    benchmark_args,
313                )
314            )
315
316        # If no benchmark is defined, add all of them
317        if not args.benchmarks:
318            for interpreter in interpreters:
319                to_run = [b for b in interpreter.available_benchmarks]
320                interpreter.benchmarks_to_run = sorted(to_run)
321        else:
322            for interpreter in interpreters:
323                interpreter.benchmarks_to_run = []
324                for b in interpreter.available_benchmarks:
325                    if b.name in args.benchmarks:
326                        interpreter.benchmarks_to_run.append(b)
327                interpreter.benchmarks_to_run = sorted(interpreter.benchmarks_to_run)
328
329        # Try to run the benchmarks of the interpreter with the least benchmarks
330        benchmarks_to_run = interpreters[0].benchmarks_to_run
331        for interpreter in interpreters:
332            if len(interpreter.benchmarks_to_run) < len(benchmarks_to_run):
333                benchmarks_to_run = interpreter.benchmarks_to_run
334        for interpreter in interpreters:
335            temp_to_run = interpreter.benchmarks_to_run
336            for benchmark in interpreter.benchmarks_to_run:
337                if benchmark not in benchmarks_to_run:
338                    log.info(f"Removing: {benchmark}")
339                    temp_to_run.remove(benchmark)
340            interpreter.benchmarks_to_run = temp_to_run
341
342        # Only run if all interpreters have the same benchmarks to run
343        assert len(benchmarks_to_run) > 0
344        log.info(f"Will run the following benchmarks: {benchmarks_to_run}")
345        for interpreter in interpreters:
346            if benchmarks_to_run != interpreter.benchmarks_to_run:
347                raise Exception(
348                    "Can't run parallel tools. The interpreters "
349                    "have different available benchmarks: "
350                    f"{benchmarks_to_run} vs {interpreter.benchmarks_to_run}"
351                )
352
353        print_json = args.json
354
355        log.info(f"Running benchmarks with args: {args}")
356        runner = BenchmarkRunner(vars(args), interpreters)
357        try:
358            benchmark_results = runner.run_benchmarks()
359        except subprocess.CalledProcessError as cpe:
360            raise RuntimeError(
361                f"{cpe}\n\nstdout:\n{cpe.stdout}\n\nstderr:\n{cpe.stderr}"
362            )
363        log.info(benchmark_results)
364
365        if print_json:
366            json_output = json.dumps(benchmark_results)
367            if __name__ == "__main__":
368                print(json_output)
369            return json_output
370
371        if not print_json:
372            if __name__ == "__main__":
373                print(build_table(benchmark_results))
374            return benchmark_results
375
376
377def main(argv):
378    suite = PyroBenchmarkSuite()
379    parser = suite.arg_parser()
380    args = parser.parse_args(argv)
381    logging.basicConfig(
382        level=logging.DEBUG if args.verbose else logging.WARN, format="%(message)s"
383    )
384    return suite.start_benchmarks(args)
385
386
387if __name__ == "__main__":
388    main(sys.argv[1:])