benchmarks/run_test.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / benchmarks / run_test.py
at trunk 450 lines 13 kB view raw
wrap content
bernsteinbear.com Add loadproperty benchmark 3y ago
2f538fc6
  1#!/usr/bin/env python3
  2import distutils.spawn
  3import json
  4import os
  5import sys
  6import unittest
  7
  8import _tools
  9import run
 10
 11
 12def tool_exists(name):
 13    return distutils.spawn.find_executable(name) is not None
 14
 15
 16BENCHMARKS_PATH = f"{os.path.dirname(os.path.realpath(__file__))}/benchmarks"
 17
 18
 19class TestBenchmark(unittest.TestCase):
 20    def test_benchmark_pins_to_single_cpu_from_list(self):
 21        available_cpus = "1,2,3,4"
 22        result = _tools.create_taskset_command(available_cpus)
 23        self.assertEqual(result, ["taskset", "--cpu-list", "1"])
 24
 25    def test_benchmark_pins_to_single_cpu_from_range(self):
 26        available_cpus = "7-13"
 27        result = _tools.create_taskset_command(available_cpus)
 28        self.assertEqual(result, ["taskset", "--cpu-list", "7"])
 29
 30    def test_benchmark_json(self):
 31        arguments = [
 32            "-i",
 33            "fbcode-python",
 34            "-p",
 35            BENCHMARKS_PATH,
 36            "-t",
 37            "time",
 38            "--json",
 39        ]
 40        json_output = json.loads(run.main(arguments))
 41        # There's at least 4 default benchmarks
 42        self.assertGreater(len(json_output), 3)
 43        single_result = json_output[0]
 44
 45        # Test the basic json output results
 46        self.assertIn("benchmark", single_result)
 47        self.assertIn("interpreter", single_result)
 48        self.assertIn("time_sec", single_result)
 49
 50    def test_benchmark_table(self):
 51        arguments = ["-i", "fbcode-python", "-p", BENCHMARKS_PATH, "-t", "time"]
 52        list_output = run.main(arguments)
 53        # There's at least 4 default benchmarks
 54        self.assertGreater(len(list_output), 3)
 55        single_result = list_output[0]
 56
 57        # Test the basic json output results
 58        self.assertIn("benchmark", single_result)
 59        self.assertIn("interpreter", single_result)
 60        self.assertIn("time_sec", single_result)
 61
 62    def test_choose_single_benchmark(self):
 63        arguments = [
 64            "-i",
 65            "fbcode-python",
 66            "-p",
 67            BENCHMARKS_PATH,
 68            "-b",
 69            "richards",
 70            "-t",
 71            "time",
 72            "--json",
 73        ]
 74        json_output = json.loads(run.main(arguments))
 75        self.assertEqual(len(json_output), 1)
 76        single_result = json_output[0]
 77        self.assertEqual(single_result["benchmark"], "richards")
 78
 79    def test_choose_go_benchmark(self):
 80        arguments = [
 81            "-i",
 82            "fbcode-python",
 83            "-p",
 84            BENCHMARKS_PATH,
 85            "-b",
 86            "go",
 87            "-t",
 88            "time",
 89            "--json",
 90        ]
 91        json_output = json.loads(run.main(arguments))
 92        self.assertEqual(len(json_output), 1)
 93        single_result = json_output[0]
 94        self.assertEqual(single_result["benchmark"], "go")
 95
 96    def test_choose_nqueens_benchmark(self):
 97        arguments = [
 98            "-i",
 99            "fbcode-python",
100            "-p",
101            BENCHMARKS_PATH,
102            "-b",
103            "nqueens",
104            "-t",
105            "time",
106            "--json",
107        ]
108        json_output = json.loads(run.main(arguments))
109        self.assertEqual(len(json_output), 1)
110        single_result = json_output[0]
111        self.assertEqual(single_result["benchmark"], "nqueens")
112
113    def test_choose_nbody_benchmark(self):
114        arguments = [
115            "-i",
116            "fbcode-python",
117            "-p",
118            BENCHMARKS_PATH,
119            "-b",
120            "nbody",
121            "-t",
122            "time",
123            "--json",
124        ]
125        json_output = json.loads(run.main(arguments))
126        self.assertEqual(len(json_output), 1)
127        single_result = json_output[0]
128        self.assertEqual(single_result["benchmark"], "nbody")
129
130    def test_choose_base64_benchmark(self):
131        arguments = [
132            "-i",
133            "fbcode-python",
134            "-p",
135            BENCHMARKS_PATH,
136            "-b",
137            "bench_base64",
138            "-t",
139            "time",
140            "--json",
141        ]
142        json_output = json.loads(run.main(arguments))
143        self.assertEqual(len(json_output), 1)
144        single_result = json_output[0]
145        self.assertEqual(single_result["benchmark"], "bench_base64")
146
147    def test_choose_pyflate_benchmark(self):
148        arguments = [
149            "-i",
150            "fbcode-python",
151            "-p",
152            BENCHMARKS_PATH,
153            "-b",
154            "pyflate",
155            "-t",
156            "time",
157            "--json",
158        ]
159        json_output = json.loads(run.main(arguments))
160        self.assertEqual(len(json_output), 1)
161        single_result = json_output[0]
162        self.assertEqual(single_result["benchmark"], "pyflate")
163
164    def test_choose_2to3_benchmark(self):
165        arguments = [
166            "-i",
167            "fbcode-python",
168            "-p",
169            BENCHMARKS_PATH,
170            "-b",
171            "2to3",
172            "-t",
173            "time",
174            "--json",
175        ]
176        json_output = json.loads(run.main(arguments))
177        self.assertEqual(len(json_output), 1)
178        single_result = json_output[0]
179        self.assertEqual(single_result["benchmark"], "2to3")
180
181    def test_choose_bench_pickle_benchmark(self):
182        arguments = [
183            "-i",
184            "fbcode-python",
185            "-p",
186            BENCHMARKS_PATH,
187            "-b",
188            "bench_pickle",
189            "-t",
190            "time",
191            "--json",
192        ]
193        json_output = json.loads(run.main(arguments))
194        self.assertEqual(len(json_output), 1)
195        single_result = json_output[0]
196        self.assertEqual(single_result["benchmark"], "bench_pickle")
197
198    def test_choose_loadproperty_benchmark(self):
199        arguments = [
200            "-i",
201            "fbcode-python",
202            "-p",
203            BENCHMARKS_PATH,
204            "-b",
205            "loadproperty",
206            "-t",
207            "time",
208            "--json",
209        ]
210        json_output = json.loads(run.main(arguments))
211        self.assertEqual(len(json_output), 1)
212        single_result = json_output[0]
213        self.assertEqual(single_result["benchmark"], "loadproperty")
214
215    def test_choose_multiple_benchmarks(self):
216        arguments = [
217            "-i",
218            "fbcode-python",
219            "-p",
220            BENCHMARKS_PATH,
221            "-b",
222            "richards",
223            "-b",
224            "deltablue",
225            "-t",
226            "time",
227            "--json",
228        ]
229        json_output = json.loads(run.main(arguments))
230        self.assertEqual(len(json_output), 2)
231        expected_results = ["deltablue", "richards"]
232        result1 = json_output[0]
233        result2 = json_output[1]
234        results = sorted([result1["benchmark"], result2["benchmark"]])
235        self.assertListEqual(expected_results, results)
236
237    def test_callgrind(self):
238        # valgrind is not available on all machines
239        if not tool_exists("valgrind"):
240            return
241
242        arguments = [
243            "-i",
244            "fbcode-python",
245            "-p",
246            BENCHMARKS_PATH,
247            "-b",
248            "deltablue",
249            "-b",
250            "richards",
251            "-t",
252            "callgrind",
253            "--json",
254        ]
255        json_output = json.loads(run.main(arguments))
256        single_result = json_output[0]
257
258        self.assertIn("benchmark", single_result)
259        self.assertIn("interpreter", single_result)
260        self.assertIn("cg_instructions", single_result)
261
262    def test_perfstat(self):
263        # perf stat can't run on MacOS
264        if not tool_exists("perf"):
265            return
266
267        arguments = [
268            "-i",
269            "fbcode-python",
270            "-p",
271            BENCHMARKS_PATH,
272            "-b",
273            "deltablue",
274            "-t",
275            "perfstat",
276            "--json",
277        ]
278        json_output = json.loads(run.main(arguments))
279        single_result = json_output[0]
280
281        self.assertIn("benchmark", single_result)
282        self.assertIn("interpreter", single_result)
283        self.assertIn("task-clock", single_result)
284        self.assertIn("instructions", single_result)
285
286    def test_perfstat_custom_events(self):
287        # perf stat can't run on MacOS
288        if not tool_exists("perf"):
289            return
290
291        arguments = [
292            "--interpreter",
293            "fbcode-python",
294            "--path",
295            BENCHMARKS_PATH,
296            "--benchmark",
297            "deltablue",
298            "--tool",
299            "perfstat",
300            "--event",
301            "cycles",
302            "--event",
303            "branches",
304            "--event",
305            "branch-misses",
306            "--json",
307        ]
308        json_output = json.loads(run.main(arguments))
309        single_result = json_output[0]
310        self.assertIn("benchmark", single_result)
311        self.assertIn("interpreter", single_result)
312        self.assertIn("cycles", single_result)
313        self.assertIn("branches", single_result)
314        self.assertIn("branch-misses", single_result)
315
316    def test_multiple_tools(self):
317        # perf stat can't run on MacOS
318        if not tool_exists("perf"):
319            return
320        if not tool_exists("valgrind"):
321            return
322
323        arguments = [
324            "-i",
325            "fbcode-python",
326            "-p",
327            BENCHMARKS_PATH,
328            "-b",
329            "deltablue",
330            "-b",
331            "richards",
332            "-t",
333            "time",
334            "-t",
335            "perfstat",
336            "-t",
337            "callgrind",
338            "--json",
339        ]
340        json_output = json.loads(run.main(arguments))
341        single_result = json_output[0]
342
343        # Both time_sec from time and task-clock from perfstat are present
344        self.assertIn("time_sec", single_result)
345        self.assertIn("task-clock", single_result)
346        self.assertIn("cg_instructions", single_result)
347
348    def test_benchmark_args(self):
349        arguments = [
350            "-i",
351            "fbcode-python",
352            "-p",
353            BENCHMARKS_PATH,
354            "--benchmark-args=--jit",
355            "-b",
356            "richards",
357            "-t",
358            "time",
359            "--json",
360        ]
361        json_output = json.loads(run.main(arguments))
362        self.assertEqual(len(json_output), 1)
363        single_result = json_output[0]
364        self.assertEqual(single_result["benchmark"], "richards")
365
366    def test_benchmark_runner_merge_parallel_results(self):
367        seq_data = [
368            {
369                "benchmark": "richards",
370                "interpreter": "python_base",
371                "task-clock": 228.8,
372                "instructions": 1478343960,
373            },
374            {
375                "benchmark": "deltablue",
376                "interpreter": "python_base",
377                "task-clock": 1414.33,
378                "instructions": 9713750376,
379            },
380            {
381                "benchmark": "richards",
382                "interpreter": "fbcode-python",
383                "task-clock": 867.86,
384                "instructions": 5875444144,
385            },
386            {
387                "benchmark": "deltablue",
388                "interpreter": "fbcode-python",
389                "task-clock": 813.83,
390                "instructions": 5776832979,
391            },
392        ]
393        parallel_data = [
394            {
395                "benchmark": "deltablue",
396                "interpreter": "python_base",
397                "cg_instructions": 9976758111,
398            },
399            {
400                "benchmark": "deltablue",
401                "interpreter": "fbcode-python",
402                "cg_instructions": 5779752171,
403            },
404            {
405                "benchmark": "richards",
406                "interpreter": "fbcode-python",
407                "cg_instructions": 5792328343,
408            },
409            {
410                "benchmark": "richards",
411                "interpreter": "python_base",
412                "cg_instructions": 1501239987,
413            },
414        ]
415        expected_results = [
416            {
417                "benchmark": "deltablue",
418                "interpreter": "fbcode-python",
419                "task-clock": 813.83,
420                "instructions": 5776832979,
421                "cg_instructions": 5779752171,
422            },
423            {
424                "benchmark": "deltablue",
425                "interpreter": "python_base",
426                "task-clock": 1414.33,
427                "instructions": 9713750376,
428                "cg_instructions": 9976758111,
429            },
430            {
431                "benchmark": "richards",
432                "interpreter": "fbcode-python",
433                "task-clock": 867.86,
434                "instructions": 5875444144,
435                "cg_instructions": 5792328343,
436            },
437            {
438                "benchmark": "richards",
439                "interpreter": "python_base",
440                "task-clock": 228.8,
441                "instructions": 1478343960,
442                "cg_instructions": 1501239987,
443            },
444        ]
445        results = run.BenchmarkRunner.merge_parallel_results(seq_data, parallel_data)
446        self.assertEqual(results, expected_results)
447
448
449if __name__ == "__main__":
450    unittest.main()