benchmarks/format_results.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / benchmarks / format_results.py
at trunk 234 lines 6.7 kB view raw
wrap content
bernsteinbear.com Make details collapsible in benchmark markdown 2y ago
99b54e19
  1#!/usr/bin/env python3
  2
  3import json
  4import logging
  5import sys
  6from os import path
  7
  8
  9logging.basicConfig(level=logging.INFO)
 10log = logging.getLogger(__name__)
 11
 12LEFT_ALIGN = " :----- "
 13RIGHT_ALIGN = " -----: "
 14METRICS_HEADER = (
 15    ("benchmark", LEFT_ALIGN),
 16    ("cg_instructions", RIGHT_ALIGN),
 17)
 18METRICS = [metric_name for metric_name, align in METRICS_HEADER]
 19SUMMARY_HEADER = (
 20    ("Metric", LEFT_ALIGN),
 21    ("Average", LEFT_ALIGN),
 22    ("Best", LEFT_ALIGN),
 23    ("Worst", LEFT_ALIGN),
 24    ("Notes", LEFT_ALIGN),
 25)
 26SUMMARY_METRIC_NOISE_MAP = {
 27    "cg_instructions": "typically < 0.2% noise",
 28}
 29
 30# Some hard-coded strings
 31BENCHMARK = "benchmark"
 32INTERPRETER = "interpreter"
 33
 34# variants
 35CPYTHON_VARIANT = "fbcode-cpython"
 36BASE_VARIANT = "python_base"
 37NEW_VARIANT = "python_new"
 38VARIANTS = (
 39    CPYTHON_VARIANT,
 40    BASE_VARIANT,
 41    NEW_VARIANT,
 42)
 43
 44
 45def read_filename_from_arg():
 46    """
 47    Read filename from argument, and ensure that the only argument provided is the filename.
 48    """
 49    if len(sys.argv) != 2:
 50        sys.exit("Invalid argument. Expect one argument (filename)")
 51    return sys.argv[1]
 52
 53
 54def ensure_file_exists(filename):
 55    """
 56    Ensures that the file provided in the arguments exist.
 57    """
 58    if not path.isfile(filename):
 59        sys.exit(f"The provided file does not exist: {filename}")
 60
 61
 62def read_json(filename):
 63    """
 64    Loads and ensure that the json file is valid.
 65    """
 66    with open(filename) as f:
 67        try:
 68            return json.load(f)
 69        except json.decoder.JSONDecodeError:
 70            sys.exit(f"The provided json file is corrupted: {filename}")
 71
 72
 73def split_json_by_variants(data):
 74    """
 75    From the json input, split the data into the different variants.
 76    """
 77    variants = {variant: [] for variant in VARIANTS}
 78    for row in data:
 79        interpreter = row[INTERPRETER]
 80        variants[interpreter].append(row)
 81    return variants
 82
 83
 84def generate_markdown_row_partition(data):
 85    """
 86    Convert a list into a string separated by `|`.
 87
 88    eg.
 89    ['apple', 'boy', 'car'] -> "| apple | boy | car |".
 90    """
 91    return "|".join([""] + data + [""])
 92
 93
 94def generate_markdown_table_header(title, cols):
 95    """
 96    Generate the markdown table's header
 97
 98    eg.
 99    title: Summary, cols: [('apple', RIGHT_ALIGN), ('boy', LEFT_ALIGN), ('car', LEFT_ALIGN)]
100    -> ["# Summary", "| apple | boy | car |", "| :----- | -----: | -----: |"]
101    """
102    table = [f"# {title}", ""]
103    column_header = [f" {col_name} " for col_name, _ in cols]
104    table.append(generate_markdown_row_partition(column_header))
105    table.append(generate_markdown_row_partition([col_align for _, col_align in cols]))
106    return table
107
108
109def generate_markdown_table(title, cols, rows, is_percentage=False):
110    """
111    Generate the markdown table from the data.
112    """
113
114    def format_value(value):
115        if isinstance(value, (int, float)):
116            if is_percentage:
117                return " {:.1%} ".format(value)
118            else:
119                return f" {value:,} "
120        else:
121            return f" {value} "
122
123    table = generate_markdown_table_header(title, cols)
124    for row in rows:
125        row_data = [
126            format_value(row[col]) for col in [col_name for col_name, _ in cols]
127        ]
128        table.append(generate_markdown_row_partition(row_data))
129    return "\n".join(table)
130
131
132def compare_metric(old_data, new_data, title):
133    """
134    From the old and new data, compute percentage change for each benchmark.
135    """
136    benchmark_to_data = {}
137    unmatched = []
138    for row in old_data:
139        benchmark = row[BENCHMARK]
140        benchmark_to_data[benchmark] = [row]
141    for row in new_data:
142        benchmark = row[BENCHMARK]
143        if benchmark not in benchmark_to_data:
144            unmatched.append(benchmark)
145            continue
146        benchmark_to_data[benchmark].append(row)
147
148    result = []
149    for benchmark, data in benchmark_to_data.items():
150        if len(data) != 2:
151            unmatched.append(benchmark)
152            continue
153        old_metric_data, new_metric_data = data
154        metric_data0 = [old_metric_data[metric] for metric in METRICS]
155        metric_data1 = [new_metric_data[metric] for metric in METRICS]
156        row = {BENCHMARK: benchmark}
157        for index, metric in enumerate(METRICS):
158            if metric == BENCHMARK:
159                continue
160            row[metric] = metric_data1[index] / metric_data0[index] - 1
161        result.append(row)
162    if unmatched:
163        log.warn(
164            f"The following benchmarks cannot be matched (so results are discarded) when comparing {title}: {unmatched}"
165        )
166        return []
167    return result
168
169
170def generate_summary(data):
171    result = []
172    for metric, notes in SUMMARY_METRIC_NOISE_MAP.items():
173        avg = sum(d[metric] for d in data) / len(data) if data else 0
174        best = min(data, key=lambda x: x[metric])  # smaller is better
175        worst = max(data, key=lambda x: x[metric])  # larger is worse
176
177        row = {
178            "Metric": metric,
179            "Average": f"**{avg:.1%}**",
180            "Best": f"{best[BENCHMARK]} {best[metric]:.1%}",
181            "Worst": f"{worst[BENCHMARK]} {worst[metric]:.1%}",
182            "Notes": notes,
183        }
184        result.append(row)
185    return result
186
187
188if __name__ == "__main__":
189    filename = read_filename_from_arg()
190    ensure_file_exists(filename)
191    data = read_json(filename)
192    variants = split_json_by_variants(data)
193
194    print(
195        generate_markdown_table(
196            "Summary",
197            SUMMARY_HEADER,
198            generate_summary(
199                compare_metric(variants[BASE_VARIANT], variants[NEW_VARIANT], "summary")
200            ),
201        )
202    )
203    print()
204    print("<details><summary>Benchmark details</summary>")
205    print()  # A newline after </summary> is required for rendering
206    print(
207        generate_markdown_table(
208            "Base vs. New",
209            METRICS_HEADER,
210            compare_metric(
211                variants[BASE_VARIANT], variants[NEW_VARIANT], "base vs new"
212            ),
213            is_percentage=True,
214        )
215    )
216    print()
217    print(
218        generate_markdown_table(
219            "CPython vs New",
220            METRICS_HEADER,
221            compare_metric(
222                variants[CPYTHON_VARIANT], variants[NEW_VARIANT], "cpython vs new"
223            ),
224            is_percentage=True,
225        )
226    )
227    print()
228    print(generate_markdown_table("Base", METRICS_HEADER, variants[BASE_VARIANT]))
229    print()
230    print(generate_markdown_table("New", METRICS_HEADER, variants[NEW_VARIANT]))
231    print()
232    print(generate_markdown_table("CPython", METRICS_HEADER, variants[CPYTHON_VARIANT]))
233    print()
234    print("</details>")