#!/usr/bin/env python3 """ Parse Swift Benchmark markdown (produced by `swift package benchmark ... --format markdown`) and produce compact summary markdown tables for Decoding and Encoding using the Time (total CPU) p50 values. Usage: - Read from stdin: swift package benchmark baseline compare swiftcbor --format markdown --no-progress | python3 bench_compare.py - Or read from file: python3 bench_compare.py benchmark.md The script prints two markdown tables (Decoding and Encoding) to stdout. """ import sys import re from pathlib import Path def parse_markdown(text): lines = text.splitlines() results = {"Decoding": {}, "Encoding": {}} mode = None i = 0 while i < len(lines): line = lines[i] if line.strip().startswith('## Decoding'): mode = 'Decoding' i += 1 continue if line.strip().startswith('## Encoding'): mode = 'Encoding' i += 1 continue m = re.match(r"###\s+(.+?)\s+metrics", line) if m and mode: bench = m.group(1).strip() # look ahead for Time (total CPU) table j = i + 1 while j < len(lines) and not lines[j].strip().startswith('###') and not lines[j].strip().startswith('## '): lh = lines[j] # find the section header line containing "Time (total CPU)" m2 = re.search(r"Time\s*\(total CPU\)\s*(?:\(([^)]+)\))?", lh) if m2: unit = m2.group(1) if m2.group(1) else '' # find the header row that contains p0/p25/p50 etc. k = j + 1 while k < len(lines) and lines[k].strip() == '': k += 1 header_line_index = None p50_idx = None for t in range(k, min(k + 60, len(lines))): if 'p50' in lines[t]: cols = [c.strip() for c in lines[t].split('|')][1:-1] # locate p50 column try: p50_idx = next(idx for idx, c in enumerate(cols) if c.startswith('p50')) except StopIteration: p50_idx = None header_line_index = t break swift_val = None curr_val = None if header_line_index is not None and p50_idx is not None: # parse following rows to find swiftcbor and Current_run for t in range(header_line_index + 1, header_line_index + 60): if t >= len(lines): break row = lines[t] if not row.strip().startswith('|'): continue cols = [c.strip() for c in row.split('|')][1:-1] if not cols: continue name = cols[0] # defensive check if p50_idx < len(cols): if 'swiftcbor' in name: swift_val = cols[p50_idx] if 'Current_run' in name: curr_val = cols[p50_idx] if swift_val and curr_val: break results[mode][bench] = (swift_val, curr_val, unit) break j += 1 i += 1 return results def clean_num(s): if s is None: return None s = s.strip().replace(',', '') # find first numeric token m = re.search(r"([0-9]+(?:\.[0-9]+)?)", s) if not m: return None try: return float(m.group(1)) except: return None def fmt(n): if n is None: return '' if n >= 1000: return f"{int(round(n)):,}" if n == int(n): return str(int(n)) return f"{n:.0f}" def render_table_section(title, rows, preferred_order=None): print(f"### {title} (cpu time)\n") print("| Benchmark | SwiftCBOR (p50) | CBOR (p50) | % Improvement |") print("|---|---:|---:|---:|") keys = [] if preferred_order: for k in preferred_order: if k in rows: keys.append(k) # append remaining in alphabetical order for k in sorted(rows.keys()): if k not in keys: keys.append(k) for b in keys: s_p, c_p, unit = rows.get(b, (None, None, '')) sval = clean_num(s_p) cval = clean_num(c_p) s_str = (fmt(sval) + (' ' + unit if unit else '')) if sval is not None else (s_p or '') c_str = (fmt(cval) + (' ' + unit if unit else '')) if cval is not None else (c_p or '') perc = '' if sval is not None and cval is not None and sval != 0: pct = round((sval - cval) / sval * 100) perc = f"**{pct}%**" print(f"| {b} | {s_str} | {c_str} | {perc} |") print("\n") def main(argv): parser = __import__('argparse').ArgumentParser(description='Parse Swift benchmark markdown and print compact p50 tables') parser.add_argument('file', nargs='?', default='-', help='Path to markdown file, or - for stdin (default)') args = parser.parse_args(argv) if args.file == '-': text = sys.stdin.read() else: p = Path(args.file) text = p.read_text() results = parse_markdown(text) # preferred orders to match your example (best-effort) dec_order = ["Array","Complex Object","Date","Dictionary","Double","Float","Indeterminate String","Int","Int Small","Simple Object","String","String Small"] enc_order = ["Array","Array Small","Bool","Complex Codable Object","Data","Data Small","Dictionary","Dictionary Small","Int","Int Small","Simple Codable Object","String","String Small"] render_table_section('Decoding', results.get('Decoding', {}), preferred_order=dec_order) render_table_section('Encoding', results.get('Encoding', {}), preferred_order=enc_order) if __name__ == '__main__': main(sys.argv[1:])