ci/compare: nix stats comparison

Displays stats table in the step-summary if there are no added/removed packages

+201 -4
+153
ci/eval/compare/cmp-stats.py
··· 1 + import json 2 + import os 3 + from scipy.stats import ttest_rel 4 + import pandas as pd 5 + import numpy as np 6 + from pathlib import Path 7 + 8 + # Define metrics of interest (can be expanded as needed) 9 + METRIC_PREFIXES = ("nr", "gc") 10 + 11 + def flatten_data(json_data: dict) -> dict: 12 + """ 13 + Extracts and flattens metrics from JSON data. 14 + This is needed because the JSON data can be nested. 15 + For example, the JSON data entry might look like this: 16 + 17 + "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464} 18 + 19 + Flattened: 20 + 21 + "gc.cycles": 13 22 + "gc.heapSize": 5404549120 23 + ... 24 + 25 + Args: 26 + json_data (dict): JSON data containing metrics. 27 + Returns: 28 + dict: Flattened metrics with keys as metric names. 29 + """ 30 + flat_metrics = {} 31 + for k, v in json_data.items(): 32 + if isinstance(v, (int, float)): 33 + flat_metrics[k] = v 34 + elif isinstance(v, dict): 35 + for sub_k, sub_v in v.items(): 36 + flat_metrics[f"{k}.{sub_k}"] = sub_v 37 + return flat_metrics 38 + 39 + 40 + 41 + 42 + def load_all_metrics(directory: Path) -> dict: 43 + """ 44 + Loads all stats JSON files in the specified directory and extracts metrics. 45 + 46 + Args: 47 + directory (Path): Directory containing JSON files. 48 + Returns: 49 + dict: Dictionary with filenames as keys and extracted metrics as values. 50 + """ 51 + metrics = {} 52 + for system_dir in directory.iterdir(): 53 + assert system_dir.is_dir() 54 + 55 + for chunk_output in system_dir.iterdir(): 56 + with chunk_output.open() as f: 57 + data = json.load(f) 58 + metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data) 59 + 60 + return metrics 61 + 62 + def dataframe_to_markdown(df: pd.DataFrame) -> str: 63 + markdown_lines = [] 64 + 65 + # Header (get column names and format them) 66 + header = '\n| ' + ' | '.join(df.columns) + ' |' 67 + markdown_lines.append(header) 68 + markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line 69 + 70 + # Iterate over rows to build Markdown rows 71 + for _, row in df.iterrows(): 72 + # TODO: define threshold for highlighting 73 + highlight = False 74 + 75 + fmt = lambda x: f"**{x}**" if highlight else f"{x}" 76 + 77 + # Check for no change and NaN in p_value/t_stat 78 + row_values = [] 79 + for val in row: 80 + if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat 81 + row_values.append("-") # Custom symbol for NaN 82 + elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0) 83 + row_values.append("-") # Custom symbol for no change 84 + else: 85 + row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val))) 86 + 87 + markdown_lines.append('| ' + ' | '.join(row_values) + ' |') 88 + 89 + return '\n'.join(markdown_lines) 90 + 91 + 92 + def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame: 93 + common_files = sorted(set(before_metrics) & set(after_metrics)) 94 + all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() }) 95 + 96 + results = [] 97 + 98 + for key in all_keys: 99 + before_vals, after_vals = [], [] 100 + 101 + for fname in common_files: 102 + if key in before_metrics[fname] and key in after_metrics[fname]: 103 + before_vals.append(before_metrics[fname][key]) 104 + after_vals.append(after_metrics[fname][key]) 105 + 106 + if len(before_vals) >= 2: 107 + before_arr = np.array(before_vals) 108 + after_arr = np.array(after_vals) 109 + 110 + diff = after_arr - before_arr 111 + pct_change = 100 * diff / before_arr 112 + t_stat, p_val = ttest_rel(after_arr, before_arr) 113 + 114 + results.append({ 115 + "metric": key, 116 + "mean_before": np.mean(before_arr), 117 + "mean_after": np.mean(after_arr), 118 + "mean_diff": np.mean(diff), 119 + "mean_%_change": np.mean(pct_change), 120 + "p_value": p_val, 121 + "t_stat": t_stat 122 + }) 123 + 124 + df = pd.DataFrame(results).sort_values("p_value") 125 + return df 126 + 127 + 128 + if __name__ == "__main__": 129 + before_dir = os.environ.get("BEFORE_DIR") 130 + after_dir = os.environ.get("AFTER_DIR") 131 + 132 + if not before_dir or not after_dir: 133 + print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.") 134 + exit(1) 135 + 136 + before_stats = Path(before_dir) / "stats" 137 + after_stats = Path(after_dir) / "stats" 138 + 139 + # This may happen if the pull request target does not include PR#399720 yet. 140 + if not before_stats.exists(): 141 + print("⚠️ Skipping comparison: stats directory is missing in the target commit.") 142 + exit(0) 143 + 144 + # This should never happen, but we're exiting gracefully anyways 145 + if not after_stats.exists(): 146 + print("⚠️ Skipping comparison: stats directory missing in current PR evaluation.") 147 + exit(0) 148 + 149 + before_metrics = load_all_metrics(before_stats) 150 + after_metrics = load_all_metrics(after_stats) 151 + df1 = perform_pairwise_tests(before_metrics, after_metrics) 152 + markdown_table = dataframe_to_markdown(df1) 153 + print(markdown_table)
+46 -4
ci/eval/compare/default.nix
··· 3 3 jq, 4 4 runCommand, 5 5 writeText, 6 + python3, 6 7 ... 7 8 }: 8 9 { ··· 125 126 in 126 127 runCommand "compare" 127 128 { 128 - nativeBuildInputs = [ jq ]; 129 + nativeBuildInputs = [ 130 + jq 131 + (python3.withPackages ( 132 + ps: with ps; [ 133 + numpy 134 + pandas 135 + scipy 136 + ] 137 + )) 138 + 139 + ]; 129 140 maintainers = builtins.toJSON maintainers; 130 141 passAsFile = [ "maintainers" ]; 142 + env = { 143 + BEFORE_DIR = "${beforeResultDir}"; 144 + AFTER_DIR = "${afterResultDir}"; 145 + }; 131 146 } 132 147 '' 133 148 mkdir $out 134 149 135 150 cp ${changed-paths} $out/changed-paths.json 136 151 137 - jq -r -f ${./generate-step-summary.jq} < ${changed-paths} > $out/step-summary.md 152 + 153 + if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then 154 + # Chunks have changed between revisions 155 + # We cannot generate a performance comparison 156 + { 157 + echo 158 + echo "# Performance comparison" 159 + echo 160 + echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')" 161 + echo 162 + echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" 163 + echo 164 + } >> $out/step-summary.md 138 165 139 - cp "$maintainersPath" "$out/maintainers.json" 166 + python3 ${./cmp-stats.py} >> $out/step-summary.md 140 167 141 - # TODO: Compare eval stats 168 + else 169 + # Package chunks are the same in both revisions 170 + # We can use the to generate a performance comparison 171 + { 172 + echo 173 + echo "# Performance Comparison" 174 + echo 175 + echo "Performance stats were skipped because the package sets differ between the two revisions." 176 + echo 177 + echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" 178 + } >> $out/step-summary.md 179 + fi 180 + 181 + jq -r -f ${./generate-step-summary.jq} < ${changed-paths} >> $out/step-summary.md 182 + 183 + cp "$maintainersPath" "$out/maintainers.json" 142 184 ''
+2
ci/eval/default.nix
··· 9 9 nixVersions, 10 10 jq, 11 11 sta, 12 + python3, 12 13 }: 13 14 14 15 let ··· 270 271 runCommand 271 272 writeText 272 273 supportedSystems 274 + python3 273 275 ; 274 276 }; 275 277