Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1#!/usr/bin/env python3 2import base64 3import json 4import os 5import subprocess 6import re 7import random 8import sys 9import tempfile 10import logging 11import click 12import click_log 13from typing import Optional 14from urllib.request import urlopen 15from joblib import Parallel, delayed, Memory 16from platformdirs import user_cache_dir 17 18sys.path.append("@depot_tools_checkout@") 19import gclient_eval 20import gclient_utils 21 22 23logger = logging.getLogger(__name__) 24click_log.basic_config(logger) 25 26nixpkgs_path = "<nixpkgs>" 27 28memory: Memory = Memory(user_cache_dir("gclient2nix"), verbose=0) 29 30def cache(mem, **mem_kwargs): 31 def cache_(f): 32 f.__module__ = "gclient2nix" 33 f.__qualname__ = f.__name__ 34 return mem.cache(f, **mem_kwargs) 35 return cache_ 36 37@cache(memory) 38def get_repo_hash(fetcher: str, args: dict) -> str: 39 expr = f"(import {nixpkgs_path} {{}}).gclient2nix.fetchers.{fetcher}{{" 40 for key, val in args.items(): 41 expr += f'{key}="{val}";' 42 expr += "}" 43 cmd = ["nurl", "-H", "--expr", expr] 44 print(" ".join(cmd), file=sys.stderr) 45 out = subprocess.check_output(cmd) 46 return out.decode("utf-8").strip() 47 48 49class Repo: 50 fetcher: str 51 args: dict 52 53 def __init__(self) -> None: 54 self.deps: dict = {} 55 56 def get_deps(self, repo_vars: dict, path: str) -> None: 57 print( 58 "evaluating " + json.dumps(self, default=vars, sort_keys=True), 59 file=sys.stderr, 60 ) 61 62 deps_file = self.get_file("DEPS") 63 evaluated = gclient_eval.Parse(deps_file, vars_override=repo_vars, filename="DEPS") 64 65 repo_vars = dict(evaluated.get("vars", {})) | repo_vars 66 67 prefix = f"{path}/" if evaluated.get("use_relative_paths", False) else "" 68 69 self.deps = { 70 prefix + dep_name: repo_from_dep(dep) 71 for dep_name, dep in evaluated.get("deps", {}).items() 72 if ( 73 gclient_eval.EvaluateCondition(dep["condition"], repo_vars) 74 if "condition" in dep 75 else True 76 ) 77 and repo_from_dep(dep) != None 78 } 79 80 for key in evaluated.get("recursedeps", []): 81 dep_path = prefix + key 82 if dep_path in self.deps: 83 self.deps[dep_path].get_deps(repo_vars, dep_path) 84 85 def eval(self) -> None: 86 self.get_deps( 87 { 88 **{ 89 f"checkout_{platform}": platform == "linux" 90 for platform in ["ios", "chromeos", "android", "mac", "win", "linux"] 91 }, 92 **{ 93 f"checkout_{arch}": True 94 for arch in ["x64", "arm64", "arm", "x86", "mips", "mips64", "ppc"] 95 }, 96 }, 97 "", 98 ) 99 100 def prefetch(self) -> None: 101 self.hash = get_repo_hash(self.fetcher, self.args) 102 103 def prefetch_all(self) -> int: 104 return sum( 105 [dep.prefetch_all() for [_, dep] in self.deps.items()], 106 [delayed(self.prefetch)()], 107 ) 108 109 def flatten_repr(self) -> dict: 110 return {"fetcher": self.fetcher, "args": {**({"hash": self.hash} if hasattr(self, "hash") else {}), **self.args}} 111 112 def flatten(self, path: str) -> dict: 113 out = {path: self.flatten_repr()} 114 for dep_path, dep in self.deps.items(): 115 out |= dep.flatten(dep_path) 116 return out 117 118 def get_file(self, filepath: str) -> str: 119 raise NotImplementedError 120 121 122class GitRepo(Repo): 123 def __init__(self, url: str, rev: str) -> None: 124 super().__init__() 125 self.fetcher = "fetchgit" 126 self.args = { 127 "url": url, 128 "rev" if re.match(r"[0-9a-f]{40}", rev) else "tag": rev, 129 } 130 131 132class GitHubRepo(Repo): 133 def __init__(self, owner: str, repo: str, rev: str) -> None: 134 super().__init__() 135 self.fetcher = "fetchFromGitHub" 136 self.args = { 137 "owner": owner, 138 "repo": repo, 139 "rev" if re.match(r"[0-9a-f]{40}", rev) else "tag": rev, 140 } 141 142 def get_file(self, filepath: str) -> str: 143 rev_or_tag = self.args['rev'] if 'rev' in self.args else f"refs/tags/{self.args['tag']}" 144 return ( 145 urlopen( 146 f"https://raw.githubusercontent.com/{self.args['owner']}/{self.args['repo']}/{rev_or_tag}/{filepath}" 147 ) 148 .read() 149 .decode("utf-8") 150 ) 151 152 153class GitilesRepo(Repo): 154 def __init__(self, url: str, rev: str) -> None: 155 super().__init__() 156 self.fetcher = "fetchFromGitiles" 157 self.args = { 158 "url": url, 159 "rev" if re.match(r"[0-9a-f]{40}", rev) else "tag": rev, 160 } 161 162 # Quirk: Chromium source code exceeds the Hydra output limit 163 # We prefer deleting test data over recompressing the sources into a 164 # tarball, because the NAR will be compressed after the size check 165 # anyways, so recompressing is more like bypassing the size limit 166 # (making it count the compressed instead of uncompressed size) 167 # rather than complying with it. 168 if url == "https://chromium.googlesource.com/chromium/src.git": 169 self.args["postFetch"] = "rm -r $out/third_party/blink/web_tests; " 170 self.args["postFetch"] += "rm -r $out/content/test/data; " 171 self.args["postFetch"] += "rm -rf $out/courgette/testdata; " 172 self.args["postFetch"] += "rm -r $out/extensions/test/data; " 173 self.args["postFetch"] += "rm -r $out/media/test/data; " 174 175 def get_file(self, filepath: str) -> str: 176 rev_or_tag = self.args['rev'] if 'rev' in self.args else f"refs/tags/{self.args['tag']}" 177 return base64.b64decode( 178 urlopen( 179 f"{self.args['url']}/+/{rev_or_tag}/{filepath}?format=TEXT" 180 ).read() 181 ).decode("utf-8") 182 183 184 185def repo_from_dep(dep: dict) -> Optional[Repo]: 186 if "url" in dep: 187 url, rev = gclient_utils.SplitUrlRevision(dep["url"]) 188 189 search_object = re.search(r"https://github.com/(.+)/(.+?)(\.git)?$", url) 190 if search_object: 191 return GitHubRepo(search_object.group(1), search_object.group(2), rev) 192 193 if re.match(r"https://.+\.googlesource.com", url): 194 return GitilesRepo(url, rev) 195 196 return GitRepo(url, rev) 197 else: 198 # Not a git dependency; skip 199 return None 200 201 202@click.group() 203def cli() -> None: 204 """gclient2nix""" 205 pass 206 207 208@cli.command("eval", help="Evaluate and print the dependency tree of a gclient project") 209@click.argument("url", required=True, type=str) 210@click.option("--root", default="src", help="Root path, where the given url is placed", type=str) 211def eval(url: str, root: str) -> None: 212 repo = repo_from_dep({"url": url}) 213 repo.eval() 214 print(json.dumps(repo.flatten(root), sort_keys=True, indent=4)) 215 216 217@cli.command("generate", help="Generate a dependencies description for a gclient project") 218@click.argument("url", required=True, type=str) 219@click.option("--root", default="src", help="Root path, where the given url is placed", type=str) 220def generate(url: str, root: str) -> None: 221 repo = repo_from_dep({"url": url}) 222 repo.eval() 223 tasks = repo.prefetch_all() 224 random.shuffle(tasks) 225 task_results = { 226 n[0]: n[1] 227 for n in Parallel(n_jobs=20, require="sharedmem", return_as="generator")(tasks) 228 if n != None 229 } 230 print(json.dumps(repo.flatten(root), sort_keys=True, indent=4)) 231 232 233if __name__ == "__main__": 234 cli()