nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 163 lines 6.3 kB view raw
1import json 2import sys 3 4from typing import Dict, Set 5 6# this compresses MITM URL lists with Gradle-specific optimizations 7# specifically, it splits each url into up to 3 parts - they will be 8# concatenated like part1/part2.part3 or part1.part2 9# part3 is simply always the file extension, but part1 and part2 is 10# optimized using special heuristics 11# additionally, if part2 ends with /a/b/{a}-{b}, the all occurences of 12# /{a}/{b}/ are replaced with # 13# finally, anything that ends with = is considered SHA256, anything that 14# starts with http is considered a redirect URL, anything else is 15# considered text 16 17with open(sys.argv[1], "rt") as f: 18 data: dict = json.load(f) 19 20new_data: Dict[str, Dict[str, Dict[str, dict]]] = {} 21 22for url, info in data.items(): 23 if url == "!version": 24 continue 25 ext, base = map(lambda x: x[::-1], url[::-1].split(".", 1)) 26 if base.endswith(".tar"): 27 base = base[:-4] 28 ext = "tar." + ext 29 # special logic for Maven repos 30 if ext in ["jar", "pom", "module"]: 31 comps = base.split("/") 32 if "-" in comps[-1]: 33 # convert base/name/ver/name-ver into base#name/ver 34 35 filename = comps[-1] 36 name = comps[-3] 37 basever = comps[-2] 38 ver = basever 39 is_snapshot = ver.endswith("-SNAPSHOT") 40 if is_snapshot: 41 ver = ver.removesuffix("-SNAPSHOT") 42 if filename.startswith(f"{name}-{ver}"): 43 if is_snapshot: 44 if filename.startswith(f"{name}-{ver}-SNAPSHOT"): 45 ver += "-SNAPSHOT" 46 else: 47 ver += "-".join( 48 filename.removeprefix(f"{name}-{ver}").split("-")[:3] 49 ) 50 comp_end = comps[-1].removeprefix(f"{name}-{ver}") 51 else: 52 ver, name, comp_end = None, None, None 53 if name and ver and (not comp_end or comp_end.startswith("-")): 54 base = "/".join(comps[:-1]) + "/" 55 base = base.replace(f"/{name}/{basever}/", "#") 56 base += f"{name}/{ver}" 57 if is_snapshot: 58 base += "/SNAPSHOT" 59 if comp_end: 60 base += "/" + comp_end[1:] 61 scheme, rest = base.split("/", 1) 62 if scheme not in new_data.keys(): 63 new_data[scheme] = {} 64 if rest not in new_data[scheme].keys(): 65 new_data[scheme][rest] = {} 66 if "hash" in info.keys(): 67 new_data[scheme][rest][ext] = info["hash"] 68 elif "text" in info.keys() and ext == "xml": 69 # nix code in fetch-deps.nix will autogenerate metadata xml files groupId 70 # is part of the URL, but it can be tricky to parse as we don't know the 71 # exact repo base, so take it from the xml and pass it to nix 72 xml = "".join(info["text"].split()) 73 new_data[scheme][rest][ext] = { 74 "groupId": xml.split("<groupId>")[1].split("</groupId>")[0], 75 } 76 if "<release>" in xml: 77 new_data[scheme][rest][ext]["release"] = xml.split("<release>")[1].split( 78 "</release>" 79 )[0] 80 if "<latest>" in xml: 81 latest = xml.split("<latest>")[1].split("</latest>")[0] 82 if latest != new_data[scheme][rest][ext].get("release"): 83 new_data[scheme][rest][ext]["latest"] = latest 84 if "<lastUpdated>" in xml: 85 new_data[scheme][rest][ext]["lastUpdated"] = xml.split("<lastUpdated>")[ 86 1 87 ].split("</lastUpdated>")[0] 88 else: 89 raise Exception("Unsupported key: " + repr(info)) 90 91# At this point, we have a map by part1 (initially the scheme), part2 (initially a 92# slash-separated string without the scheme and with potential # substitution as 93# seen above), extension. 94# Now, push some segments from "part2" into "part1" like this: 95# https # part1 96# domain1/b # part2 97# domain1/c 98# domain2/a 99# domain2/c 100# -> 101# https/domain1 # part1 102# b # part2 103# c 104# https/domain2 # part1 105# a # part2 106# c 107# This helps reduce the lockfile size because a Gradle project will usually use lots 108# of files from a single Maven repo 109 110data = new_data 111changed = True 112while changed: 113 changed = False 114 new_data = {} 115 for part1, info1 in data.items(): 116 starts: Set[str] = set() 117 # by how many bytes the file size will be increased (roughly) 118 lose = 0 119 # by how many bytes the file size will be reduced (roughly) 120 win = 0 121 # how many different initial part2 segments there are 122 count = 0 123 for part2, info2 in info1.items(): 124 if "/" not in part2: 125 # can't push a segment from part2 into part1 126 count = 0 127 break 128 st = part2.split("/", 1)[0] 129 if st not in starts: 130 lose += len(st) + 1 131 count += 1 132 starts.add(st) 133 win += len(st) + 1 134 if count == 0: 135 new_data[part1] = info1 136 continue 137 # only allow pushing part2 segments into path1 if *either*: 138 # - the domain isn't yet part of part1 139 # - the initial part2 segment is always the same 140 if count != 1 and "." in part1: 141 new_data[part1] = info1 142 continue 143 # some heuristics that may or may not work well (originally this was 144 # used when the above if wasn't here, but perhaps it's useless now) 145 lose += (count - 1) * max(0, len(part1) - 4) 146 if win > lose or ("." not in part1 and win >= lose): 147 changed = True 148 for part2, info2 in info1.items(): 149 st, part3 = part2.split("/", 1) 150 new_part1 = part1 + "/" + st 151 if new_part1 not in new_data.keys(): 152 new_data[new_part1] = {} 153 new_data[new_part1][part3] = info2 154 else: 155 new_data[part1] = info1 156 data = new_data 157 158new_data["!comment"] = "This is a nixpkgs Gradle dependency lockfile. For more details, refer to the Gradle section in the nixpkgs manual." # type: ignore 159new_data["!version"] = 1 # type: ignore 160 161with open(sys.argv[2], "wt") as f: 162 json.dump(new_data, f, sort_keys=True, indent=1) 163 f.write("\n")