nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1import json
2import sys
3
4from typing import Dict, Set
5
6# this compresses MITM URL lists with Gradle-specific optimizations
7# specifically, it splits each url into up to 3 parts - they will be
8# concatenated like part1/part2.part3 or part1.part2
9# part3 is simply always the file extension, but part1 and part2 is
10# optimized using special heuristics
11# additionally, if part2 ends with /a/b/{a}-{b}, the all occurences of
12# /{a}/{b}/ are replaced with #
13# finally, anything that ends with = is considered SHA256, anything that
14# starts with http is considered a redirect URL, anything else is
15# considered text
16
17with open(sys.argv[1], "rt") as f:
18 data: dict = json.load(f)
19
20new_data: Dict[str, Dict[str, Dict[str, dict]]] = {}
21
22for url, info in data.items():
23 if url == "!version":
24 continue
25 ext, base = map(lambda x: x[::-1], url[::-1].split(".", 1))
26 if base.endswith(".tar"):
27 base = base[:-4]
28 ext = "tar." + ext
29 # special logic for Maven repos
30 if ext in ["jar", "pom", "module"]:
31 comps = base.split("/")
32 if "-" in comps[-1]:
33 # convert base/name/ver/name-ver into base#name/ver
34
35 filename = comps[-1]
36 name = comps[-3]
37 basever = comps[-2]
38 ver = basever
39 is_snapshot = ver.endswith("-SNAPSHOT")
40 if is_snapshot:
41 ver = ver.removesuffix("-SNAPSHOT")
42 if filename.startswith(f"{name}-{ver}"):
43 if is_snapshot:
44 if filename.startswith(f"{name}-{ver}-SNAPSHOT"):
45 ver += "-SNAPSHOT"
46 else:
47 ver += "-".join(
48 filename.removeprefix(f"{name}-{ver}").split("-")[:3]
49 )
50 comp_end = comps[-1].removeprefix(f"{name}-{ver}")
51 else:
52 ver, name, comp_end = None, None, None
53 if name and ver and (not comp_end or comp_end.startswith("-")):
54 base = "/".join(comps[:-1]) + "/"
55 base = base.replace(f"/{name}/{basever}/", "#")
56 base += f"{name}/{ver}"
57 if is_snapshot:
58 base += "/SNAPSHOT"
59 if comp_end:
60 base += "/" + comp_end[1:]
61 scheme, rest = base.split("/", 1)
62 if scheme not in new_data.keys():
63 new_data[scheme] = {}
64 if rest not in new_data[scheme].keys():
65 new_data[scheme][rest] = {}
66 if "hash" in info.keys():
67 new_data[scheme][rest][ext] = info["hash"]
68 elif "text" in info.keys() and ext == "xml":
69 # nix code in fetch-deps.nix will autogenerate metadata xml files groupId
70 # is part of the URL, but it can be tricky to parse as we don't know the
71 # exact repo base, so take it from the xml and pass it to nix
72 xml = "".join(info["text"].split())
73 new_data[scheme][rest][ext] = {
74 "groupId": xml.split("<groupId>")[1].split("</groupId>")[0],
75 }
76 if "<release>" in xml:
77 new_data[scheme][rest][ext]["release"] = xml.split("<release>")[1].split(
78 "</release>"
79 )[0]
80 if "<latest>" in xml:
81 latest = xml.split("<latest>")[1].split("</latest>")[0]
82 if latest != new_data[scheme][rest][ext].get("release"):
83 new_data[scheme][rest][ext]["latest"] = latest
84 if "<lastUpdated>" in xml:
85 new_data[scheme][rest][ext]["lastUpdated"] = xml.split("<lastUpdated>")[
86 1
87 ].split("</lastUpdated>")[0]
88 else:
89 raise Exception("Unsupported key: " + repr(info))
90
91# At this point, we have a map by part1 (initially the scheme), part2 (initially a
92# slash-separated string without the scheme and with potential # substitution as
93# seen above), extension.
94# Now, push some segments from "part2" into "part1" like this:
95# https # part1
96# domain1/b # part2
97# domain1/c
98# domain2/a
99# domain2/c
100# ->
101# https/domain1 # part1
102# b # part2
103# c
104# https/domain2 # part1
105# a # part2
106# c
107# This helps reduce the lockfile size because a Gradle project will usually use lots
108# of files from a single Maven repo
109
110data = new_data
111changed = True
112while changed:
113 changed = False
114 new_data = {}
115 for part1, info1 in data.items():
116 starts: Set[str] = set()
117 # by how many bytes the file size will be increased (roughly)
118 lose = 0
119 # by how many bytes the file size will be reduced (roughly)
120 win = 0
121 # how many different initial part2 segments there are
122 count = 0
123 for part2, info2 in info1.items():
124 if "/" not in part2:
125 # can't push a segment from part2 into part1
126 count = 0
127 break
128 st = part2.split("/", 1)[0]
129 if st not in starts:
130 lose += len(st) + 1
131 count += 1
132 starts.add(st)
133 win += len(st) + 1
134 if count == 0:
135 new_data[part1] = info1
136 continue
137 # only allow pushing part2 segments into path1 if *either*:
138 # - the domain isn't yet part of part1
139 # - the initial part2 segment is always the same
140 if count != 1 and "." in part1:
141 new_data[part1] = info1
142 continue
143 # some heuristics that may or may not work well (originally this was
144 # used when the above if wasn't here, but perhaps it's useless now)
145 lose += (count - 1) * max(0, len(part1) - 4)
146 if win > lose or ("." not in part1 and win >= lose):
147 changed = True
148 for part2, info2 in info1.items():
149 st, part3 = part2.split("/", 1)
150 new_part1 = part1 + "/" + st
151 if new_part1 not in new_data.keys():
152 new_data[new_part1] = {}
153 new_data[new_part1][part3] = info2
154 else:
155 new_data[part1] = info1
156 data = new_data
157
158new_data["!comment"] = "This is a nixpkgs Gradle dependency lockfile. For more details, refer to the Gradle section in the nixpkgs manual." # type: ignore
159new_data["!version"] = 1 # type: ignore
160
161with open(sys.argv[2], "wt") as f:
162 json.dump(new_data, f, sort_keys=True, indent=1)
163 f.write("\n")