fork
Configure Feed
Select the types of activity you want to include in your feed.
nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
fork
Configure Feed
Select the types of activity you want to include in your feed.
1"""
2This script generates a Docker image from a set of store paths. Uses
3Docker Image Specification v1.2 as reference [1].
4
5It expects a JSON file with the following properties and writes the
6image as an uncompressed tarball to stdout:
7
8* "architecture", "config", "os", "created", "repo_tag" correspond to
9 the fields with the same name on the image spec [2].
10* "created" can be "now".
11* "created" is also used as mtime for files added to the image.
12* "store_layers" is a list of layers in ascending order, where each
13 layer is the list of store paths to include in that layer.
14
15The main challenge for this script to create the final image in a
16streaming fashion, without dumping any intermediate data to disk
17for performance.
18
19A docker image has each layer contents archived as separate tarballs,
20and they later all get enveloped into a single big tarball in a
21content addressed fashion. However, because how "tar" format works,
22we have to know about the name (which includes the checksum in our
23case) and the size of the tarball before we can start adding it to the
24outer tarball. We achieve that by creating the layer tarballs twice;
25on the first iteration we calculate the file size and the checksum,
26and on the second one we actually stream the contents. 'add_layer_dir'
27function does all this.
28
29[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
30[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions
31""" # noqa: E501
32
33
34import io
35import os
36import re
37import sys
38import json
39import hashlib
40import pathlib
41import tarfile
42import itertools
43import threading
44from datetime import datetime, timezone
45from collections import namedtuple
46
47
48def archive_paths_to(obj, paths, mtime):
49 """
50 Writes the given store paths as a tar file to the given stream.
51
52 obj: Stream to write to. Should have a 'write' method.
53 paths: List of store paths.
54 """
55
56 # gettarinfo makes the paths relative, this makes them
57 # absolute again
58 def append_root(ti):
59 ti.name = "/" + ti.name
60 return ti
61
62 def apply_filters(ti):
63 ti.mtime = mtime
64 ti.uid = 0
65 ti.gid = 0
66 ti.uname = "root"
67 ti.gname = "root"
68 return ti
69
70 def nix_root(ti):
71 ti.mode = 0o0555 # r-xr-xr-x
72 return ti
73
74 def dir(path):
75 ti = tarfile.TarInfo(path)
76 ti.type = tarfile.DIRTYPE
77 return ti
78
79 with tarfile.open(fileobj=obj, mode="w|") as tar:
80 # To be consistent with the docker utilities, we need to have
81 # these directories first when building layer tarballs.
82 tar.addfile(apply_filters(nix_root(dir("/nix"))))
83 tar.addfile(apply_filters(nix_root(dir("/nix/store"))))
84
85 for path in paths:
86 path = pathlib.Path(path)
87 if path.is_symlink():
88 files = [path]
89 else:
90 files = itertools.chain([path], path.rglob("*"))
91
92 for filename in sorted(files):
93 ti = append_root(tar.gettarinfo(filename))
94
95 # copy hardlinks as regular files
96 if ti.islnk():
97 ti.type = tarfile.REGTYPE
98 ti.linkname = ""
99 ti.size = filename.stat().st_size
100
101 ti = apply_filters(ti)
102 if ti.isfile():
103 with open(filename, "rb") as f:
104 tar.addfile(ti, f)
105 else:
106 tar.addfile(ti)
107
108
109class ExtractChecksum:
110 """
111 A writable stream which only calculates the final file size and
112 sha256sum, while discarding the actual contents.
113 """
114
115 def __init__(self):
116 self._digest = hashlib.sha256()
117 self._size = 0
118
119 def write(self, data):
120 self._digest.update(data)
121 self._size += len(data)
122
123 def extract(self):
124 """
125 Returns: Hex-encoded sha256sum and size as a tuple.
126 """
127 return (self._digest.hexdigest(), self._size)
128
129
130FromImage = namedtuple("FromImage", ["tar", "manifest_json", "image_json"])
131# Some metadata for a layer
132LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
133
134
135def load_from_image(from_image_str):
136 """
137 Loads the given base image, if any.
138
139 from_image_str: Path to the base image archive.
140
141 Returns: A 'FromImage' object with references to the loaded base image,
142 or 'None' if no base image was provided.
143 """
144 if from_image_str is None:
145 return None
146
147 base_tar = tarfile.open(from_image_str)
148
149 manifest_json_tarinfo = base_tar.getmember("manifest.json")
150 with base_tar.extractfile(manifest_json_tarinfo) as f:
151 manifest_json = json.load(f)
152
153 image_json_tarinfo = base_tar.getmember(manifest_json[0]["Config"])
154 with base_tar.extractfile(image_json_tarinfo) as f:
155 image_json = json.load(f)
156
157 return FromImage(base_tar, manifest_json, image_json)
158
159
160def add_base_layers(tar, from_image):
161 """
162 Adds the layers from the given base image to the final image.
163
164 tar: 'tarfile.TarFile' object for new layers to be added to.
165 from_image: 'FromImage' object with references to the loaded base image.
166 """
167 if from_image is None:
168 print("No 'fromImage' provided", file=sys.stderr)
169 return []
170
171 layers = from_image.manifest_json[0]["Layers"]
172 checksums = from_image.image_json["rootfs"]["diff_ids"]
173 layers_checksums = zip(layers, checksums)
174
175 for num, (layer, checksum) in enumerate(layers_checksums, start=1):
176 layer_tarinfo = from_image.tar.getmember(layer)
177 checksum = re.sub(r"^sha256:", "", checksum)
178
179 tar.addfile(layer_tarinfo, from_image.tar.extractfile(layer_tarinfo))
180 path = layer_tarinfo.path
181 size = layer_tarinfo.size
182
183 print("Adding base layer", num, "from", path, file=sys.stderr)
184 yield LayerInfo(size=size, checksum=checksum, path=path, paths=[path])
185
186 from_image.tar.close()
187
188
189def overlay_base_config(from_image, final_config):
190 """
191 Overlays the final image 'config' JSON on top of selected defaults from the
192 base image 'config' JSON.
193
194 from_image: 'FromImage' object with references to the loaded base image.
195 final_config: 'dict' object of the final image 'config' JSON.
196 """
197 if from_image is None:
198 return final_config
199
200 base_config = from_image.image_json["config"]
201
202 # Preserve environment from base image
203 final_env = base_config.get("Env", []) + final_config.get("Env", [])
204 if final_env:
205 # Resolve duplicates (last one wins) and format back as list
206 resolved_env = {entry.split("=", 1)[0]: entry for entry in final_env}
207 final_config["Env"] = list(resolved_env.values())
208 return final_config
209
210
211def add_layer_dir(tar, paths, store_dir, mtime):
212 """
213 Appends given store paths to a TarFile object as a new layer.
214
215 tar: 'tarfile.TarFile' object for the new layer to be added to.
216 paths: List of store paths.
217 store_dir: the root directory of the nix store
218 mtime: 'mtime' of the added files and the layer tarball.
219 Should be an integer representing a POSIX time.
220
221 Returns: A 'LayerInfo' object containing some metadata of
222 the layer added.
223 """
224
225 invalid_paths = [i for i in paths if not i.startswith(store_dir)]
226 assert len(invalid_paths) == 0, \
227 f"Expecting absolute paths from {store_dir}, but got: {invalid_paths}"
228
229 # First, calculate the tarball checksum and the size.
230 extract_checksum = ExtractChecksum()
231 archive_paths_to(
232 extract_checksum,
233 paths,
234 mtime=mtime,
235 )
236 (checksum, size) = extract_checksum.extract()
237
238 path = f"{checksum}/layer.tar"
239 layer_tarinfo = tarfile.TarInfo(path)
240 layer_tarinfo.size = size
241 layer_tarinfo.mtime = mtime
242
243 # Then actually stream the contents to the outer tarball.
244 read_fd, write_fd = os.pipe()
245 with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
246 def producer():
247 archive_paths_to(
248 write,
249 paths,
250 mtime=mtime,
251 )
252 write.close()
253
254 # Closing the write end of the fifo also closes the read end,
255 # so we don't need to wait until this thread is finished.
256 #
257 # Any exception from the thread will get printed by the default
258 # exception handler, and the 'addfile' call will fail since it
259 # won't be able to read required amount of bytes.
260 threading.Thread(target=producer).start()
261 tar.addfile(layer_tarinfo, read)
262
263 return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
264
265
266def add_customisation_layer(target_tar, customisation_layer, mtime):
267 """
268 Adds the customisation layer as a new layer. This is layer is structured
269 differently; given store path has the 'layer.tar' and corresponding
270 sha256sum ready.
271
272 tar: 'tarfile.TarFile' object for the new layer to be added to.
273 customisation_layer: Path containing the layer archive.
274 mtime: 'mtime' of the added layer tarball.
275 """
276
277 checksum_path = os.path.join(customisation_layer, "checksum")
278 with open(checksum_path) as f:
279 checksum = f.read().strip()
280 assert len(checksum) == 64, f"Invalid sha256 at ${checksum_path}."
281
282 layer_path = os.path.join(customisation_layer, "layer.tar")
283
284 path = f"{checksum}/layer.tar"
285 tarinfo = target_tar.gettarinfo(layer_path)
286 tarinfo.name = path
287 tarinfo.mtime = mtime
288
289 with open(layer_path, "rb") as f:
290 target_tar.addfile(tarinfo, f)
291
292 return LayerInfo(
293 size=None,
294 checksum=checksum,
295 path=path,
296 paths=[customisation_layer]
297 )
298
299
300def add_bytes(tar, path, content, mtime):
301 """
302 Adds a file to the tarball with given path and contents.
303
304 tar: 'tarfile.TarFile' object.
305 path: Path of the file as a string.
306 content: Contents of the file.
307 mtime: 'mtime' of the file. Should be an integer representing a POSIX time.
308 """
309 assert type(content) is bytes
310
311 ti = tarfile.TarInfo(path)
312 ti.size = len(content)
313 ti.mtime = mtime
314 tar.addfile(ti, io.BytesIO(content))
315
316
317def main():
318 with open(sys.argv[1], "r") as f:
319 conf = json.load(f)
320
321 created = (
322 datetime.now(tz=timezone.utc)
323 if conf["created"] == "now"
324 else datetime.fromisoformat(conf["created"])
325 )
326 mtime = int(created.timestamp())
327 store_dir = conf["store_dir"]
328
329 from_image = load_from_image(conf["from_image"])
330
331 with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar:
332 layers = []
333 layers.extend(add_base_layers(tar, from_image))
334
335 start = len(layers) + 1
336 for num, store_layer in enumerate(conf["store_layers"], start=start):
337 print("Creating layer", num, "from paths:", store_layer,
338 file=sys.stderr)
339 info = add_layer_dir(tar, store_layer, store_dir, mtime=mtime)
340 layers.append(info)
341
342 print("Creating layer", len(layers) + 1, "with customisation...",
343 file=sys.stderr)
344 layers.append(
345 add_customisation_layer(
346 tar,
347 conf["customisation_layer"],
348 mtime=mtime
349 )
350 )
351
352 print("Adding manifests...", file=sys.stderr)
353
354 image_json = {
355 "created": datetime.isoformat(created),
356 "architecture": conf["architecture"],
357 "os": "linux",
358 "config": overlay_base_config(from_image, conf["config"]),
359 "rootfs": {
360 "diff_ids": [f"sha256:{layer.checksum}" for layer in layers],
361 "type": "layers",
362 },
363 "history": [
364 {
365 "created": datetime.isoformat(created),
366 "comment": f"store paths: {layer.paths}"
367 }
368 for layer in layers
369 ],
370 }
371
372 image_json = json.dumps(image_json, indent=4).encode("utf-8")
373 image_json_checksum = hashlib.sha256(image_json).hexdigest()
374 image_json_path = f"{image_json_checksum}.json"
375 add_bytes(tar, image_json_path, image_json, mtime=mtime)
376
377 manifest_json = [
378 {
379 "Config": image_json_path,
380 "RepoTags": [conf["repo_tag"]],
381 "Layers": [layer.path for layer in layers],
382 }
383 ]
384 manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8")
385 add_bytes(tar, "manifest.json", manifest_json, mtime=mtime)
386
387 print("Done.", file=sys.stderr)
388
389
390if __name__ == "__main__":
391 main()