nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
fork

Configure Feed

Select the types of activity you want to include in your feed.

at netboot-syslinux-multiplatform 391 lines 13 kB view raw
1""" 2This script generates a Docker image from a set of store paths. Uses 3Docker Image Specification v1.2 as reference [1]. 4 5It expects a JSON file with the following properties and writes the 6image as an uncompressed tarball to stdout: 7 8* "architecture", "config", "os", "created", "repo_tag" correspond to 9 the fields with the same name on the image spec [2]. 10* "created" can be "now". 11* "created" is also used as mtime for files added to the image. 12* "store_layers" is a list of layers in ascending order, where each 13 layer is the list of store paths to include in that layer. 14 15The main challenge for this script to create the final image in a 16streaming fashion, without dumping any intermediate data to disk 17for performance. 18 19A docker image has each layer contents archived as separate tarballs, 20and they later all get enveloped into a single big tarball in a 21content addressed fashion. However, because how "tar" format works, 22we have to know about the name (which includes the checksum in our 23case) and the size of the tarball before we can start adding it to the 24outer tarball. We achieve that by creating the layer tarballs twice; 25on the first iteration we calculate the file size and the checksum, 26and on the second one we actually stream the contents. 'add_layer_dir' 27function does all this. 28 29[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md 30[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions 31""" # noqa: E501 32 33 34import io 35import os 36import re 37import sys 38import json 39import hashlib 40import pathlib 41import tarfile 42import itertools 43import threading 44from datetime import datetime, timezone 45from collections import namedtuple 46 47 48def archive_paths_to(obj, paths, mtime): 49 """ 50 Writes the given store paths as a tar file to the given stream. 51 52 obj: Stream to write to. Should have a 'write' method. 53 paths: List of store paths. 54 """ 55 56 # gettarinfo makes the paths relative, this makes them 57 # absolute again 58 def append_root(ti): 59 ti.name = "/" + ti.name 60 return ti 61 62 def apply_filters(ti): 63 ti.mtime = mtime 64 ti.uid = 0 65 ti.gid = 0 66 ti.uname = "root" 67 ti.gname = "root" 68 return ti 69 70 def nix_root(ti): 71 ti.mode = 0o0555 # r-xr-xr-x 72 return ti 73 74 def dir(path): 75 ti = tarfile.TarInfo(path) 76 ti.type = tarfile.DIRTYPE 77 return ti 78 79 with tarfile.open(fileobj=obj, mode="w|") as tar: 80 # To be consistent with the docker utilities, we need to have 81 # these directories first when building layer tarballs. 82 tar.addfile(apply_filters(nix_root(dir("/nix")))) 83 tar.addfile(apply_filters(nix_root(dir("/nix/store")))) 84 85 for path in paths: 86 path = pathlib.Path(path) 87 if path.is_symlink(): 88 files = [path] 89 else: 90 files = itertools.chain([path], path.rglob("*")) 91 92 for filename in sorted(files): 93 ti = append_root(tar.gettarinfo(filename)) 94 95 # copy hardlinks as regular files 96 if ti.islnk(): 97 ti.type = tarfile.REGTYPE 98 ti.linkname = "" 99 ti.size = filename.stat().st_size 100 101 ti = apply_filters(ti) 102 if ti.isfile(): 103 with open(filename, "rb") as f: 104 tar.addfile(ti, f) 105 else: 106 tar.addfile(ti) 107 108 109class ExtractChecksum: 110 """ 111 A writable stream which only calculates the final file size and 112 sha256sum, while discarding the actual contents. 113 """ 114 115 def __init__(self): 116 self._digest = hashlib.sha256() 117 self._size = 0 118 119 def write(self, data): 120 self._digest.update(data) 121 self._size += len(data) 122 123 def extract(self): 124 """ 125 Returns: Hex-encoded sha256sum and size as a tuple. 126 """ 127 return (self._digest.hexdigest(), self._size) 128 129 130FromImage = namedtuple("FromImage", ["tar", "manifest_json", "image_json"]) 131# Some metadata for a layer 132LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) 133 134 135def load_from_image(from_image_str): 136 """ 137 Loads the given base image, if any. 138 139 from_image_str: Path to the base image archive. 140 141 Returns: A 'FromImage' object with references to the loaded base image, 142 or 'None' if no base image was provided. 143 """ 144 if from_image_str is None: 145 return None 146 147 base_tar = tarfile.open(from_image_str) 148 149 manifest_json_tarinfo = base_tar.getmember("manifest.json") 150 with base_tar.extractfile(manifest_json_tarinfo) as f: 151 manifest_json = json.load(f) 152 153 image_json_tarinfo = base_tar.getmember(manifest_json[0]["Config"]) 154 with base_tar.extractfile(image_json_tarinfo) as f: 155 image_json = json.load(f) 156 157 return FromImage(base_tar, manifest_json, image_json) 158 159 160def add_base_layers(tar, from_image): 161 """ 162 Adds the layers from the given base image to the final image. 163 164 tar: 'tarfile.TarFile' object for new layers to be added to. 165 from_image: 'FromImage' object with references to the loaded base image. 166 """ 167 if from_image is None: 168 print("No 'fromImage' provided", file=sys.stderr) 169 return [] 170 171 layers = from_image.manifest_json[0]["Layers"] 172 checksums = from_image.image_json["rootfs"]["diff_ids"] 173 layers_checksums = zip(layers, checksums) 174 175 for num, (layer, checksum) in enumerate(layers_checksums, start=1): 176 layer_tarinfo = from_image.tar.getmember(layer) 177 checksum = re.sub(r"^sha256:", "", checksum) 178 179 tar.addfile(layer_tarinfo, from_image.tar.extractfile(layer_tarinfo)) 180 path = layer_tarinfo.path 181 size = layer_tarinfo.size 182 183 print("Adding base layer", num, "from", path, file=sys.stderr) 184 yield LayerInfo(size=size, checksum=checksum, path=path, paths=[path]) 185 186 from_image.tar.close() 187 188 189def overlay_base_config(from_image, final_config): 190 """ 191 Overlays the final image 'config' JSON on top of selected defaults from the 192 base image 'config' JSON. 193 194 from_image: 'FromImage' object with references to the loaded base image. 195 final_config: 'dict' object of the final image 'config' JSON. 196 """ 197 if from_image is None: 198 return final_config 199 200 base_config = from_image.image_json["config"] 201 202 # Preserve environment from base image 203 final_env = base_config.get("Env", []) + final_config.get("Env", []) 204 if final_env: 205 # Resolve duplicates (last one wins) and format back as list 206 resolved_env = {entry.split("=", 1)[0]: entry for entry in final_env} 207 final_config["Env"] = list(resolved_env.values()) 208 return final_config 209 210 211def add_layer_dir(tar, paths, store_dir, mtime): 212 """ 213 Appends given store paths to a TarFile object as a new layer. 214 215 tar: 'tarfile.TarFile' object for the new layer to be added to. 216 paths: List of store paths. 217 store_dir: the root directory of the nix store 218 mtime: 'mtime' of the added files and the layer tarball. 219 Should be an integer representing a POSIX time. 220 221 Returns: A 'LayerInfo' object containing some metadata of 222 the layer added. 223 """ 224 225 invalid_paths = [i for i in paths if not i.startswith(store_dir)] 226 assert len(invalid_paths) == 0, \ 227 f"Expecting absolute paths from {store_dir}, but got: {invalid_paths}" 228 229 # First, calculate the tarball checksum and the size. 230 extract_checksum = ExtractChecksum() 231 archive_paths_to( 232 extract_checksum, 233 paths, 234 mtime=mtime, 235 ) 236 (checksum, size) = extract_checksum.extract() 237 238 path = f"{checksum}/layer.tar" 239 layer_tarinfo = tarfile.TarInfo(path) 240 layer_tarinfo.size = size 241 layer_tarinfo.mtime = mtime 242 243 # Then actually stream the contents to the outer tarball. 244 read_fd, write_fd = os.pipe() 245 with open(read_fd, "rb") as read, open(write_fd, "wb") as write: 246 def producer(): 247 archive_paths_to( 248 write, 249 paths, 250 mtime=mtime, 251 ) 252 write.close() 253 254 # Closing the write end of the fifo also closes the read end, 255 # so we don't need to wait until this thread is finished. 256 # 257 # Any exception from the thread will get printed by the default 258 # exception handler, and the 'addfile' call will fail since it 259 # won't be able to read required amount of bytes. 260 threading.Thread(target=producer).start() 261 tar.addfile(layer_tarinfo, read) 262 263 return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) 264 265 266def add_customisation_layer(target_tar, customisation_layer, mtime): 267 """ 268 Adds the customisation layer as a new layer. This is layer is structured 269 differently; given store path has the 'layer.tar' and corresponding 270 sha256sum ready. 271 272 tar: 'tarfile.TarFile' object for the new layer to be added to. 273 customisation_layer: Path containing the layer archive. 274 mtime: 'mtime' of the added layer tarball. 275 """ 276 277 checksum_path = os.path.join(customisation_layer, "checksum") 278 with open(checksum_path) as f: 279 checksum = f.read().strip() 280 assert len(checksum) == 64, f"Invalid sha256 at ${checksum_path}." 281 282 layer_path = os.path.join(customisation_layer, "layer.tar") 283 284 path = f"{checksum}/layer.tar" 285 tarinfo = target_tar.gettarinfo(layer_path) 286 tarinfo.name = path 287 tarinfo.mtime = mtime 288 289 with open(layer_path, "rb") as f: 290 target_tar.addfile(tarinfo, f) 291 292 return LayerInfo( 293 size=None, 294 checksum=checksum, 295 path=path, 296 paths=[customisation_layer] 297 ) 298 299 300def add_bytes(tar, path, content, mtime): 301 """ 302 Adds a file to the tarball with given path and contents. 303 304 tar: 'tarfile.TarFile' object. 305 path: Path of the file as a string. 306 content: Contents of the file. 307 mtime: 'mtime' of the file. Should be an integer representing a POSIX time. 308 """ 309 assert type(content) is bytes 310 311 ti = tarfile.TarInfo(path) 312 ti.size = len(content) 313 ti.mtime = mtime 314 tar.addfile(ti, io.BytesIO(content)) 315 316 317def main(): 318 with open(sys.argv[1], "r") as f: 319 conf = json.load(f) 320 321 created = ( 322 datetime.now(tz=timezone.utc) 323 if conf["created"] == "now" 324 else datetime.fromisoformat(conf["created"]) 325 ) 326 mtime = int(created.timestamp()) 327 store_dir = conf["store_dir"] 328 329 from_image = load_from_image(conf["from_image"]) 330 331 with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: 332 layers = [] 333 layers.extend(add_base_layers(tar, from_image)) 334 335 start = len(layers) + 1 336 for num, store_layer in enumerate(conf["store_layers"], start=start): 337 print("Creating layer", num, "from paths:", store_layer, 338 file=sys.stderr) 339 info = add_layer_dir(tar, store_layer, store_dir, mtime=mtime) 340 layers.append(info) 341 342 print("Creating layer", len(layers) + 1, "with customisation...", 343 file=sys.stderr) 344 layers.append( 345 add_customisation_layer( 346 tar, 347 conf["customisation_layer"], 348 mtime=mtime 349 ) 350 ) 351 352 print("Adding manifests...", file=sys.stderr) 353 354 image_json = { 355 "created": datetime.isoformat(created), 356 "architecture": conf["architecture"], 357 "os": "linux", 358 "config": overlay_base_config(from_image, conf["config"]), 359 "rootfs": { 360 "diff_ids": [f"sha256:{layer.checksum}" for layer in layers], 361 "type": "layers", 362 }, 363 "history": [ 364 { 365 "created": datetime.isoformat(created), 366 "comment": f"store paths: {layer.paths}" 367 } 368 for layer in layers 369 ], 370 } 371 372 image_json = json.dumps(image_json, indent=4).encode("utf-8") 373 image_json_checksum = hashlib.sha256(image_json).hexdigest() 374 image_json_path = f"{image_json_checksum}.json" 375 add_bytes(tar, image_json_path, image_json, mtime=mtime) 376 377 manifest_json = [ 378 { 379 "Config": image_json_path, 380 "RepoTags": [conf["repo_tag"]], 381 "Layers": [layer.path for layer in layers], 382 } 383 ] 384 manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") 385 add_bytes(tar, "manifest.json", manifest_json, mtime=mtime) 386 387 print("Done.", file=sys.stderr) 388 389 390if __name__ == "__main__": 391 main()