lol

nixos/qemu-vm: use persistent block device names

This change removes the bespoke logic around identifying block devices.
Instead of trying to find the right device by iterating over
`qemu.drives` and guessing the right partition number (e.g.
/dev/vda{1,2}), devices are now identified by persistent names provided
by udev in /dev/disk/by-*.

Before this change, the root device was formatted on demand in the
initrd. However, this makes it impossible to use filesystem identifiers
to identify devices. Now, the formatting step is performed before the VM
is started. Because some tests, however, rely on this behaviour, a
utility function to replace this behaviour in added in
/nixos/tests/common/auto-format-root-device.nix.

Devices that contain neither a partition table nor a filesystem are
identified by their hardware serial number which is injecetd via QEMU
(and is thus persistent and predictable). PCI paths are not a reliably
way to identify devices because their availability and numbering depends
on the QEMU machine type.

This change makes the module more robust against changes in QEMU and the
kernel (non-persistent device naming) and by decoupling abstractions
(i.e. rootDevice, bootPartition, and bootLoaderDevice) enables further
improvement down the line.

nikstur 0bdba6c9 aa337560

+130 -99
+14
nixos/doc/manual/release-notes/rl-2311.section.md
··· 45 45 - A new option was added to the virtualisation module that enables specifying explicitly named network interfaces in QEMU VMs. The existing `virtualisation.vlans` is still supported for cases where the name of the network interface is irrelevant. 46 46 47 47 - `services.nginx` gained a `defaultListen` option at server-level with support for PROXY protocol listeners, also `proxyProtocol` is now exposed in `services.nginx.virtualHosts.<name>.listen` option. It is now possible to run PROXY listeners and non-PROXY listeners at a server-level, see [#213510](https://github.com/NixOS/nixpkgs/pull/213510/) for more details. 48 + 49 + ## Nixpkgs internals {#sec-release-23.11-nixpkgs-internals} 50 + 51 + - The `qemu-vm.nix` module by default now identifies block devices via 52 + persistent names available in `/dev/disk/by-*`. Because the rootDevice is 53 + identfied by its filesystem label, it needs to be formatted before the VM is 54 + started. The functionality of automatically formatting the rootDevice in the 55 + initrd is removed from the QEMU module. However, for tests that depend on 56 + this functionality, a test utility for the scripted initrd is added 57 + (`nixos/tests/common/auto-format-root-device.nix`). To use this in a NixOS 58 + test, import the module, e.g. `imports = [ 59 + ./common/auto-format-root-device.nix ];` When you use the systemd initrd, you 60 + can automatically format the root device by setting 61 + `virtualisation.fileSystems."/".autoFormat = true;`.
+1
nixos/lib/make-disk-image.nix
··· 573 573 # In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb 574 574 # Use this option to create a symlink from vda to any arbitrary device you want. 575 575 ${optionalString (config.boot.loader.grub.device != "/dev/vda") '' 576 + mkdir -p $(dirname ${config.boot.loader.grub.device}) 576 577 ln -s /dev/vda ${config.boot.loader.grub.device} 577 578 ''} 578 579
+63 -96
nixos/modules/virtualisation/qemu-vm.nix
··· 81 81 82 82 drivesCmdLine = drives: concatStringsSep "\\\n " (imap1 driveCmdline drives); 83 83 84 - 85 - # Creates a device name from a 1-based a numerical index, e.g. 86 - # * `driveDeviceName 1` -> `/dev/vda` 87 - # * `driveDeviceName 2` -> `/dev/vdb` 88 - driveDeviceName = idx: 89 - let letter = elemAt lowerChars (idx - 1); 90 - in if cfg.qemu.diskInterface == "scsi" then 91 - "/dev/sd${letter}" 92 - else 93 - "/dev/vd${letter}"; 94 - 95 - lookupDriveDeviceName = driveName: driveList: 96 - (findSingle (drive: drive.name == driveName) 97 - (throw "Drive ${driveName} not found") 98 - (throw "Multiple drives named ${driveName}") driveList).device; 99 - 100 - addDeviceNames = 101 - imap1 (idx: drive: drive // { device = driveDeviceName idx; }); 102 - 103 84 # Shell script to start the VM. 104 85 startVM = 105 86 '' ··· 109 90 110 91 set -e 111 92 93 + # Create an empty ext4 filesystem image. A filesystem image does not 94 + # contain a partition table but just a filesystem. 95 + createEmptyFilesystemImage() { 96 + local name=$1 97 + local size=$2 98 + local temp=$(mktemp) 99 + ${qemu}/bin/qemu-img create -f raw "$temp" "$size" 100 + ${pkgs.e2fsprogs}/bin/mkfs.ext4 -L ${rootFilesystemLabel} "$temp" 101 + ${qemu}/bin/qemu-img convert -f raw -O qcow2 "$temp" "$name" 102 + rm "$temp" 103 + } 104 + 112 105 NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE" 113 106 114 107 if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then 115 108 echo "Disk image do not exist, creating the virtualisation disk image..." 116 - # If we are using a bootloader and default filesystems layout. 117 - # We have to reuse the system image layout as a backing image format (CoW) 118 - # So we can write on the top of it. 119 109 120 - # If we are not using the default FS layout, potentially, we are interested into 121 - # performing operations in postDeviceCommands or at early boot on the raw device. 122 - # We can still boot through QEMU direct kernel boot feature. 110 + ${if (cfg.useBootLoader && cfg.useDefaultFilesystems) then '' 111 + # Create a writable qcow2 image using the systemImage as a backing 112 + # image. 123 113 124 - # CoW prevent size to be attributed to an image. 125 - # FIXME: raise this issue to upstream. 126 - ${qemu}/bin/qemu-img create \ 127 - ${concatStringsSep " \\\n" ([ "-f qcow2" ] 128 - ++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2" 129 - ++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M" 130 - ++ [ ''"$NIX_DISK_IMAGE"'' ])} 114 + # CoW prevent size to be attributed to an image. 115 + # FIXME: raise this issue to upstream. 116 + ${qemu}/bin/qemu-img create \ 117 + -f qcow2 \ 118 + -b ${systemImage}/nixos.qcow2 \ 119 + -F qcow2 \ 120 + "$NIX_DISK_IMAGE" 121 + '' else if cfg.useDefaultFilesystems then '' 122 + createEmptyFilesystemImage "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M" 123 + '' else '' 124 + # Create an empty disk image without a filesystem. 125 + ${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M" 126 + '' 127 + } 131 128 echo "Virtualisation disk image created." 132 129 fi 133 130 ··· 148 145 ${pkgs.erofs-utils}/bin/mkfs.erofs \ 149 146 --force-uid=0 \ 150 147 --force-gid=0 \ 148 + -L ${nixStoreFilesystemLabel} \ 151 149 -U eb176051-bd15-49b7-9e6b-462e0b467019 \ 152 150 -T 0 \ 153 151 --exclude-regex="$( ··· 218 216 219 217 regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; }; 220 218 219 + # Use well-defined and persistent filesystem labels to identify block devices. 220 + rootFilesystemLabel = "nixos"; 221 + espFilesystemLabel = "ESP"; # Hard-coded by make-disk-image.nix 222 + nixStoreFilesystemLabel = "nix-store"; 223 + 224 + # The root drive is a raw disk which does not necessarily contain a 225 + # filesystem or partition table. It thus cannot be identified via the typical 226 + # persistent naming schemes (e.g. /dev/disk/by-{label, uuid, partlabel, 227 + # partuuid}. Instead, supply a well-defined and persistent serial attribute 228 + # via QEMU. Inside the running system, the disk can then be identified via 229 + # the /dev/disk/by-id scheme. 230 + rootDriveSerialAttr = "root"; 231 + 221 232 # System image is akin to a complete NixOS install with 222 233 # a boot partition and root partition. 223 234 systemImage = import ../../lib/make-disk-image.nix { ··· 225 236 additionalPaths = [ regInfo ]; 226 237 format = "qcow2"; 227 238 onlyNixStore = false; 239 + label = rootFilesystemLabel; 228 240 partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; }; 229 241 # Bootloader should be installed on the system image only if we are booting through bootloaders. 230 242 # Though, if a user is not using our default filesystems, it is possible to not have any ESP ··· 247 259 additionalPaths = [ regInfo ]; 248 260 format = "qcow2"; 249 261 onlyNixStore = true; 262 + label = nixStoreFilesystemLabel; 250 263 partitionTableType = "none"; 251 264 installBootLoader = false; 252 265 touchEFIVars = false; ··· 255 268 copyChannel = false; 256 269 }; 257 270 258 - bootConfiguration = 259 - if cfg.useDefaultFilesystems 260 - then 261 - if cfg.useBootLoader 262 - then 263 - if cfg.useEFIBoot then "efi_bootloading_with_default_fs" 264 - else "legacy_bootloading_with_default_fs" 265 - else 266 - if cfg.directBoot.enable then "direct_boot_with_default_fs" 267 - else "custom" 268 - else 269 - "custom"; 270 - suggestedRootDevice = { 271 - "efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2"; 272 - "legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1"; 273 - "direct_boot_with_default_fs" = cfg.bootLoaderDevice; 274 - # This will enforce a NixOS module type checking error 275 - # to ask explicitly the user to set a rootDevice. 276 - # As it will look like `rootDevice = lib.mkDefault null;` after 277 - # all "computations". 278 - "custom" = null; 279 - }.${bootConfiguration}; 280 271 in 281 272 282 273 { ··· 343 334 virtualisation.bootLoaderDevice = 344 335 mkOption { 345 336 type = types.path; 346 - default = lookupDriveDeviceName "root" cfg.qemu.drives; 347 - defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives''; 348 - example = "/dev/vda"; 337 + default = "/dev/disk/by-id/virtio-${rootDriveSerialAttr}"; 338 + defaultText = literalExpression ''/dev/disk/by-id/virtio-${rootDriveSerialAttr}''; 339 + example = "/dev/disk/by-id/virtio-boot-loader-device"; 349 340 description = 350 341 lib.mdDoc '' 351 - The disk to be used for the boot filesystem. 352 - By default, it is the same disk as the root filesystem. 342 + The path (inside th VM) to the device to boot from when legacy booting. 353 343 ''; 354 344 }; 355 345 356 346 virtualisation.bootPartition = 357 347 mkOption { 358 348 type = types.nullOr types.path; 359 - default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null; 360 - defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null''; 361 - example = "/dev/vda1"; 349 + default = if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null; 350 + defaultText = literalExpression ''if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null''; 351 + example = "/dev/disk/by-label/esp"; 362 352 description = 363 353 lib.mdDoc '' 364 - The boot partition to be used to mount /boot filesystem. 365 - In legacy boots, this should be null. 366 - By default, in EFI boot, it is the first partition of the boot device. 354 + The path (inside the VM) to the device containing the EFI System Partition (ESP). 355 + 356 + If you are *not* booting from a UEFI firmware, this value is, by 357 + default, `null`. The ESP is mounted under `/boot`. 367 358 ''; 368 359 }; 369 360 370 361 virtualisation.rootDevice = 371 362 mkOption { 372 363 type = types.nullOr types.path; 373 - example = "/dev/vda2"; 364 + default = "/dev/disk/by-label/${rootFilesystemLabel}"; 365 + defaultText = literalExpression ''/dev/disk/by-label/${rootFilesystemLabel}''; 366 + example = "/dev/disk/by-label/nixos"; 374 367 description = 375 368 lib.mdDoc '' 376 - The disk or partition to be used for the root filesystem. 377 - By default (read the source code for more details): 378 - 379 - - under EFI with a bootloader: 2nd partition of the boot disk 380 - - in legacy boot with a bootloader: 1st partition of the boot disk 381 - - in direct boot (i.e. without a bootloader): whole disk 382 - 383 - In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device. 369 + The path (inside the VM) to the device containing the root filesystem. 384 370 ''; 385 371 }; 386 372 ··· 711 697 mkOption { 712 698 type = types.listOf (types.submodule driveOpts); 713 699 description = lib.mdDoc "Drives passed to qemu."; 714 - apply = addDeviceNames; 715 700 }; 716 701 717 702 diskInterface = ··· 975 960 # FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint? 976 961 boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice); 977 962 boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}"; 978 - virtualisation.rootDevice = mkDefault suggestedRootDevice; 979 963 980 964 boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ]; 981 965 982 966 boot.loader.supportsInitrdSecrets = mkIf (!cfg.useBootLoader) (mkVMOverride false); 983 967 984 - boot.initrd.extraUtilsCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable) 985 - '' 986 - # We need mke2fs in the initrd. 987 - copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs 988 - ''; 989 - 990 - boot.initrd.postDeviceCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable) 991 - '' 992 - # If the disk image appears to be empty, run mke2fs to 993 - # initialise. 994 - FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true) 995 - PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true) 996 - if test -z "$FSTYPE" -a -z "$PARTTYPE"; then 997 - mke2fs -t ext4 ${cfg.rootDevice} 998 - fi 999 - ''; 1000 - 1001 968 boot.initrd.postMountCommands = lib.mkIf (!config.boot.initrd.systemd.enable) 1002 969 '' 1003 970 # Mark this as a NixOS machine. ··· 1112 1079 driveExtraOpts.cache = "writeback"; 1113 1080 driveExtraOpts.werror = "report"; 1114 1081 deviceExtraOpts.bootindex = "1"; 1082 + deviceExtraOpts.serial = rootDriveSerialAttr; 1115 1083 }]) 1116 1084 (mkIf cfg.useNixStoreImage [{ 1117 1085 name = "nix-store"; ··· 1154 1122 } else { 1155 1123 device = cfg.rootDevice; 1156 1124 fsType = "ext4"; 1157 - autoFormat = true; 1158 1125 }); 1159 1126 "/tmp" = lib.mkIf config.boot.tmp.useTmpfs { 1160 1127 device = "tmpfs"; ··· 1164 1131 options = [ "mode=1777" "strictatime" "nosuid" "nodev" "size=${toString config.boot.tmp.tmpfsSize}" ]; 1165 1132 }; 1166 1133 "/nix/${if cfg.writableStore then ".ro-store" else "store"}" = lib.mkIf cfg.useNixStoreImage { 1167 - device = "${lookupDriveDeviceName "nix-store" cfg.qemu.drives}"; 1134 + device = "/dev/disk/by-label/${nixStoreFilesystemLabel}"; 1168 1135 neededForBoot = true; 1169 1136 options = [ "ro" ]; 1170 1137 }; ··· 1174 1141 neededForBoot = true; 1175 1142 }; 1176 1143 "/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) { 1177 - device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage` 1144 + device = cfg.bootPartition; 1178 1145 fsType = "vfat"; 1179 1146 noCheck = true; # fsck fails on a r/o filesystem 1180 1147 };
+29
nixos/tests/common/auto-format-root-device.nix
··· 1 + # This is a test utility that automatically formats 2 + # `config.virtualisation.rootDevice` in the initrd. 3 + # Note that when you are using 4 + # `boot.initrd.systemd.enable = true`, you can use 5 + # `virtualisation.fileSystems."/".autoFormat = true;` 6 + # instead. 7 + 8 + { config, pkgs, ... }: 9 + 10 + let 11 + rootDevice = config.virtualisation.rootDevice; 12 + in 13 + { 14 + 15 + boot.initrd.extraUtilsCommands = '' 16 + # We need mke2fs in the initrd. 17 + copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs 18 + ''; 19 + 20 + boot.initrd.postDeviceCommands = '' 21 + # If the disk image appears to be empty, run mke2fs to 22 + # initialise. 23 + FSTYPE=$(blkid -o value -s TYPE ${rootDevice} || true) 24 + PARTTYPE=$(blkid -o value -s PTTYPE ${rootDevice} || true) 25 + if test -z "$FSTYPE" -a -z "$PARTTYPE"; then 26 + mke2fs -t ext4 ${rootDevice} 27 + fi 28 + ''; 29 + }
+7 -3
nixos/tests/fsck.nix
··· 21 21 boot.initrd.systemd.enable = systemdStage1; 22 22 }; 23 23 24 - testScript = '' 24 + testScript = { nodes, ...}: 25 + let 26 + rootDevice = nodes.machine.virtualisation.rootDevice; 27 + in 28 + '' 25 29 machine.wait_for_unit("default.target") 26 30 27 31 with subtest("root fs is fsckd"): 28 32 machine.succeed("journalctl -b | grep '${if systemdStage1 29 - then "fsck.*vda.*clean" 30 - else "fsck.ext4.*/dev/vda"}'") 33 + then "fsck.*${builtins.baseNameOf rootDevice}.*clean" 34 + else "fsck.ext4.*${rootDevice}"}'") 31 35 32 36 with subtest("mnt fs is fsckd"): 33 37 machine.succeed("journalctl -b | grep 'fsck.*vdb.*clean'")
+1
nixos/tests/hibernate.nix
··· 50 50 imports = [ 51 51 ../modules/profiles/installation-device.nix 52 52 ../modules/profiles/base.nix 53 + ./common/auto-format-root-device.nix 53 54 ]; 54 55 55 56 nix.settings = {
+3
nixos/tests/initrd-luks-empty-passphrase.nix
··· 14 14 name = "initrd-luks-empty-passphrase"; 15 15 16 16 nodes.machine = { pkgs, ... }: { 17 + imports = lib.optionals (!systemdStage1) [ ./common/auto-format-root-device.nix ]; 18 + 17 19 virtualisation = { 18 20 emptyDiskImages = [ 512 ]; 19 21 useBootLoader = true; ··· 23 25 # the new root device is /dev/vdb 24 26 # an empty 512MiB drive, containing no Nix store. 25 27 mountHostNixStore = true; 28 + fileSystems."/".autoFormat = lib.mkIf systemdStage1 true; 26 29 }; 27 30 28 31 boot.loader.systemd-boot.enable = true;
+5
nixos/tests/installer.nix
··· 298 298 ../modules/profiles/installation-device.nix 299 299 ../modules/profiles/base.nix 300 300 extraInstallerConfig 301 + ./common/auto-format-root-device.nix 301 302 ]; 303 + 304 + # In systemdStage1, also automatically format the device backing the 305 + # root filesystem. 306 + virtualisation.fileSystems."/".autoFormat = systemdStage1; 302 307 303 308 # builds stuff in the VM, needs more juice 304 309 virtualisation.diskSize = 8 * 1024;
+2
nixos/tests/luks.nix
··· 2 2 name = "luks"; 3 3 4 4 nodes.machine = { pkgs, ... }: { 5 + imports = [ ./common/auto-format-root-device.nix ]; 6 + 5 7 # Use systemd-boot 6 8 virtualisation = { 7 9 emptyDiskImages = [ 512 512 ];
+1
nixos/tests/systemd-initrd-luks-fido2.nix
··· 26 26 }; 27 27 }; 28 28 virtualisation.rootDevice = "/dev/mapper/cryptroot"; 29 + virtualisation.fileSystems."/".autoFormat = true; 29 30 }; 30 31 }; 31 32
+1
nixos/tests/systemd-initrd-luks-keyfile.nix
··· 34 34 }; 35 35 }; 36 36 virtualisation.rootDevice = "/dev/mapper/cryptroot"; 37 + virtualisation.fileSystems."/".autoFormat = true; 37 38 boot.initrd.secrets."/etc/cryptroot.key" = keyfile; 38 39 }; 39 40 };
+1
nixos/tests/systemd-initrd-luks-password.nix
··· 25 25 cryptroot2.device = "/dev/vdc"; 26 26 }; 27 27 virtualisation.rootDevice = "/dev/mapper/cryptroot"; 28 + virtualisation.fileSystems."/".autoFormat = true; 28 29 # test mounting device unlocked in initrd after switching root 29 30 virtualisation.fileSystems."/cryptroot2".device = "/dev/mapper/cryptroot2"; 30 31 };
+1
nixos/tests/systemd-initrd-luks-tpm2.nix
··· 28 28 }; 29 29 }; 30 30 virtualisation.rootDevice = "/dev/mapper/cryptroot"; 31 + virtualisation.fileSystems."/".autoFormat = true; 31 32 }; 32 33 }; 33 34
+1
nixos/tests/systemd-initrd-networkd-ssh.nix
··· 17 17 18 18 specialisation.encrypted-root.configuration = { 19 19 virtualisation.rootDevice = "/dev/mapper/root"; 20 + virtualisation.fileSystems."/".autoFormat = true; 20 21 boot.initrd.luks.devices = lib.mkVMOverride { 21 22 root.device = "/dev/vdb"; 22 23 };