Merge pull request #256226 from ElvishJerricco/systemd-stage-1-testing-backdoor

authored by Ryan Lahfa and committed by GitHub b8218af2 658414a1

+213 -140
+2
nixos/doc/manual/release-notes/rl-2311.section.md
··· 556 - `teleport` has been upgraded from major version 12 to major version 14. Please see upstream [upgrade instructions](https://goteleport.com/docs/management/operations/upgrading/) and release notes for versions [13](https://goteleport.com/docs/changelog/#1300-050823) and [14](https://goteleport.com/docs/changelog/#1400-092023). Note that Teleport does not officially support upgrades across more than one major version at a time. If you're running Teleport server components, it is recommended to first upgrade to an intermediate 13.x version by setting `services.teleport.package = pkgs.teleport_13`. Afterwards, this option can be removed to upgrade to the default version (14). 557 558 - The Linux kernel module `msr` (see [`msr(4)`](https://man7.org/linux/man-pages/man4/msr.4.html)), which provides an interface to read and write the model-specific registers (MSRs) of an x86 CPU, can now be configured via `hardware.cpu.x86.msr`.
··· 556 - `teleport` has been upgraded from major version 12 to major version 14. Please see upstream [upgrade instructions](https://goteleport.com/docs/management/operations/upgrading/) and release notes for versions [13](https://goteleport.com/docs/changelog/#1300-050823) and [14](https://goteleport.com/docs/changelog/#1400-092023). Note that Teleport does not officially support upgrades across more than one major version at a time. If you're running Teleport server components, it is recommended to first upgrade to an intermediate 13.x version by setting `services.teleport.package = pkgs.teleport_13`. Afterwards, this option can be removed to upgrade to the default version (14). 557 558 - The Linux kernel module `msr` (see [`msr(4)`](https://man7.org/linux/man-pages/man4/msr.4.html)), which provides an interface to read and write the model-specific registers (MSRs) of an x86 CPU, can now be configured via `hardware.cpu.x86.msr`. 559 + 560 + - There is a new NixOS option when writing NixOS tests `testing.initrdBackdoor`, that enables `backdoor.service` in initrd. Requires `boot.initrd.systemd.enable` to be enabled. Boot will pause in stage 1 at `initrd.target`, and will listen for commands from the `Machine` python interface, just like stage 2 normally does. This enables commands to be sent to test and debug stage 1. Use `machine.switch_root()` to leave stage 1 and proceed to stage 2.
+16
nixos/lib/test-driver/test_driver/machine.py
··· 1278 def run_callbacks(self) -> None: 1279 for callback in self.callbacks: 1280 callback()
··· 1278 def run_callbacks(self) -> None: 1279 for callback in self.callbacks: 1280 callback() 1281 + 1282 + def switch_root(self) -> None: 1283 + """ 1284 + Transition from stage 1 to stage 2. This requires the 1285 + machine to be configured with `testing.initrdBackdoor = true` 1286 + and `boot.initrd.systemd.enable = true`. 1287 + """ 1288 + self.wait_for_unit("initrd.target") 1289 + self.execute( 1290 + "systemctl isolate --no-block initrd-switch-root.target 2>/dev/null >/dev/null", 1291 + check_return=False, 1292 + check_output=False, 1293 + ) 1294 + self.wait_for_console_text(r"systemd\[1\]:.*Switching root\.") 1295 + self.connected = False 1296 + self.connect()
+93 -41
nixos/modules/testing/test-instrumentation.nix
··· 6 with lib; 7 8 let 9 qemu-common = import ../../lib/qemu-common.nix { inherit lib pkgs; }; 10 in 11 12 { 13 14 config = { 15 16 - systemd.services.backdoor = 17 - { wantedBy = [ "multi-user.target" ]; 18 - requires = [ "dev-hvc0.device" "dev-${qemu-common.qemuSerialDevice}.device" ]; 19 - after = [ "dev-hvc0.device" "dev-${qemu-common.qemuSerialDevice}.device" ]; 20 - script = 21 - '' 22 - export USER=root 23 - export HOME=/root 24 - export DISPLAY=:0.0 25 26 - source /etc/profile 27 28 - # Don't use a pager when executing backdoor 29 - # actions. Because we use a tty, commands like systemctl 30 - # or nix-store get confused into thinking they're running 31 - # interactively. 32 - export PAGER= 33 34 - cd /tmp 35 - exec < /dev/hvc0 > /dev/hvc0 36 - while ! exec 2> /dev/${qemu-common.qemuSerialDevice}; do sleep 0.1; done 37 - echo "connecting to host..." >&2 38 - stty -F /dev/hvc0 raw -echo # prevent nl -> cr/nl conversion 39 - # The following line is essential since it signals to 40 - # the test driver that the shell is ready. 41 - # See: the connect method in the Machine class. 42 - echo "Spawning backdoor root shell..." 43 - # Passing the terminal device makes bash run non-interactively. 44 - # Otherwise we get errors on the terminal because bash tries to 45 - # setup things like job control. 46 - # Note: calling bash explicitly here instead of sh makes sure that 47 - # we can also run non-NixOS guests during tests. 48 - PS1= exec /usr/bin/env bash --norc /dev/hvc0 49 - ''; 50 - serviceConfig.KillSignal = "SIGHUP"; 51 - }; 52 53 # Prevent agetty from being instantiated on the serial device, since it 54 # interferes with the backdoor (writes to it will randomly fail ··· 104 MaxLevelConsole=debug 105 ''; 106 107 - boot.initrd.systemd.contents."/etc/systemd/journald.conf".text = '' 108 - [Journal] 109 - ForwardToConsole=yes 110 - MaxLevelConsole=debug 111 - ''; 112 - 113 systemd.extraConfig = '' 114 # Don't clobber the console with duplicate systemd messages. 115 ShowStatus=no ··· 122 DefaultTimeoutStartSec=300 123 DefaultDeviceTimeoutSec=300 124 ''; 125 - 126 - boot.initrd.systemd.extraConfig = config.systemd.extraConfig; 127 128 boot.consoleLogLevel = 7; 129
··· 6 with lib; 7 8 let 9 + cfg = config.testing; 10 + 11 qemu-common = import ../../lib/qemu-common.nix { inherit lib pkgs; }; 12 + 13 + backdoorService = { 14 + wantedBy = [ "sysinit.target" ]; 15 + unitConfig.DefaultDependencies = false; 16 + conflicts = [ "shutdown.target" "initrd-switch-root.target" ]; 17 + before = [ "shutdown.target" "initrd-switch-root.target" ]; 18 + requires = [ "dev-hvc0.device" "dev-${qemu-common.qemuSerialDevice}.device" ]; 19 + after = [ "dev-hvc0.device" "dev-${qemu-common.qemuSerialDevice}.device" ]; 20 + script = 21 + '' 22 + export USER=root 23 + export HOME=/root 24 + export DISPLAY=:0.0 25 + 26 + if [[ -e /etc/profile ]]; then 27 + source /etc/profile 28 + fi 29 + 30 + # Don't use a pager when executing backdoor 31 + # actions. Because we use a tty, commands like systemctl 32 + # or nix-store get confused into thinking they're running 33 + # interactively. 34 + export PAGER= 35 + 36 + cd /tmp 37 + exec < /dev/hvc0 > /dev/hvc0 38 + while ! exec 2> /dev/${qemu-common.qemuSerialDevice}; do sleep 0.1; done 39 + echo "connecting to host..." >&2 40 + stty -F /dev/hvc0 raw -echo # prevent nl -> cr/nl conversion 41 + # The following line is essential since it signals to 42 + # the test driver that the shell is ready. 43 + # See: the connect method in the Machine class. 44 + echo "Spawning backdoor root shell..." 45 + # Passing the terminal device makes bash run non-interactively. 46 + # Otherwise we get errors on the terminal because bash tries to 47 + # setup things like job control. 48 + # Note: calling bash explicitly here instead of sh makes sure that 49 + # we can also run non-NixOS guests during tests. 50 + PS1= exec /usr/bin/env bash --norc /dev/hvc0 51 + ''; 52 + serviceConfig.KillSignal = "SIGHUP"; 53 + }; 54 + 55 in 56 57 { 58 59 + options.testing = { 60 + 61 + initrdBackdoor = lib.mkEnableOption (lib.mdDoc '' 62 + enable backdoor.service in initrd. Requires 63 + boot.initrd.systemd.enable to be enabled. Boot will pause in 64 + stage 1 at initrd.target, and will listen for commands from the 65 + Machine python interface, just like stage 2 normally does. This 66 + enables commands to be sent to test and debug stage 1. Use 67 + machine.switch_root() to leave stage 1 and proceed to stage 2. 68 + ''); 69 + 70 + }; 71 + 72 config = { 73 74 + assertions = [ 75 + { 76 + assertion = cfg.initrdBackdoor -> config.boot.initrd.systemd.enable; 77 + message = '' 78 + testing.initrdBackdoor requires boot.initrd.systemd.enable to be enabled. 79 + ''; 80 + } 81 + ]; 82 83 + systemd.services.backdoor = backdoorService; 84 + 85 + boot.initrd.systemd = lib.mkMerge [ 86 + { 87 + contents."/etc/systemd/journald.conf".text = '' 88 + [Journal] 89 + ForwardToConsole=yes 90 + MaxLevelConsole=debug 91 + ''; 92 + 93 + extraConfig = config.systemd.extraConfig; 94 + } 95 + 96 + (lib.mkIf cfg.initrdBackdoor { 97 + # Implemented in machine.switch_root(). Suppress the unit by 98 + # making it a noop without removing it, which would break 99 + # initrd-parse-etc.service 100 + services.initrd-cleanup.serviceConfig.ExecStart = [ 101 + # Reset 102 + "" 103 + # noop 104 + "/bin/true" 105 + ]; 106 107 + services.backdoor = backdoorService; 108 109 + contents."/usr/bin/env".source = "${pkgs.coreutils}/bin/env"; 110 + }) 111 + ]; 112 113 # Prevent agetty from being instantiated on the serial device, since it 114 # interferes with the backdoor (writes to it will randomly fail ··· 164 MaxLevelConsole=debug 165 ''; 166 167 systemd.extraConfig = '' 168 # Don't clobber the console with duplicate systemd messages. 169 ShowStatus=no ··· 176 DefaultTimeoutStartSec=300 177 DefaultDeviceTimeoutSec=300 178 ''; 179 180 boot.consoleLogLevel = 7; 181
+7
nixos/tests/systemd-initrd-modprobe.nix
··· 2 name = "systemd-initrd-modprobe"; 3 4 nodes.machine = { pkgs, ... }: { 5 boot.initrd.systemd.enable = true; 6 boot.initrd.kernelModules = [ "loop" ]; # Load module in initrd. 7 boot.extraModprobeConfig = '' ··· 10 }; 11 12 testScript = '' 13 machine.wait_for_unit("multi-user.target") 14 max_loop = machine.succeed("cat /sys/module/loop/parameters/max_loop") 15 assert int(max_loop) == 42, "Parameter should be respected for initrd kernel modules"
··· 2 name = "systemd-initrd-modprobe"; 3 4 nodes.machine = { pkgs, ... }: { 5 + testing.initrdBackdoor = true; 6 boot.initrd.systemd.enable = true; 7 boot.initrd.kernelModules = [ "loop" ]; # Load module in initrd. 8 boot.extraModprobeConfig = '' ··· 11 }; 12 13 testScript = '' 14 + machine.wait_for_unit("initrd.target") 15 + max_loop = machine.succeed("cat /sys/module/loop/parameters/max_loop") 16 + assert int(max_loop) == 42, "Parameter should be respected for initrd kernel modules" 17 + 18 + # Make sure it sticks in stage 2 19 + machine.switch_root() 20 machine.wait_for_unit("multi-user.target") 21 max_loop = machine.succeed("cat /sys/module/loop/parameters/max_loop") 22 assert int(max_loop) == 42, "Parameter should be respected for initrd kernel modules"
+13 -39
nixos/tests/systemd-initrd-networkd-ssh.nix
··· 4 5 nodes = { 6 server = { config, pkgs, ... }: { 7 - environment.systemPackages = [ pkgs.cryptsetup ]; 8 - boot.loader.systemd-boot.enable = true; 9 - boot.loader.timeout = 0; 10 - virtualisation = { 11 - emptyDiskImages = [ 4096 ]; 12 - useBootLoader = true; 13 - # Booting off the encrypted disk requires an available init script from 14 - # the Nix store 15 - mountHostNixStore = true; 16 - useEFIBoot = true; 17 - }; 18 - 19 - specialisation.encrypted-root.configuration = { 20 - virtualisation.rootDevice = "/dev/mapper/root"; 21 - virtualisation.fileSystems."/".autoFormat = true; 22 - boot.initrd.luks.devices = lib.mkVMOverride { 23 - root.device = "/dev/vdb"; 24 - }; 25 - boot.initrd.systemd.enable = true; 26 - boot.initrd.network = { 27 enable = true; 28 - ssh = { 29 - enable = true; 30 - authorizedKeys = [ (lib.readFile ./initrd-network-ssh/id_ed25519.pub) ]; 31 - port = 22; 32 - # Terrible hack so it works with useBootLoader 33 - hostKeys = [ { outPath = "${./initrd-network-ssh/ssh_host_ed25519_key}"; } ]; 34 - }; 35 }; 36 }; 37 }; ··· 63 status, _ = client.execute("nc -z server 22") 64 return status == 0 65 66 - server.wait_for_unit("multi-user.target") 67 - server.succeed( 68 - "echo somepass | cryptsetup luksFormat --type=luks2 /dev/vdb", 69 - "bootctl set-default nixos-generation-1-specialisation-encrypted-root.conf", 70 - "sync", 71 - ) 72 - server.shutdown() 73 - server.start() 74 - 75 client.wait_for_unit("network.target") 76 with client.nested("waiting for SSH server to come up"): 77 retry(ssh_is_up) 78 79 - client.succeed( 80 - "echo somepass | ssh -i /etc/sshKey -o UserKnownHostsFile=/etc/knownHosts server 'systemd-tty-ask-password-agent' & exit" 81 ) 82 83 server.wait_for_unit("multi-user.target") 84 - server.succeed("mount | grep '/dev/mapper/root on /'") 85 ''; 86 })
··· 4 5 nodes = { 6 server = { config, pkgs, ... }: { 7 + testing.initrdBackdoor = true; 8 + boot.initrd.systemd.enable = true; 9 + boot.initrd.systemd.contents."/etc/msg".text = "foo"; 10 + boot.initrd.network = { 11 + enable = true; 12 + ssh = { 13 enable = true; 14 + authorizedKeys = [ (lib.readFile ./initrd-network-ssh/id_ed25519.pub) ]; 15 + port = 22; 16 + hostKeys = [ ./initrd-network-ssh/ssh_host_ed25519_key ]; 17 }; 18 }; 19 }; ··· 45 status, _ = client.execute("nc -z server 22") 46 return status == 0 47 48 client.wait_for_unit("network.target") 49 with client.nested("waiting for SSH server to come up"): 50 retry(ssh_is_up) 51 52 + msg = client.succeed( 53 + "ssh -i /etc/sshKey -o UserKnownHostsFile=/etc/knownHosts server 'cat /etc/msg'" 54 ) 55 + assert "foo" in msg 56 57 + server.switch_root() 58 server.wait_for_unit("multi-user.target") 59 ''; 60 })
+74 -56
nixos/tests/systemd-initrd-networkd.nix
··· 1 - import ./make-test-python.nix ({ pkgs, lib, ... }: { 2 - name = "systemd-initrd-network"; 3 - meta.maintainers = [ lib.maintainers.elvishjerricco ]; 4 5 - nodes = let 6 - mkFlushTest = flush: script: { ... }: { 7 - boot.initrd.systemd.enable = true; 8 - boot.initrd.network = { 9 - enable = true; 10 - flushBeforeStage2 = flush; 11 - }; 12 systemd.services.check-flush = { 13 requiredBy = ["multi-user.target"]; 14 before = ["network-pre.target" "multi-user.target"]; ··· 19 inherit script; 20 }; 21 }; 22 - in { 23 - basic = { ... }: { 24 - boot.initrd.network.enable = true; 25 26 - boot.initrd.systemd = { 27 - enable = true; 28 - # Enable network-online to fail the test in case of timeout 29 - network.wait-online.timeout = 10; 30 - network.wait-online.anyInterface = true; 31 - targets.network-online.requiredBy = [ "initrd.target" ]; 32 - services.systemd-networkd-wait-online.requiredBy = 33 - [ "network-online.target" ]; 34 - 35 - initrdBin = [ pkgs.iproute2 pkgs.iputils pkgs.gnugrep ]; 36 - services.check = { 37 - requiredBy = [ "initrd.target" ]; 38 - before = [ "initrd.target" ]; 39 - after = [ "network-online.target" ]; 40 - serviceConfig.Type = "oneshot"; 41 - path = [ pkgs.iproute2 pkgs.iputils pkgs.gnugrep ]; 42 - script = '' 43 - ip addr | grep 10.0.2.15 || exit 1 44 - ping -c1 10.0.2.2 || exit 1 45 - ''; 46 - }; 47 - }; 48 - }; 49 50 - doFlush = mkFlushTest true '' 51 - if ip addr | grep 10.0.2.15; then 52 - echo "Network configuration survived switch-root; flushBeforeStage2 failed" 53 - exit 1 54 - fi 55 ''; 56 57 - dontFlush = mkFlushTest false '' 58 - if ! (ip addr | grep 10.0.2.15); then 59 - echo "Network configuration didn't survive switch-root" 60 - exit 1 61 - fi 62 ''; 63 }; 64 65 - testScript = '' 66 - start_all() 67 - basic.wait_for_unit("multi-user.target") 68 - doFlush.wait_for_unit("multi-user.target") 69 - dontFlush.wait_for_unit("multi-user.target") 70 - # Make sure the systemd-network user was set correctly in initrd 71 - basic.succeed("[ $(stat -c '%U,%G' /run/systemd/netif/links) = systemd-network,systemd-network ]") 72 - basic.succeed("ip addr show >&2") 73 - basic.succeed("ip route show >&2") 74 ''; 75 - })
··· 1 + { system ? builtins.currentSystem 2 + , config ? {} 3 + , pkgs ? import ../.. { inherit system config; } 4 + , lib ? pkgs.lib 5 + }: 6 + 7 + with import ../lib/testing-python.nix { inherit system pkgs; }; 8 + 9 + let 10 + inherit (lib.maintainers) elvishjerricco; 11 + 12 + common = { 13 + boot.initrd.systemd = { 14 + enable = true; 15 + network.wait-online.timeout = 10; 16 + network.wait-online.anyInterface = true; 17 + targets.network-online.requiredBy = [ "initrd.target" ]; 18 + services.systemd-networkd-wait-online.requiredBy = 19 + [ "network-online.target" ]; 20 + initrdBin = [ pkgs.iproute2 pkgs.iputils pkgs.gnugrep ]; 21 + }; 22 + testing.initrdBackdoor = true; 23 + boot.initrd.network.enable = true; 24 + }; 25 + 26 + mkFlushTest = flush: script: makeTest { 27 + name = "systemd-initrd-network-${lib.optionalString (!flush) "no-"}flush"; 28 + meta.maintainers = [ elvishjerricco ]; 29 + 30 + nodes.machine = { 31 + imports = [ common ]; 32 33 + boot.initrd.network.flushBeforeStage2 = flush; 34 systemd.services.check-flush = { 35 requiredBy = ["multi-user.target"]; 36 before = ["network-pre.target" "multi-user.target"]; ··· 41 inherit script; 42 }; 43 }; 44 45 + testScript = '' 46 + machine.wait_for_unit("network-online.target") 47 + machine.succeed( 48 + "ip addr | grep 10.0.2.15", 49 + "ping -c1 10.0.2.2", 50 + ) 51 + machine.switch_root() 52 53 + machine.wait_for_unit("multi-user.target") 54 ''; 55 + }; 56 57 + in { 58 + basic = makeTest { 59 + name = "systemd-initrd-network"; 60 + meta.maintainers = [ elvishjerricco ]; 61 + 62 + nodes.machine = common; 63 + 64 + testScript = '' 65 + machine.wait_for_unit("network-online.target") 66 + machine.succeed( 67 + "ip addr | grep 10.0.2.15", 68 + "ping -c1 10.0.2.2", 69 + ) 70 + machine.switch_root() 71 + 72 + # Make sure the systemd-network user was set correctly in initrd 73 + machine.wait_for_unit("multi-user.target") 74 + machine.succeed("[ $(stat -c '%U,%G' /run/systemd/netif/links) = systemd-network,systemd-network ]") 75 + machine.succeed("ip addr show >&2") 76 + machine.succeed("ip route show >&2") 77 ''; 78 }; 79 80 + doFlush = mkFlushTest true '' 81 + if ip addr | grep 10.0.2.15; then 82 + echo "Network configuration survived switch-root; flushBeforeStage2 failed" 83 + exit 1 84 + fi 85 ''; 86 + 87 + dontFlush = mkFlushTest false '' 88 + if ! (ip addr | grep 10.0.2.15); then 89 + echo "Network configuration didn't survive switch-root" 90 + exit 1 91 + fi 92 + ''; 93 + }
+8 -4
nixos/tests/systemd-initrd-simple.nix
··· 2 name = "systemd-initrd-simple"; 3 4 nodes.machine = { pkgs, ... }: { 5 - boot.initrd.systemd = { 6 - enable = true; 7 - emergencyAccess = true; 8 - }; 9 virtualisation.fileSystems."/".autoResize = true; 10 }; 11 12 testScript = '' 13 import subprocess 14 15 with subtest("handover to stage-2 systemd works"): 16 machine.wait_for_unit("multi-user.target") ··· 37 subprocess.check_call(["qemu-img", "resize", "vm-state-machine/machine.qcow2", "+1G"]) 38 39 machine.start() 40 newAvail = machine.succeed("df --output=avail / | sed 1d") 41 42 assert int(oldAvail) < int(newAvail), "File system did not grow"
··· 2 name = "systemd-initrd-simple"; 3 4 nodes.machine = { pkgs, ... }: { 5 + testing.initrdBackdoor = true; 6 + boot.initrd.systemd.enable = true; 7 virtualisation.fileSystems."/".autoResize = true; 8 }; 9 10 testScript = '' 11 import subprocess 12 + 13 + with subtest("testing initrd backdoor"): 14 + machine.wait_for_unit("initrd.target") 15 + machine.succeed("systemctl status initrd-fs.target") 16 + machine.switch_root() 17 18 with subtest("handover to stage-2 systemd works"): 19 machine.wait_for_unit("multi-user.target") ··· 40 subprocess.check_call(["qemu-img", "resize", "vm-state-machine/machine.qcow2", "+1G"]) 41 42 machine.start() 43 + machine.switch_root() 44 newAvail = machine.succeed("df --output=avail / | sed 1d") 45 46 assert int(oldAvail) < int(newAvail), "File system did not grow"