Merge pull request #262839 from RaitoBezarius/qemu-vm/timeout

authored by

Ryan Lahfa and committed by
GitHub
92fdbd28 d51318c4

+99 -13
+5
nixos/lib/test-driver/default.nix
··· 11 11 , tesseract4 12 12 , vde2 13 13 , extraPythonPackages ? (_ : []) 14 + , nixosTests 14 15 }: 15 16 16 17 python3Packages.buildPythonApplication { ··· 30 31 ] 31 32 ++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ]) 32 33 ++ extraPythonPackages python3Packages; 34 + 35 + passthru.tests = { 36 + inherit (nixosTests.nixos-test-driver) driver-timeout; 37 + }; 33 38 34 39 doCheck = true; 35 40 nativeCheckInputs = with python3Packages; [ mypy ruff black ];
+9
nixos/lib/test-driver/test_driver/__init__.py
··· 77 77 help="vlans to span by the driver", 78 78 ) 79 79 arg_parser.add_argument( 80 + "--global-timeout", 81 + type=int, 82 + metavar="GLOBAL_TIMEOUT", 83 + action=EnvDefault, 84 + envvar="globalTimeout", 85 + help="Timeout in seconds for the whole test", 86 + ) 87 + arg_parser.add_argument( 80 88 "-o", 81 89 "--output_directory", 82 90 help="""The path to the directory where outputs copied from the VM will be placed. ··· 103 111 args.testscript.read_text(), 104 112 args.output_directory.resolve(), 105 113 args.keep_vm_state, 114 + args.global_timeout, 106 115 ) as driver: 107 116 if args.interactive: 108 117 history_dir = os.getcwd()
+25
nixos/lib/test-driver/test_driver/driver.py
··· 1 1 import os 2 2 import re 3 + import signal 3 4 import tempfile 5 + import threading 4 6 from contextlib import contextmanager 5 7 from pathlib import Path 6 8 from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union ··· 41 43 vlans: List[VLan] 42 44 machines: List[Machine] 43 45 polling_conditions: List[PollingCondition] 46 + global_timeout: int 47 + race_timer: threading.Timer 44 48 45 49 def __init__( 46 50 self, ··· 49 53 tests: str, 50 54 out_dir: Path, 51 55 keep_vm_state: bool = False, 56 + global_timeout: int = 24 * 60 * 60 * 7, 52 57 ): 53 58 self.tests = tests 54 59 self.out_dir = out_dir 60 + self.global_timeout = global_timeout 61 + self.race_timer = threading.Timer(global_timeout, self.terminate_test) 55 62 56 63 tmp_dir = get_tmp_dir() 57 64 ··· 82 89 83 90 def __exit__(self, *_: Any) -> None: 84 91 with rootlog.nested("cleanup"): 92 + self.race_timer.cancel() 85 93 for machine in self.machines: 86 94 machine.release() 87 95 ··· 144 152 145 153 def run_tests(self) -> None: 146 154 """Run the test script (for non-interactive test runs)""" 155 + rootlog.info( 156 + f"Test will time out and terminate in {self.global_timeout} seconds" 157 + ) 158 + self.race_timer.start() 147 159 self.test_script() 148 160 # TODO: Collect coverage data 149 161 for machine in self.machines: ··· 161 173 with rootlog.nested("wait for all VMs to finish"): 162 174 for machine in self.machines: 163 175 machine.wait_for_shutdown() 176 + self.race_timer.cancel() 177 + 178 + def terminate_test(self) -> None: 179 + # This will be usually running in another thread than 180 + # the thread actually executing the test script. 181 + with rootlog.nested("timeout reached; test terminating..."): 182 + for machine in self.machines: 183 + machine.release() 184 + # As we cannot `sys.exit` from another thread 185 + # We can at least force the main thread to get SIGTERM'ed. 186 + # This will prevent any user who caught all the exceptions 187 + # to swallow them and prevent itself from terminating. 188 + os.kill(os.getpid(), signal.SIGTERM) 164 189 165 190 def create_machine(self, args: Dict[str, Any]) -> Machine: 166 191 tmp_dir = get_tmp_dir()
+1
nixos/lib/testing-python.nix
··· 42 42 , nodes ? {} 43 43 , testScript 44 44 , enableOCR ? false 45 + , globalTimeout ? (60 * 60) 45 46 , name ? "unnamed" 46 47 , skipTypeCheck ? false 47 48 # Skip linting (mainly intended for faster dev cycles)
+13
nixos/lib/testing/driver.nix
··· 94 94 wrapProgram $out/bin/nixos-test-driver \ 95 95 --set startScripts "''${vmStartScripts[*]}" \ 96 96 --set testScript "$out/test-script" \ 97 + --set globalTimeout "${toString config.globalTimeout}" \ 97 98 --set vlans '${toString vlans}' \ 98 99 ${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)} 99 100 ''; ··· 121 122 type = types.package; 122 123 default = hostPkgs.qemu_test; 123 124 defaultText = "hostPkgs.qemu_test"; 125 + }; 126 + 127 + globalTimeout = mkOption { 128 + description = mdDoc '' 129 + A global timeout for the complete test, expressed in seconds. 130 + Beyond that timeout, every resource will be killed and released and the test will fail. 131 + 132 + By default, we use a 1 hour timeout. 133 + ''; 134 + type = types.int; 135 + default = 60 * 60; 136 + example = 10 * 60; 124 137 }; 125 138 126 139 enableOCR = mkOption {
+23 -13
nixos/lib/testing/run.nix
··· 16 16 ''; 17 17 }; 18 18 19 + rawTestDerivation = mkOption { 20 + type = types.package; 21 + description = mdDoc '' 22 + Unfiltered version of `test`, for troubleshooting the test framework and `testBuildFailure` in the test framework's test suite. 23 + This is not intended for general use. Use `test` instead. 24 + ''; 25 + internal = true; 26 + }; 27 + 19 28 test = mkOption { 20 29 type = types.package; 21 30 # TODO: can the interactive driver be configured to access the network? ··· 29 38 }; 30 39 31 40 config = { 32 - test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used. 33 - derivation = hostPkgs.stdenv.mkDerivation { 34 - name = "vm-test-run-${config.name}"; 41 + rawTestDerivation = hostPkgs.stdenv.mkDerivation { 42 + name = "vm-test-run-${config.name}"; 35 43 36 - requiredSystemFeatures = [ "kvm" "nixos-test" ]; 44 + requiredSystemFeatures = [ "kvm" "nixos-test" ]; 37 45 38 - buildCommand = '' 39 - mkdir -p $out 46 + buildCommand = '' 47 + mkdir -p $out 40 48 41 - # effectively mute the XMLLogger 42 - export LOGFILE=/dev/null 49 + # effectively mute the XMLLogger 50 + export LOGFILE=/dev/null 43 51 44 - ${config.driver}/bin/nixos-test-driver -o $out 45 - ''; 52 + ${config.driver}/bin/nixos-test-driver -o $out 53 + ''; 46 54 47 - passthru = config.passthru; 55 + passthru = config.passthru; 48 56 49 - meta = config.meta; 50 - }; 57 + meta = config.meta; 58 + }; 59 + test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used. 60 + derivation = config.rawTestDerivation; 51 61 inherit (config) passthru meta; 52 62 }; 53 63
+8
nixos/tests/all-tests.nix
··· 90 90 lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {}; 91 91 node-name = runTest ./nixos-test-driver/node-name.nix; 92 92 busybox = runTest ./nixos-test-driver/busybox.nix; 93 + driver-timeout = pkgs.runCommand "ensure-timeout-induced-failure" { 94 + failed = pkgs.testers.testBuildFailure ((runTest ./nixos-test-driver/timeout.nix).config.rawTestDerivation); 95 + } '' 96 + grep -F "timeout reached; test terminating" $failed/testBuildFailure.log 97 + # The program will always be terminated by SIGTERM (143) if it waits for the deadline thread. 98 + [[ 143 = $(cat $failed/testBuildFailure.exit) ]] 99 + touch $out 100 + ''; 93 101 }; 94 102 95 103 # NixOS vm tests and non-vm unit tests
+15
nixos/tests/nixos-test-driver/timeout.nix
··· 1 + { 2 + name = "Test that sleep of 6 seconds fails a timeout of 5 seconds"; 3 + globalTimeout = 5; 4 + 5 + nodes = { 6 + machine = ({ pkgs, ... }: { 7 + }); 8 + }; 9 + 10 + testScript = '' 11 + start_all() 12 + machine.wait_for_unit("multi-user.target") 13 + machine.succeed("sleep 6") 14 + ''; 15 + }