Merge pull request #262839 from RaitoBezarius/qemu-vm/timeout

authored by

Ryan Lahfa and committed by
GitHub
92fdbd28 d51318c4

+99 -13
+5
nixos/lib/test-driver/default.nix
··· 11 , tesseract4 12 , vde2 13 , extraPythonPackages ? (_ : []) 14 }: 15 16 python3Packages.buildPythonApplication { ··· 30 ] 31 ++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ]) 32 ++ extraPythonPackages python3Packages; 33 34 doCheck = true; 35 nativeCheckInputs = with python3Packages; [ mypy ruff black ];
··· 11 , tesseract4 12 , vde2 13 , extraPythonPackages ? (_ : []) 14 + , nixosTests 15 }: 16 17 python3Packages.buildPythonApplication { ··· 31 ] 32 ++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ]) 33 ++ extraPythonPackages python3Packages; 34 + 35 + passthru.tests = { 36 + inherit (nixosTests.nixos-test-driver) driver-timeout; 37 + }; 38 39 doCheck = true; 40 nativeCheckInputs = with python3Packages; [ mypy ruff black ];
+9
nixos/lib/test-driver/test_driver/__init__.py
··· 77 help="vlans to span by the driver", 78 ) 79 arg_parser.add_argument( 80 "-o", 81 "--output_directory", 82 help="""The path to the directory where outputs copied from the VM will be placed. ··· 103 args.testscript.read_text(), 104 args.output_directory.resolve(), 105 args.keep_vm_state, 106 ) as driver: 107 if args.interactive: 108 history_dir = os.getcwd()
··· 77 help="vlans to span by the driver", 78 ) 79 arg_parser.add_argument( 80 + "--global-timeout", 81 + type=int, 82 + metavar="GLOBAL_TIMEOUT", 83 + action=EnvDefault, 84 + envvar="globalTimeout", 85 + help="Timeout in seconds for the whole test", 86 + ) 87 + arg_parser.add_argument( 88 "-o", 89 "--output_directory", 90 help="""The path to the directory where outputs copied from the VM will be placed. ··· 111 args.testscript.read_text(), 112 args.output_directory.resolve(), 113 args.keep_vm_state, 114 + args.global_timeout, 115 ) as driver: 116 if args.interactive: 117 history_dir = os.getcwd()
+25
nixos/lib/test-driver/test_driver/driver.py
··· 1 import os 2 import re 3 import tempfile 4 from contextlib import contextmanager 5 from pathlib import Path 6 from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union ··· 41 vlans: List[VLan] 42 machines: List[Machine] 43 polling_conditions: List[PollingCondition] 44 45 def __init__( 46 self, ··· 49 tests: str, 50 out_dir: Path, 51 keep_vm_state: bool = False, 52 ): 53 self.tests = tests 54 self.out_dir = out_dir 55 56 tmp_dir = get_tmp_dir() 57 ··· 82 83 def __exit__(self, *_: Any) -> None: 84 with rootlog.nested("cleanup"): 85 for machine in self.machines: 86 machine.release() 87 ··· 144 145 def run_tests(self) -> None: 146 """Run the test script (for non-interactive test runs)""" 147 self.test_script() 148 # TODO: Collect coverage data 149 for machine in self.machines: ··· 161 with rootlog.nested("wait for all VMs to finish"): 162 for machine in self.machines: 163 machine.wait_for_shutdown() 164 165 def create_machine(self, args: Dict[str, Any]) -> Machine: 166 tmp_dir = get_tmp_dir()
··· 1 import os 2 import re 3 + import signal 4 import tempfile 5 + import threading 6 from contextlib import contextmanager 7 from pathlib import Path 8 from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union ··· 43 vlans: List[VLan] 44 machines: List[Machine] 45 polling_conditions: List[PollingCondition] 46 + global_timeout: int 47 + race_timer: threading.Timer 48 49 def __init__( 50 self, ··· 53 tests: str, 54 out_dir: Path, 55 keep_vm_state: bool = False, 56 + global_timeout: int = 24 * 60 * 60 * 7, 57 ): 58 self.tests = tests 59 self.out_dir = out_dir 60 + self.global_timeout = global_timeout 61 + self.race_timer = threading.Timer(global_timeout, self.terminate_test) 62 63 tmp_dir = get_tmp_dir() 64 ··· 89 90 def __exit__(self, *_: Any) -> None: 91 with rootlog.nested("cleanup"): 92 + self.race_timer.cancel() 93 for machine in self.machines: 94 machine.release() 95 ··· 152 153 def run_tests(self) -> None: 154 """Run the test script (for non-interactive test runs)""" 155 + rootlog.info( 156 + f"Test will time out and terminate in {self.global_timeout} seconds" 157 + ) 158 + self.race_timer.start() 159 self.test_script() 160 # TODO: Collect coverage data 161 for machine in self.machines: ··· 173 with rootlog.nested("wait for all VMs to finish"): 174 for machine in self.machines: 175 machine.wait_for_shutdown() 176 + self.race_timer.cancel() 177 + 178 + def terminate_test(self) -> None: 179 + # This will be usually running in another thread than 180 + # the thread actually executing the test script. 181 + with rootlog.nested("timeout reached; test terminating..."): 182 + for machine in self.machines: 183 + machine.release() 184 + # As we cannot `sys.exit` from another thread 185 + # We can at least force the main thread to get SIGTERM'ed. 186 + # This will prevent any user who caught all the exceptions 187 + # to swallow them and prevent itself from terminating. 188 + os.kill(os.getpid(), signal.SIGTERM) 189 190 def create_machine(self, args: Dict[str, Any]) -> Machine: 191 tmp_dir = get_tmp_dir()
+1
nixos/lib/testing-python.nix
··· 42 , nodes ? {} 43 , testScript 44 , enableOCR ? false 45 , name ? "unnamed" 46 , skipTypeCheck ? false 47 # Skip linting (mainly intended for faster dev cycles)
··· 42 , nodes ? {} 43 , testScript 44 , enableOCR ? false 45 + , globalTimeout ? (60 * 60) 46 , name ? "unnamed" 47 , skipTypeCheck ? false 48 # Skip linting (mainly intended for faster dev cycles)
+13
nixos/lib/testing/driver.nix
··· 94 wrapProgram $out/bin/nixos-test-driver \ 95 --set startScripts "''${vmStartScripts[*]}" \ 96 --set testScript "$out/test-script" \ 97 --set vlans '${toString vlans}' \ 98 ${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)} 99 ''; ··· 121 type = types.package; 122 default = hostPkgs.qemu_test; 123 defaultText = "hostPkgs.qemu_test"; 124 }; 125 126 enableOCR = mkOption {
··· 94 wrapProgram $out/bin/nixos-test-driver \ 95 --set startScripts "''${vmStartScripts[*]}" \ 96 --set testScript "$out/test-script" \ 97 + --set globalTimeout "${toString config.globalTimeout}" \ 98 --set vlans '${toString vlans}' \ 99 ${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)} 100 ''; ··· 122 type = types.package; 123 default = hostPkgs.qemu_test; 124 defaultText = "hostPkgs.qemu_test"; 125 + }; 126 + 127 + globalTimeout = mkOption { 128 + description = mdDoc '' 129 + A global timeout for the complete test, expressed in seconds. 130 + Beyond that timeout, every resource will be killed and released and the test will fail. 131 + 132 + By default, we use a 1 hour timeout. 133 + ''; 134 + type = types.int; 135 + default = 60 * 60; 136 + example = 10 * 60; 137 }; 138 139 enableOCR = mkOption {
+23 -13
nixos/lib/testing/run.nix
··· 16 ''; 17 }; 18 19 test = mkOption { 20 type = types.package; 21 # TODO: can the interactive driver be configured to access the network? ··· 29 }; 30 31 config = { 32 - test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used. 33 - derivation = hostPkgs.stdenv.mkDerivation { 34 - name = "vm-test-run-${config.name}"; 35 36 - requiredSystemFeatures = [ "kvm" "nixos-test" ]; 37 38 - buildCommand = '' 39 - mkdir -p $out 40 41 - # effectively mute the XMLLogger 42 - export LOGFILE=/dev/null 43 44 - ${config.driver}/bin/nixos-test-driver -o $out 45 - ''; 46 47 - passthru = config.passthru; 48 49 - meta = config.meta; 50 - }; 51 inherit (config) passthru meta; 52 }; 53
··· 16 ''; 17 }; 18 19 + rawTestDerivation = mkOption { 20 + type = types.package; 21 + description = mdDoc '' 22 + Unfiltered version of `test`, for troubleshooting the test framework and `testBuildFailure` in the test framework's test suite. 23 + This is not intended for general use. Use `test` instead. 24 + ''; 25 + internal = true; 26 + }; 27 + 28 test = mkOption { 29 type = types.package; 30 # TODO: can the interactive driver be configured to access the network? ··· 38 }; 39 40 config = { 41 + rawTestDerivation = hostPkgs.stdenv.mkDerivation { 42 + name = "vm-test-run-${config.name}"; 43 44 + requiredSystemFeatures = [ "kvm" "nixos-test" ]; 45 46 + buildCommand = '' 47 + mkdir -p $out 48 49 + # effectively mute the XMLLogger 50 + export LOGFILE=/dev/null 51 52 + ${config.driver}/bin/nixos-test-driver -o $out 53 + ''; 54 55 + passthru = config.passthru; 56 57 + meta = config.meta; 58 + }; 59 + test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used. 60 + derivation = config.rawTestDerivation; 61 inherit (config) passthru meta; 62 }; 63
+8
nixos/tests/all-tests.nix
··· 90 lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {}; 91 node-name = runTest ./nixos-test-driver/node-name.nix; 92 busybox = runTest ./nixos-test-driver/busybox.nix; 93 }; 94 95 # NixOS vm tests and non-vm unit tests
··· 90 lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {}; 91 node-name = runTest ./nixos-test-driver/node-name.nix; 92 busybox = runTest ./nixos-test-driver/busybox.nix; 93 + driver-timeout = pkgs.runCommand "ensure-timeout-induced-failure" { 94 + failed = pkgs.testers.testBuildFailure ((runTest ./nixos-test-driver/timeout.nix).config.rawTestDerivation); 95 + } '' 96 + grep -F "timeout reached; test terminating" $failed/testBuildFailure.log 97 + # The program will always be terminated by SIGTERM (143) if it waits for the deadline thread. 98 + [[ 143 = $(cat $failed/testBuildFailure.exit) ]] 99 + touch $out 100 + ''; 101 }; 102 103 # NixOS vm tests and non-vm unit tests
+15
nixos/tests/nixos-test-driver/timeout.nix
···
··· 1 + { 2 + name = "Test that sleep of 6 seconds fails a timeout of 5 seconds"; 3 + globalTimeout = 5; 4 + 5 + nodes = { 6 + machine = ({ pkgs, ... }: { 7 + }); 8 + }; 9 + 10 + testScript = '' 11 + start_all() 12 + machine.wait_for_unit("multi-user.target") 13 + machine.succeed("sleep 6") 14 + ''; 15 + }