···7777 help="vlans to span by the driver",
7878 )
7979 arg_parser.add_argument(
8080+ "--global-timeout",
8181+ type=int,
8282+ metavar="GLOBAL_TIMEOUT",
8383+ action=EnvDefault,
8484+ envvar="globalTimeout",
8585+ help="Timeout in seconds for the whole test",
8686+ )
8787+ arg_parser.add_argument(
8088 "-o",
8189 "--output_directory",
8290 help="""The path to the directory where outputs copied from the VM will be placed.
···103111 args.testscript.read_text(),
104112 args.output_directory.resolve(),
105113 args.keep_vm_state,
114114+ args.global_timeout,
106115 ) as driver:
107116 if args.interactive:
108117 history_dir = os.getcwd()
+25
nixos/lib/test-driver/test_driver/driver.py
···11import os
22import re
33+import signal
34import tempfile
55+import threading
46from contextlib import contextmanager
57from pathlib import Path
68from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union
···4143 vlans: List[VLan]
4244 machines: List[Machine]
4345 polling_conditions: List[PollingCondition]
4646+ global_timeout: int
4747+ race_timer: threading.Timer
44484549 def __init__(
4650 self,
···4953 tests: str,
5054 out_dir: Path,
5155 keep_vm_state: bool = False,
5656+ global_timeout: int = 24 * 60 * 60 * 7,
5257 ):
5358 self.tests = tests
5459 self.out_dir = out_dir
6060+ self.global_timeout = global_timeout
6161+ self.race_timer = threading.Timer(global_timeout, self.terminate_test)
55625663 tmp_dir = get_tmp_dir()
5764···82898390 def __exit__(self, *_: Any) -> None:
8491 with rootlog.nested("cleanup"):
9292+ self.race_timer.cancel()
8593 for machine in self.machines:
8694 machine.release()
8795···144152145153 def run_tests(self) -> None:
146154 """Run the test script (for non-interactive test runs)"""
155155+ rootlog.info(
156156+ f"Test will time out and terminate in {self.global_timeout} seconds"
157157+ )
158158+ self.race_timer.start()
147159 self.test_script()
148160 # TODO: Collect coverage data
149161 for machine in self.machines:
···161173 with rootlog.nested("wait for all VMs to finish"):
162174 for machine in self.machines:
163175 machine.wait_for_shutdown()
176176+ self.race_timer.cancel()
177177+178178+ def terminate_test(self) -> None:
179179+ # This will be usually running in another thread than
180180+ # the thread actually executing the test script.
181181+ with rootlog.nested("timeout reached; test terminating..."):
182182+ for machine in self.machines:
183183+ machine.release()
184184+ # As we cannot `sys.exit` from another thread
185185+ # We can at least force the main thread to get SIGTERM'ed.
186186+ # This will prevent any user who caught all the exceptions
187187+ # to swallow them and prevent itself from terminating.
188188+ os.kill(os.getpid(), signal.SIGTERM)
164189165190 def create_machine(self, args: Dict[str, Any]) -> Machine:
166191 tmp_dir = get_tmp_dir()