nixos-rebuild-ng: kill underlying remote process (#403436)

authored by Thiago Kenji Okada and committed by GitHub b59a398f c2e815c7

+113 -16
+2 -2
pkgs/by-name/ni/nixos-rebuild-ng/src/nixos_rebuild/__init__.py
··· 5 5 import sys 6 6 from pathlib import Path 7 7 from subprocess import CalledProcessError, run 8 - from typing import assert_never 8 + from typing import Final, assert_never 9 9 10 10 from . import nix, tmpdir 11 11 from .constants import EXECUTABLE, WITH_NIX_2_18, WITH_REEXEC, WITH_SHELL_FILES ··· 13 13 from .process import Remote, cleanup_ssh 14 14 from .utils import Args, LogFormatter, tabulate 15 15 16 - logger = logging.getLogger() 16 + logger: Final = logging.getLogger() 17 17 logger.setLevel(logging.INFO) 18 18 19 19
+6 -4
pkgs/by-name/ni/nixos-rebuild-ng/src/nixos_rebuild/constants.py
··· 1 + from typing import Final 2 + 1 3 # Build-time flags 2 4 # Use strings to avoid breaking standalone (e.g.: `python -m nixos_rebuild`) 3 5 # usage 4 - EXECUTABLE = "@executable@" 6 + EXECUTABLE: Final[str] = "@executable@" 5 7 # Use either `== "true"` if the default (e.g.: `python -m nixos_rebuild`) is 6 8 # `False` or `!= "false"` if the default is `True` 7 - WITH_NIX_2_18 = "@withNix218@" != "false" # type: ignore 8 - WITH_REEXEC = "@withReexec@" == "true" # type: ignore 9 - WITH_SHELL_FILES = "@withShellFiles@" == "true" # type: ignore 9 + WITH_NIX_2_18: Final[bool] = "@withNix218@" != "false" 10 + WITH_REEXEC: Final[bool] = "@withReexec@" == "true" 11 + WITH_SHELL_FILES: Final[bool] = "@withShellFiles@" == "true"
+1 -1
pkgs/by-name/ni/nixos-rebuild-ng/src/nixos_rebuild/nix.py
··· 45 45 "--service-type=exec", 46 46 "--unit=nixos-rebuild-switch-to-configuration", 47 47 ] 48 - logger = logging.getLogger(__name__) 48 + logger: Final = logging.getLogger(__name__) 49 49 50 50 51 51 def build(
+73 -9
pkgs/by-name/ni/nixos-rebuild-ng/src/nixos_rebuild/process.py
··· 1 1 import atexit 2 2 import logging 3 3 import os 4 + import re 4 5 import shlex 5 6 import subprocess 6 7 from collections.abc import Sequence ··· 10 11 11 12 from . import tmpdir 12 13 13 - logger = logging.getLogger(__name__) 14 + logger: Final = logging.getLogger(__name__) 14 15 15 16 SSH_DEFAULT_OPTS: Final = [ 16 17 "-o", ··· 20 21 "-o", 21 22 "ControlPersist=60", 22 23 ] 24 + 25 + type Args = Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes]] 23 26 24 27 25 28 @dataclass(frozen=True) ··· 82 85 83 86 84 87 def run_wrapper( 85 - args: Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes]], 88 + args: Args, 86 89 *, 87 90 check: bool = True, 88 91 extra_env: dict[str, str] | None = None, ··· 93 96 "Wrapper around `subprocess.run` that supports extra functionality." 94 97 env = None 95 98 process_input = None 99 + run_args = args 100 + 96 101 if remote: 97 102 if extra_env: 98 103 extra_env_args = [f"{env}={value}" for env, value in extra_env.items()] ··· 103 108 process_input = remote.sudo_password + "\n" 104 109 else: 105 110 args = ["sudo", *args] 106 - args = [ 111 + run_args = [ 107 112 "ssh", 108 113 *remote.opts, 109 114 *SSH_DEFAULT_OPTS, ··· 119 124 if extra_env: 120 125 env = os.environ | extra_env 121 126 if sudo: 122 - args = ["sudo", *args] 127 + run_args = ["sudo", *run_args] 123 128 124 129 logger.debug( 125 130 "calling run with args=%r, kwargs=%r, extra_env=%r", 126 - args, 131 + run_args, 127 132 kwargs, 128 133 extra_env, 129 134 ) 130 135 131 136 try: 132 137 r = subprocess.run( 133 - args, 138 + run_args, 134 139 check=check, 135 140 env=env, 136 141 input=process_input, 137 - # Hope nobody is using NixOS with non-UTF8 encodings, but "surrogateescape" 138 - # should still work in those systems. 142 + # Hope nobody is using NixOS with non-UTF8 encodings, but 143 + # "surrogateescape" should still work in those systems. 139 144 text=True, 140 145 errors="surrogateescape", 141 146 **kwargs, 142 147 ) 143 148 144 149 if kwargs.get("capture_output") or kwargs.get("stderr") or kwargs.get("stdout"): 145 - logger.debug("captured output stdout=%r, stderr=%r", r.stdout, r.stderr) 150 + logger.debug( 151 + "captured output with stdout=%r, stderr=%r", r.stdout, r.stderr 152 + ) 146 153 147 154 return r 155 + except KeyboardInterrupt: 156 + # sudo commands are activation only and unlikely to be long running 157 + if remote and not sudo: 158 + _kill_long_running_ssh_process(args, remote) 159 + raise 148 160 except subprocess.CalledProcessError: 149 161 if sudo and remote and remote.sudo_password is None: 150 162 logger.error( ··· 152 164 + "--ask-sudo-password?" 153 165 ) 154 166 raise 167 + 168 + 169 + # SSH does not send the signals to the process when running without usage of 170 + # pseudo-TTY (that causes a whole other can of worms), so if the process is 171 + # long running (e.g.: a build) this will result in the underlying process 172 + # staying alive. 173 + # See: https://stackoverflow.com/a/44354466 174 + # Issue: https://github.com/NixOS/nixpkgs/issues/403269 175 + def _kill_long_running_ssh_process(args: Args, remote: Remote) -> None: 176 + logger.info("cleaning-up remote process, please wait...") 177 + 178 + # We need to escape both the shell and regex here (since pkill interprets 179 + # its arguments as regex) 180 + quoted_args = re.escape(shlex.join(str(a) for a in args)) 181 + logger.debug("killing remote process using pkill with args=%r", quoted_args) 182 + cleanup_interrupted = False 183 + 184 + try: 185 + r = subprocess.run( 186 + [ 187 + "ssh", 188 + *remote.opts, 189 + *SSH_DEFAULT_OPTS, 190 + remote.host, 191 + "--", 192 + "pkill", 193 + "--signal", 194 + "SIGINT", 195 + "--full", 196 + "--", 197 + quoted_args, 198 + ], 199 + check=False, 200 + capture_output=True, 201 + text=True, 202 + ) 203 + logger.debug( 204 + "remote pkill captured output with stdout=%r, stderr=%r, returncode=%s", 205 + r.stdout, 206 + r.stderr, 207 + r.returncode, 208 + ) 209 + except KeyboardInterrupt: 210 + cleanup_interrupted = True 211 + raise 212 + finally: 213 + if cleanup_interrupted or r.returncode: 214 + logger.warning( 215 + "could not clean-up remote process, the command %s may still be running in host '%s'", 216 + args, 217 + remote.host, 218 + )
+31
pkgs/by-name/ni/nixos-rebuild-ng/src/tests/test_process.py
··· 96 96 ) 97 97 98 98 99 + @patch(get_qualified_name(p.subprocess.run), autospec=True) 100 + def test__kill_long_running_ssh_process(mock_run: Any) -> None: 101 + p._kill_long_running_ssh_process( 102 + [ 103 + "nix", 104 + "--extra-experimental-features", 105 + "nix-command flakes", 106 + "build", 107 + "/nix/store/la0c8nmpr9xfclla0n4f3qq9iwgdrq4g-nixos-system-sankyuu-nixos-25.05.20250424.f771eb4.drv^*", 108 + ], 109 + m.Remote("user@localhost", opts=[], sudo_password=None), 110 + ) 111 + mock_run.assert_called_with( 112 + [ 113 + "ssh", 114 + *p.SSH_DEFAULT_OPTS, 115 + "user@localhost", 116 + "--", 117 + "pkill", 118 + "--signal", 119 + "SIGINT", 120 + "--full", 121 + "--", 122 + r"nix\ \-\-extra\-experimental\-features\ 'nix\-command\ flakes'\ build\ '/nix/store/la0c8nmpr9xfclla0n4f3qq9iwgdrq4g\-nixos\-system\-sankyuu\-nixos\-25\.05\.20250424\.f771eb4\.drv\^\*'", 123 + ], 124 + check=False, 125 + capture_output=True, 126 + text=True, 127 + ) 128 + 129 + 99 130 def test_remote_from_name(monkeypatch: MonkeyPatch) -> None: 100 131 monkeypatch.setenv("NIX_SSHOPTS", "") 101 132 assert m.Remote.from_arg("user@localhost", None, False) == m.Remote(