diff --git a/dk-installer.py b/dk-installer.py index 9200142..73ac16c 100755 --- a/dk-installer.py +++ b/dk-installer.py @@ -20,6 +20,7 @@ import re import secrets import shutil +import signal import socket import ssl import stat @@ -1179,6 +1180,15 @@ def run(self, parent=None): ("The Docker engine is not running.", "Start the Docker engine and try again."), label="Docker engine running", ) +REQ_DOCKER_COMPOSE = Requirement( + "DOCKER_COMPOSE", + ("docker", "compose", "version"), + ( + "The Docker Compose plugin is not available.", + "Install the Docker Compose plugin and try again.", + ), + label="Docker Compose installed", +) REQ_TESTGEN_IMAGE = Requirement( "TESTGEN_IMAGE", ("docker", "manifest", "inspect", "{image}"), @@ -1412,7 +1422,7 @@ def delete_compose_volumes(self, args): class ComposeDeleteAction(Action, ComposeActionMixin): args_cmd = "delete" - requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON] + requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] def execute(self, args): if self.get_compose_file_path(args).exists(): @@ -1675,7 +1685,7 @@ class ObsUpgradeAction(MultiStepAction, ComposeActionMixin): label = "Upgrade" title = "Upgrade Observability" args_cmd = "upgrade" - requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON] + requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] steps = [ ObsFetchCurrentVersionStep, @@ -1918,7 +1928,7 @@ class ObsInstallAction(AnalyticsMultiStepAction, ComposeActionMixin): intro_text = ["This process may take 5~15 minutes depending on your system resources and network speed."] args_cmd = "install" - requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON] + requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] def get_parser(self, sub_parsers): parser = super().get_parser(sub_parsers) @@ -2464,6 +2474,40 @@ def read_testgen_config_env() -> dict[str, str]: return config +def stop_app_tree(proc: subprocess.Popen, timeout: int = 10) -> None: + """Terminate ``proc`` and all of its descendants. + + Plain ``proc.terminate()`` only kills the parent — pixeltable-pgserver + spawns ``postgres`` children that get orphaned otherwise. Cross-platform: + on Windows we shell out to ``taskkill /F /T``; on POSIX we send SIGTERM + to the whole process group (the parent was started with + ``start_new_session=True``). + """ + if proc.poll() is not None: + return + if platform.system() == "Windows": + with contextlib.suppress(Exception): + subprocess.run( + ["taskkill", "/F", "/T", "/PID", str(proc.pid)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0), + check=False, + ) + else: + with contextlib.suppress(Exception): + os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + try: + proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + if platform.system() != "Windows": + with contextlib.suppress(Exception): + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) + proc.kill() + with contextlib.suppress(subprocess.TimeoutExpired): + proc.wait(timeout=5) + + def start_testgen_app(action, args) -> None: """Start ``testgen run-app`` and block until the user interrupts. @@ -2496,15 +2540,17 @@ def start_testgen_app(action, args) -> None: stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, + # POSIX-only: put the parent in its own process group so we can + # signal the whole tree (postgres included) on shutdown. Windows + # gets the same effect via ``taskkill /T`` in ``stop_app_tree``. + start_new_session=(platform.system() != "Windows"), ) except FileNotFoundError as e: raise InstallerError(f"Could not start TestGen: {e}") from e try: if not wait_for_tcp_port(port, timeout=TESTGEN_APP_READY_TIMEOUT): - proc.terminate() - with contextlib.suppress(subprocess.TimeoutExpired): - proc.wait(timeout=5) + stop_app_tree(proc, timeout=5) raise InstallerError( f"TestGen did not start within {TESTGEN_APP_READY_TIMEOUT} seconds. " f"See {simplify_path(TESTGEN_LOG_FILE_PATH)} for details." @@ -2526,19 +2572,11 @@ def start_testgen_app(action, args) -> None: # Reset the cursor to column 0 — the terminal echoed `^C` mid-line. print("") CONSOLE.msg("Stopping TestGen...") - proc.terminate() - try: - proc.wait(timeout=10) - except subprocess.TimeoutExpired: - proc.kill() - proc.wait() + stop_app_tree(proc, timeout=10) CONSOLE.msg("TestGen stopped.") CONSOLE.msg(f"To start it again, {command_hint(args.prod, 'start', 'Start TestGen')}.") finally: - if proc.poll() is None: - proc.terminate() - with contextlib.suppress(subprocess.TimeoutExpired): - proc.wait(timeout=5) + stop_app_tree(proc, timeout=5) class UvToolUpgradeStep(Step): @@ -2668,7 +2706,7 @@ class TestgenInstallAction(ComposeActionMixin, AnalyticsMultiStepAction): "Installing TestGen with Docker Compose.", "This process may take 5~10 minutes depending on your system resources and network speed.", ] - docker_requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_TESTGEN_IMAGE] + docker_requirements = [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE, REQ_TESTGEN_IMAGE] args_cmd = "install" label = "Installation" @@ -2791,7 +2829,12 @@ def _auto_select_mode(self, args): CONSOLE.msg("[d] Docker Compose (Recommended)") CONSOLE.msg(" The most stable TestGen experience for persistent use.") CONSOLE.msg(" Provides a fully managed environment with an isolated PostgreSQL container.") - prereq_status = " ".join(f"{'(✓)' if ok else '(X)'} {req.label or req.key}" for req, ok in prereq_results) + # Hide REQ_DOCKER from the picker — REQ_DOCKER_COMPOSE failure implies + # the same fix, so showing both bloats the prereq line. The actual + # check below (and the per-prereq fail messages later) still uses all four. + prereq_status = " ".join( + f"{'(✓)' if ok else '(X)'} {req.label or req.key}" for req, ok in prereq_results if req is not REQ_DOCKER + ) CONSOLE.msg(f" Prerequisites: {prereq_status}") CONSOLE.space() CONSOLE.msg("[p] Pip + embedded PostgreSQL") @@ -2896,6 +2939,7 @@ def get_requirements(self, args): return [ REQ_DOCKER, REQ_DOCKER_DAEMON, + REQ_DOCKER_COMPOSE, Requirement( "TG_COMPOSE_FILE", ( @@ -2949,7 +2993,7 @@ def check_requirements(self, args): def get_requirements(self, args): if self._resolved_mode == INSTALL_MODE_DOCKER: - return [REQ_DOCKER, REQ_DOCKER_DAEMON] + return [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] return [] def _resolve_install_mode(self, args): @@ -3023,7 +3067,7 @@ def check_requirements(self, args): def get_requirements(self, args): if self._resolved_mode == INSTALL_MODE_DOCKER: - return [REQ_DOCKER, REQ_DOCKER_DAEMON] + return [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] return [] def _resolve_install_mode(self, args): @@ -3116,10 +3160,14 @@ def check_requirements(self, args): super().check_requirements(args) def get_requirements(self, args): - # Docker mode requires Docker. For pip mode, Docker is only needed when - # the user asked to export to Observability (the dk-demo container - # generates the export payload). - if self._resolved_mode == INSTALL_MODE_DOCKER or getattr(args, "obs_export", False): + # Docker mode requires Docker + Compose (we shell into the engine + # container via ``docker compose exec``). For pip mode, Docker is only + # needed when the user asked to export to Observability — the dk-demo + # container that generates the export payload runs via ``docker run``, + # so Compose isn't required there. + if self._resolved_mode == INSTALL_MODE_DOCKER: + return [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] + if getattr(args, "obs_export", False): return [REQ_DOCKER, REQ_DOCKER_DAEMON] return [] @@ -3190,9 +3238,13 @@ def check_requirements(self, args): super().check_requirements(args) def get_requirements(self, args): - # Docker mode requires Docker. For pip mode, the dk-demo container - # call below is wrapped in try/except so Docker absence is non-fatal. - return [REQ_DOCKER, REQ_DOCKER_DAEMON] if self._resolved_mode == INSTALL_MODE_DOCKER else [] + # Docker mode requires Docker + Compose (we shell into the engine + # container via ``docker compose exec``). For pip mode, the dk-demo + # container call below is wrapped in try/except so Docker absence is + # non-fatal. + if self._resolved_mode == INSTALL_MODE_DOCKER: + return [REQ_DOCKER, REQ_DOCKER_DAEMON, REQ_DOCKER_COMPOSE] + return [] def _resolve_install_mode(self, args): # Like delete: idempotent, so "no install" returns rather than aborts. diff --git a/tests/conftest.py b/tests/conftest.py index c57139c..3d57a0d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,22 @@ def _no_real_browser_launch(): yield mock +@pytest.fixture(autouse=True) +def _no_real_process_group_signals(): + """Belt-and-suspenders: stop ``stop_app_tree`` from accidentally signalling + a real process group when a Popen-mocked test exercises it. ``MagicMock``'s + auto ``__index__`` returns 1, so ``os.getpgid(mock_proc.pid)`` resolves to + pgid 1 (init) and ``os.killpg(1, SIGTERM)`` from a root container actually + signals init → CI runner shutdown. Tests that need to assert on these + explicitly override the patches inside their own ``with patch(...)``. + """ + with ( + patch("tests.installer.os.killpg") as killpg_mock, + patch("tests.installer.os.getpgid", return_value=99999), + ): + yield killpg_mock + + @pytest.fixture def stdout_mock(): return Mock(return_value=[]) diff --git a/tests/test_tg_pip_install.py b/tests/test_tg_pip_install.py index 399026f..36d6350 100644 --- a/tests/test_tg_pip_install.py +++ b/tests/test_tg_pip_install.py @@ -204,9 +204,9 @@ def test_auto_mode_picks_pip_when_docker_unavailable(install_action, args_mock, @pytest.mark.integration def test_auto_mode_displays_prereq_status_when_docker_unavailable(install_action, args_mock, console_msg_mock): """Docker probe fails → the prereq display lists each requirement with a marker and (for failures) a fix hint.""" - # Only the first prereq passes — exercises the mixed pass/fail rendering. + # Only the Compose plugin probe passes — exercises the mixed pass/fail rendering. def selective_check(req_self, *_, **__): - return req_self.key == "DOCKER" + return req_self.key == "DOCKER_COMPOSE" with ( patch("tests.installer.Requirement.check_availability", autospec=True, side_effect=selective_check), @@ -216,8 +216,12 @@ def selective_check(req_self, *_, **__): console_msg_mock.assert_any_msg_contains("two installation modes") console_msg_mock.assert_any_msg_contains("Prerequisites:") - console_msg_mock.assert_any_msg_contains("(✓) Docker installed") + console_msg_mock.assert_any_msg_contains("(✓) Docker Compose installed") console_msg_mock.assert_any_msg_contains("(X) Docker engine running") + # REQ_DOCKER is checked but not displayed — REQ_DOCKER_COMPOSE failure + # implies the same fix, so the picker hides it to keep the line short. + rendered = " ".join(call.args[0] for call in console_msg_mock.call_args_list if call.args) + assert "Docker installed" not in rendered @pytest.mark.integration diff --git a/tests/test_tg_start.py b/tests/test_tg_start.py index f6d2315..ecd86ba 100644 --- a/tests/test_tg_start.py +++ b/tests/test_tg_start.py @@ -10,6 +10,7 @@ InstallerError, TestgenStartAction, start_testgen_app, + stop_app_tree, InstallMarker, ) @@ -93,41 +94,114 @@ def test_start_testgen_app_aborts_on_port_timeout(app_action, args_mock, proc_ru patch("tests.installer.resolve_testgen_path", return_value="/bin/testgen"), patch("tests.installer.subprocess.Popen", return_value=proc_running_then_stops), patch("tests.installer.wait_for_tcp_port", return_value=False), + patch("tests.installer.stop_app_tree") as stop_mock, pytest.raises(InstallerError, match="did not start within"), ): start_testgen_app(app_action, args_mock) - proc_running_then_stops.terminate.assert_called() + stop_mock.assert_called_with(proc_running_then_stops, timeout=5) @pytest.mark.unit def test_start_testgen_app_handles_keyboard_interrupt(app_action, args_mock, console_msg_mock, empty_tg_config): - """User Ctrl+C during run is the expected stop signal — terminate cleanly, - don't propagate the exception, and hint at the start command for next time.""" + """User Ctrl+C during run is the expected stop signal — kill the whole + process tree (postgres + parent), don't propagate the exception, and hint + at the start command for next time.""" args_mock.prod = "tg" proc = MagicMock() - # poll() returns None while alive; after terminate() it transitions to 0. proc.poll.return_value = None - - def _on_terminate(): - proc.poll.return_value = 0 - - proc.terminate.side_effect = _on_terminate proc.wait.side_effect = [KeyboardInterrupt(), 0] with ( patch("tests.installer.resolve_testgen_path", return_value="/bin/testgen"), patch("tests.installer.subprocess.Popen", return_value=proc), patch("tests.installer.wait_for_tcp_port", return_value=True), + patch("tests.installer.stop_app_tree") as stop_mock, ): start_testgen_app(app_action, args_mock) - proc.terminate.assert_called() + # Called once for the keyboard-interrupt branch (timeout=10) and again in + # the ``finally`` cleanup (timeout=5; no-op since proc already stopped). + assert stop_mock.call_args_list[0].args[0] is proc + assert stop_mock.call_args_list[0].kwargs == {"timeout": 10} console_msg_mock.assert_any_msg_contains("TestGen stopped") console_msg_mock.assert_any_msg_contains("tg start") +# --- stop_app_tree ------------------------------------------------------------ + + +@pytest.mark.unit +def test_stop_app_tree_no_op_when_proc_already_exited(): + proc = MagicMock() + proc.poll.return_value = 0 # already exited + + with patch("tests.installer.subprocess.run") as run_mock: + stop_app_tree(proc) + + run_mock.assert_not_called() + proc.wait.assert_not_called() + + +@pytest.mark.unit +def test_stop_app_tree_windows_uses_taskkill_tree(): + proc = MagicMock() + proc.poll.return_value = None + proc.pid = 4242 + proc.wait.return_value = 0 + + with ( + patch("tests.installer.platform.system", return_value="Windows"), + patch("tests.installer.subprocess.run") as run_mock, + ): + stop_app_tree(proc, timeout=3) + + cmd = run_mock.call_args.args[0] + assert cmd[:4] == ["taskkill", "/F", "/T", "/PID"] + assert cmd[4] == "4242" + proc.wait.assert_called_with(timeout=3) + + +@pytest.mark.unit +def test_stop_app_tree_posix_signals_process_group(): + proc = MagicMock() + proc.poll.return_value = None + proc.pid = 4242 + proc.wait.return_value = 0 + + with ( + patch("tests.installer.platform.system", return_value="Linux"), + patch("tests.installer.os.killpg") as killpg_mock, + patch("tests.installer.os.getpgid", return_value=4242), + ): + stop_app_tree(proc, timeout=3) + + killpg_mock.assert_called_once() + assert killpg_mock.call_args.args[0] == 4242 # pgid + proc.wait.assert_called_with(timeout=3) + + +@pytest.mark.unit +def test_stop_app_tree_falls_through_to_kill_on_timeout(): + """If SIGTERM doesn't take, escalate to SIGKILL / proc.kill().""" + import subprocess as sp + + proc = MagicMock() + proc.poll.return_value = None + proc.pid = 4242 + proc.wait.side_effect = [sp.TimeoutExpired(cmd="x", timeout=1), 0] + + with ( + patch("tests.installer.platform.system", return_value="Linux"), + patch("tests.installer.os.killpg"), + patch("tests.installer.os.getpgid", return_value=4242), + ): + stop_app_tree(proc, timeout=1) + + proc.kill.assert_called_once() + + # --- TestgenStartAction ------------------------------------------------------- diff --git a/tests/test_tg_upgrade.py b/tests/test_tg_upgrade.py index 67cae08..dfa5f31 100644 --- a/tests/test_tg_upgrade.py +++ b/tests/test_tg_upgrade.py @@ -77,7 +77,7 @@ def set_version_check_mock(version_check_mock, latest_version): @pytest.mark.integration def test_tg_upgrade_compose_missing(tg_upgrade_action, args_mock, start_cmd_mock, console_msg_mock): - start_cmd_mock.__exit__.side_effect = [None, None, CommandFailed] + start_cmd_mock.__exit__.side_effect = [None, None, None, CommandFailed] with pytest.raises(AbortAction, match=""): tg_upgrade_action.check_requirements(args_mock)