From 76738f18e5ea44c467a0c659b2fa31e2ca283f1a Mon Sep 17 00:00:00 2001 From: Julien Cristau Date: Thu, 9 Oct 2025 14:47:28 +0200 Subject: [PATCH] run-task: wait on child processes when we're pid 1. Avoid leaving zombie processes around in the container while the task is running. Ported from gecko (bug 1967974) --- src/taskgraph/run-task/run-task | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/taskgraph/run-task/run-task b/src/taskgraph/run-task/run-task index c446df247..98f9882fe 100755 --- a/src/taskgraph/run-task/run-task +++ b/src/taskgraph/run-task/run-task @@ -28,6 +28,7 @@ import socket import stat import subprocess import sys +import threading import time import urllib.error import urllib.request @@ -121,6 +122,17 @@ def print_line(prefix, m): sys.stdout.buffer.flush() +def reap_zombies(main_subprocess): + """Wait for main_subprocess to exit, while awaiting any other child processes""" + while main_subprocess.poll() is None: + with main_subprocess._waitpid_lock: + if main_subprocess.returncode is not None: + return + pid, status = os.wait() + if pid == main_subprocess.pid: + main_subprocess._handle_exitstatus(status) + + def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5): """ It's possible to see spurious errors on Windows due to various things @@ -278,6 +290,13 @@ def run_command(prefix, args, *, extra_env=None, cwd=None): stdout = io.TextIOWrapper(p.stdout, encoding="latin1") + if os.getpid() == 1: + # in docker we're init, so we get to adopt unawaited zombies + reaper_thread = threading.Thread(target=reap_zombies, args=(p,)) + reaper_thread.start() + else: + reaper_thread = None + while True: data = stdout.readline().encode("latin1") @@ -286,6 +305,8 @@ def run_command(prefix, args, *, extra_env=None, cwd=None): print_line(prefix, data) + if reaper_thread: + reaper_thread.join() return p.wait()