From c9247d6695fb310e325ef2b8585709f0274a40b0 Mon Sep 17 00:00:00 2001 From: Wen Date: Sun, 31 May 2026 04:38:21 -0700 Subject: [PATCH 1/4] test(evm): cap child_process.exec in lib.js to surface stalled commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Autobahn EVM integration matrix has hit multiple 30-minute job timeouts where hardhat went silent between test files with no error. The orphan-process listing at cleanup consistently shows a docker exec (running `seid tx evm send` or similar) still alive — `child_process.exec` has no timeout by default, so a stalled CLI invocation eats the entire job budget instead of surfacing as a clear error. Add a default 60s timeout to execCommand with a SIGKILL kill signal. On expiry, throw an error containing the offending command so the next occurrence is a 60s actionable error instead of a 30-minute mystery. Override via EXEC_TIMEOUT_MS for tests that legitimately need longer. Co-Authored-By: Claude Opus 4.7 (1M context) --- contracts/test/lib.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/contracts/test/lib.js b/contracts/test/lib.js index 5fc378cd7e..0d8c769708 100644 --- a/contracts/test/lib.js +++ b/contracts/test/lib.js @@ -1032,9 +1032,22 @@ async function execute(command, interaction=`printf "12345678\\n"`){ } function execCommand(command) { + // Cap shelled-out child processes (typically `docker exec ... seid ...`) + // with a timeout so an indefinite stall surfaces as an error with the + // command text, instead of consuming the entire job's wall-clock budget. + // The Autobahn EVM matrix has hit multiple 30-minute job timeouts where + // hardhat went silent between test files; suspect path is a stalled + // child here. Override via EXEC_TIMEOUT_MS for tests that legitimately + // need longer. + const timeoutMs = Number(process.env.EXEC_TIMEOUT_MS || 60000) return new Promise((resolve, reject) => { - exec(command, (error, stdout, stderr) => { + exec(command, { timeout: timeoutMs, killSignal: "SIGKILL" }, (error, stdout, stderr) => { if (error) { + if (error.killed || error.signal) { + const summary = command.length > 200 ? command.slice(0, 197) + "..." : command + reject(new Error(`execCommand timed out after ${timeoutMs}ms: ${summary}`)) + return + } reject(error); return; } From c1920c4bb2722c93e82e947b7f676b14a57b7edb Mon Sep 17 00:00:00 2001 From: Wen Date: Sun, 31 May 2026 20:27:09 -0400 Subject: [PATCH 2/4] test(evm): narrow timeout attribution to error.killed only Per cursor bugbot: error.signal is set on any signal-terminated process (external OOM kill, runner cleanup, etc.), not just timeout kills. error.killed is the precise Node-set-the-killer signal. Narrowing the condition prevents non-timeout signal deaths from being mis-reported as "execCommand timed out". Co-Authored-By: Claude Opus 4.7 (1M context) --- contracts/test/lib.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/contracts/test/lib.js b/contracts/test/lib.js index 0d8c769708..fc05139860 100644 --- a/contracts/test/lib.js +++ b/contracts/test/lib.js @@ -1043,7 +1043,11 @@ function execCommand(command) { return new Promise((resolve, reject) => { exec(command, { timeout: timeoutMs, killSignal: "SIGKILL" }, (error, stdout, stderr) => { if (error) { - if (error.killed || error.signal) { + // error.killed is set only when Node killed the child via the + // timeout option; signal-without-killed (external SIGKILL, + // OOM, runner cleanup) is a different failure and shouldn't + // be mis-attributed as a timeout. + if (error.killed) { const summary = command.length > 200 ? command.slice(0, 197) + "..." : command reject(new Error(`execCommand timed out after ${timeoutMs}ms: ${summary}`)) return From 74a7c746e95e4803b36b33c9874e7f5a874778bb Mon Sep 17 00:00:00 2001 From: Wen Date: Mon, 1 Jun 2026 13:39:12 -0400 Subject: [PATCH 3/4] test(evm): exclude maxBuffer overflow from timeout attribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per cursor bugbot: Node sets error.killed=true for two cases — the timeout kill we want to attribute, and a maxBuffer overflow which also has error.code = 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER'. Gate the timeout branch on !error.code so a buffer overflow falls through to its original error instead of being mis-reported as a timeout. Co-Authored-By: Claude Opus 4.7 (1M context) --- contracts/test/lib.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/contracts/test/lib.js b/contracts/test/lib.js index fc05139860..40cb585dfc 100644 --- a/contracts/test/lib.js +++ b/contracts/test/lib.js @@ -1043,11 +1043,15 @@ function execCommand(command) { return new Promise((resolve, reject) => { exec(command, { timeout: timeoutMs, killSignal: "SIGKILL" }, (error, stdout, stderr) => { if (error) { - // error.killed is set only when Node killed the child via the - // timeout option; signal-without-killed (external SIGKILL, - // OOM, runner cleanup) is a different failure and shouldn't - // be mis-attributed as a timeout. - if (error.killed) { + // Node sets error.killed=true for the two kinds of kill it + // initiates: the timeout we configured, and a maxBuffer + // overflow. Only the former leaves error.code unset; the + // latter sets it to 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER'. + // Gate on !error.code so a buffer overflow isn't mis- + // attributed as a timeout, and so external signal deaths + // (OOM, runner cleanup — error.killed=false) keep their + // original error. + if (error.killed && !error.code) { const summary = command.length > 200 ? command.slice(0, 197) + "..." : command reject(new Error(`execCommand timed out after ${timeoutMs}ms: ${summary}`)) return From 8ec7aa0fcd8f386492d2aca20264bf38d23c1919 Mon Sep 17 00:00:00 2001 From: Wen Date: Tue, 2 Jun 2026 16:13:55 -0400 Subject: [PATCH 4/4] test(evm): include the full command text in the timeout error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 200-char cap was an arbitrary defensive choice in the initial commit, not a bot-driven change. Realistic docker-exec-seid invocations in this codebase typically run 250-350 chars before any encoded calldata, so the cap truncated exactly the args needed to identify which command stalled — defeating the diagnostic the timeout exists to provide. Test code, no persistent storage, no risk surface to a long error string. Co-Authored-By: Claude Opus 4.7 (1M context) --- contracts/test/lib.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contracts/test/lib.js b/contracts/test/lib.js index 40cb585dfc..d86c6eddb0 100644 --- a/contracts/test/lib.js +++ b/contracts/test/lib.js @@ -1052,8 +1052,7 @@ function execCommand(command) { // (OOM, runner cleanup — error.killed=false) keep their // original error. if (error.killed && !error.code) { - const summary = command.length > 200 ? command.slice(0, 197) + "..." : command - reject(new Error(`execCommand timed out after ${timeoutMs}ms: ${summary}`)) + reject(new Error(`execCommand timed out after ${timeoutMs}ms: ${command}`)) return } reject(error);