From 82a99927ded5ba4823c391d4b83c386537d3b892 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 12:46:43 +0100 Subject: [PATCH 01/16] use systemd --- template/code-interpreter.service | 14 ++++++++++++++ template/jupyter.service | 13 +++++++++++++ template/start-up.sh | 21 ++------------------- template/template.py | 10 ++++++++++ 4 files changed, 39 insertions(+), 19 deletions(-) create mode 100644 template/code-interpreter.service create mode 100644 template/jupyter.service diff --git a/template/code-interpreter.service b/template/code-interpreter.service new file mode 100644 index 00000000..26100d9a --- /dev/null +++ b/template/code-interpreter.service @@ -0,0 +1,14 @@ +[Unit] +Description=Code Interpreter Server +After=jupyter.service +Requires=jupyter.service + +[Service] +Type=simple +WorkingDirectory=/root/.server +ExecStartPre=/bin/bash -c 'until curl -sf http://localhost:8888/api/status > /dev/null 2>&1; do sleep 0.1; done' +ExecStart=/root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640 +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/template/jupyter.service b/template/jupyter.service new file mode 100644 index 00000000..896b5d0c --- /dev/null +++ b/template/jupyter.service @@ -0,0 +1,13 @@ +[Unit] +Description=Jupyter Server + +[Service] +Type=simple +Environment=MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc +ExecStart=/usr/local/bin/jupyter server --IdentityProvider.token="" +Restart=on-failure +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/template/start-up.sh b/template/start-up.sh index d30cfc9e..de3922be 100755 --- a/template/start-up.sh +++ b/template/start-up.sh @@ -1,22 +1,5 @@ #!/bin/bash -function start_jupyter_server() { - counter=0 - response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status") - while [[ ${response} -ne 200 ]]; do - let counter++ - if ((counter % 20 == 0)); then - echo "Waiting for Jupyter Server to start..." - sleep 0.1 - fi - - response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status") - done - - cd /root/.server/ - .venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640 -} - echo "Starting Code Interpreter server..." -start_jupyter_server & -MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc jupyter server --IdentityProvider.token="" >/dev/null 2>&1 +systemctl daemon-reload +systemctl start jupyter code-interpreter diff --git a/template/template.py b/template/template.py index 7fb2f1e0..f5e24882 100644 --- a/template/template.py +++ b/template/template.py @@ -38,6 +38,7 @@ def make_template( "sudo", "fonts-noto-cjk", "ca-certificates", + "systemd", ] ) .run_cmd("curl -fsSL https://deb.nodesource.com/setup_20.x | bash -") @@ -126,6 +127,15 @@ def make_template( .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") .copy("startup_scripts", ".ipython/profile_default/startup") + # Install systemd service units + .copy("jupyter.service", ".jupyter/jupyter.service") + .copy("code-interpreter.service", ".jupyter/code-interpreter.service") + .run_cmd( + [ + "cp /root/.jupyter/jupyter.service /etc/systemd/system/", + "cp /root/.jupyter/code-interpreter.service /etc/systemd/system/", + ] + ) ) if is_docker: From c36ff2959f5237a91fd427dd151888a80deebf40 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 13:07:49 +0100 Subject: [PATCH 02/16] switched to supervisord --- template/code-interpreter.service | 14 -------------- template/jupyter.service | 13 ------------- template/start-code-interpreter.sh | 9 +++++++++ template/start-up.sh | 3 +-- template/supervisord.conf | 24 ++++++++++++++++++++++++ template/template.py | 14 ++++---------- 6 files changed, 38 insertions(+), 39 deletions(-) delete mode 100644 template/code-interpreter.service delete mode 100644 template/jupyter.service create mode 100755 template/start-code-interpreter.sh create mode 100644 template/supervisord.conf diff --git a/template/code-interpreter.service b/template/code-interpreter.service deleted file mode 100644 index 26100d9a..00000000 --- a/template/code-interpreter.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=Code Interpreter Server -After=jupyter.service -Requires=jupyter.service - -[Service] -Type=simple -WorkingDirectory=/root/.server -ExecStartPre=/bin/bash -c 'until curl -sf http://localhost:8888/api/status > /dev/null 2>&1; do sleep 0.1; done' -ExecStart=/root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640 -Restart=on-failure - -[Install] -WantedBy=multi-user.target diff --git a/template/jupyter.service b/template/jupyter.service deleted file mode 100644 index 896b5d0c..00000000 --- a/template/jupyter.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=Jupyter Server - -[Service] -Type=simple -Environment=MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc -ExecStart=/usr/local/bin/jupyter server --IdentityProvider.token="" -Restart=on-failure -StandardOutput=journal -StandardError=journal - -[Install] -WantedBy=multi-user.target diff --git a/template/start-code-interpreter.sh b/template/start-code-interpreter.sh new file mode 100755 index 00000000..5d46a439 --- /dev/null +++ b/template/start-code-interpreter.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "Waiting for Jupyter server to be ready..." +until curl -s -o /dev/null -w '%{http_code}' http://localhost:8888/api/status | grep -q '200'; do + sleep 0.5 +done +echo "Jupyter server is ready, starting Code Interpreter..." + +exec /root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640 diff --git a/template/start-up.sh b/template/start-up.sh index de3922be..b0c9c691 100755 --- a/template/start-up.sh +++ b/template/start-up.sh @@ -1,5 +1,4 @@ #!/bin/bash echo "Starting Code Interpreter server..." -systemctl daemon-reload -systemctl start jupyter code-interpreter +supervisord -c /etc/supervisord.conf diff --git a/template/supervisord.conf b/template/supervisord.conf new file mode 100644 index 00000000..4c42fcdf --- /dev/null +++ b/template/supervisord.conf @@ -0,0 +1,24 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisord.log +pidfile=/var/run/supervisord.pid + +[program:jupyter] +command=/usr/local/bin/jupyter server --IdentityProvider.token="" +environment=MATPLOTLIBRC="/root/.config/matplotlib/.matplotlibrc" +stdout_logfile=/dev/null +stderr_logfile=/dev/fd/1 +stderr_logfile_maxbytes=0 +autorestart=true +priority=10 + +[program:code-interpreter] +command=/root/.jupyter/start-code-interpreter.sh +directory=/root/.server +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/fd/1 +stderr_logfile_maxbytes=0 +autorestart=true +priority=20 +startsecs=0 diff --git a/template/template.py b/template/template.py index f5e24882..faef6f0f 100644 --- a/template/template.py +++ b/template/template.py @@ -38,7 +38,7 @@ def make_template( "sudo", "fonts-noto-cjk", "ca-certificates", - "systemd", + "supervisor", ] ) .run_cmd("curl -fsSL https://deb.nodesource.com/setup_20.x | bash -") @@ -122,20 +122,14 @@ def make_template( template = ( template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc") .copy("start-up.sh", ".jupyter/start-up.sh") + .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") .run_cmd("chmod +x .jupyter/start-up.sh") .copy("jupyter_server_config.py", ".jupyter/") .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") .copy("startup_scripts", ".ipython/profile_default/startup") - # Install systemd service units - .copy("jupyter.service", ".jupyter/jupyter.service") - .copy("code-interpreter.service", ".jupyter/code-interpreter.service") - .run_cmd( - [ - "cp /root/.jupyter/jupyter.service /etc/systemd/system/", - "cp /root/.jupyter/code-interpreter.service /etc/systemd/system/", - ] - ) + # Install supervisord config + .copy("supervisord.conf", "/etc/supervisord.conf") ) if is_docker: From 1914c4a12fc2183f8ddca79f3d084d32f03efc10 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:13:38 +0100 Subject: [PATCH 03/16] kill uvicorn on jupyter death --- template/start-jupyter.sh | 7 +++++++ template/supervisord.conf | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100755 template/start-jupyter.sh diff --git a/template/start-jupyter.sh b/template/start-jupyter.sh new file mode 100755 index 00000000..bfb2dd41 --- /dev/null +++ b/template/start-jupyter.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +/usr/local/bin/jupyter server --IdentityProvider.token="" + +# Jupyter exited — kill code-interpreter so supervisord restarts both +echo "Jupyter exited, killing code-interpreter..." +pkill -f "uvicorn main:app" diff --git a/template/supervisord.conf b/template/supervisord.conf index 4c42fcdf..b2feb08f 100644 --- a/template/supervisord.conf +++ b/template/supervisord.conf @@ -4,7 +4,7 @@ logfile=/var/log/supervisord.log pidfile=/var/run/supervisord.pid [program:jupyter] -command=/usr/local/bin/jupyter server --IdentityProvider.token="" +command=/root/.jupyter/start-jupyter.sh environment=MATPLOTLIBRC="/root/.config/matplotlib/.matplotlibrc" stdout_logfile=/dev/null stderr_logfile=/dev/fd/1 From 3910e65e068e6df4b3ad70e885b5b9f4158c016c Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:15:12 +0100 Subject: [PATCH 04/16] return chmod --- template/template.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/template/template.py b/template/template.py index faef6f0f..0e437a1e 100644 --- a/template/template.py +++ b/template/template.py @@ -122,8 +122,9 @@ def make_template( template = ( template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc") .copy("start-up.sh", ".jupyter/start-up.sh") - .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") .run_cmd("chmod +x .jupyter/start-up.sh") + .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") + .run_cmd("chmod +x .jupyter/start-code-interpreter.sh") .copy("jupyter_server_config.py", ".jupyter/") .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") From 12dc7f4a2a446d1b579c3bb874f9b0a6e2771629 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:15:25 +0100 Subject: [PATCH 05/16] update --- template/template.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/template/template.py b/template/template.py index 0e437a1e..91750535 100644 --- a/template/template.py +++ b/template/template.py @@ -122,9 +122,8 @@ def make_template( template = ( template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc") .copy("start-up.sh", ".jupyter/start-up.sh") - .run_cmd("chmod +x .jupyter/start-up.sh") .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") - .run_cmd("chmod +x .jupyter/start-code-interpreter.sh") + .run_cmd("chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh") .copy("jupyter_server_config.py", ".jupyter/") .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") From 27ae39dce9a340fd62e06783cd3c50cf10898980 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:44:09 +0100 Subject: [PATCH 06/16] copy start jupyter --- template/template.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/template/template.py b/template/template.py index 91750535..8d946a11 100644 --- a/template/template.py +++ b/template/template.py @@ -123,7 +123,8 @@ def make_template( template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc") .copy("start-up.sh", ".jupyter/start-up.sh") .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") - .run_cmd("chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh") + .copy("start-jupyter.sh", ".jupyter/start-jupyter.sh") + .run_cmd("chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh .jupyter/start-jupyter.sh") .copy("jupyter_server_config.py", ".jupyter/") .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") From f86f1cabd5b802d005c847fe7ccd432dedfbc085 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:55:48 +0100 Subject: [PATCH 07/16] bugbot comment --- template/start-jupyter.sh | 6 ++---- template/template.py | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/template/start-jupyter.sh b/template/start-jupyter.sh index bfb2dd41..2d372c64 100755 --- a/template/start-jupyter.sh +++ b/template/start-jupyter.sh @@ -1,7 +1,5 @@ #!/bin/bash -/usr/local/bin/jupyter server --IdentityProvider.token="" +trap 'echo "Jupyter exited, killing code-interpreter..."; pkill -f "uvicorn main:app"' EXIT -# Jupyter exited — kill code-interpreter so supervisord restarts both -echo "Jupyter exited, killing code-interpreter..." -pkill -f "uvicorn main:app" +exec /usr/local/bin/jupyter server --IdentityProvider.token="" diff --git a/template/template.py b/template/template.py index 8d946a11..7cbb7f72 100644 --- a/template/template.py +++ b/template/template.py @@ -124,7 +124,9 @@ def make_template( .copy("start-up.sh", ".jupyter/start-up.sh") .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh") .copy("start-jupyter.sh", ".jupyter/start-jupyter.sh") - .run_cmd("chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh .jupyter/start-jupyter.sh") + .run_cmd( + "chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh .jupyter/start-jupyter.sh" + ) .copy("jupyter_server_config.py", ".jupyter/") .make_dir(".ipython/profile_default/startup") .copy("ipython_kernel_config.py", ".ipython/profile_default/") From 730f76acac809b4c3f8dd086ad5b5cd1f56ff71e Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:03:38 +0100 Subject: [PATCH 08/16] kill as group --- template/start-jupyter.sh | 6 ++++-- template/supervisord.conf | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/template/start-jupyter.sh b/template/start-jupyter.sh index 2d372c64..bfb2dd41 100755 --- a/template/start-jupyter.sh +++ b/template/start-jupyter.sh @@ -1,5 +1,7 @@ #!/bin/bash -trap 'echo "Jupyter exited, killing code-interpreter..."; pkill -f "uvicorn main:app"' EXIT +/usr/local/bin/jupyter server --IdentityProvider.token="" -exec /usr/local/bin/jupyter server --IdentityProvider.token="" +# Jupyter exited — kill code-interpreter so supervisord restarts both +echo "Jupyter exited, killing code-interpreter..." +pkill -f "uvicorn main:app" diff --git a/template/supervisord.conf b/template/supervisord.conf index b2feb08f..a0aa89fb 100644 --- a/template/supervisord.conf +++ b/template/supervisord.conf @@ -10,6 +10,8 @@ stdout_logfile=/dev/null stderr_logfile=/dev/fd/1 stderr_logfile_maxbytes=0 autorestart=true +stopasgroup=true +killasgroup=true priority=10 [program:code-interpreter] From b2144d3939bc777860640e0f75f00f24be6941e6 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:56:46 +0100 Subject: [PATCH 09/16] duplicate conf --- template/supervisord.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/template/supervisord.conf b/template/supervisord.conf index a0aa89fb..c7e8d526 100644 --- a/template/supervisord.conf +++ b/template/supervisord.conf @@ -22,5 +22,7 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/fd/1 stderr_logfile_maxbytes=0 autorestart=true +stopasgroup=true +killasgroup=true priority=20 startsecs=0 From 535057c1b015607d19f182b80ad04abc6e345d2e Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:11:19 +0100 Subject: [PATCH 10/16] kill by pid to avoid killing similar processes --- template/start-code-interpreter.sh | 1 + template/start-jupyter.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/template/start-code-interpreter.sh b/template/start-code-interpreter.sh index 5d46a439..88ea456b 100755 --- a/template/start-code-interpreter.sh +++ b/template/start-code-interpreter.sh @@ -6,4 +6,5 @@ until curl -s -o /dev/null -w '%{http_code}' http://localhost:8888/api/status | done echo "Jupyter server is ready, starting Code Interpreter..." +echo $$ > /var/run/code-interpreter.pid exec /root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640 diff --git a/template/start-jupyter.sh b/template/start-jupyter.sh index bfb2dd41..40e62bbf 100755 --- a/template/start-jupyter.sh +++ b/template/start-jupyter.sh @@ -4,4 +4,4 @@ # Jupyter exited — kill code-interpreter so supervisord restarts both echo "Jupyter exited, killing code-interpreter..." -pkill -f "uvicorn main:app" +kill "$(cat /var/run/code-interpreter.pid)" 2>/dev/null From dd60d007dc16e83de07ef19d58a3cb814f095dc1 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:24:35 +0100 Subject: [PATCH 11/16] added tests --- js/tests/supervisord.test.ts | 49 ++++++++++++++++++++ python/tests/async/test_async_supervisord.py | 44 ++++++++++++++++++ python/tests/sync/test_supervisord.py | 44 ++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 js/tests/supervisord.test.ts create mode 100644 python/tests/async/test_async_supervisord.py create mode 100644 python/tests/sync/test_supervisord.py diff --git a/js/tests/supervisord.test.ts b/js/tests/supervisord.test.ts new file mode 100644 index 00000000..0a9479f3 --- /dev/null +++ b/js/tests/supervisord.test.ts @@ -0,0 +1,49 @@ +import { expect } from 'vitest' +import { sandboxTest, wait } from './setup' + +async function waitForHealth(sandbox: any, maxRetries = 10, intervalMs = 100) { + for (let i = 0; i < maxRetries; i++) { + const result = await sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if (result.stdout.trim() === '200') { + return true + } + await wait(intervalMs) + } + return false +} + +sandboxTest('restart after jupyter kill', async ({ sandbox }) => { + // Verify health is up initially + const initialHealth = await waitForHealth(sandbox) + expect(initialHealth).toBe(true) + + // Kill the jupyter process + await sandbox.commands.run('supervisorctl stop jupyter') + + // Wait for supervisord to restart it and health to come back + const recovered = await waitForHealth(sandbox, 10, 100) + expect(recovered).toBe(true) + + // Verify code execution works after recovery + const result = await sandbox.runCode('x = 1; x') + expect(result.text).toEqual('1') +}) + +sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => { + // Verify health is up initially + const initialHealth = await waitForHealth(sandbox) + expect(initialHealth).toBe(true) + + // Kill the code-interpreter process + await sandbox.commands.run('supervisorctl stop code-interpreter') + + // Wait for supervisord to restart it and health to come back + const recovered = await waitForHealth(sandbox, 10, 100) + expect(recovered).toBe(true) + + // Verify code execution works after recovery + const result = await sandbox.runCode('x = 1; x') + expect(result.text).toEqual('1') +}) diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py new file mode 100644 index 00000000..ff9c3235 --- /dev/null +++ b/python/tests/async/test_async_supervisord.py @@ -0,0 +1,44 @@ +import asyncio + +from e2b_code_interpreter.code_interpreter_async import AsyncSandbox + + +async def wait_for_health(sandbox: AsyncSandbox, max_retries=10, interval_ms=100): + for _ in range(max_retries): + result = await sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if result.stdout.strip() == "200": + return True + await asyncio.sleep(interval_ms / 1000) + return False + + +async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): + # Verify health is up initially + assert await wait_for_health(async_sandbox) + + # Kill the jupyter process + await async_sandbox.commands.run("supervisorctl stop jupyter") + + # Wait for supervisord to restart it and health to come back + assert await wait_for_health(async_sandbox, 10, 100) + + # Verify code execution works after recovery + result = await async_sandbox.run_code("x = 1; x") + assert result.text == "1" + + +async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): + # Verify health is up initially + assert await wait_for_health(async_sandbox) + + # Kill the code-interpreter process + await async_sandbox.commands.run("supervisorctl stop code-interpreter") + + # Wait for supervisord to restart it and health to come back + assert await wait_for_health(async_sandbox, 10, 100) + + # Verify code execution works after recovery + result = await async_sandbox.run_code("x = 1; x") + assert result.text == "1" diff --git a/python/tests/sync/test_supervisord.py b/python/tests/sync/test_supervisord.py new file mode 100644 index 00000000..9a3a25ce --- /dev/null +++ b/python/tests/sync/test_supervisord.py @@ -0,0 +1,44 @@ +import time + +from e2b_code_interpreter.code_interpreter_sync import Sandbox + + +def wait_for_health(sandbox: Sandbox, max_retries=10, interval_ms=100): + for _ in range(max_retries): + result = sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if result.stdout.strip() == "200": + return True + time.sleep(interval_ms / 1000) + return False + + +def test_restart_after_jupyter_kill(sandbox: Sandbox): + # Verify health is up initially + assert wait_for_health(sandbox) + + # Kill the jupyter process + sandbox.commands.run("supervisorctl stop jupyter") + + # Wait for supervisord to restart it and health to come back + assert wait_for_health(sandbox, 10, 100) + + # Verify code execution works after recovery + result = sandbox.run_code("x = 1; x") + assert result.text == "1" + + +def test_restart_after_code_interpreter_kill(sandbox: Sandbox): + # Verify health is up initially + assert wait_for_health(sandbox) + + # Kill the code-interpreter process + sandbox.commands.run("supervisorctl stop code-interpreter") + + # Wait for supervisord to restart it and health to come back + assert wait_for_health(sandbox, 10, 100) + + # Verify code execution works after recovery + result = sandbox.run_code("x = 1; x") + assert result.text == "1" From b9eedf07197249f08bda53be5e17d705d9ef88c5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:30:28 +0100 Subject: [PATCH 12/16] update tests --- js/tests/supervisord.test.ts | 12 ++++++++---- python/tests/async/test_async_supervisord.py | 8 ++++---- python/tests/sync/test_supervisord.py | 8 ++++---- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/js/tests/supervisord.test.ts b/js/tests/supervisord.test.ts index 0a9479f3..bacace58 100644 --- a/js/tests/supervisord.test.ts +++ b/js/tests/supervisord.test.ts @@ -19,8 +19,10 @@ sandboxTest('restart after jupyter kill', async ({ sandbox }) => { const initialHealth = await waitForHealth(sandbox) expect(initialHealth).toBe(true) - // Kill the jupyter process - await sandbox.commands.run('supervisorctl stop jupyter') + // Kill the jupyter process by pid + await sandbox.commands.run( + "kill -9 $(pgrep -f 'jupyter server')" + ) // Wait for supervisord to restart it and health to come back const recovered = await waitForHealth(sandbox, 10, 100) @@ -36,8 +38,10 @@ sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => { const initialHealth = await waitForHealth(sandbox) expect(initialHealth).toBe(true) - // Kill the code-interpreter process - await sandbox.commands.run('supervisorctl stop code-interpreter') + // Kill the code-interpreter process by pid + await sandbox.commands.run( + 'kill -9 $(cat /var/run/code-interpreter.pid)' + ) // Wait for supervisord to restart it and health to come back const recovered = await waitForHealth(sandbox, 10, 100) diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py index ff9c3235..5cf795c6 100644 --- a/python/tests/async/test_async_supervisord.py +++ b/python/tests/async/test_async_supervisord.py @@ -18,8 +18,8 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): # Verify health is up initially assert await wait_for_health(async_sandbox) - # Kill the jupyter process - await async_sandbox.commands.run("supervisorctl stop jupyter") + # Kill the jupyter process by pid + await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')") # Wait for supervisord to restart it and health to come back assert await wait_for_health(async_sandbox, 10, 100) @@ -33,8 +33,8 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): # Verify health is up initially assert await wait_for_health(async_sandbox) - # Kill the code-interpreter process - await async_sandbox.commands.run("supervisorctl stop code-interpreter") + # Kill the code-interpreter process by pid + await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)") # Wait for supervisord to restart it and health to come back assert await wait_for_health(async_sandbox, 10, 100) diff --git a/python/tests/sync/test_supervisord.py b/python/tests/sync/test_supervisord.py index 9a3a25ce..8461b2e8 100644 --- a/python/tests/sync/test_supervisord.py +++ b/python/tests/sync/test_supervisord.py @@ -18,8 +18,8 @@ def test_restart_after_jupyter_kill(sandbox: Sandbox): # Verify health is up initially assert wait_for_health(sandbox) - # Kill the jupyter process - sandbox.commands.run("supervisorctl stop jupyter") + # Kill the jupyter process by pid + sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')") # Wait for supervisord to restart it and health to come back assert wait_for_health(sandbox, 10, 100) @@ -33,8 +33,8 @@ def test_restart_after_code_interpreter_kill(sandbox: Sandbox): # Verify health is up initially assert wait_for_health(sandbox) - # Kill the code-interpreter process - sandbox.commands.run("supervisorctl stop code-interpreter") + # Kill the code-interpreter process by pid + sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)") # Wait for supervisord to restart it and health to come back assert wait_for_health(sandbox, 10, 100) From 2e145cc9923265d4c563d58bf3b1ffec0c9e9392 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:38:22 +0100 Subject: [PATCH 13/16] kill as root --- js/tests/supervisord.test.ts | 16 ++++++++-------- python/tests/async/test_async_supervisord.py | 4 ++-- python/tests/sync/test_supervisord.py | 10 ++++++---- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/js/tests/supervisord.test.ts b/js/tests/supervisord.test.ts index bacace58..9d056913 100644 --- a/js/tests/supervisord.test.ts +++ b/js/tests/supervisord.test.ts @@ -19,10 +19,10 @@ sandboxTest('restart after jupyter kill', async ({ sandbox }) => { const initialHealth = await waitForHealth(sandbox) expect(initialHealth).toBe(true) - // Kill the jupyter process by pid - await sandbox.commands.run( - "kill -9 $(pgrep -f 'jupyter server')" - ) + // Kill the jupyter process as root + await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", { + user: 'root', + }) // Wait for supervisord to restart it and health to come back const recovered = await waitForHealth(sandbox, 10, 100) @@ -38,10 +38,10 @@ sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => { const initialHealth = await waitForHealth(sandbox) expect(initialHealth).toBe(true) - // Kill the code-interpreter process by pid - await sandbox.commands.run( - 'kill -9 $(cat /var/run/code-interpreter.pid)' - ) + // Kill the code-interpreter process as root + await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid)', { + user: 'root', + }) // Wait for supervisord to restart it and health to come back const recovered = await waitForHealth(sandbox, 10, 100) diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py index 5cf795c6..22bbc6a7 100644 --- a/python/tests/async/test_async_supervisord.py +++ b/python/tests/async/test_async_supervisord.py @@ -19,7 +19,7 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): assert await wait_for_health(async_sandbox) # Kill the jupyter process by pid - await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')") + await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") # Wait for supervisord to restart it and health to come back assert await wait_for_health(async_sandbox, 10, 100) @@ -34,7 +34,7 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): assert await wait_for_health(async_sandbox) # Kill the code-interpreter process by pid - await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)") + await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)", user="root") # Wait for supervisord to restart it and health to come back assert await wait_for_health(async_sandbox, 10, 100) diff --git a/python/tests/sync/test_supervisord.py b/python/tests/sync/test_supervisord.py index 8461b2e8..c5507051 100644 --- a/python/tests/sync/test_supervisord.py +++ b/python/tests/sync/test_supervisord.py @@ -18,8 +18,8 @@ def test_restart_after_jupyter_kill(sandbox: Sandbox): # Verify health is up initially assert wait_for_health(sandbox) - # Kill the jupyter process by pid - sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')") + # Kill the jupyter process as root + sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") # Wait for supervisord to restart it and health to come back assert wait_for_health(sandbox, 10, 100) @@ -33,8 +33,10 @@ def test_restart_after_code_interpreter_kill(sandbox: Sandbox): # Verify health is up initially assert wait_for_health(sandbox) - # Kill the code-interpreter process by pid - sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)") + # Kill the code-interpreter process as root + sandbox.commands.run( + "kill -9 $(cat /var/run/code-interpreter.pid)", user="root" + ) # Wait for supervisord to restart it and health to come back assert wait_for_health(sandbox, 10, 100) From a31d70284e8d7439cc83616c9e4e137bdb729a5e Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:45:12 +0100 Subject: [PATCH 14/16] added try/catch blocks and ignore non-zero code on kill --- js/tests/supervisord.test.ts | 22 ++++++++++------- python/tests/async/test_async_supervisord.py | 25 +++++++++++--------- python/tests/sync/test_supervisord.py | 21 +++++++++------- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/js/tests/supervisord.test.ts b/js/tests/supervisord.test.ts index 9d056913..e0227a94 100644 --- a/js/tests/supervisord.test.ts +++ b/js/tests/supervisord.test.ts @@ -3,11 +3,15 @@ import { sandboxTest, wait } from './setup' async function waitForHealth(sandbox: any, maxRetries = 10, intervalMs = 100) { for (let i = 0; i < maxRetries; i++) { - const result = await sandbox.commands.run( - 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' - ) - if (result.stdout.trim() === '200') { - return true + try { + const result = await sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if (result.stdout.trim() === '200') { + return true + } + } catch { + // Connection refused or other error, retry } await wait(intervalMs) } @@ -20,12 +24,12 @@ sandboxTest('restart after jupyter kill', async ({ sandbox }) => { expect(initialHealth).toBe(true) // Kill the jupyter process as root - await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", { + await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", { user: 'root', }) // Wait for supervisord to restart it and health to come back - const recovered = await waitForHealth(sandbox, 10, 100) + const recovered = await waitForHealth(sandbox, 20, 100) expect(recovered).toBe(true) // Verify code execution works after recovery @@ -39,12 +43,12 @@ sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => { expect(initialHealth).toBe(true) // Kill the code-interpreter process as root - await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid)', { + await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid) || true', { user: 'root', }) // Wait for supervisord to restart it and health to come back - const recovered = await waitForHealth(sandbox, 10, 100) + const recovered = await waitForHealth(sandbox, 20, 100) expect(recovered).toBe(true) // Verify code execution works after recovery diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py index 22bbc6a7..05398708 100644 --- a/python/tests/async/test_async_supervisord.py +++ b/python/tests/async/test_async_supervisord.py @@ -5,11 +5,14 @@ async def wait_for_health(sandbox: AsyncSandbox, max_retries=10, interval_ms=100): for _ in range(max_retries): - result = await sandbox.commands.run( - 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' - ) - if result.stdout.strip() == "200": - return True + try: + result = await sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if result.stdout.strip() == "200": + return True + except Exception: + pass await asyncio.sleep(interval_ms / 1000) return False @@ -18,11 +21,11 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): # Verify health is up initially assert await wait_for_health(async_sandbox) - # Kill the jupyter process by pid - await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") + # Kill the jupyter process as root + await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root") # Wait for supervisord to restart it and health to come back - assert await wait_for_health(async_sandbox, 10, 100) + assert await wait_for_health(async_sandbox, 20, 100) # Verify code execution works after recovery result = await async_sandbox.run_code("x = 1; x") @@ -33,11 +36,11 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): # Verify health is up initially assert await wait_for_health(async_sandbox) - # Kill the code-interpreter process by pid - await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)", user="root") + # Kill the code-interpreter process as root + await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root") # Wait for supervisord to restart it and health to come back - assert await wait_for_health(async_sandbox, 10, 100) + assert await wait_for_health(async_sandbox, 20, 100) # Verify code execution works after recovery result = await async_sandbox.run_code("x = 1; x") diff --git a/python/tests/sync/test_supervisord.py b/python/tests/sync/test_supervisord.py index c5507051..a863e497 100644 --- a/python/tests/sync/test_supervisord.py +++ b/python/tests/sync/test_supervisord.py @@ -5,11 +5,14 @@ def wait_for_health(sandbox: Sandbox, max_retries=10, interval_ms=100): for _ in range(max_retries): - result = sandbox.commands.run( - 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' - ) - if result.stdout.strip() == "200": - return True + try: + result = sandbox.commands.run( + 'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health' + ) + if result.stdout.strip() == "200": + return True + except Exception: + pass time.sleep(interval_ms / 1000) return False @@ -19,10 +22,10 @@ def test_restart_after_jupyter_kill(sandbox: Sandbox): assert wait_for_health(sandbox) # Kill the jupyter process as root - sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") + sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root") # Wait for supervisord to restart it and health to come back - assert wait_for_health(sandbox, 10, 100) + assert wait_for_health(sandbox, 20, 100) # Verify code execution works after recovery result = sandbox.run_code("x = 1; x") @@ -35,11 +38,11 @@ def test_restart_after_code_interpreter_kill(sandbox: Sandbox): # Kill the code-interpreter process as root sandbox.commands.run( - "kill -9 $(cat /var/run/code-interpreter.pid)", user="root" + "kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root" ) # Wait for supervisord to restart it and health to come back - assert wait_for_health(sandbox, 10, 100) + assert wait_for_health(sandbox, 20, 100) # Verify code execution works after recovery result = sandbox.run_code("x = 1; x") From 443c5463984dcfa536a0cc729e41a6b792e9409d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:57:11 +0100 Subject: [PATCH 15/16] added try/catch blocks to avoid throwing on negative err code --- js/tests/supervisord.test.ts | 28 +++++++++++++------- python/tests/async/test_async_supervisord.py | 18 +++++++++---- python/tests/sync/test_supervisord.py | 22 ++++++++++----- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/js/tests/supervisord.test.ts b/js/tests/supervisord.test.ts index e0227a94..80c26abf 100644 --- a/js/tests/supervisord.test.ts +++ b/js/tests/supervisord.test.ts @@ -24,12 +24,18 @@ sandboxTest('restart after jupyter kill', async ({ sandbox }) => { expect(initialHealth).toBe(true) // Kill the jupyter process as root - await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", { - user: 'root', - }) + // The command handle may get killed too (since killing jupyter cascades to code-interpreter), + // so we catch the error. + try { + await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", { + user: 'root', + }) + } catch { + // Expected — the kill cascade may terminate the command handle + } - // Wait for supervisord to restart it and health to come back - const recovered = await waitForHealth(sandbox, 20, 100) + // Wait for supervisord to restart both services (jupyter startup + code-interpreter startup) + const recovered = await waitForHealth(sandbox, 60, 500) expect(recovered).toBe(true) // Verify code execution works after recovery @@ -43,12 +49,16 @@ sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => { expect(initialHealth).toBe(true) // Kill the code-interpreter process as root - await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid) || true', { - user: 'root', - }) + try { + await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid)', { + user: 'root', + }) + } catch { + // Expected — killing code-interpreter may terminate the command handle + } // Wait for supervisord to restart it and health to come back - const recovered = await waitForHealth(sandbox, 20, 100) + const recovered = await waitForHealth(sandbox, 60, 500) expect(recovered).toBe(true) // Verify code execution works after recovery diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py index 05398708..13a43b3f 100644 --- a/python/tests/async/test_async_supervisord.py +++ b/python/tests/async/test_async_supervisord.py @@ -22,10 +22,15 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): assert await wait_for_health(async_sandbox) # Kill the jupyter process as root - await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root") + # The command handle may get killed too (killing jupyter cascades to code-interpreter), + # so we catch the error. + try: + await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") + except Exception: + pass - # Wait for supervisord to restart it and health to come back - assert await wait_for_health(async_sandbox, 20, 100) + # Wait for supervisord to restart both services + assert await wait_for_health(async_sandbox, 60, 500) # Verify code execution works after recovery result = await async_sandbox.run_code("x = 1; x") @@ -37,10 +42,13 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): assert await wait_for_health(async_sandbox) # Kill the code-interpreter process as root - await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root") + try: + await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)", user="root") + except Exception: + pass # Wait for supervisord to restart it and health to come back - assert await wait_for_health(async_sandbox, 20, 100) + assert await wait_for_health(async_sandbox, 60, 500) # Verify code execution works after recovery result = await async_sandbox.run_code("x = 1; x") diff --git a/python/tests/sync/test_supervisord.py b/python/tests/sync/test_supervisord.py index a863e497..d7f67bfc 100644 --- a/python/tests/sync/test_supervisord.py +++ b/python/tests/sync/test_supervisord.py @@ -22,10 +22,15 @@ def test_restart_after_jupyter_kill(sandbox: Sandbox): assert wait_for_health(sandbox) # Kill the jupyter process as root - sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root") + # The command handle may get killed too (killing jupyter cascades to code-interpreter), + # so we catch the error. + try: + sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") + except Exception: + pass - # Wait for supervisord to restart it and health to come back - assert wait_for_health(sandbox, 20, 100) + # Wait for supervisord to restart both services + assert wait_for_health(sandbox, 60, 500) # Verify code execution works after recovery result = sandbox.run_code("x = 1; x") @@ -37,12 +42,15 @@ def test_restart_after_code_interpreter_kill(sandbox: Sandbox): assert wait_for_health(sandbox) # Kill the code-interpreter process as root - sandbox.commands.run( - "kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root" - ) + try: + sandbox.commands.run( + "kill -9 $(cat /var/run/code-interpreter.pid)", user="root" + ) + except Exception: + pass # Wait for supervisord to restart it and health to come back - assert wait_for_health(sandbox, 20, 100) + assert wait_for_health(sandbox, 60, 500) # Verify code execution works after recovery result = sandbox.run_code("x = 1; x") From 35f17c9591fe1519b0679d6b2985c41b6f628a2f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 12 Mar 2026 19:13:57 +0100 Subject: [PATCH 16/16] lint --- python/tests/async/test_async_supervisord.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/tests/async/test_async_supervisord.py b/python/tests/async/test_async_supervisord.py index 13a43b3f..763a7ae9 100644 --- a/python/tests/async/test_async_supervisord.py +++ b/python/tests/async/test_async_supervisord.py @@ -25,7 +25,9 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox): # The command handle may get killed too (killing jupyter cascades to code-interpreter), # so we catch the error. try: - await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root") + await async_sandbox.commands.run( + "kill -9 $(pgrep -f 'jupyter server')", user="root" + ) except Exception: pass @@ -43,7 +45,9 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox): # Kill the code-interpreter process as root try: - await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)", user="root") + await async_sandbox.commands.run( + "kill -9 $(cat /var/run/code-interpreter.pid)", user="root" + ) except Exception: pass