Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/ref/extensions/sandbox/openshell/sandbox.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# `Sandbox`

::: agents.extensions.sandbox.openshell.sandbox
3 changes: 3 additions & 0 deletions docs/sandbox/clients.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ For provider-specific setup notes and links for the checked-in extension example
| `DaytonaSandboxClient` | `openai-agents[daytona]` | [Daytona runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/daytona/daytona_runner.py) |
| `E2BSandboxClient` | `openai-agents[e2b]` | [E2B runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/e2b_runner.py) |
| `ModalSandboxClient` | `openai-agents[modal]` | [Modal runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/modal_runner.py) |
| `OpenShellSandboxClient` | `openai-agents[openshell]` | [OpenShell runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/openshell_runner.py) |
| `RunloopSandboxClient` | `openai-agents[runloop]` | [Runloop runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/runloop/runner.py) |
| `VercelSandboxClient` | `openai-agents[vercel]` | [Vercel runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/vercel_runner.py) |

Expand All @@ -113,6 +114,7 @@ Hosted sandbox clients expose provider-specific mount strategies. Choose the bac
| `DaytonaSandboxClient` | Supports rclone-backed cloud storage mounts with `DaytonaCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
| `E2BSandboxClient` | Supports rclone-backed cloud storage mounts with `E2BCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
| `RunloopSandboxClient` | Supports rclone-backed cloud storage mounts with `RunloopCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
| `OpenShellSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |
| `VercelSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |

</div>
Expand All @@ -130,6 +132,7 @@ The table below summarizes which remote storage entries each backend can mount d
| `DaytonaSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
| `E2BSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
| `RunloopSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
| `OpenShellSandboxClient` | - | - | - | - | - | - |
| `VercelSandboxClient` | - | - | - | - | - | - |

</div>
Expand Down
301 changes: 301 additions & 0 deletions examples/sandbox/extensions/openshell_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
"""
OpenShell sandbox integration example.

This script exercises the OpenShell sandbox extension at two levels:

1. **Session-level** (no LLM needed): Creates a sandbox, writes files, reads them
back, runs commands, and verifies workspace persistence. This validates the
extension works end-to-end with a real OpenShell gateway.

2. **Agent-level** (requires OPENAI_API_KEY): Runs a SandboxAgent with a shell
capability inside the OpenShell sandbox.

Prerequisites:
- An OpenShell gateway running (local, remote, or cloud).
- ``openshell`` Python package installed: ``uv sync --extra openshell``
- For agent mode: ``OPENAI_API_KEY`` environment variable set.

Quick start:
# Session-level only (no LLM):
uv run python examples/sandbox/extensions/openshell_runner.py --session-only

# Full agent run:
uv run python examples/sandbox/extensions/openshell_runner.py

# With a specific cluster:
uv run python examples/sandbox/extensions/openshell_runner.py --cluster my-gateway

# With a custom image:
uv run python examples/sandbox/extensions/openshell_runner.py --image ubuntu:24.04
"""

from __future__ import annotations

import argparse
import asyncio
import io
import os
import sys
from pathlib import Path

try:
from agents.extensions.sandbox import (
OpenShellSandboxClient,
OpenShellSandboxClientOptions,
)
except Exception as exc:
raise SystemExit(
"OpenShell sandbox examples require the optional openshell extra.\n"
"Install it with: uv sync --extra openshell"
) from exc


async def session_level_test(
*,
cluster: str | None,
endpoint: str | None,
image: str | None,
gpu: bool,
) -> None:
"""Exercise the sandbox extension directly without an LLM."""

from agents.sandbox import Manifest
from agents.sandbox.entries import File

print("=== OpenShell Session-Level Test ===\n")

# Build a manifest with test files.
# OpenShell sandboxes default to /sandbox as the working directory.
manifest = Manifest(
root="/sandbox",
entries={
"hello.txt": File(content=b"Hello from OpenShell sandbox!\n"),
"data/numbers.csv": File(content=b"a,b,c\n1,2,3\n4,5,6\n"),
},
)

client = OpenShellSandboxClient()
options = OpenShellSandboxClientOptions(
cluster=cluster,
endpoint=endpoint,
image=image,
gpu=gpu,
)

print("1. Creating sandbox...")
session = await client.create(manifest=manifest, options=options)

try:
print("2. Starting session (materializing workspace)...")
await session.start()

print("3. Running 'ls -la' in workspace...")
result = await session.exec("ls", "-la", shell=False)
print(f" exit_code={result.exit_code}")
print(f" stdout:\n{result.stdout.decode()}")

print("4. Reading hello.txt...")
content = await session.read(Path("hello.txt"))
text = content.read()
if isinstance(text, bytes):
text = text.decode("utf-8")
print(f" content: {text.strip()!r}")
assert "Hello from OpenShell sandbox!" in text, "Read verification failed."

print("5. Writing a new file...")
await session.write(
Path("output.txt"),
io.BytesIO(b"Written by the OpenAI Agents SDK via OpenShell.\n"),
)

print("6. Verifying the written file...")
result = await session.exec("cat", "output.txt", shell=False)
assert result.exit_code == 0, f"cat failed: {result.stderr.decode()}"
print(f" content: {result.stdout.decode().strip()!r}")

print("7. Running a multi-step shell command...")
result = await session.exec("wc -l data/numbers.csv && echo 'done'")
print(f" output: {result.stdout.decode().strip()}")

print("8. Checking sandbox is running...")
is_running = await session.running()
print(f" running: {is_running}")
assert is_running, "Sandbox should be running."

print("9. Persisting workspace (tar snapshot)...")
snapshot = await session.persist_workspace()
snapshot_bytes = snapshot.read()
print(f" snapshot size: {len(snapshot_bytes)} bytes")
assert len(snapshot_bytes) > 0, "Snapshot should not be empty."

print("\nAll session-level checks passed.")

finally:
print("\n10. Shutting down sandbox...")
await session.aclose()
print(" Done.")


async def agent_level_test(
*,
model: str,
cluster: str | None,
endpoint: str | None,
image: str | None,
gpu: bool,
question: str,
stream: bool,
) -> None:
"""Run a SandboxAgent backed by OpenShell."""

from openai.types.responses import ResponseTextDeltaEvent

from agents import ModelSettings, Runner
from agents.run import RunConfig
from agents.sandbox import Manifest, SandboxAgent, SandboxRunConfig
from agents.sandbox.entries import File

if __package__ is None or __package__ == "":
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))

from examples.sandbox.misc.workspace_shell import WorkspaceShellCapability

print("\n=== OpenShell Agent-Level Test ===\n")

manifest = Manifest(
root="/sandbox",
entries={
"README.md": File(
content=(
b"# Project Status\n\nThis workspace contains a sample project status report.\n"
),
),
"status.md": File(
content=(
b"# Sprint 42 Status\n\n"
b"- Auth service: on track, shipping Tuesday.\n"
b"- Search reindex: blocked on infra ticket INFRA-1234.\n"
b"- Dashboard v2: 80% complete, needs UX review.\n"
),
),
},
)

agent = SandboxAgent(
name="OpenShell Sandbox Assistant",
model=model,
instructions=(
"Answer questions about the sandbox workspace. Inspect the files before answering "
"and keep the response concise. "
"Do not invent files or statuses that are not present in the workspace. Cite the "
"file names you inspected."
),
default_manifest=manifest,
capabilities=[WorkspaceShellCapability()],
model_settings=ModelSettings(tool_choice="required"),
)

run_config = RunConfig(
sandbox=SandboxRunConfig(
client=OpenShellSandboxClient(),
options=OpenShellSandboxClientOptions(
cluster=cluster,
endpoint=endpoint,
image=image,
gpu=gpu,
),
),
workflow_name="OpenShell sandbox example",
)

if not stream:
result = await Runner.run(agent, question, run_config=run_config)
print(f"assistant> {result.final_output}")
return

stream_result = Runner.run_streamed(agent, question, run_config=run_config)
saw_text_delta = False
async for event in stream_result.stream_events():
if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
if not saw_text_delta:
print("assistant> ", end="", flush=True)
saw_text_delta = True
print(event.data.delta, end="", flush=True)
if saw_text_delta:
print()


async def main(
*,
model: str,
cluster: str | None,
endpoint: str | None,
image: str | None,
gpu: bool,
question: str,
stream: bool,
session_only: bool,
) -> None:
# Session-level test always runs (no LLM needed).
await session_level_test(
cluster=cluster,
endpoint=endpoint,
image=image,
gpu=gpu,
)

if session_only:
return

# Agent-level test requires OPENAI_API_KEY.
if not os.environ.get("OPENAI_API_KEY"):
print("\nSkipping agent-level test (OPENAI_API_KEY not set).")
print("Set OPENAI_API_KEY and remove --session-only to run the full test.")
return

await agent_level_test(
model=model,
cluster=cluster,
endpoint=endpoint,
image=image,
gpu=gpu,
question=question,
stream=stream,
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="OpenShell sandbox integration example for the OpenAI Agents SDK."
)
parser.add_argument("--model", default="gpt-4.1-mini", help="Model name to use.")
parser.add_argument(
"--question",
default="Summarize the project status from the workspace files.",
help="Prompt to send to the agent.",
)
parser.add_argument("--cluster", default=None, help="OpenShell gateway cluster name.")
parser.add_argument("--endpoint", default=None, help="Explicit gateway endpoint (host:port).")
parser.add_argument("--image", default=None, help="Container image for the sandbox.")
parser.add_argument("--gpu", action="store_true", default=False, help="Request GPU.")
parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.")
parser.add_argument(
"--session-only",
action="store_true",
default=False,
help="Run session-level test only (no LLM needed).",
)
args = parser.parse_args()

asyncio.run(
main(
model=args.model,
cluster=args.cluster,
endpoint=args.endpoint,
image=args.image,
gpu=args.gpu,
question=args.question,
stream=args.stream,
session_only=args.session_only,
)
)
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ e2b = ["e2b==2.20.0", "e2b-code-interpreter==2.4.1"]
modal = ["modal==1.3.5"]
runloop = ["runloop_api_client>=1.16.0,<2.0.0"]
vercel = ["vercel>=0.5.6,<0.6"]
openshell = ["openshell>=0.0.0a0"]
s3 = ["boto3>=1.34"]
temporal = [
"temporalio==1.26.0",
Expand Down Expand Up @@ -164,6 +165,10 @@ ignore_missing_imports = true
module = ["vercel", "vercel.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = ["openshell", "openshell.*"]
ignore_missing_imports = true

[tool.coverage.run]
source = ["src/agents"]
omit = [
Expand Down
22 changes: 22 additions & 0 deletions src/agents/extensions/sandbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@
except Exception: # pragma: no cover
_HAS_VERCEL = False

try:
from .openshell import (
OpenShellSandboxClient as OpenShellSandboxClient,
OpenShellSandboxClientOptions as OpenShellSandboxClientOptions,
OpenShellSandboxSession as OpenShellSandboxSession,
OpenShellSandboxSessionState as OpenShellSandboxSessionState,
)

_HAS_OPENSHELL = True
except Exception: # pragma: no cover
_HAS_OPENSHELL = False

__all__: list[str] = []

if _HAS_E2B:
Expand Down Expand Up @@ -207,3 +219,13 @@
"RunloopUserParameters",
]
)

if _HAS_OPENSHELL:
__all__.extend(
[
"OpenShellSandboxClient",
"OpenShellSandboxClientOptions",
"OpenShellSandboxSession",
"OpenShellSandboxSessionState",
]
)
15 changes: 15 additions & 0 deletions src/agents/extensions/sandbox/openshell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

from .sandbox import (
OpenShellSandboxClient as OpenShellSandboxClient,
OpenShellSandboxClientOptions as OpenShellSandboxClientOptions,
OpenShellSandboxSession as OpenShellSandboxSession,
OpenShellSandboxSessionState as OpenShellSandboxSessionState,
)

__all__ = [
"OpenShellSandboxClient",
"OpenShellSandboxClientOptions",
"OpenShellSandboxSession",
"OpenShellSandboxSessionState",
]
Loading