From 1937dc8a082a0a3a4e38b37e8184f04f08cde4bd Mon Sep 17 00:00:00 2001
From: Kazuhiro Sera <seratch@openai.com>
Date: Tue, 10 Feb 2026 17:45:16 -0800
Subject: [PATCH 1/2] feat: add container shell support

---
 .../tools/container_shell_inline_skill.py     | 117 +++++++++++
 .../tools/container_shell_skill_reference.py  | 112 ++++++++++
 examples/tools/skills/csv-workbench/SKILL.md  |  20 ++
 .../tools/skills/csv-workbench/playbook.md    |  32 +++
 pyproject.toml                                |   2 +-
 src/agents/__init__.py                        |  28 +++
 src/agents/items.py                           |   2 +
 src/agents/models/openai_responses.py         |  24 ++-
 src/agents/run_internal/tool_actions.py       |   5 +-
 src/agents/run_internal/turn_resolution.py    | 134 +++++++++---
 src/agents/tool.py                            | 144 ++++++++++++-
 tests/test_local_shell_tool.py                |   1 +
 tests/test_openai_responses_converter.py      | 124 +++++++++++
 tests/test_process_model_response.py          | 192 +++++++++++++++++-
 tests/test_run_step_execution.py              |  71 +++++++
 tests/test_shell_tool.py                      | 166 ++++++++++++++-
 tests/test_tool_metadata.py                   |   4 +-
 uv.lock                                       |   8 +-
 18 files changed, 1140 insertions(+), 46 deletions(-)
 create mode 100644 examples/tools/container_shell_inline_skill.py
 create mode 100644 examples/tools/container_shell_skill_reference.py
 create mode 100644 examples/tools/skills/csv-workbench/SKILL.md
 create mode 100644 examples/tools/skills/csv-workbench/playbook.md

diff --git a/examples/tools/container_shell_inline_skill.py b/examples/tools/container_shell_inline_skill.py
new file mode 100644
index 0000000000..13ea544006
--- /dev/null
+++ b/examples/tools/container_shell_inline_skill.py
@@ -0,0 +1,117 @@
+import argparse
+import asyncio
+import base64
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from zipfile import ZIP_DEFLATED, ZipFile
+
+from openai.types.responses import ResponseFunctionShellToolCall
+from openai.types.responses.response_container_reference import ResponseContainerReference
+
+from agents import Agent, Runner, ShellTool, ShellToolInlineSkill, trace
+from agents.items import ModelResponse
+
+SKILL_NAME = "csv-workbench"
+SKILL_DIR = Path(__file__).resolve().parent / "skills" / SKILL_NAME
+
+
+def build_skill_zip_bundle() -> bytes:
+    with TemporaryDirectory(prefix="agents-inline-skill-") as temp_dir:
+        zip_path = Path(temp_dir) / f"{SKILL_NAME}.zip"
+        with ZipFile(zip_path, "w", compression=ZIP_DEFLATED) as archive:
+            for path in sorted(SKILL_DIR.rglob("*")):
+                if path.is_file():
+                    archive.write(path, f"{SKILL_NAME}/{path.relative_to(SKILL_DIR)}")
+        return zip_path.read_bytes()
+
+
+def build_inline_skill() -> ShellToolInlineSkill:
+    bundle = build_skill_zip_bundle()
+    return {
+        "type": "inline",
+        "name": SKILL_NAME,
+        "description": "Analyze CSV files in /mnt/data and return concise numeric summaries.",
+        "source": {
+            "type": "base64",
+            "media_type": "application/zip",
+            "data": base64.b64encode(bundle).decode("ascii"),
+        },
+    }
+
+
+def extract_container_id(raw_responses: list[ModelResponse]) -> str | None:
+    for response in raw_responses:
+        for item in response.output:
+            if isinstance(item, ResponseFunctionShellToolCall) and isinstance(
+                item.environment, ResponseContainerReference
+            ):
+                return item.environment.container_id
+
+    return None
+
+
+async def main(model: str) -> None:
+    inline_skill = build_inline_skill()
+
+    with trace("container_shell_inline_skill_example"):
+        agent1 = Agent(
+            name="Container Shell Agent (Inline Skill)",
+            model=model,
+            instructions="Use the available container skill to answer user requests.",
+            tools=[
+                ShellTool(
+                    environment={
+                        "type": "container_auto",
+                        "network_policy": {"type": "disabled"},
+                        "skills": [inline_skill],
+                    }
+                )
+            ],
+        )
+
+        result1 = await Runner.run(
+            agent1,
+            (
+                "Use the csv-workbench skill. Create /mnt/data/orders.csv with columns "
+                "id,region,amount,status and at least 6 rows. Then report total amount by "
+                "region and count failed orders."
+            ),
+        )
+        print(f"Agent: {result1.final_output}")
+
+        container_id = extract_container_id(result1.raw_responses)
+        if not container_id:
+            raise RuntimeError("Container ID was not returned in shell call output.")
+
+        print(f"[info] Reusing container_id={container_id}")
+
+        agent2 = Agent(
+            name="Container Reference Shell Agent",
+            model=model,
+            instructions="Reuse the existing shell container and answer concisely.",
+            tools=[
+                ShellTool(
+                    environment={
+                        "type": "container_reference",
+                        "container_id": container_id,
+                    }
+                )
+            ],
+        )
+
+        result2 = await Runner.run(
+            agent2,
+            "Run `ls -la /mnt/data`, then summarize in one sentence.",
+        )
+        print(f"Agent (container reuse): {result2.final_output}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        default="gpt-5.2",
+        help="Model name to use.",
+    )
+    args = parser.parse_args()
+    asyncio.run(main(args.model))
diff --git a/examples/tools/container_shell_skill_reference.py b/examples/tools/container_shell_skill_reference.py
new file mode 100644
index 0000000000..b9d942cea7
--- /dev/null
+++ b/examples/tools/container_shell_skill_reference.py
@@ -0,0 +1,112 @@
+import argparse
+import asyncio
+import os
+
+from openai.types.responses import ResponseFunctionShellToolCall
+from openai.types.responses.response_container_reference import ResponseContainerReference
+
+from agents import Agent, Runner, ShellTool, ShellToolSkillReference, trace
+from agents.items import ModelResponse
+
+SHELL_SKILL_ID_ENV = "OPENAI_SHELL_SKILL_ID"
+SHELL_SKILL_VERSION_ENV = "OPENAI_SHELL_SKILL_VERSION"
+DEFAULT_SKILL_REFERENCE: ShellToolSkillReference = {
+    "type": "skill_reference",
+    "skill_id": "skill_698bbe879adc81918725cbc69dcae7960bc5613dadaed377",
+    "version": "1",
+}
+
+
+def resolve_skill_reference() -> ShellToolSkillReference:
+    skill_id = os.environ.get(SHELL_SKILL_ID_ENV)
+    if not skill_id:
+        return DEFAULT_SKILL_REFERENCE
+
+    reference: ShellToolSkillReference = {"type": "skill_reference", "skill_id": skill_id}
+    skill_version = os.environ.get(SHELL_SKILL_VERSION_ENV)
+    if skill_version:
+        reference["version"] = skill_version
+    return reference
+
+
+def extract_container_id(raw_responses: list[ModelResponse]) -> str | None:
+    for response in raw_responses:
+        for item in response.output:
+            if isinstance(item, ResponseFunctionShellToolCall) and isinstance(
+                item.environment, ResponseContainerReference
+            ):
+                return item.environment.container_id
+
+    return None
+
+
+async def main(model: str) -> None:
+    skill_reference = resolve_skill_reference()
+    print(
+        "[info] Using skill reference:",
+        skill_reference["skill_id"],
+        f"(version {skill_reference.get('version', 'default')})",
+    )
+
+    with trace("container_shell_skill_reference_example"):
+        agent1 = Agent(
+            name="Container Shell Agent (Skill Reference)",
+            model=model,
+            instructions="Use the available container skill to answer user requests.",
+            tools=[
+                ShellTool(
+                    environment={
+                        "type": "container_auto",
+                        "network_policy": {"type": "disabled"},
+                        "skills": [skill_reference],
+                    }
+                )
+            ],
+        )
+
+        result1 = await Runner.run(
+            agent1,
+            (
+                "Use the csv-workbench skill. Create /mnt/data/orders.csv with columns "
+                "id,region,amount,status and at least 6 rows. Then report total amount by "
+                "region and count failed orders."
+            ),
+        )
+        print(f"Agent: {result1.final_output}")
+
+        container_id = extract_container_id(result1.raw_responses)
+        if not container_id:
+            raise RuntimeError("Container ID was not returned in shell call output.")
+
+        print(f"[info] Reusing container_id={container_id}")
+
+        agent2 = Agent(
+            name="Container Reference Shell Agent",
+            model=model,
+            instructions="Reuse the existing shell container and answer concisely.",
+            tools=[
+                ShellTool(
+                    environment={
+                        "type": "container_reference",
+                        "container_id": container_id,
+                    }
+                )
+            ],
+        )
+
+        result2 = await Runner.run(
+            agent2,
+            "Run `ls -la /mnt/data`, then summarize in one sentence.",
+        )
+        print(f"Agent (container reuse): {result2.final_output}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        default="gpt-5.2",
+        help="Model name to use.",
+    )
+    args = parser.parse_args()
+    asyncio.run(main(args.model))
diff --git a/examples/tools/skills/csv-workbench/SKILL.md b/examples/tools/skills/csv-workbench/SKILL.md
new file mode 100644
index 0000000000..a954e42fb7
--- /dev/null
+++ b/examples/tools/skills/csv-workbench/SKILL.md
@@ -0,0 +1,20 @@
+---
+name: csv-workbench
+description: Analyze CSV files in /mnt/data and return concise numeric summaries.
+---
+
+# CSV Workbench
+
+Use this skill when the user asks for quick analysis of tabular data.
+
+## Workflow
+
+1. Inspect the CSV schema first (`head`, `python csv.DictReader`, or both).
+2. Compute requested aggregates with a short Python script.
+3. Return concise results with concrete numbers and units when available.
+
+## Constraints
+
+- Prefer Python stdlib for portability.
+- If data is missing or malformed, state assumptions clearly.
+- Keep the final answer short and actionable.
diff --git a/examples/tools/skills/csv-workbench/playbook.md b/examples/tools/skills/csv-workbench/playbook.md
new file mode 100644
index 0000000000..95cacedeb6
--- /dev/null
+++ b/examples/tools/skills/csv-workbench/playbook.md
@@ -0,0 +1,32 @@
+# CSV Playbook
+
+## Quick checks
+
+- Preview rows: `head -n 10 /mnt/data/your-file.csv`.
+- Count rows:
+
+```bash
+python - <<'PY'
+import csv
+
+with open('/mnt/data/your-file.csv', newline='') as f:
+    print(sum(1 for _ in csv.DictReader(f)))
+PY
+```
+
+## Grouped totals template
+
+```bash
+python - <<'PY'
+import csv
+from collections import defaultdict
+
+totals = defaultdict(float)
+with open('/mnt/data/your-file.csv', newline='') as f:
+    for row in csv.DictReader(f):
+        totals[row['region']] += float(row['amount'])
+
+for region in sorted(totals):
+    print(region, round(totals[region], 2))
+PY
+```
diff --git a/pyproject.toml b/pyproject.toml
index 51290c124f..1dc8cab877 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = ">=3.9"
 license = "MIT"
 authors = [{ name = "OpenAI", email = "support@openai.com" }]
 dependencies = [
-    "openai>=2.9.0,<3",
+    "openai>=2.19.0,<3",
     "pydantic>=2.12.3, <3",
     "griffe>=1.5.6, <2",
     "typing-extensions>=4.12.2, <5",
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
index c4f1de30f2..ea5e6fdb97 100644
--- a/src/agents/__init__.py
+++ b/src/agents/__init__.py
@@ -125,6 +125,20 @@
     ShellExecutor,
     ShellResult,
     ShellTool,
+    ShellToolContainerAutoEnvironment,
+    ShellToolContainerNetworkPolicy,
+    ShellToolContainerNetworkPolicyAllowlist,
+    ShellToolContainerNetworkPolicyDisabled,
+    ShellToolContainerNetworkPolicyDomainSecret,
+    ShellToolContainerReferenceEnvironment,
+    ShellToolContainerSkill,
+    ShellToolEnvironment,
+    ShellToolHostedEnvironment,
+    ShellToolInlineSkill,
+    ShellToolInlineSkillSource,
+    ShellToolLocalEnvironment,
+    ShellToolLocalSkill,
+    ShellToolSkillReference,
     Tool,
     ToolOutputFileContent,
     ToolOutputFileContentDict,
@@ -351,6 +365,20 @@ def enable_verbose_stdout_logging():
     "ShellCallOutcome",
     "ShellCommandOutput",
     "ShellCommandRequest",
+    "ShellToolLocalSkill",
+    "ShellToolSkillReference",
+    "ShellToolInlineSkillSource",
+    "ShellToolInlineSkill",
+    "ShellToolContainerSkill",
+    "ShellToolContainerNetworkPolicyDomainSecret",
+    "ShellToolContainerNetworkPolicyAllowlist",
+    "ShellToolContainerNetworkPolicyDisabled",
+    "ShellToolContainerNetworkPolicy",
+    "ShellToolLocalEnvironment",
+    "ShellToolContainerAutoEnvironment",
+    "ShellToolContainerReferenceEnvironment",
+    "ShellToolHostedEnvironment",
+    "ShellToolEnvironment",
     "ShellExecutor",
     "ShellResult",
     "ShellTool",
diff --git a/src/agents/items.py b/src/agents/items.py
index 94ab5daa35..e6100a2e01 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -11,6 +11,7 @@
     Response,
     ResponseComputerToolCall,
     ResponseFileSearchToolCall,
+    ResponseFunctionShellToolCallOutput,
     ResponseFunctionToolCall,
     ResponseFunctionWebSearch,
     ResponseInputItemParam,
@@ -253,6 +254,7 @@ class ToolCallItem(RunItemBase[Any]):
     FunctionCallOutput,
     ComputerCallOutput,
     LocalShellCallOutput,
+    ResponseFunctionShellToolCallOutput,
     dict[str, Any],
 ]
 
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index ce7a08b7f5..9ebbbfe7eb 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import json
-from collections.abc import AsyncIterator
+from collections.abc import AsyncIterator, Mapping
 from contextvars import ContextVar
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Literal, Union, cast, overload
@@ -37,6 +37,7 @@
     ImageGenerationTool,
     LocalShellTool,
     ShellTool,
+    ShellToolEnvironment,
     Tool,
     WebSearchTool,
 )
@@ -411,6 +412,19 @@ class ConvertedTools:
 
 
 class Converter:
+    @classmethod
+    def _convert_shell_environment(cls, environment: ShellToolEnvironment | None) -> dict[str, Any]:
+        """Convert shell environment settings to OpenAI payload shape."""
+        if environment is None:
+            return {"type": "local"}
+        if not isinstance(environment, Mapping):
+            raise UserError("Shell environment must be a mapping.")
+
+        payload = dict(environment)
+        if "type" not in payload:
+            payload["type"] = "local"
+        return payload
+
     @classmethod
     def convert_tool_choice(
         cls, tool_choice: Literal["auto", "required", "none"] | str | MCPToolChoice | None
@@ -561,7 +575,13 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None
             converted_tool = cast(ToolParam, {"type": "apply_patch"})
             includes = None
         elif isinstance(tool, ShellTool):
-            converted_tool = cast(ToolParam, {"type": "shell"})
+            converted_tool = cast(
+                ToolParam,
+                {
+                    "type": "shell",
+                    "environment": cls._convert_shell_environment(tool.environment),
+                },
+            )
             includes = None
         elif isinstance(tool, ImageGenerationTool):
             converted_tool = tool.tool_config
diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py
index 07e1fce560..29086b4c67 100644
--- a/src/agents/run_internal/tool_actions.py
+++ b/src/agents/run_internal/tool_actions.py
@@ -284,7 +284,10 @@ async def execute(
         )
 
         try:
-            executor_result = call.shell_tool.executor(request)
+            executor = call.shell_tool.executor
+            if executor is None:
+                raise ModelBehaviorError("Shell tool has no local executor configured.")
+            executor_result = executor(request)
             result = (
                 await executor_result if inspect.isawaitable(executor_result) else executor_result
             )
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
index fed661ea9a..07dd913e63 100644
--- a/src/agents/run_internal/turn_resolution.py
+++ b/src/agents/run_internal/turn_resolution.py
@@ -10,6 +10,7 @@
     ResponseComputerToolCall,
     ResponseCustomToolCall,
     ResponseFileSearchToolCall,
+    ResponseFunctionShellToolCallOutput,
     ResponseFunctionToolCall,
     ResponseFunctionWebSearch,
     ResponseOutputMessage,
@@ -601,33 +602,37 @@ async def execute_tools_and_side_effects(
     )
 
     if not processed_response.has_tools_or_approvals_to_run():
-        if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
-            final_output = output_schema.validate_json(potential_final_output_text)
-            return await execute_final_output_call(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=final_output,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                tool_input_guardrail_results=tool_input_guardrail_results,
-                tool_output_guardrail_results=tool_output_guardrail_results,
-            )
-        if not output_schema or output_schema.is_plain_text():
-            return await execute_final_output_call(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=potential_final_output_text or "",
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                tool_input_guardrail_results=tool_input_guardrail_results,
-                tool_output_guardrail_results=tool_output_guardrail_results,
-            )
+        has_tool_activity_without_message = potential_final_output_text is None and bool(
+            processed_response.tools_used
+        )
+        if not has_tool_activity_without_message:
+            if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
+                final_output = output_schema.validate_json(potential_final_output_text)
+                return await execute_final_output_call(
+                    agent=agent,
+                    original_input=original_input,
+                    new_response=new_response,
+                    pre_step_items=pre_step_items,
+                    new_step_items=new_step_items,
+                    final_output=final_output,
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                    tool_input_guardrail_results=tool_input_guardrail_results,
+                    tool_output_guardrail_results=tool_output_guardrail_results,
+                )
+            if not output_schema or output_schema.is_plain_text():
+                return await execute_final_output_call(
+                    agent=agent,
+                    original_input=original_input,
+                    new_response=new_response,
+                    pre_step_items=pre_step_items,
+                    new_step_items=new_step_items,
+                    final_output=potential_final_output_text or "",
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                    tool_input_guardrail_results=tool_input_guardrail_results,
+                    tool_output_guardrail_results=tool_output_guardrail_results,
+                )
 
     return SingleStepResult(
         original_input=original_input,
@@ -1234,7 +1239,22 @@ def process_model_response(
             output.__class__.__name__ if hasattr(output, "__class__") else type(output),
         )
         if output_type == "shell_call":
-            items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
+            if isinstance(output, dict):
+                shell_call_raw = dict(output)
+            elif hasattr(output, "model_dump"):
+                shell_call_raw = cast(Any, output).model_dump(exclude_unset=True)
+            else:
+                shell_call_raw = {
+                    "type": "shell_call",
+                    "id": get_mapping_or_attr(output, "id"),
+                    "call_id": get_mapping_or_attr(output, "call_id"),
+                    "status": get_mapping_or_attr(output, "status"),
+                    "action": get_mapping_or_attr(output, "action"),
+                    "environment": get_mapping_or_attr(output, "environment"),
+                    "created_by": get_mapping_or_attr(output, "created_by"),
+                }
+            shell_call_raw.pop("created_by", None)
+            items.append(ToolCallItem(raw_item=cast(Any, shell_call_raw), agent=agent))
             if not shell_tool:
                 tools_used.append("shell")
                 _error_tracing.attach_error_to_current_span(
@@ -1245,19 +1265,71 @@ def process_model_response(
                 )
                 raise ModelBehaviorError("Model produced shell call without a shell tool.")
             tools_used.append(shell_tool.name)
+            shell_environment = shell_tool.environment
+            if shell_environment is None or shell_environment["type"] != "local":
+                logger.debug(
+                    "Skipping local shell execution for hosted shell tool %s", shell_tool.name
+                )
+                continue
+            if shell_tool.executor is None:
+                _error_tracing.attach_error_to_current_span(
+                    SpanError(
+                        message="Local shell executor not found",
+                        data={},
+                    )
+                )
+                raise ModelBehaviorError(
+                    "Model produced local shell call without a local shell executor."
+                )
             call_identifier = get_mapping_or_attr(output, "call_id")
             logger.debug("Queuing shell_call %s", call_identifier)
             shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool))
             continue
+        if output_type == "shell_call_output" and isinstance(
+            output, (dict, ResponseFunctionShellToolCallOutput)
+        ):
+            tools_used.append(shell_tool.name if shell_tool else "shell")
+            if isinstance(output, dict):
+                shell_output_raw = dict(output)
+            else:
+                shell_output_raw = output.model_dump(exclude_unset=True)
+            shell_output_raw.pop("created_by", None)
+            shell_outputs = shell_output_raw.get("output")
+            if isinstance(shell_outputs, list):
+                for shell_output in shell_outputs:
+                    if isinstance(shell_output, dict):
+                        shell_output.pop("created_by", None)
+            items.append(
+                ToolCallOutputItem(
+                    raw_item=cast(Any, shell_output_raw),
+                    output=shell_output_raw.get("output"),
+                    agent=agent,
+                )
+            )
+            continue
         if output_type == "apply_patch_call":
-            items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
+            if isinstance(output, dict):
+                apply_patch_call_raw = dict(output)
+            elif hasattr(output, "model_dump"):
+                apply_patch_call_raw = cast(Any, output).model_dump(exclude_unset=True)
+            else:
+                apply_patch_call_raw = {
+                    "type": "apply_patch_call",
+                    "id": get_mapping_or_attr(output, "id"),
+                    "call_id": get_mapping_or_attr(output, "call_id"),
+                    "status": get_mapping_or_attr(output, "status"),
+                    "operation": get_mapping_or_attr(output, "operation"),
+                    "created_by": get_mapping_or_attr(output, "created_by"),
+                }
+            apply_patch_call_raw.pop("created_by", None)
+            items.append(ToolCallItem(raw_item=cast(Any, apply_patch_call_raw), agent=agent))
             if apply_patch_tool:
                 tools_used.append(apply_patch_tool.name)
-                call_identifier = get_mapping_or_attr(output, "call_id")
+                call_identifier = get_mapping_or_attr(apply_patch_call_raw, "call_id")
                 logger.debug("Queuing apply_patch_call %s", call_identifier)
                 apply_patch_calls.append(
                     ToolRunApplyPatchCall(
-                        tool_call=output,
+                        tool_call=apply_patch_call_raw,
                         apply_patch_tool=apply_patch_tool,
                     )
                 )
diff --git a/src/agents/tool.py b/src/agents/tool.py
index db1df63d69..a47475e591 100644
--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@@ -4,7 +4,7 @@
 import inspect
 import json
 import weakref
-from collections.abc import Awaitable
+from collections.abc import Awaitable, Mapping
 from dataclasses import dataclass, field
 from typing import (
     TYPE_CHECKING,
@@ -608,6 +608,106 @@ def name(self):
         return "local_shell"
 
 
+class ShellToolLocalSkill(TypedDict):
+    """Skill metadata for local shell environments."""
+
+    description: str
+    name: str
+    path: str
+
+
+class ShellToolSkillReference(TypedDict):
+    """Reference to a hosted shell skill."""
+
+    type: Literal["skill_reference"]
+    skill_id: str
+    version: NotRequired[str]
+
+
+class ShellToolInlineSkillSource(TypedDict):
+    """Inline skill source payload."""
+
+    data: str
+    media_type: Literal["application/zip"]
+    type: Literal["base64"]
+
+
+class ShellToolInlineSkill(TypedDict):
+    """Inline hosted shell skill bundle."""
+
+    description: str
+    name: str
+    source: ShellToolInlineSkillSource
+    type: Literal["inline"]
+
+
+ShellToolContainerSkill = Union[ShellToolSkillReference, ShellToolInlineSkill]
+"""Container skill configuration."""
+
+
+class ShellToolContainerNetworkPolicyDomainSecret(TypedDict):
+    """A secret bound to a single domain in allowlist mode."""
+
+    domain: str
+    name: str
+    value: str
+
+
+class ShellToolContainerNetworkPolicyAllowlist(TypedDict):
+    """Allowlist network policy for hosted containers."""
+
+    allowed_domains: list[str]
+    type: Literal["allowlist"]
+    domain_secrets: NotRequired[list[ShellToolContainerNetworkPolicyDomainSecret]]
+
+
+class ShellToolContainerNetworkPolicyDisabled(TypedDict):
+    """Disabled network policy for hosted containers."""
+
+    type: Literal["disabled"]
+
+
+ShellToolContainerNetworkPolicy = Union[
+    ShellToolContainerNetworkPolicyAllowlist,
+    ShellToolContainerNetworkPolicyDisabled,
+]
+"""Network policy configuration for hosted shell containers."""
+
+
+class ShellToolLocalEnvironment(TypedDict):
+    """Local shell execution environment."""
+
+    type: Literal["local"]
+    skills: NotRequired[list[ShellToolLocalSkill]]
+
+
+class ShellToolContainerAutoEnvironment(TypedDict):
+    """Auto-provisioned hosted container environment."""
+
+    type: Literal["container_auto"]
+    file_ids: NotRequired[list[str]]
+    memory_limit: NotRequired[Literal["1g", "4g", "16g", "64g"] | None]
+    network_policy: NotRequired[ShellToolContainerNetworkPolicy]
+    skills: NotRequired[list[ShellToolContainerSkill]]
+
+
+class ShellToolContainerReferenceEnvironment(TypedDict):
+    """Reference to an existing hosted container."""
+
+    type: Literal["container_reference"]
+    container_id: str
+
+
+ShellToolHostedEnvironment = Union[
+    ShellToolContainerAutoEnvironment,
+    ShellToolContainerReferenceEnvironment,
+]
+"""Hosted shell environment variants."""
+
+ShellToolEnvironment = Union[ShellToolLocalEnvironment, ShellToolHostedEnvironment]
+"""All supported shell environments."""
+
+
 @dataclass
 class ShellCallOutcome:
     """Describes the terminal condition of a shell command."""
@@ -675,11 +775,26 @@ class ShellCommandRequest:
 """Executes a shell command sequence and returns either text or structured output."""
 
 
+def _normalize_shell_tool_environment(
+    environment: ShellToolEnvironment | None,
+) -> ShellToolEnvironment:
+    """Normalize shell environment into a predictable mapping shape."""
+    if environment is None:
+        return {"type": "local"}
+    if not isinstance(environment, Mapping):
+        raise UserError("ShellTool environment must be a mapping.")
+
+    normalized = dict(environment)
+    if "type" not in normalized:
+        normalized["type"] = "local"
+    return cast(ShellToolEnvironment, normalized)
+
+
 @dataclass
 class ShellTool:
     """Next-generation shell tool. LocalShellTool will be deprecated in favor of this."""
 
-    executor: ShellExecutor
+    executor: ShellExecutor | None = None
     name: str = "shell"
     needs_approval: bool | ShellApprovalFunction = False
     """Whether the shell tool needs approval before execution. If True, the run will be interrupted
@@ -692,6 +807,31 @@ class ShellTool:
     """Optional handler to auto-approve or reject when approval is required.
     If provided, it will be invoked immediately when an approval is needed.
     """
+    environment: ShellToolEnvironment | None = None
+    """Execution environment for shell commands.
+
+    If omitted, local mode is used.
+    """
+
+    def __post_init__(self) -> None:
+        """Validate shell tool configuration and normalize environment fields."""
+        normalized_environment = _normalize_shell_tool_environment(self.environment)
+        self.environment = normalized_environment
+
+        environment_type = normalized_environment["type"]
+        if environment_type == "local":
+            if self.executor is None:
+                raise UserError("ShellTool with local environment requires an executor.")
+            return
+
+        if self.executor is not None:
+            raise UserError("ShellTool with hosted environment does not accept an executor.")
+        if self.needs_approval is not False or self.on_approval is not None:
+            raise UserError(
+                "ShellTool with hosted environment does not support needs_approval or on_approval."
+            )
+        self.needs_approval = False
+        self.on_approval = None
 
     @property
     def type(self) -> str:
diff --git a/tests/test_local_shell_tool.py b/tests/test_local_shell_tool.py
index 013c1d1fc2..cdc0d9a7f1 100644
--- a/tests/test_local_shell_tool.py
+++ b/tests/test_local_shell_tool.py
@@ -144,6 +144,7 @@ async def test_runner_executes_local_shell_calls() -> None:
 
     local_shell_output = items[2]
     assert isinstance(local_shell_output, ToolCallOutputItem)
+    assert isinstance(local_shell_output.raw_item, dict)
     assert local_shell_output.raw_item.get("type") == "local_shell_call_output"
     assert local_shell_output.output == "shell result"
 
diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py
index 62e058e00e..376bf67a1b 100644
--- a/tests/test_openai_responses_converter.py
+++ b/tests/test_openai_responses_converter.py
@@ -23,6 +23,8 @@
   one `ComputerTool`.
 """
 
+from typing import Any, cast
+
 import pytest
 from openai import omit
 from pydantic import BaseModel
@@ -34,6 +36,7 @@
     ComputerTool,
     FileSearchTool,
     Handoff,
+    ShellTool,
     Tool,
     UserError,
     WebSearchTool,
@@ -187,6 +190,127 @@ def test_convert_tools_basic_types_and_includes():
         Converter.convert_tools(tools=[comp_tool, comp_tool], handoffs=[])
 
 
+def test_convert_tools_shell_local_environment() -> None:
+    shell_tool = ShellTool(executor=lambda request: "ok")
+
+    converted = Converter.convert_tools(tools=[shell_tool], handoffs=[])
+
+    assert converted.tools == [{"type": "shell", "environment": {"type": "local"}}]
+    assert converted.includes == []
+
+
+def test_convert_tools_shell_container_reference_environment() -> None:
+    shell_tool = ShellTool(environment={"type": "container_reference", "container_id": "cntr_123"})
+
+    converted = Converter.convert_tools(tools=[shell_tool], handoffs=[])
+
+    assert converted.tools == [
+        {
+            "type": "shell",
+            "environment": {
+                "type": "container_reference",
+                "container_id": "cntr_123",
+            },
+        }
+    ]
+
+
+def test_convert_tools_shell_container_auto_environment() -> None:
+    shell_tool = ShellTool(
+        environment={
+            "type": "container_auto",
+            "file_ids": ["file-123"],
+            "memory_limit": "1g",
+            "network_policy": {
+                "type": "allowlist",
+                "allowed_domains": ["example.com"],
+                "domain_secrets": [{"domain": "example.com", "name": "TOKEN", "value": "secret"}],
+            },
+            "skills": [
+                {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"},
+                {
+                    "type": "inline",
+                    "name": "csv-workbench",
+                    "description": "Analyze CSV files.",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "application/zip",
+                        "data": "ZmFrZS16aXA=",
+                    },
+                },
+            ],
+        }
+    )
+
+    converted = Converter.convert_tools(tools=[shell_tool], handoffs=[])
+
+    assert converted.tools == [
+        {
+            "type": "shell",
+            "environment": {
+                "type": "container_auto",
+                "file_ids": ["file-123"],
+                "memory_limit": "1g",
+                "network_policy": {
+                    "type": "allowlist",
+                    "allowed_domains": ["example.com"],
+                    "domain_secrets": [
+                        {"domain": "example.com", "name": "TOKEN", "value": "secret"}
+                    ],
+                },
+                "skills": [
+                    {
+                        "type": "skill_reference",
+                        "skill_id": "skill_123",
+                        "version": "latest",
+                    },
+                    {
+                        "type": "inline",
+                        "name": "csv-workbench",
+                        "description": "Analyze CSV files.",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "application/zip",
+                            "data": "ZmFrZS16aXA=",
+                        },
+                    },
+                ],
+            },
+        }
+    ]
+
+
+def test_convert_tools_shell_environment_passes_through_unknown_fields() -> None:
+    shell_tool = ShellTool(
+        environment=cast(
+            Any,
+            {
+                "type": "container_auto",
+                "network_policy": {
+                    "type": "future_mode",
+                    "allowed_domains": ["example.com"],
+                    "some_new_field": "keep-me",
+                },
+            },
+        )
+    )
+
+    converted = Converter.convert_tools(tools=[shell_tool], handoffs=[])
+    assert converted.tools == [
+        {
+            "type": "shell",
+            "environment": {
+                "type": "container_auto",
+                "network_policy": {
+                    "type": "future_mode",
+                    "allowed_domains": ["example.com"],
+                    "some_new_field": "keep-me",
+                },
+            },
+        }
+    ]
+
+
 def test_convert_tools_includes_handoffs():
     """
     When handoff objects are included, `convert_tools` should append their
diff --git a/tests/test_process_model_response.py b/tests/test_process_model_response.py
index d26559a68d..ee51f5b521 100644
--- a/tests/test_process_model_response.py
+++ b/tests/test_process_model_response.py
@@ -1,9 +1,16 @@
+from typing import Any, cast
+
 import pytest
-from openai.types.responses import ResponseCompactionItem
+from openai.types.responses import (
+    ResponseApplyPatchToolCall,
+    ResponseCompactionItem,
+    ResponseFunctionShellToolCall,
+    ResponseFunctionShellToolCallOutput,
+)
 
-from agents import Agent, ApplyPatchTool, CompactionItem
+from agents import Agent, ApplyPatchTool, CompactionItem, ShellTool
 from agents.exceptions import ModelBehaviorError
-from agents.items import ModelResponse
+from agents.items import ModelResponse, ToolCallItem, ToolCallOutputItem
 from agents.run_internal import run_loop
 from agents.usage import Usage
 from tests.fake_model import FakeModel
@@ -35,6 +42,143 @@ def test_process_model_response_shell_call_without_tool_raises() -> None:
         )
 
 
+def test_process_model_response_skips_local_shell_execution_for_hosted_environment() -> None:
+    shell_tool = ShellTool(environment={"type": "container_auto"})
+    agent = Agent(name="hosted-shell", model=FakeModel(), tools=[shell_tool])
+    shell_call = make_shell_call("shell-hosted-1")
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[shell_tool],
+        response=_response([shell_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 1
+    assert isinstance(processed.new_items[0], ToolCallItem)
+    assert processed.shell_calls == []
+    assert processed.tools_used == ["shell"]
+
+
+def test_process_model_response_sanitizes_shell_call_model_object() -> None:
+    shell_call = ResponseFunctionShellToolCall(
+        type="shell_call",
+        id="sh_call_2",
+        call_id="call_shell_2",
+        status="completed",
+        created_by="server",
+        action=cast(Any, {"commands": ["echo hi"], "timeout_ms": 1000}),
+    )
+    shell_tool = ShellTool(environment={"type": "container_auto"})
+    agent = Agent(name="hosted-shell-model", model=FakeModel(), tools=[shell_tool])
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[shell_tool],
+        response=_response([shell_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 1
+    item = processed.new_items[0]
+    assert isinstance(item, ToolCallItem)
+    assert isinstance(item.raw_item, dict)
+    assert item.raw_item["type"] == "shell_call"
+    assert "created_by" not in item.raw_item
+    next_input = item.to_input_item()
+    assert isinstance(next_input, dict)
+    assert next_input["type"] == "shell_call"
+    assert "created_by" not in next_input
+    assert processed.shell_calls == []
+    assert processed.tools_used == ["shell"]
+
+
+def test_process_model_response_preserves_shell_call_output() -> None:
+    shell_output = {
+        "type": "shell_call_output",
+        "id": "sh_out_1",
+        "call_id": "call_shell_1",
+        "status": "completed",
+        "max_output_length": 1000,
+        "output": [
+            {
+                "stdout": "ok\n",
+                "stderr": "",
+                "outcome": {"type": "exit", "exit_code": 0},
+            }
+        ],
+    }
+    agent = Agent(name="shell-output", model=FakeModel())
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[],
+        response=_response([shell_output]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 1
+    assert isinstance(processed.new_items[0], ToolCallOutputItem)
+    assert processed.new_items[0].raw_item == shell_output
+    assert processed.tools_used == ["shell"]
+    assert processed.shell_calls == []
+
+
+def test_process_model_response_sanitizes_shell_call_output_model_object() -> None:
+    shell_output = ResponseFunctionShellToolCallOutput(
+        type="shell_call_output",
+        id="sh_out_2",
+        call_id="call_shell_2",
+        status="completed",
+        created_by="server",
+        output=cast(
+            Any,
+            [
+                {
+                    "stdout": "ok\n",
+                    "stderr": "",
+                    "outcome": {"type": "exit", "exit_code": 0},
+                    "created_by": "server",
+                }
+            ],
+        ),
+    )
+    agent = Agent(name="shell-output-model", model=FakeModel())
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[],
+        response=_response([shell_output]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 1
+    item = processed.new_items[0]
+    assert isinstance(item, ToolCallOutputItem)
+    assert isinstance(item.raw_item, dict)
+    assert item.raw_item["type"] == "shell_call_output"
+    assert "created_by" not in item.raw_item
+    shell_outputs = item.raw_item.get("output")
+    assert isinstance(shell_outputs, list)
+    assert isinstance(shell_outputs[0], dict)
+    assert "created_by" not in shell_outputs[0]
+
+    next_input = item.to_input_item()
+    assert isinstance(next_input, dict)
+    assert next_input["type"] == "shell_call_output"
+    assert "status" not in next_input
+    assert "created_by" not in next_input
+    next_outputs = next_input.get("output")
+    assert isinstance(next_outputs, list)
+    assert isinstance(next_outputs[0], dict)
+    assert "created_by" not in next_outputs[0]
+    assert processed.tools_used == ["shell"]
+
+
 def test_process_model_response_apply_patch_call_without_tool_raises() -> None:
     agent = Agent(name="no-apply", model=FakeModel())
     apply_patch_call = make_apply_patch_dict("apply-1", diff="-old\n+new\n")
@@ -49,6 +193,48 @@ def test_process_model_response_apply_patch_call_without_tool_raises() -> None:
         )
 
 
+def test_process_model_response_sanitizes_apply_patch_call_model_object() -> None:
+    editor = RecordingEditor()
+    apply_patch_tool = ApplyPatchTool(editor=editor)
+    agent = Agent(name="apply-agent-model", model=FakeModel(), tools=[apply_patch_tool])
+    apply_patch_call = ResponseApplyPatchToolCall(
+        type="apply_patch_call",
+        id="ap_call_1",
+        call_id="call_apply_1",
+        status="completed",
+        created_by="server",
+        operation=cast(
+            Any,
+            {"type": "update_file", "path": "test.md", "diff": "-old\n+new\n"},
+        ),
+    )
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[apply_patch_tool],
+        response=_response([apply_patch_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 1
+    item = processed.new_items[0]
+    assert isinstance(item, ToolCallItem)
+    assert isinstance(item.raw_item, dict)
+    assert item.raw_item["type"] == "apply_patch_call"
+    assert "created_by" not in item.raw_item
+    next_input = item.to_input_item()
+    assert isinstance(next_input, dict)
+    assert next_input["type"] == "apply_patch_call"
+    assert "created_by" not in next_input
+    assert len(processed.apply_patch_calls) == 1
+    queued_call = processed.apply_patch_calls[0].tool_call
+    assert isinstance(queued_call, dict)
+    assert queued_call["type"] == "apply_patch_call"
+    assert "created_by" not in queued_call
+    assert processed.tools_used == [apply_patch_tool.name]
+
+
 def test_process_model_response_converts_custom_apply_patch_call() -> None:
     editor = RecordingEditor()
     apply_patch_tool = ApplyPatchTool(editor=editor)
diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py
index a9c747cd25..720b0611cd 100644
--- a/tests/test_run_step_execution.py
+++ b/tests/test_run_step_execution.py
@@ -175,6 +175,77 @@ async def test_plaintext_agent_with_tool_call_is_run_again():
     assert isinstance(result.next_step, NextStepRunAgain)
 
 
+@pytest.mark.asyncio
+async def test_plaintext_agent_hosted_shell_items_without_message_runs_again():
+    shell_tool = ShellTool(environment={"type": "container_auto"})
+    agent = Agent(name="test", tools=[shell_tool])
+    response = ModelResponse(
+        output=[
+            make_shell_call(
+                "call_shell_hosted", id_value="shell_call_hosted", commands=["echo hi"]
+            ),
+            cast(
+                Any,
+                {
+                    "type": "shell_call_output",
+                    "id": "sh_out_hosted",
+                    "call_id": "call_shell_hosted",
+                    "status": "completed",
+                    "output": [
+                        {
+                            "stdout": "hi\n",
+                            "stderr": "",
+                            "outcome": {"type": "exit", "exit_code": 0},
+                        }
+                    ],
+                },
+            ),
+        ],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    result = await get_execute_result(agent, response)
+
+    assert len(result.generated_items) == 2
+    assert isinstance(result.generated_items[0], ToolCallItem)
+    assert isinstance(result.generated_items[1], ToolCallOutputItem)
+    assert isinstance(result.next_step, NextStepRunAgain)
+
+
+@pytest.mark.asyncio
+async def test_plaintext_agent_shell_output_only_without_message_runs_again():
+    agent = Agent(name="test")
+    response = ModelResponse(
+        output=[
+            cast(
+                Any,
+                {
+                    "type": "shell_call_output",
+                    "id": "sh_out_only",
+                    "call_id": "call_shell_only",
+                    "status": "completed",
+                    "output": [
+                        {
+                            "stdout": "hi\n",
+                            "stderr": "",
+                            "outcome": {"type": "exit", "exit_code": 0},
+                        }
+                    ],
+                },
+            ),
+        ],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    result = await get_execute_result(agent, response)
+
+    assert len(result.generated_items) == 1
+    assert isinstance(result.generated_items[0], ToolCallOutputItem)
+    assert isinstance(result.next_step, NextStepRunAgain)
+
+
 @pytest.mark.asyncio
 async def test_multiple_tool_calls():
     agent = Agent(
diff --git a/tests/test_shell_tool.py b/tests/test_shell_tool.py
index 35fb1fcee8..2ec532f049 100644
--- a/tests/test_shell_tool.py
+++ b/tests/test_shell_tool.py
@@ -13,9 +13,10 @@
     ShellCommandOutput,
     ShellResult,
     ShellTool,
+    UserError,
 )
 from agents.items import ToolApprovalItem, ToolCallOutputItem
-from agents.run_internal.run_loop import ShellAction, ToolRunShellCall
+from agents.run_internal.run_loop import ShellAction, ToolRunShellCall, execute_shell_calls
 
 from .utils.hitl import (
     HITL_REJECTION_MSG,
@@ -40,6 +41,169 @@ def _shell_call(call_id: str = "call_shell") -> dict[str, Any]:
     )
 
 
+def test_shell_tool_defaults_to_local_environment() -> None:
+    shell_tool = ShellTool(executor=lambda request: "ok")
+
+    assert shell_tool.environment == {"type": "local"}
+    assert shell_tool.executor is not None
+
+
+def test_shell_tool_supports_hosted_environment_without_executor() -> None:
+    shell_tool = ShellTool(
+        environment={
+            "type": "container_reference",
+            "container_id": "cntr_123",
+        }
+    )
+
+    assert shell_tool.environment == {"type": "container_reference", "container_id": "cntr_123"}
+    assert shell_tool.executor is None
+
+
+def test_shell_tool_normalizes_container_auto_environment() -> None:
+    shell_tool = ShellTool(
+        environment={
+            "type": "container_auto",
+            "file_ids": ["file_123"],
+            "memory_limit": "4g",
+            "network_policy": {
+                "type": "allowlist",
+                "allowed_domains": ["example.com"],
+                "domain_secrets": [
+                    {
+                        "domain": "example.com",
+                        "name": "API_TOKEN",
+                        "value": "secret",
+                    }
+                ],
+            },
+            "skills": [
+                {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"},
+                {
+                    "type": "inline",
+                    "name": "csv-workbench",
+                    "description": "Analyze CSV files.",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "application/zip",
+                        "data": "ZmFrZS16aXA=",
+                    },
+                },
+            ],
+        }
+    )
+
+    assert shell_tool.environment == {
+        "type": "container_auto",
+        "file_ids": ["file_123"],
+        "memory_limit": "4g",
+        "network_policy": {
+            "type": "allowlist",
+            "allowed_domains": ["example.com"],
+            "domain_secrets": [
+                {
+                    "domain": "example.com",
+                    "name": "API_TOKEN",
+                    "value": "secret",
+                }
+            ],
+        },
+        "skills": [
+            {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"},
+            {
+                "type": "inline",
+                "name": "csv-workbench",
+                "description": "Analyze CSV files.",
+                "source": {
+                    "type": "base64",
+                    "media_type": "application/zip",
+                    "data": "ZmFrZS16aXA=",
+                },
+            },
+        ],
+    }
+
+
+def test_shell_tool_rejects_local_mode_without_executor() -> None:
+    with pytest.raises(UserError, match="requires an executor"):
+        ShellTool()
+
+    with pytest.raises(UserError, match="requires an executor"):
+        ShellTool(environment={"type": "local"})
+
+
+def test_shell_tool_allows_unvalidated_hosted_environment_shapes() -> None:
+    shell_tool = ShellTool(environment=cast(Any, {"type": "container_reference"}))
+    assert shell_tool.environment == {"type": "container_reference"}
+
+    shell_tool = ShellTool(
+        environment=cast(
+            Any,
+            {
+                "type": "container_auto",
+                "network_policy": {
+                    "type": "future_mode",
+                    "allowed_domains": ["example.com"],
+                    "some_new_field": True,
+                },
+                "skills": [{"type": "skill_reference"}],
+            },
+        )
+    )
+    assert isinstance(shell_tool.environment, dict)
+    assert shell_tool.environment["type"] == "container_auto"
+
+
+def test_shell_tool_rejects_local_executor_and_approval_for_hosted_environment() -> None:
+    with pytest.raises(UserError, match="does not accept an executor"):
+        ShellTool(
+            executor=lambda request: "ok",
+            environment={"type": "container_reference", "container_id": "cntr_123"},
+        )
+
+    with pytest.raises(UserError, match="does not support needs_approval or on_approval"):
+        ShellTool(
+            environment={"type": "container_reference", "container_id": "cntr_123"},
+            needs_approval=True,
+        )
+
+    with pytest.raises(UserError, match="does not support needs_approval or on_approval"):
+        ShellTool(
+            environment={"type": "container_reference", "container_id": "cntr_123"},
+            on_approval=lambda _context, _item: {"approve": True},
+        )
+
+
+@pytest.mark.asyncio
+async def test_execute_shell_calls_surfaces_missing_local_executor() -> None:
+    shell_tool = ShellTool(
+        environment={
+            "type": "container_reference",
+            "container_id": "cntr_123",
+        }
+    )
+    tool_run = ToolRunShellCall(tool_call=_shell_call(), shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await execute_shell_calls(
+        agent=agent,
+        calls=[tool_run],
+        context_wrapper=context_wrapper,
+        hooks=RunHooks[Any](),
+        config=RunConfig(),
+    )
+
+    assert len(result) == 1
+    output_item = result[0]
+    assert isinstance(output_item, ToolCallOutputItem)
+    assert output_item.output == "Shell tool has no local executor configured."
+    raw_item = cast(dict[str, Any], output_item.raw_item)
+    assert raw_item["type"] == "shell_call_output"
+    assert raw_item["call_id"] == "call_shell"
+    assert raw_item["status"] == "failed"
+
+
 @pytest.mark.asyncio
 async def test_shell_tool_structured_output_is_rendered() -> None:
     shell_tool = ShellTool(
diff --git a/tests/test_tool_metadata.py b/tests/test_tool_metadata.py
index ad6395e9b1..42440ea9d6 100644
--- a/tests/test_tool_metadata.py
+++ b/tests/test_tool_metadata.py
@@ -46,7 +46,9 @@ def test_tool_name_properties() -> None:
     assert CodeInterpreterTool(tool_config=dummy_code).name == "code_interpreter"
     assert ImageGenerationTool(tool_config=dummy_image).name == "image_generation"
     assert LocalShellTool(executor=lambda req: "ok").name == "local_shell"
-    assert ShellTool(executor=lambda req: "ok").type == "shell"
+    shell_tool = ShellTool(executor=lambda req: "ok")
+    assert shell_tool.type == "shell"
+    assert shell_tool.environment == {"type": "local"}
     assert ApplyPatchTool(editor=DummyEditor()).type == "apply_patch"
 
 
diff --git a/uv.lock b/uv.lock
index 8d7915f0b3..38bc480342 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2151,7 +2151,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.9.0"
+version = "2.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2163,9 +2163,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6e/5a/f495777c02625bfa18212b6e3b73f1893094f2bf660976eb4bc6f43a1ca2/openai-2.20.0.tar.gz", hash = "sha256:2654a689208cd0bf1098bb9462e8d722af5cbe961e6bba54e6f19fb843d88db1", size = 642355, upload-time = "2026-02-10T19:02:54.145Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/a0/cf4297aa51bbc21e83ef0ac018947fa06aea8f2364aad7c96cbf148590e6/openai-2.20.0-py3-none-any.whl", hash = "sha256:38d989c4b1075cd1f76abc68364059d822327cf1a932531d429795f4fc18be99", size = 1098479, upload-time = "2026-02-10T19:02:52.157Z" },
 ]
 
 [[package]]
@@ -2256,7 +2256,7 @@ requires-dist = [
     { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.81.0,<2" },
     { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.19.0,<2" },
     { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" },
-    { name = "openai", specifier = ">=2.9.0,<3" },
+    { name = "openai", specifier = ">=2.19.0,<3" },
     { name = "pydantic", specifier = ">=2.12.3,<3" },
     { name = "redis", marker = "extra == 'redis'", specifier = ">=7" },
     { name = "requests", specifier = ">=2.0,<3" },

From c5797fca23b4faee21a662eff54fd899303de190 Mon Sep 17 00:00:00 2001
From: Kazuhiro Sera <seratch@openai.com>
Date: Wed, 11 Feb 2026 09:27:22 -0800
Subject: [PATCH 2/2] fix review comment

---
 src/agents/run_internal/turn_resolution.py |  2 +-
 tests/test_run_step_execution.py           | 52 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
index 07dd913e63..e51fa801c0 100644
--- a/src/agents/run_internal/turn_resolution.py
+++ b/src/agents/run_internal/turn_resolution.py
@@ -602,7 +602,7 @@ async def execute_tools_and_side_effects(
     )
 
     if not processed_response.has_tools_or_approvals_to_run():
-        has_tool_activity_without_message = potential_final_output_text is None and bool(
+        has_tool_activity_without_message = not message_items and bool(
             processed_response.tools_used
         )
         if not has_tool_activity_without_message:
diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py
index 720b0611cd..a9ebc225a6 100644
--- a/tests/test_run_step_execution.py
+++ b/tests/test_run_step_execution.py
@@ -6,6 +6,8 @@
 
 import pytest
 from openai.types.responses.response_output_item import McpApprovalRequest
+from openai.types.responses.response_output_message import ResponseOutputMessage
+from openai.types.responses.response_output_refusal import ResponseOutputRefusal
 from pydantic import BaseModel
 
 from agents import (
@@ -246,6 +248,56 @@ async def test_plaintext_agent_shell_output_only_without_message_runs_again():
     assert isinstance(result.next_step, NextStepRunAgain)
 
 
+@pytest.mark.asyncio
+async def test_plaintext_agent_hosted_shell_with_refusal_message_is_final_output():
+    shell_tool = ShellTool(environment={"type": "container_auto"})
+    agent = Agent(name="test", tools=[shell_tool])
+    refusal_message = ResponseOutputMessage(
+        id="msg_refusal",
+        type="message",
+        role="assistant",
+        content=[ResponseOutputRefusal(type="refusal", refusal="I cannot help with that.")],
+        status="completed",
+    )
+    response = ModelResponse(
+        output=[
+            make_shell_call(
+                "call_shell_hosted_refusal",
+                id_value="shell_call_hosted_refusal",
+                commands=["echo hi"],
+            ),
+            cast(
+                Any,
+                {
+                    "type": "shell_call_output",
+                    "id": "sh_out_hosted_refusal",
+                    "call_id": "call_shell_hosted_refusal",
+                    "status": "completed",
+                    "output": [
+                        {
+                            "stdout": "hi\n",
+                            "stderr": "",
+                            "outcome": {"type": "exit", "exit_code": 0},
+                        }
+                    ],
+                },
+            ),
+            refusal_message,
+        ],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    result = await get_execute_result(agent, response)
+
+    assert len(result.generated_items) == 3
+    assert isinstance(result.generated_items[0], ToolCallItem)
+    assert isinstance(result.generated_items[1], ToolCallOutputItem)
+    assert isinstance(result.generated_items[2], MessageOutputItem)
+    assert isinstance(result.next_step, NextStepFinalOutput)
+    assert result.next_step.output == ""
+
+
 @pytest.mark.asyncio
 async def test_multiple_tool_calls():
     agent = Agent(