From 1937dc8a082a0a3a4e38b37e8184f04f08cde4bd Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Tue, 10 Feb 2026 17:45:16 -0800 Subject: [PATCH 1/2] feat: add container shell support --- .../tools/container_shell_inline_skill.py | 117 +++++++++++ .../tools/container_shell_skill_reference.py | 112 ++++++++++ examples/tools/skills/csv-workbench/SKILL.md | 20 ++ .../tools/skills/csv-workbench/playbook.md | 32 +++ pyproject.toml | 2 +- src/agents/__init__.py | 28 +++ src/agents/items.py | 2 + src/agents/models/openai_responses.py | 24 ++- src/agents/run_internal/tool_actions.py | 5 +- src/agents/run_internal/turn_resolution.py | 134 +++++++++--- src/agents/tool.py | 144 ++++++++++++- tests/test_local_shell_tool.py | 1 + tests/test_openai_responses_converter.py | 124 +++++++++++ tests/test_process_model_response.py | 192 +++++++++++++++++- tests/test_run_step_execution.py | 71 +++++++ tests/test_shell_tool.py | 166 ++++++++++++++- tests/test_tool_metadata.py | 4 +- uv.lock | 8 +- 18 files changed, 1140 insertions(+), 46 deletions(-) create mode 100644 examples/tools/container_shell_inline_skill.py create mode 100644 examples/tools/container_shell_skill_reference.py create mode 100644 examples/tools/skills/csv-workbench/SKILL.md create mode 100644 examples/tools/skills/csv-workbench/playbook.md diff --git a/examples/tools/container_shell_inline_skill.py b/examples/tools/container_shell_inline_skill.py new file mode 100644 index 0000000000..13ea544006 --- /dev/null +++ b/examples/tools/container_shell_inline_skill.py @@ -0,0 +1,117 @@ +import argparse +import asyncio +import base64 +from pathlib import Path +from tempfile import TemporaryDirectory +from zipfile import ZIP_DEFLATED, ZipFile + +from openai.types.responses import ResponseFunctionShellToolCall +from openai.types.responses.response_container_reference import ResponseContainerReference + +from agents import Agent, Runner, ShellTool, ShellToolInlineSkill, trace +from agents.items import ModelResponse + +SKILL_NAME = "csv-workbench" +SKILL_DIR = Path(__file__).resolve().parent / "skills" / SKILL_NAME + + +def build_skill_zip_bundle() -> bytes: + with TemporaryDirectory(prefix="agents-inline-skill-") as temp_dir: + zip_path = Path(temp_dir) / f"{SKILL_NAME}.zip" + with ZipFile(zip_path, "w", compression=ZIP_DEFLATED) as archive: + for path in sorted(SKILL_DIR.rglob("*")): + if path.is_file(): + archive.write(path, f"{SKILL_NAME}/{path.relative_to(SKILL_DIR)}") + return zip_path.read_bytes() + + +def build_inline_skill() -> ShellToolInlineSkill: + bundle = build_skill_zip_bundle() + return { + "type": "inline", + "name": SKILL_NAME, + "description": "Analyze CSV files in /mnt/data and return concise numeric summaries.", + "source": { + "type": "base64", + "media_type": "application/zip", + "data": base64.b64encode(bundle).decode("ascii"), + }, + } + + +def extract_container_id(raw_responses: list[ModelResponse]) -> str | None: + for response in raw_responses: + for item in response.output: + if isinstance(item, ResponseFunctionShellToolCall) and isinstance( + item.environment, ResponseContainerReference + ): + return item.environment.container_id + + return None + + +async def main(model: str) -> None: + inline_skill = build_inline_skill() + + with trace("container_shell_inline_skill_example"): + agent1 = Agent( + name="Container Shell Agent (Inline Skill)", + model=model, + instructions="Use the available container skill to answer user requests.", + tools=[ + ShellTool( + environment={ + "type": "container_auto", + "network_policy": {"type": "disabled"}, + "skills": [inline_skill], + } + ) + ], + ) + + result1 = await Runner.run( + agent1, + ( + "Use the csv-workbench skill. Create /mnt/data/orders.csv with columns " + "id,region,amount,status and at least 6 rows. Then report total amount by " + "region and count failed orders." + ), + ) + print(f"Agent: {result1.final_output}") + + container_id = extract_container_id(result1.raw_responses) + if not container_id: + raise RuntimeError("Container ID was not returned in shell call output.") + + print(f"[info] Reusing container_id={container_id}") + + agent2 = Agent( + name="Container Reference Shell Agent", + model=model, + instructions="Reuse the existing shell container and answer concisely.", + tools=[ + ShellTool( + environment={ + "type": "container_reference", + "container_id": container_id, + } + ) + ], + ) + + result2 = await Runner.run( + agent2, + "Run `ls -la /mnt/data`, then summarize in one sentence.", + ) + print(f"Agent (container reuse): {result2.final_output}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", + default="gpt-5.2", + help="Model name to use.", + ) + args = parser.parse_args() + asyncio.run(main(args.model)) diff --git a/examples/tools/container_shell_skill_reference.py b/examples/tools/container_shell_skill_reference.py new file mode 100644 index 0000000000..b9d942cea7 --- /dev/null +++ b/examples/tools/container_shell_skill_reference.py @@ -0,0 +1,112 @@ +import argparse +import asyncio +import os + +from openai.types.responses import ResponseFunctionShellToolCall +from openai.types.responses.response_container_reference import ResponseContainerReference + +from agents import Agent, Runner, ShellTool, ShellToolSkillReference, trace +from agents.items import ModelResponse + +SHELL_SKILL_ID_ENV = "OPENAI_SHELL_SKILL_ID" +SHELL_SKILL_VERSION_ENV = "OPENAI_SHELL_SKILL_VERSION" +DEFAULT_SKILL_REFERENCE: ShellToolSkillReference = { + "type": "skill_reference", + "skill_id": "skill_698bbe879adc81918725cbc69dcae7960bc5613dadaed377", + "version": "1", +} + + +def resolve_skill_reference() -> ShellToolSkillReference: + skill_id = os.environ.get(SHELL_SKILL_ID_ENV) + if not skill_id: + return DEFAULT_SKILL_REFERENCE + + reference: ShellToolSkillReference = {"type": "skill_reference", "skill_id": skill_id} + skill_version = os.environ.get(SHELL_SKILL_VERSION_ENV) + if skill_version: + reference["version"] = skill_version + return reference + + +def extract_container_id(raw_responses: list[ModelResponse]) -> str | None: + for response in raw_responses: + for item in response.output: + if isinstance(item, ResponseFunctionShellToolCall) and isinstance( + item.environment, ResponseContainerReference + ): + return item.environment.container_id + + return None + + +async def main(model: str) -> None: + skill_reference = resolve_skill_reference() + print( + "[info] Using skill reference:", + skill_reference["skill_id"], + f"(version {skill_reference.get('version', 'default')})", + ) + + with trace("container_shell_skill_reference_example"): + agent1 = Agent( + name="Container Shell Agent (Skill Reference)", + model=model, + instructions="Use the available container skill to answer user requests.", + tools=[ + ShellTool( + environment={ + "type": "container_auto", + "network_policy": {"type": "disabled"}, + "skills": [skill_reference], + } + ) + ], + ) + + result1 = await Runner.run( + agent1, + ( + "Use the csv-workbench skill. Create /mnt/data/orders.csv with columns " + "id,region,amount,status and at least 6 rows. Then report total amount by " + "region and count failed orders." + ), + ) + print(f"Agent: {result1.final_output}") + + container_id = extract_container_id(result1.raw_responses) + if not container_id: + raise RuntimeError("Container ID was not returned in shell call output.") + + print(f"[info] Reusing container_id={container_id}") + + agent2 = Agent( + name="Container Reference Shell Agent", + model=model, + instructions="Reuse the existing shell container and answer concisely.", + tools=[ + ShellTool( + environment={ + "type": "container_reference", + "container_id": container_id, + } + ) + ], + ) + + result2 = await Runner.run( + agent2, + "Run `ls -la /mnt/data`, then summarize in one sentence.", + ) + print(f"Agent (container reuse): {result2.final_output}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", + default="gpt-5.2", + help="Model name to use.", + ) + args = parser.parse_args() + asyncio.run(main(args.model)) diff --git a/examples/tools/skills/csv-workbench/SKILL.md b/examples/tools/skills/csv-workbench/SKILL.md new file mode 100644 index 0000000000..a954e42fb7 --- /dev/null +++ b/examples/tools/skills/csv-workbench/SKILL.md @@ -0,0 +1,20 @@ +--- +name: csv-workbench +description: Analyze CSV files in /mnt/data and return concise numeric summaries. +--- + +# CSV Workbench + +Use this skill when the user asks for quick analysis of tabular data. + +## Workflow + +1. Inspect the CSV schema first (`head`, `python csv.DictReader`, or both). +2. Compute requested aggregates with a short Python script. +3. Return concise results with concrete numbers and units when available. + +## Constraints + +- Prefer Python stdlib for portability. +- If data is missing or malformed, state assumptions clearly. +- Keep the final answer short and actionable. diff --git a/examples/tools/skills/csv-workbench/playbook.md b/examples/tools/skills/csv-workbench/playbook.md new file mode 100644 index 0000000000..95cacedeb6 --- /dev/null +++ b/examples/tools/skills/csv-workbench/playbook.md @@ -0,0 +1,32 @@ +# CSV Playbook + +## Quick checks + +- Preview rows: `head -n 10 /mnt/data/your-file.csv`. +- Count rows: + +```bash +python - <<'PY' +import csv + +with open('/mnt/data/your-file.csv', newline='') as f: + print(sum(1 for _ in csv.DictReader(f))) +PY +``` + +## Grouped totals template + +```bash +python - <<'PY' +import csv +from collections import defaultdict + +totals = defaultdict(float) +with open('/mnt/data/your-file.csv', newline='') as f: + for row in csv.DictReader(f): + totals[row['region']] += float(row['amount']) + +for region in sorted(totals): + print(region, round(totals[region], 2)) +PY +``` diff --git a/pyproject.toml b/pyproject.toml index 51290c124f..1dc8cab877 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.9" license = "MIT" authors = [{ name = "OpenAI", email = "support@openai.com" }] dependencies = [ - "openai>=2.9.0,<3", + "openai>=2.19.0,<3", "pydantic>=2.12.3, <3", "griffe>=1.5.6, <2", "typing-extensions>=4.12.2, <5", diff --git a/src/agents/__init__.py b/src/agents/__init__.py index c4f1de30f2..ea5e6fdb97 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -125,6 +125,20 @@ ShellExecutor, ShellResult, ShellTool, + ShellToolContainerAutoEnvironment, + ShellToolContainerNetworkPolicy, + ShellToolContainerNetworkPolicyAllowlist, + ShellToolContainerNetworkPolicyDisabled, + ShellToolContainerNetworkPolicyDomainSecret, + ShellToolContainerReferenceEnvironment, + ShellToolContainerSkill, + ShellToolEnvironment, + ShellToolHostedEnvironment, + ShellToolInlineSkill, + ShellToolInlineSkillSource, + ShellToolLocalEnvironment, + ShellToolLocalSkill, + ShellToolSkillReference, Tool, ToolOutputFileContent, ToolOutputFileContentDict, @@ -351,6 +365,20 @@ def enable_verbose_stdout_logging(): "ShellCallOutcome", "ShellCommandOutput", "ShellCommandRequest", + "ShellToolLocalSkill", + "ShellToolSkillReference", + "ShellToolInlineSkillSource", + "ShellToolInlineSkill", + "ShellToolContainerSkill", + "ShellToolContainerNetworkPolicyDomainSecret", + "ShellToolContainerNetworkPolicyAllowlist", + "ShellToolContainerNetworkPolicyDisabled", + "ShellToolContainerNetworkPolicy", + "ShellToolLocalEnvironment", + "ShellToolContainerAutoEnvironment", + "ShellToolContainerReferenceEnvironment", + "ShellToolHostedEnvironment", + "ShellToolEnvironment", "ShellExecutor", "ShellResult", "ShellTool", diff --git a/src/agents/items.py b/src/agents/items.py index 94ab5daa35..e6100a2e01 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -11,6 +11,7 @@ Response, ResponseComputerToolCall, ResponseFileSearchToolCall, + ResponseFunctionShellToolCallOutput, ResponseFunctionToolCall, ResponseFunctionWebSearch, ResponseInputItemParam, @@ -253,6 +254,7 @@ class ToolCallItem(RunItemBase[Any]): FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput, + ResponseFunctionShellToolCallOutput, dict[str, Any], ] diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index ce7a08b7f5..9ebbbfe7eb 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Mapping from contextvars import ContextVar from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Literal, Union, cast, overload @@ -37,6 +37,7 @@ ImageGenerationTool, LocalShellTool, ShellTool, + ShellToolEnvironment, Tool, WebSearchTool, ) @@ -411,6 +412,19 @@ class ConvertedTools: class Converter: + @classmethod + def _convert_shell_environment(cls, environment: ShellToolEnvironment | None) -> dict[str, Any]: + """Convert shell environment settings to OpenAI payload shape.""" + if environment is None: + return {"type": "local"} + if not isinstance(environment, Mapping): + raise UserError("Shell environment must be a mapping.") + + payload = dict(environment) + if "type" not in payload: + payload["type"] = "local" + return payload + @classmethod def convert_tool_choice( cls, tool_choice: Literal["auto", "required", "none"] | str | MCPToolChoice | None @@ -561,7 +575,13 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None converted_tool = cast(ToolParam, {"type": "apply_patch"}) includes = None elif isinstance(tool, ShellTool): - converted_tool = cast(ToolParam, {"type": "shell"}) + converted_tool = cast( + ToolParam, + { + "type": "shell", + "environment": cls._convert_shell_environment(tool.environment), + }, + ) includes = None elif isinstance(tool, ImageGenerationTool): converted_tool = tool.tool_config diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py index 07e1fce560..29086b4c67 100644 --- a/src/agents/run_internal/tool_actions.py +++ b/src/agents/run_internal/tool_actions.py @@ -284,7 +284,10 @@ async def execute( ) try: - executor_result = call.shell_tool.executor(request) + executor = call.shell_tool.executor + if executor is None: + raise ModelBehaviorError("Shell tool has no local executor configured.") + executor_result = executor(request) result = ( await executor_result if inspect.isawaitable(executor_result) else executor_result ) diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index fed661ea9a..07dd913e63 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -10,6 +10,7 @@ ResponseComputerToolCall, ResponseCustomToolCall, ResponseFileSearchToolCall, + ResponseFunctionShellToolCallOutput, ResponseFunctionToolCall, ResponseFunctionWebSearch, ResponseOutputMessage, @@ -601,33 +602,37 @@ async def execute_tools_and_side_effects( ) if not processed_response.has_tools_or_approvals_to_run(): - if output_schema and not output_schema.is_plain_text() and potential_final_output_text: - final_output = output_schema.validate_json(potential_final_output_text) - return await execute_final_output_call( - agent=agent, - original_input=original_input, - new_response=new_response, - pre_step_items=pre_step_items, - new_step_items=new_step_items, - final_output=final_output, - hooks=hooks, - context_wrapper=context_wrapper, - tool_input_guardrail_results=tool_input_guardrail_results, - tool_output_guardrail_results=tool_output_guardrail_results, - ) - if not output_schema or output_schema.is_plain_text(): - return await execute_final_output_call( - agent=agent, - original_input=original_input, - new_response=new_response, - pre_step_items=pre_step_items, - new_step_items=new_step_items, - final_output=potential_final_output_text or "", - hooks=hooks, - context_wrapper=context_wrapper, - tool_input_guardrail_results=tool_input_guardrail_results, - tool_output_guardrail_results=tool_output_guardrail_results, - ) + has_tool_activity_without_message = potential_final_output_text is None and bool( + processed_response.tools_used + ) + if not has_tool_activity_without_message: + if output_schema and not output_schema.is_plain_text() and potential_final_output_text: + final_output = output_schema.validate_json(potential_final_output_text) + return await execute_final_output_call( + agent=agent, + original_input=original_input, + new_response=new_response, + pre_step_items=pre_step_items, + new_step_items=new_step_items, + final_output=final_output, + hooks=hooks, + context_wrapper=context_wrapper, + tool_input_guardrail_results=tool_input_guardrail_results, + tool_output_guardrail_results=tool_output_guardrail_results, + ) + if not output_schema or output_schema.is_plain_text(): + return await execute_final_output_call( + agent=agent, + original_input=original_input, + new_response=new_response, + pre_step_items=pre_step_items, + new_step_items=new_step_items, + final_output=potential_final_output_text or "", + hooks=hooks, + context_wrapper=context_wrapper, + tool_input_guardrail_results=tool_input_guardrail_results, + tool_output_guardrail_results=tool_output_guardrail_results, + ) return SingleStepResult( original_input=original_input, @@ -1234,7 +1239,22 @@ def process_model_response( output.__class__.__name__ if hasattr(output, "__class__") else type(output), ) if output_type == "shell_call": - items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) + if isinstance(output, dict): + shell_call_raw = dict(output) + elif hasattr(output, "model_dump"): + shell_call_raw = cast(Any, output).model_dump(exclude_unset=True) + else: + shell_call_raw = { + "type": "shell_call", + "id": get_mapping_or_attr(output, "id"), + "call_id": get_mapping_or_attr(output, "call_id"), + "status": get_mapping_or_attr(output, "status"), + "action": get_mapping_or_attr(output, "action"), + "environment": get_mapping_or_attr(output, "environment"), + "created_by": get_mapping_or_attr(output, "created_by"), + } + shell_call_raw.pop("created_by", None) + items.append(ToolCallItem(raw_item=cast(Any, shell_call_raw), agent=agent)) if not shell_tool: tools_used.append("shell") _error_tracing.attach_error_to_current_span( @@ -1245,19 +1265,71 @@ def process_model_response( ) raise ModelBehaviorError("Model produced shell call without a shell tool.") tools_used.append(shell_tool.name) + shell_environment = shell_tool.environment + if shell_environment is None or shell_environment["type"] != "local": + logger.debug( + "Skipping local shell execution for hosted shell tool %s", shell_tool.name + ) + continue + if shell_tool.executor is None: + _error_tracing.attach_error_to_current_span( + SpanError( + message="Local shell executor not found", + data={}, + ) + ) + raise ModelBehaviorError( + "Model produced local shell call without a local shell executor." + ) call_identifier = get_mapping_or_attr(output, "call_id") logger.debug("Queuing shell_call %s", call_identifier) shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool)) continue + if output_type == "shell_call_output" and isinstance( + output, (dict, ResponseFunctionShellToolCallOutput) + ): + tools_used.append(shell_tool.name if shell_tool else "shell") + if isinstance(output, dict): + shell_output_raw = dict(output) + else: + shell_output_raw = output.model_dump(exclude_unset=True) + shell_output_raw.pop("created_by", None) + shell_outputs = shell_output_raw.get("output") + if isinstance(shell_outputs, list): + for shell_output in shell_outputs: + if isinstance(shell_output, dict): + shell_output.pop("created_by", None) + items.append( + ToolCallOutputItem( + raw_item=cast(Any, shell_output_raw), + output=shell_output_raw.get("output"), + agent=agent, + ) + ) + continue if output_type == "apply_patch_call": - items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) + if isinstance(output, dict): + apply_patch_call_raw = dict(output) + elif hasattr(output, "model_dump"): + apply_patch_call_raw = cast(Any, output).model_dump(exclude_unset=True) + else: + apply_patch_call_raw = { + "type": "apply_patch_call", + "id": get_mapping_or_attr(output, "id"), + "call_id": get_mapping_or_attr(output, "call_id"), + "status": get_mapping_or_attr(output, "status"), + "operation": get_mapping_or_attr(output, "operation"), + "created_by": get_mapping_or_attr(output, "created_by"), + } + apply_patch_call_raw.pop("created_by", None) + items.append(ToolCallItem(raw_item=cast(Any, apply_patch_call_raw), agent=agent)) if apply_patch_tool: tools_used.append(apply_patch_tool.name) - call_identifier = get_mapping_or_attr(output, "call_id") + call_identifier = get_mapping_or_attr(apply_patch_call_raw, "call_id") logger.debug("Queuing apply_patch_call %s", call_identifier) apply_patch_calls.append( ToolRunApplyPatchCall( - tool_call=output, + tool_call=apply_patch_call_raw, apply_patch_tool=apply_patch_tool, ) ) diff --git a/src/agents/tool.py b/src/agents/tool.py index db1df63d69..a47475e591 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -4,7 +4,7 @@ import inspect import json import weakref -from collections.abc import Awaitable +from collections.abc import Awaitable, Mapping from dataclasses import dataclass, field from typing import ( TYPE_CHECKING, @@ -608,6 +608,106 @@ def name(self): return "local_shell" +class ShellToolLocalSkill(TypedDict): + """Skill metadata for local shell environments.""" + + description: str + name: str + path: str + + +class ShellToolSkillReference(TypedDict): + """Reference to a hosted shell skill.""" + + type: Literal["skill_reference"] + skill_id: str + version: NotRequired[str] + + +class ShellToolInlineSkillSource(TypedDict): + """Inline skill source payload.""" + + data: str + media_type: Literal["application/zip"] + type: Literal["base64"] + + +class ShellToolInlineSkill(TypedDict): + """Inline hosted shell skill bundle.""" + + description: str + name: str + source: ShellToolInlineSkillSource + type: Literal["inline"] + + +ShellToolContainerSkill = Union[ShellToolSkillReference, ShellToolInlineSkill] +"""Container skill configuration.""" + + +class ShellToolContainerNetworkPolicyDomainSecret(TypedDict): + """A secret bound to a single domain in allowlist mode.""" + + domain: str + name: str + value: str + + +class ShellToolContainerNetworkPolicyAllowlist(TypedDict): + """Allowlist network policy for hosted containers.""" + + allowed_domains: list[str] + type: Literal["allowlist"] + domain_secrets: NotRequired[list[ShellToolContainerNetworkPolicyDomainSecret]] + + +class ShellToolContainerNetworkPolicyDisabled(TypedDict): + """Disabled network policy for hosted containers.""" + + type: Literal["disabled"] + + +ShellToolContainerNetworkPolicy = Union[ + ShellToolContainerNetworkPolicyAllowlist, + ShellToolContainerNetworkPolicyDisabled, +] +"""Network policy configuration for hosted shell containers.""" + + +class ShellToolLocalEnvironment(TypedDict): + """Local shell execution environment.""" + + type: Literal["local"] + skills: NotRequired[list[ShellToolLocalSkill]] + + +class ShellToolContainerAutoEnvironment(TypedDict): + """Auto-provisioned hosted container environment.""" + + type: Literal["container_auto"] + file_ids: NotRequired[list[str]] + memory_limit: NotRequired[Literal["1g", "4g", "16g", "64g"] | None] + network_policy: NotRequired[ShellToolContainerNetworkPolicy] + skills: NotRequired[list[ShellToolContainerSkill]] + + +class ShellToolContainerReferenceEnvironment(TypedDict): + """Reference to an existing hosted container.""" + + type: Literal["container_reference"] + container_id: str + + +ShellToolHostedEnvironment = Union[ + ShellToolContainerAutoEnvironment, + ShellToolContainerReferenceEnvironment, +] +"""Hosted shell environment variants.""" + +ShellToolEnvironment = Union[ShellToolLocalEnvironment, ShellToolHostedEnvironment] +"""All supported shell environments.""" + + @dataclass class ShellCallOutcome: """Describes the terminal condition of a shell command.""" @@ -675,11 +775,26 @@ class ShellCommandRequest: """Executes a shell command sequence and returns either text or structured output.""" +def _normalize_shell_tool_environment( + environment: ShellToolEnvironment | None, +) -> ShellToolEnvironment: + """Normalize shell environment into a predictable mapping shape.""" + if environment is None: + return {"type": "local"} + if not isinstance(environment, Mapping): + raise UserError("ShellTool environment must be a mapping.") + + normalized = dict(environment) + if "type" not in normalized: + normalized["type"] = "local" + return cast(ShellToolEnvironment, normalized) + + @dataclass class ShellTool: """Next-generation shell tool. LocalShellTool will be deprecated in favor of this.""" - executor: ShellExecutor + executor: ShellExecutor | None = None name: str = "shell" needs_approval: bool | ShellApprovalFunction = False """Whether the shell tool needs approval before execution. If True, the run will be interrupted @@ -692,6 +807,31 @@ class ShellTool: """Optional handler to auto-approve or reject when approval is required. If provided, it will be invoked immediately when an approval is needed. """ + environment: ShellToolEnvironment | None = None + """Execution environment for shell commands. + + If omitted, local mode is used. + """ + + def __post_init__(self) -> None: + """Validate shell tool configuration and normalize environment fields.""" + normalized_environment = _normalize_shell_tool_environment(self.environment) + self.environment = normalized_environment + + environment_type = normalized_environment["type"] + if environment_type == "local": + if self.executor is None: + raise UserError("ShellTool with local environment requires an executor.") + return + + if self.executor is not None: + raise UserError("ShellTool with hosted environment does not accept an executor.") + if self.needs_approval is not False or self.on_approval is not None: + raise UserError( + "ShellTool with hosted environment does not support needs_approval or on_approval." + ) + self.needs_approval = False + self.on_approval = None @property def type(self) -> str: diff --git a/tests/test_local_shell_tool.py b/tests/test_local_shell_tool.py index 013c1d1fc2..cdc0d9a7f1 100644 --- a/tests/test_local_shell_tool.py +++ b/tests/test_local_shell_tool.py @@ -144,6 +144,7 @@ async def test_runner_executes_local_shell_calls() -> None: local_shell_output = items[2] assert isinstance(local_shell_output, ToolCallOutputItem) + assert isinstance(local_shell_output.raw_item, dict) assert local_shell_output.raw_item.get("type") == "local_shell_call_output" assert local_shell_output.output == "shell result" diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py index 62e058e00e..376bf67a1b 100644 --- a/tests/test_openai_responses_converter.py +++ b/tests/test_openai_responses_converter.py @@ -23,6 +23,8 @@ one `ComputerTool`. """ +from typing import Any, cast + import pytest from openai import omit from pydantic import BaseModel @@ -34,6 +36,7 @@ ComputerTool, FileSearchTool, Handoff, + ShellTool, Tool, UserError, WebSearchTool, @@ -187,6 +190,127 @@ def test_convert_tools_basic_types_and_includes(): Converter.convert_tools(tools=[comp_tool, comp_tool], handoffs=[]) +def test_convert_tools_shell_local_environment() -> None: + shell_tool = ShellTool(executor=lambda request: "ok") + + converted = Converter.convert_tools(tools=[shell_tool], handoffs=[]) + + assert converted.tools == [{"type": "shell", "environment": {"type": "local"}}] + assert converted.includes == [] + + +def test_convert_tools_shell_container_reference_environment() -> None: + shell_tool = ShellTool(environment={"type": "container_reference", "container_id": "cntr_123"}) + + converted = Converter.convert_tools(tools=[shell_tool], handoffs=[]) + + assert converted.tools == [ + { + "type": "shell", + "environment": { + "type": "container_reference", + "container_id": "cntr_123", + }, + } + ] + + +def test_convert_tools_shell_container_auto_environment() -> None: + shell_tool = ShellTool( + environment={ + "type": "container_auto", + "file_ids": ["file-123"], + "memory_limit": "1g", + "network_policy": { + "type": "allowlist", + "allowed_domains": ["example.com"], + "domain_secrets": [{"domain": "example.com", "name": "TOKEN", "value": "secret"}], + }, + "skills": [ + {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"}, + { + "type": "inline", + "name": "csv-workbench", + "description": "Analyze CSV files.", + "source": { + "type": "base64", + "media_type": "application/zip", + "data": "ZmFrZS16aXA=", + }, + }, + ], + } + ) + + converted = Converter.convert_tools(tools=[shell_tool], handoffs=[]) + + assert converted.tools == [ + { + "type": "shell", + "environment": { + "type": "container_auto", + "file_ids": ["file-123"], + "memory_limit": "1g", + "network_policy": { + "type": "allowlist", + "allowed_domains": ["example.com"], + "domain_secrets": [ + {"domain": "example.com", "name": "TOKEN", "value": "secret"} + ], + }, + "skills": [ + { + "type": "skill_reference", + "skill_id": "skill_123", + "version": "latest", + }, + { + "type": "inline", + "name": "csv-workbench", + "description": "Analyze CSV files.", + "source": { + "type": "base64", + "media_type": "application/zip", + "data": "ZmFrZS16aXA=", + }, + }, + ], + }, + } + ] + + +def test_convert_tools_shell_environment_passes_through_unknown_fields() -> None: + shell_tool = ShellTool( + environment=cast( + Any, + { + "type": "container_auto", + "network_policy": { + "type": "future_mode", + "allowed_domains": ["example.com"], + "some_new_field": "keep-me", + }, + }, + ) + ) + + converted = Converter.convert_tools(tools=[shell_tool], handoffs=[]) + assert converted.tools == [ + { + "type": "shell", + "environment": { + "type": "container_auto", + "network_policy": { + "type": "future_mode", + "allowed_domains": ["example.com"], + "some_new_field": "keep-me", + }, + }, + } + ] + + def test_convert_tools_includes_handoffs(): """ When handoff objects are included, `convert_tools` should append their diff --git a/tests/test_process_model_response.py b/tests/test_process_model_response.py index d26559a68d..ee51f5b521 100644 --- a/tests/test_process_model_response.py +++ b/tests/test_process_model_response.py @@ -1,9 +1,16 @@ +from typing import Any, cast + import pytest -from openai.types.responses import ResponseCompactionItem +from openai.types.responses import ( + ResponseApplyPatchToolCall, + ResponseCompactionItem, + ResponseFunctionShellToolCall, + ResponseFunctionShellToolCallOutput, +) -from agents import Agent, ApplyPatchTool, CompactionItem +from agents import Agent, ApplyPatchTool, CompactionItem, ShellTool from agents.exceptions import ModelBehaviorError -from agents.items import ModelResponse +from agents.items import ModelResponse, ToolCallItem, ToolCallOutputItem from agents.run_internal import run_loop from agents.usage import Usage from tests.fake_model import FakeModel @@ -35,6 +42,143 @@ def test_process_model_response_shell_call_without_tool_raises() -> None: ) +def test_process_model_response_skips_local_shell_execution_for_hosted_environment() -> None: + shell_tool = ShellTool(environment={"type": "container_auto"}) + agent = Agent(name="hosted-shell", model=FakeModel(), tools=[shell_tool]) + shell_call = make_shell_call("shell-hosted-1") + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[shell_tool], + response=_response([shell_call]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 1 + assert isinstance(processed.new_items[0], ToolCallItem) + assert processed.shell_calls == [] + assert processed.tools_used == ["shell"] + + +def test_process_model_response_sanitizes_shell_call_model_object() -> None: + shell_call = ResponseFunctionShellToolCall( + type="shell_call", + id="sh_call_2", + call_id="call_shell_2", + status="completed", + created_by="server", + action=cast(Any, {"commands": ["echo hi"], "timeout_ms": 1000}), + ) + shell_tool = ShellTool(environment={"type": "container_auto"}) + agent = Agent(name="hosted-shell-model", model=FakeModel(), tools=[shell_tool]) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[shell_tool], + response=_response([shell_call]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 1 + item = processed.new_items[0] + assert isinstance(item, ToolCallItem) + assert isinstance(item.raw_item, dict) + assert item.raw_item["type"] == "shell_call" + assert "created_by" not in item.raw_item + next_input = item.to_input_item() + assert isinstance(next_input, dict) + assert next_input["type"] == "shell_call" + assert "created_by" not in next_input + assert processed.shell_calls == [] + assert processed.tools_used == ["shell"] + + +def test_process_model_response_preserves_shell_call_output() -> None: + shell_output = { + "type": "shell_call_output", + "id": "sh_out_1", + "call_id": "call_shell_1", + "status": "completed", + "max_output_length": 1000, + "output": [ + { + "stdout": "ok\n", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}, + } + ], + } + agent = Agent(name="shell-output", model=FakeModel()) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[], + response=_response([shell_output]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 1 + assert isinstance(processed.new_items[0], ToolCallOutputItem) + assert processed.new_items[0].raw_item == shell_output + assert processed.tools_used == ["shell"] + assert processed.shell_calls == [] + + +def test_process_model_response_sanitizes_shell_call_output_model_object() -> None: + shell_output = ResponseFunctionShellToolCallOutput( + type="shell_call_output", + id="sh_out_2", + call_id="call_shell_2", + status="completed", + created_by="server", + output=cast( + Any, + [ + { + "stdout": "ok\n", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}, + "created_by": "server", + } + ], + ), + ) + agent = Agent(name="shell-output-model", model=FakeModel()) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[], + response=_response([shell_output]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 1 + item = processed.new_items[0] + assert isinstance(item, ToolCallOutputItem) + assert isinstance(item.raw_item, dict) + assert item.raw_item["type"] == "shell_call_output" + assert "created_by" not in item.raw_item + shell_outputs = item.raw_item.get("output") + assert isinstance(shell_outputs, list) + assert isinstance(shell_outputs[0], dict) + assert "created_by" not in shell_outputs[0] + + next_input = item.to_input_item() + assert isinstance(next_input, dict) + assert next_input["type"] == "shell_call_output" + assert "status" not in next_input + assert "created_by" not in next_input + next_outputs = next_input.get("output") + assert isinstance(next_outputs, list) + assert isinstance(next_outputs[0], dict) + assert "created_by" not in next_outputs[0] + assert processed.tools_used == ["shell"] + + def test_process_model_response_apply_patch_call_without_tool_raises() -> None: agent = Agent(name="no-apply", model=FakeModel()) apply_patch_call = make_apply_patch_dict("apply-1", diff="-old\n+new\n") @@ -49,6 +193,48 @@ def test_process_model_response_apply_patch_call_without_tool_raises() -> None: ) +def test_process_model_response_sanitizes_apply_patch_call_model_object() -> None: + editor = RecordingEditor() + apply_patch_tool = ApplyPatchTool(editor=editor) + agent = Agent(name="apply-agent-model", model=FakeModel(), tools=[apply_patch_tool]) + apply_patch_call = ResponseApplyPatchToolCall( + type="apply_patch_call", + id="ap_call_1", + call_id="call_apply_1", + status="completed", + created_by="server", + operation=cast( + Any, + {"type": "update_file", "path": "test.md", "diff": "-old\n+new\n"}, + ), + ) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[apply_patch_tool], + response=_response([apply_patch_call]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 1 + item = processed.new_items[0] + assert isinstance(item, ToolCallItem) + assert isinstance(item.raw_item, dict) + assert item.raw_item["type"] == "apply_patch_call" + assert "created_by" not in item.raw_item + next_input = item.to_input_item() + assert isinstance(next_input, dict) + assert next_input["type"] == "apply_patch_call" + assert "created_by" not in next_input + assert len(processed.apply_patch_calls) == 1 + queued_call = processed.apply_patch_calls[0].tool_call + assert isinstance(queued_call, dict) + assert queued_call["type"] == "apply_patch_call" + assert "created_by" not in queued_call + assert processed.tools_used == [apply_patch_tool.name] + + def test_process_model_response_converts_custom_apply_patch_call() -> None: editor = RecordingEditor() apply_patch_tool = ApplyPatchTool(editor=editor) diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index a9c747cd25..720b0611cd 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -175,6 +175,77 @@ async def test_plaintext_agent_with_tool_call_is_run_again(): assert isinstance(result.next_step, NextStepRunAgain) +@pytest.mark.asyncio +async def test_plaintext_agent_hosted_shell_items_without_message_runs_again(): + shell_tool = ShellTool(environment={"type": "container_auto"}) + agent = Agent(name="test", tools=[shell_tool]) + response = ModelResponse( + output=[ + make_shell_call( + "call_shell_hosted", id_value="shell_call_hosted", commands=["echo hi"] + ), + cast( + Any, + { + "type": "shell_call_output", + "id": "sh_out_hosted", + "call_id": "call_shell_hosted", + "status": "completed", + "output": [ + { + "stdout": "hi\n", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}, + } + ], + }, + ), + ], + usage=Usage(), + response_id=None, + ) + + result = await get_execute_result(agent, response) + + assert len(result.generated_items) == 2 + assert isinstance(result.generated_items[0], ToolCallItem) + assert isinstance(result.generated_items[1], ToolCallOutputItem) + assert isinstance(result.next_step, NextStepRunAgain) + + +@pytest.mark.asyncio +async def test_plaintext_agent_shell_output_only_without_message_runs_again(): + agent = Agent(name="test") + response = ModelResponse( + output=[ + cast( + Any, + { + "type": "shell_call_output", + "id": "sh_out_only", + "call_id": "call_shell_only", + "status": "completed", + "output": [ + { + "stdout": "hi\n", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}, + } + ], + }, + ), + ], + usage=Usage(), + response_id=None, + ) + + result = await get_execute_result(agent, response) + + assert len(result.generated_items) == 1 + assert isinstance(result.generated_items[0], ToolCallOutputItem) + assert isinstance(result.next_step, NextStepRunAgain) + + @pytest.mark.asyncio async def test_multiple_tool_calls(): agent = Agent( diff --git a/tests/test_shell_tool.py b/tests/test_shell_tool.py index 35fb1fcee8..2ec532f049 100644 --- a/tests/test_shell_tool.py +++ b/tests/test_shell_tool.py @@ -13,9 +13,10 @@ ShellCommandOutput, ShellResult, ShellTool, + UserError, ) from agents.items import ToolApprovalItem, ToolCallOutputItem -from agents.run_internal.run_loop import ShellAction, ToolRunShellCall +from agents.run_internal.run_loop import ShellAction, ToolRunShellCall, execute_shell_calls from .utils.hitl import ( HITL_REJECTION_MSG, @@ -40,6 +41,169 @@ def _shell_call(call_id: str = "call_shell") -> dict[str, Any]: ) +def test_shell_tool_defaults_to_local_environment() -> None: + shell_tool = ShellTool(executor=lambda request: "ok") + + assert shell_tool.environment == {"type": "local"} + assert shell_tool.executor is not None + + +def test_shell_tool_supports_hosted_environment_without_executor() -> None: + shell_tool = ShellTool( + environment={ + "type": "container_reference", + "container_id": "cntr_123", + } + ) + + assert shell_tool.environment == {"type": "container_reference", "container_id": "cntr_123"} + assert shell_tool.executor is None + + +def test_shell_tool_normalizes_container_auto_environment() -> None: + shell_tool = ShellTool( + environment={ + "type": "container_auto", + "file_ids": ["file_123"], + "memory_limit": "4g", + "network_policy": { + "type": "allowlist", + "allowed_domains": ["example.com"], + "domain_secrets": [ + { + "domain": "example.com", + "name": "API_TOKEN", + "value": "secret", + } + ], + }, + "skills": [ + {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"}, + { + "type": "inline", + "name": "csv-workbench", + "description": "Analyze CSV files.", + "source": { + "type": "base64", + "media_type": "application/zip", + "data": "ZmFrZS16aXA=", + }, + }, + ], + } + ) + + assert shell_tool.environment == { + "type": "container_auto", + "file_ids": ["file_123"], + "memory_limit": "4g", + "network_policy": { + "type": "allowlist", + "allowed_domains": ["example.com"], + "domain_secrets": [ + { + "domain": "example.com", + "name": "API_TOKEN", + "value": "secret", + } + ], + }, + "skills": [ + {"type": "skill_reference", "skill_id": "skill_123", "version": "latest"}, + { + "type": "inline", + "name": "csv-workbench", + "description": "Analyze CSV files.", + "source": { + "type": "base64", + "media_type": "application/zip", + "data": "ZmFrZS16aXA=", + }, + }, + ], + } + + +def test_shell_tool_rejects_local_mode_without_executor() -> None: + with pytest.raises(UserError, match="requires an executor"): + ShellTool() + + with pytest.raises(UserError, match="requires an executor"): + ShellTool(environment={"type": "local"}) + + +def test_shell_tool_allows_unvalidated_hosted_environment_shapes() -> None: + shell_tool = ShellTool(environment=cast(Any, {"type": "container_reference"})) + assert shell_tool.environment == {"type": "container_reference"} + + shell_tool = ShellTool( + environment=cast( + Any, + { + "type": "container_auto", + "network_policy": { + "type": "future_mode", + "allowed_domains": ["example.com"], + "some_new_field": True, + }, + "skills": [{"type": "skill_reference"}], + }, + ) + ) + assert isinstance(shell_tool.environment, dict) + assert shell_tool.environment["type"] == "container_auto" + + +def test_shell_tool_rejects_local_executor_and_approval_for_hosted_environment() -> None: + with pytest.raises(UserError, match="does not accept an executor"): + ShellTool( + executor=lambda request: "ok", + environment={"type": "container_reference", "container_id": "cntr_123"}, + ) + + with pytest.raises(UserError, match="does not support needs_approval or on_approval"): + ShellTool( + environment={"type": "container_reference", "container_id": "cntr_123"}, + needs_approval=True, + ) + + with pytest.raises(UserError, match="does not support needs_approval or on_approval"): + ShellTool( + environment={"type": "container_reference", "container_id": "cntr_123"}, + on_approval=lambda _context, _item: {"approve": True}, + ) + + +@pytest.mark.asyncio +async def test_execute_shell_calls_surfaces_missing_local_executor() -> None: + shell_tool = ShellTool( + environment={ + "type": "container_reference", + "container_id": "cntr_123", + } + ) + tool_run = ToolRunShellCall(tool_call=_shell_call(), shell_tool=shell_tool) + agent = Agent(name="shell-agent", tools=[shell_tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await execute_shell_calls( + agent=agent, + calls=[tool_run], + context_wrapper=context_wrapper, + hooks=RunHooks[Any](), + config=RunConfig(), + ) + + assert len(result) == 1 + output_item = result[0] + assert isinstance(output_item, ToolCallOutputItem) + assert output_item.output == "Shell tool has no local executor configured." + raw_item = cast(dict[str, Any], output_item.raw_item) + assert raw_item["type"] == "shell_call_output" + assert raw_item["call_id"] == "call_shell" + assert raw_item["status"] == "failed" + + @pytest.mark.asyncio async def test_shell_tool_structured_output_is_rendered() -> None: shell_tool = ShellTool( diff --git a/tests/test_tool_metadata.py b/tests/test_tool_metadata.py index ad6395e9b1..42440ea9d6 100644 --- a/tests/test_tool_metadata.py +++ b/tests/test_tool_metadata.py @@ -46,7 +46,9 @@ def test_tool_name_properties() -> None: assert CodeInterpreterTool(tool_config=dummy_code).name == "code_interpreter" assert ImageGenerationTool(tool_config=dummy_image).name == "image_generation" assert LocalShellTool(executor=lambda req: "ok").name == "local_shell" - assert ShellTool(executor=lambda req: "ok").type == "shell" + shell_tool = ShellTool(executor=lambda req: "ok") + assert shell_tool.type == "shell" + assert shell_tool.environment == {"type": "local"} assert ApplyPatchTool(editor=DummyEditor()).type == "apply_patch" diff --git a/uv.lock b/uv.lock index 8d7915f0b3..38bc480342 100644 --- a/uv.lock +++ b/uv.lock @@ -2151,7 +2151,7 @@ wheels = [ [[package]] name = "openai" -version = "2.9.0" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2163,9 +2163,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/5a/f495777c02625bfa18212b6e3b73f1893094f2bf660976eb4bc6f43a1ca2/openai-2.20.0.tar.gz", hash = "sha256:2654a689208cd0bf1098bb9462e8d722af5cbe961e6bba54e6f19fb843d88db1", size = 642355, upload-time = "2026-02-10T19:02:54.145Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a0/cf4297aa51bbc21e83ef0ac018947fa06aea8f2364aad7c96cbf148590e6/openai-2.20.0-py3-none-any.whl", hash = "sha256:38d989c4b1075cd1f76abc68364059d822327cf1a932531d429795f4fc18be99", size = 1098479, upload-time = "2026-02-10T19:02:52.157Z" }, ] [[package]] @@ -2256,7 +2256,7 @@ requires-dist = [ { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.81.0,<2" }, { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.19.0,<2" }, { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" }, - { name = "openai", specifier = ">=2.9.0,<3" }, + { name = "openai", specifier = ">=2.19.0,<3" }, { name = "pydantic", specifier = ">=2.12.3,<3" }, { name = "redis", marker = "extra == 'redis'", specifier = ">=7" }, { name = "requests", specifier = ">=2.0,<3" }, From c5797fca23b4faee21a662eff54fd899303de190 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Wed, 11 Feb 2026 09:27:22 -0800 Subject: [PATCH 2/2] fix review comment --- src/agents/run_internal/turn_resolution.py | 2 +- tests/test_run_step_execution.py | 52 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index 07dd913e63..e51fa801c0 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -602,7 +602,7 @@ async def execute_tools_and_side_effects( ) if not processed_response.has_tools_or_approvals_to_run(): - has_tool_activity_without_message = potential_final_output_text is None and bool( + has_tool_activity_without_message = not message_items and bool( processed_response.tools_used ) if not has_tool_activity_without_message: diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index 720b0611cd..a9ebc225a6 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -6,6 +6,8 @@ import pytest from openai.types.responses.response_output_item import McpApprovalRequest +from openai.types.responses.response_output_message import ResponseOutputMessage +from openai.types.responses.response_output_refusal import ResponseOutputRefusal from pydantic import BaseModel from agents import ( @@ -246,6 +248,56 @@ async def test_plaintext_agent_shell_output_only_without_message_runs_again(): assert isinstance(result.next_step, NextStepRunAgain) +@pytest.mark.asyncio +async def test_plaintext_agent_hosted_shell_with_refusal_message_is_final_output(): + shell_tool = ShellTool(environment={"type": "container_auto"}) + agent = Agent(name="test", tools=[shell_tool]) + refusal_message = ResponseOutputMessage( + id="msg_refusal", + type="message", + role="assistant", + content=[ResponseOutputRefusal(type="refusal", refusal="I cannot help with that.")], + status="completed", + ) + response = ModelResponse( + output=[ + make_shell_call( + "call_shell_hosted_refusal", + id_value="shell_call_hosted_refusal", + commands=["echo hi"], + ), + cast( + Any, + { + "type": "shell_call_output", + "id": "sh_out_hosted_refusal", + "call_id": "call_shell_hosted_refusal", + "status": "completed", + "output": [ + { + "stdout": "hi\n", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}, + } + ], + }, + ), + refusal_message, + ], + usage=Usage(), + response_id=None, + ) + + result = await get_execute_result(agent, response) + + assert len(result.generated_items) == 3 + assert isinstance(result.generated_items[0], ToolCallItem) + assert isinstance(result.generated_items[1], ToolCallOutputItem) + assert isinstance(result.generated_items[2], MessageOutputItem) + assert isinstance(result.next_step, NextStepFinalOutput) + assert result.next_step.output == "" + + @pytest.mark.asyncio async def test_multiple_tool_calls(): agent = Agent(