diff --git a/docs/context.md b/docs/context.md index 1c7f19bef..9fde4be29 100644 --- a/docs/context.md +++ b/docs/context.md @@ -126,6 +126,7 @@ plus additional fields specific to the current tool call: - `tool_arguments` – the raw argument string passed to the tool - `tool_namespace` – the Responses namespace for the tool call, when the tool was loaded through `tool_namespace()` or another namespaced surface - `qualified_tool_name` – the tool name qualified with the namespace when one is available +- `conversation_history` – a visible history snapshot available to the tool at invocation time. For local function tools in non-streaming runs, this includes the current input plus prior visible run items that can be represented as model input. Use `ToolContext` when you need tool-level metadata during execution. For general context sharing between agents and tools, `RunContextWrapper` remains sufficient. Because `ToolContext` extends `RunContextWrapper`, it can also expose `.tool_input` when a nested `Agent.as_tool()` run supplied structured input. diff --git a/src/agents/agent.py b/src/agents/agent.py index 67247afd0..9cd7d9b97 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -602,6 +602,7 @@ async def _run_agent_impl(context: ToolContext, input_json: str) -> Any: tool_namespace=context.tool_namespace, agent=context.agent, run_config=resolved_run_config, + conversation_history=context.conversation_history, ) set_agent_tool_state_scope(nested_context, tool_state_scope_id) if should_capture_tool_input: diff --git a/src/agents/result.py b/src/agents/result.py index 774c90dc4..59fd389bf 100644 --- a/src/agents/result.py +++ b/src/agents/result.py @@ -99,6 +99,10 @@ def _populate_state_from_result( state._reasoning_item_id_policy = getattr(result, "_reasoning_item_id_policy", None) interruptions = list(getattr(result, "interruptions", [])) + if interruptions: + state._interrupted_turn_input = copy.deepcopy(result.context_wrapper._tool_history_input) + else: + state._interrupted_turn_input = None if interruptions: state._current_step = NextStepInterruption(interruptions=interruptions) diff --git a/src/agents/run.py b/src/agents/run.py index 047d454d3..d0725b7e2 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -570,6 +570,13 @@ async def run( session_items = [] model_responses = [] context_wrapper = ensure_context_wrapper(context) + preserve_tool_history = ( + conversation_id is not None + and context_wrapper._tool_history_conversation_id == conversation_id + ) + if not preserve_tool_history: + context_wrapper._tool_history_input = [] + context_wrapper._tool_history_conversation_id = None set_agent_tool_state_scope(context_wrapper, None) run_state = RunState( context=context_wrapper, @@ -1505,6 +1512,13 @@ def run_streamed( auto_previous_response_id=auto_previous_response_id, ) context_wrapper = ensure_context_wrapper(context) + preserve_tool_history = ( + conversation_id is not None + and context_wrapper._tool_history_conversation_id == conversation_id + ) + if not preserve_tool_history: + context_wrapper._tool_history_input = [] + context_wrapper._tool_history_conversation_id = None set_agent_tool_state_scope(context_wrapper, None) # input_for_state is the same as input_for_result here input_for_state = input_for_result diff --git a/src/agents/run_context.py b/src/agents/run_context.py index df7047eb3..7c55a4950 100644 --- a/src/agents/run_context.py +++ b/src/agents/run_context.py @@ -57,6 +57,10 @@ class RunContextWrapper(Generic[TContext]): """ turn_input: list[TResponseInputItem] = field(default_factory=list) + _tool_history_input: list[TResponseInputItem] = field( + default_factory=list, repr=False, init=False + ) + _tool_history_conversation_id: str | None = field(default=None, repr=False, init=False) _approvals: dict[str, _ApprovalRecord] = field(default_factory=dict) tool_input: Any | None = None """Structured input for the current agent tool run, when available.""" @@ -460,6 +464,8 @@ def _fork_with_tool_input(self, tool_input: Any) -> RunContextWrapper[TContext]: fork.usage = self.usage fork._approvals = self._approvals fork.turn_input = self.turn_input + fork._tool_history_input = self._tool_history_input + fork._tool_history_conversation_id = self._tool_history_conversation_id fork.tool_input = tool_input return fork @@ -469,6 +475,8 @@ def _fork_without_tool_input(self) -> RunContextWrapper[TContext]: fork.usage = self.usage fork._approvals = self._approvals fork.turn_input = self.turn_input + fork._tool_history_input = self._tool_history_input + fork._tool_history_conversation_id = self._tool_history_conversation_id return fork diff --git a/src/agents/run_internal/agent_runner_helpers.py b/src/agents/run_internal/agent_runner_helpers.py index 776e40670..90a9af9a1 100644 --- a/src/agents/run_internal/agent_runner_helpers.py +++ b/src/agents/run_internal/agent_runner_helpers.py @@ -2,6 +2,7 @@ from __future__ import annotations +import copy from typing import Any, cast from ..agent import Agent @@ -311,6 +312,11 @@ def update_run_state_for_interruption( run_state._session_items = list(session_items) run_state._current_step = next_step run_state._current_turn = current_turn + run_state._interrupted_turn_input = ( + copy.deepcopy(run_state._context._tool_history_input) + if run_state._context is not None + else None + ) async def save_turn_items_if_needed( diff --git a/src/agents/run_internal/run_loop.py b/src/agents/run_internal/run_loop.py index 3d21d89fd..f9f55f7ed 100644 --- a/src/agents/run_internal/run_loop.py +++ b/src/agents/run_internal/run_loop.py @@ -6,6 +6,7 @@ from __future__ import annotations import asyncio +import copy import dataclasses as _dc import json from collections.abc import Awaitable, Callable, Mapping @@ -986,6 +987,9 @@ async def _save_stream_items_without_count( run_state._session_items = list(streamed_result.new_items) run_state._current_step = turn_result.next_step run_state._current_turn = current_turn + run_state._interrupted_turn_input = copy.deepcopy( + context_wrapper._tool_history_input + ) run_state._current_turn_persisted_item_count = ( streamed_result._current_turn_persisted_item_count ) @@ -1189,6 +1193,7 @@ def _tool_search_fingerprint(raw_item: Any) -> str: reasoning_item_id_policy, ) + prior_tool_history_input = list(context_wrapper._tool_history_input) filtered = await maybe_filter_model_input( agent=agent, run_config=run_config, @@ -1198,6 +1203,13 @@ def _tool_search_fingerprint(raw_item: Any) -> str: ) if isinstance(filtered.input, list): filtered.input = deduplicate_input_items_preferring_latest(filtered.input) + context_wrapper._tool_history_input = list(filtered.input) + if server_conversation_tracker is not None: + context_wrapper._tool_history_input = prepare_model_input_items( + prior_tool_history_input, + context_wrapper._tool_history_input, + ) + context_wrapper._tool_history_conversation_id = server_conversation_tracker.conversation_id hosted_mcp_tool_metadata = collect_mcp_list_tools_metadata(streamed_result._model_input_items) if isinstance(filtered.input, list): hosted_mcp_tool_metadata.update(collect_mcp_list_tools_metadata(filtered.input)) @@ -1529,6 +1541,7 @@ async def run_single_turn( else: input = _prepare_turn_input_items(original_input, generated_items, reasoning_item_id_policy) + prior_tool_history_input = list(context_wrapper._tool_history_input) new_response = await get_new_response( agent, system_prompt, @@ -1545,6 +1558,12 @@ async def run_single_turn( session=session, session_items_to_rewind=session_items_to_rewind, ) + if server_conversation_tracker is not None: + context_wrapper._tool_history_input = prepare_model_input_items( + prior_tool_history_input, + context_wrapper._tool_history_input, + ) + context_wrapper._tool_history_conversation_id = server_conversation_tracker.conversation_id return await get_single_step_result_from_response( agent=agent, @@ -1587,6 +1606,7 @@ async def get_new_response( ) if isinstance(filtered.input, list): filtered.input = deduplicate_input_items_preferring_latest(filtered.input) + context_wrapper._tool_history_input = list(filtered.input) model = get_model(agent, run_config) model_settings = agent.model_settings.resolve(run_config.model_settings) diff --git a/src/agents/run_internal/tool_execution.py b/src/agents/run_internal/tool_execution.py index 349066ba4..c5cd78622 100644 --- a/src/agents/run_internal/tool_execution.py +++ b/src/agents/run_internal/tool_execution.py @@ -55,6 +55,7 @@ RunItemBase, ToolApprovalItem, ToolCallOutputItem, + TResponseInputItem, ) from ..logger import logger from ..model_settings import ModelSettings @@ -1284,6 +1285,7 @@ def __init__( hooks: RunHooks[Any], context_wrapper: RunContextWrapper[Any], config: RunConfig, + conversation_history: list[TResponseInputItem] | None, isolate_parallel_failures: bool | None, ) -> None: self.agent = agent @@ -1291,6 +1293,9 @@ def __init__( self.hooks = hooks self.context_wrapper = context_wrapper self.config = config + self.conversation_history = ( + list(conversation_history) if conversation_history is not None else None + ) self.isolate_parallel_failures = ( len(tool_runs) > 1 if isolate_parallel_failures is None else isolate_parallel_failures ) @@ -1465,6 +1470,7 @@ async def _run_single_tool( tool_namespace=tool_context_namespace, agent=self.agent, run_config=self.config, + conversation_history=self.conversation_history, ) agent_hooks = self.agent.hooks if self.config.trace_include_sensitive_data: @@ -1797,6 +1803,7 @@ async def execute_function_tool_calls( hooks: RunHooks[Any], context_wrapper: RunContextWrapper[Any], config: RunConfig, + conversation_history: list[TResponseInputItem] | None = None, isolate_parallel_failures: bool | None = None, ) -> tuple[ list[FunctionToolResult], list[ToolInputGuardrailResult], list[ToolOutputGuardrailResult] @@ -1808,6 +1815,7 @@ async def execute_function_tool_calls( hooks=hooks, context_wrapper=context_wrapper, config=config, + conversation_history=conversation_history, isolate_parallel_failures=isolate_parallel_failures, ).execute() diff --git a/src/agents/run_internal/tool_planning.py b/src/agents/run_internal/tool_planning.py index dabb83b4a..1abec1a9f 100644 --- a/src/agents/run_internal/tool_planning.py +++ b/src/agents/run_internal/tool_planning.py @@ -20,6 +20,7 @@ ToolApprovalItem, ToolCallItem, ToolCallOutputItem, + TResponseInputItem, ) from ..run_context import RunContextWrapper from ..tool import FunctionTool, MCPToolApprovalRequest @@ -522,6 +523,7 @@ async def _execute_tool_plan( hooks, context_wrapper: RunContextWrapper[Any], run_config, + conversation_history: list[TResponseInputItem] | None = None, parallel: bool = True, ) -> tuple[ list[Any], @@ -556,6 +558,7 @@ async def _execute_tool_plan( hooks=hooks, context_wrapper=context_wrapper, config=run_config, + conversation_history=conversation_history, isolate_parallel_failures=isolate_function_tool_failures, ), execute_computer_actions( @@ -598,6 +601,7 @@ async def _execute_tool_plan( hooks=hooks, context_wrapper=context_wrapper, config=run_config, + conversation_history=conversation_history, isolate_parallel_failures=isolate_function_tool_failures, ) computer_results = await execute_computer_actions( diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index c34c720fc..b3f151881 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import copy import inspect from collections.abc import Awaitable, Callable, Mapping, Sequence from typing import Any, Literal, cast @@ -87,7 +88,10 @@ from .items import ( REJECTION_MESSAGE, apply_patch_rejection_item, + deduplicate_input_items_preferring_latest, function_rejection_item, + prepare_model_input_items, + run_items_to_input_items, shell_rejection_item, ) from .run_steps import ( @@ -139,6 +143,7 @@ _make_unique_item_appender, _select_function_tool_runs_for_resume, ) +from .turn_preparation import maybe_filter_model_input __all__ = [ "execute_final_output_step", @@ -153,6 +158,13 @@ ] +def _build_function_tool_conversation_history( + turn_input: Sequence[TResponseInputItem], +) -> list[TResponseInputItem]: + """Build the visible history snapshot for a local function tool invocation.""" + return list(turn_input) + + async def _maybe_finalize_from_tool_results( *, agent: Agent[TContext], @@ -528,6 +540,10 @@ async def execute_tools_and_side_effects( new_items=processed_response.new_items, ) + conversation_history = _build_function_tool_conversation_history( + context_wrapper._tool_history_input + ) + ( function_results, tool_input_guardrail_results, @@ -542,6 +558,7 @@ async def execute_tools_and_side_effects( hooks=hooks, context_wrapper=context_wrapper, run_config=run_config, + conversation_history=conversation_history, ) new_step_items.extend( _build_tool_result_items( @@ -1103,6 +1120,35 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None: apply_patch_calls=approved_apply_patch_calls, ) + resolved_reasoning_item_id_policy = ( + run_config.reasoning_item_id_policy + if run_config.reasoning_item_id_policy is not None + else (run_state._reasoning_item_id_policy if run_state is not None else None) + ) + if run_state is not None and isinstance(run_state._interrupted_turn_input, list): + context_wrapper._tool_history_input = copy.deepcopy(run_state._interrupted_turn_input) + else: + reconstructed_turn_input = prepare_model_input_items( + ItemHelpers.input_to_new_input_list(original_input), + run_items_to_input_items(original_pre_step_items, resolved_reasoning_item_id_policy), + ) + system_prompt = await agent.get_system_prompt(context_wrapper) + filtered_model_input = await maybe_filter_model_input( + agent=agent, + run_config=run_config, + context_wrapper=context_wrapper, + input_items=reconstructed_turn_input, + system_instructions=system_prompt, + ) + if isinstance(filtered_model_input.input, list): + filtered_model_input.input = deduplicate_input_items_preferring_latest( + filtered_model_input.input + ) + context_wrapper._tool_history_input = list(filtered_model_input.input) + conversation_history = _build_function_tool_conversation_history( + context_wrapper._tool_history_input + ) + ( function_results, tool_input_guardrail_results, @@ -1117,6 +1163,7 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None: hooks=hooks, context_wrapper=context_wrapper, run_config=run_config, + conversation_history=conversation_history, ) for interruption in _collect_tool_interruptions( diff --git a/src/agents/run_state.py b/src/agents/run_state.py index dcda9e073..99416251b 100644 --- a/src/agents/run_state.py +++ b/src/agents/run_state.py @@ -73,6 +73,7 @@ ) from .logger import logger from .run_context import RunContextWrapper +from .run_internal.items import normalize_input_items_for_api from .tool import ( ApplyPatchTool, ComputerTool, @@ -118,9 +119,9 @@ # 3. to_json() always emits CURRENT_SCHEMA_VERSION. # 4. Forward compatibility is intentionally fail-fast (older SDKs reject newer or unsupported # versions). -CURRENT_SCHEMA_VERSION = "1.6" +CURRENT_SCHEMA_VERSION = "1.7" SUPPORTED_SCHEMA_VERSIONS = frozenset( - {"1.0", "1.1", "1.2", "1.3", "1.4", "1.5", CURRENT_SCHEMA_VERSION} + {"1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", CURRENT_SCHEMA_VERSION} ) _FUNCTION_OUTPUT_ADAPTER: TypeAdapter[FunctionCallOutput] = TypeAdapter(FunctionCallOutput) @@ -187,6 +188,9 @@ class RunState(Generic[TContext, TAgent]): _reasoning_item_id_policy: Literal["preserve", "omit"] | None = None """How reasoning item IDs are represented in next-turn model input.""" + _interrupted_turn_input: list[TResponseInputItem] | None = None + """Filtered turn input snapshot for the currently interrupted turn, if any.""" + _input_guardrail_results: list[InputGuardrailResult] = field(default_factory=list) """Results from input guardrails applied to the run.""" @@ -240,6 +244,7 @@ def __init__( self._previous_response_id = previous_response_id self._auto_previous_response_id = auto_previous_response_id self._reasoning_item_id_policy = None + self._interrupted_turn_input = None self._model_responses = [] self._generated_items = [] self._session_items = [] @@ -657,6 +662,11 @@ def to_json( "previous_response_id": self._previous_response_id, "auto_previous_response_id": self._auto_previous_response_id, "reasoning_item_id_policy": self._reasoning_item_id_policy, + "interrupted_turn_input": ( + normalize_input_items_for_api(copy.deepcopy(self._interrupted_turn_input)) + if isinstance(self._interrupted_turn_input, list) + else None + ), } generated_items = self._merge_generated_items_with_processed() @@ -2288,6 +2298,11 @@ async def _build_run_state_from_json( state._reasoning_item_id_policy = cast(Literal["preserve", "omit"], serialized_policy) else: state._reasoning_item_id_policy = None + serialized_interrupted_turn_input = state_json.get("interrupted_turn_input") + if isinstance(serialized_interrupted_turn_input, list): + state._interrupted_turn_input = copy.deepcopy(serialized_interrupted_turn_input) + else: + state._interrupted_turn_input = None state.set_tool_use_tracker_snapshot(state_json.get("tool_use_tracker", {})) trace_data = state_json.get("trace") if isinstance(trace_data, Mapping): diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py index d8ea1aa13..324f98128 100644 --- a/src/agents/tool_context.py +++ b/src/agents/tool_context.py @@ -57,6 +57,9 @@ class ToolContext(RunContextWrapper[TContext]): run_config: RunConfig | None = None """The active run config for this tool call, when available.""" + conversation_history: list[TResponseInputItem] = field(default_factory=list) + """Visible conversation history snapshot available when this tool is invoked.""" + def __init__( self, context: TContext, @@ -69,6 +72,7 @@ def __init__( tool_namespace: str | None = None, agent: AgentBase[Any] | None = None, run_config: RunConfig | None = None, + conversation_history: list[TResponseInputItem] | None = None, turn_input: list[TResponseInputItem] | None = None, _approvals: dict[str, _ApprovalRecord] | None = None, tool_input: Any | None = None, @@ -82,6 +86,7 @@ def __init__( _approvals={} if _approvals is None else _approvals, tool_input=tool_input, ) + self._tool_history_input = list(turn_input or []) self.tool_name = ( _assert_must_pass_tool_name() if tool_name is _MISSING else cast(str, tool_name) ) @@ -103,6 +108,7 @@ def __init__( ) self.agent = agent self.run_config = run_config + self.conversation_history = list(conversation_history or []) @property def qualified_tool_name(self) -> str: @@ -119,6 +125,7 @@ def from_agent_context( *, tool_namespace: str | None = None, run_config: RunConfig | None = None, + conversation_history: list[TResponseInputItem] | None = None, ) -> ToolContext: """ Create a ToolContext from a RunContextWrapper. @@ -137,6 +144,9 @@ def from_agent_context( tool_run_config = run_config if tool_run_config is None and isinstance(context, ToolContext): tool_run_config = context.run_config + tool_conversation_history = conversation_history + if tool_conversation_history is None and isinstance(context, ToolContext): + tool_conversation_history = context.conversation_history tool_context = cls( tool_name=tool_name, @@ -155,7 +165,10 @@ def from_agent_context( ), agent=tool_agent, run_config=tool_run_config, + conversation_history=tool_conversation_history, **base_values, ) + tool_context._tool_history_input = list(context._tool_history_input) + tool_context._tool_history_conversation_id = context._tool_history_conversation_id set_agent_tool_state_scope(tool_context, get_agent_tool_state_scope(context)) return tool_context diff --git a/tests/test_agent_as_tool.py b/tests/test_agent_as_tool.py index f54e16197..3b396097f 100644 --- a/tests/test_agent_as_tool.py +++ b/tests/test_agent_as_tool.py @@ -697,6 +697,43 @@ async def fake_run( assert run_context.tool_input is None +@pytest.mark.asyncio +async def test_agent_as_tool_preserves_conversation_history_for_nested_tool_context( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Nested ToolContext instances should inherit conversation history.""" + + class DummyResult: + def __init__(self) -> None: + self.final_output = "ok" + self.interruptions: list[ToolApprovalItem] = [] + + agent = Agent(name="history-agent") + tool = agent.as_tool(tool_name="history_tool", tool_description="History tool") + history: list[TResponseInputItem] = [{"role": "user", "content": "hello"}] + + async def fake_run(cls, /, starting_agent, input, **kwargs) -> DummyResult: + del cls, starting_agent, input + nested_context = kwargs.get("context") + assert isinstance(nested_context, ToolContext) + assert nested_context.conversation_history == history + assert nested_context.conversation_history is not history + return DummyResult() + + monkeypatch.setattr(Runner, "run", classmethod(fake_run)) + + tool_context = ToolContext( + context=None, + tool_name="history_tool", + tool_call_id="history-call", + tool_arguments='{"input":"hello"}', + conversation_history=history, + ) + + output = await tool.on_invoke_tool(tool_context, '{"input":"hello"}') + assert output == "ok" + + @pytest.mark.asyncio async def test_agent_as_tool_clears_stale_tool_input_for_plain_tools( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 8b0729716..8b6263f91 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -813,6 +813,259 @@ def foo(context: ToolContext[Any]) -> str: assert captured_contexts[0].agent is agent +@pytest.mark.asyncio +async def test_tool_call_context_includes_conversation_history_snapshot() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent( + name="test", + model=model, + tools=[foo], + ) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("foo", "{}")], + [get_text_message("done")], + ] + ) + + result = await Runner.run(agent, input="user_message") + + assert result.final_output == "done" + assert len(captured_contexts) == 1 + assert captured_contexts[0].conversation_history == [get_text_input_item("user_message")] + + +@pytest.mark.asyncio +async def test_tool_call_context_conversation_history_includes_prior_session_turns() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent(name="test", model=model, tools=[foo]) + session = SimpleListSession() + + model.add_multiple_turn_outputs( + [ + [get_text_message("first_done")], + [get_function_tool_call("foo", "{}")], + [get_text_message("second_done")], + ] + ) + + first_result = await Runner.run(agent, input="first_user", session=session) + second_result = await Runner.run(agent, input="second_user", session=session) + + assert first_result.final_output == "first_done" + assert second_result.final_output == "second_done" + assert len(captured_contexts) == 1 + history = captured_contexts[0].conversation_history + assert any(isinstance(item, dict) and item.get("content") == "first_user" for item in history) + assert any(isinstance(item, dict) and item.get("content") == "second_user" for item in history) + assert ( + sum(1 for item in history if isinstance(item, dict) and item.get("content") == "first_user") + == 1 + ) + + +@pytest.mark.asyncio +async def test_tool_context_history_does_not_leak_across_reused_context_wrapper_runs() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent(name="test", model=model, tools=[foo]) + shared_context = RunContextWrapper(context=None) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("foo", "{}")], + [get_text_message("done1")], + [get_function_tool_call("foo", "{}")], + [get_text_message("done2")], + ] + ) + + first_result = await Runner.run( + agent, + input="first_user", + conversation_id="conv-1", + context=shared_context, + ) + second_result = await Runner.run( + agent, + input="second_user", + conversation_id="conv-2", + context=shared_context, + ) + + assert first_result.final_output == "done1" + assert second_result.final_output == "done2" + assert len(captured_contexts) == 2 + first_history = captured_contexts[0].conversation_history + second_history = captured_contexts[1].conversation_history + assert any( + isinstance(item, dict) and item.get("content") == "first_user" for item in first_history + ) + assert not any( + isinstance(item, dict) and item.get("content") == "first_user" for item in second_history + ) + assert any( + isinstance(item, dict) and item.get("content") == "second_user" for item in second_history + ) + + +@pytest.mark.asyncio +async def test_tool_context_history_persists_across_reused_wrapper_same_conversation_id() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent(name="test", model=model, tools=[foo]) + shared_context = RunContextWrapper(context=None) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("foo", "{}")], + [get_text_message("done1")], + [get_function_tool_call("foo", "{}")], + [get_text_message("done2")], + ] + ) + + first_result = await Runner.run( + agent, + input="first_user", + conversation_id="conv-same", + context=shared_context, + ) + second_result = await Runner.run( + agent, + input="second_user", + conversation_id="conv-same", + context=shared_context, + ) + + assert first_result.final_output == "done1" + assert second_result.final_output == "done2" + assert len(captured_contexts) == 2 + second_history = captured_contexts[1].conversation_history + assert any( + isinstance(item, dict) and item.get("content") == "first_user" for item in second_history + ) + assert any( + isinstance(item, dict) and item.get("content") == "second_user" for item in second_history + ) + + +@pytest.mark.asyncio +async def test_tool_call_context_conversation_history_uses_filtered_turn_input() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent(name="test", model=model, tools=[foo]) + + def redact_input(data): + return type(data.model_data)( + input=[get_text_input_item("redacted_user")], + instructions=data.model_data.instructions, + ) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("foo", "{}")], + [get_text_message("done")], + ] + ) + + result = await Runner.run( + agent, + input="original_user", + run_config=RunConfig(call_model_input_filter=redact_input), + ) + + assert result.final_output == "done" + assert len(captured_contexts) == 1 + assert captured_contexts[0].conversation_history == [get_text_input_item("redacted_user")] + + +@pytest.mark.asyncio +async def test_tool_call_context_conversation_history_applies_reasoning_id_policy() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + session = SimpleListSession() + + @function_tool(name_override="foo") + def foo(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "tool_result" + + agent = Agent(name="test", model=model, tools=[foo]) + + model.add_multiple_turn_outputs( + [ + [ + ResponseReasoningItem( + id="rs_history", + type="reasoning", + summary=[Summary(text="Thinking...", type="summary_text")], + ), + get_text_message("first_done"), + ], + [get_function_tool_call("foo", "{}")], + [get_text_message("done")], + ] + ) + + first_result = await Runner.run( + agent, + input="user_message", + session=session, + run_config=RunConfig(reasoning_item_id_policy="omit"), + ) + second_result = await Runner.run( + agent, + input="follow_up", + session=session, + run_config=RunConfig(reasoning_item_id_policy="omit"), + ) + + assert first_result.final_output == "first_done" + assert second_result.final_output == "done" + assert len(captured_contexts) == 1 + reasoning_items = [ + item + for item in captured_contexts[0].conversation_history + if isinstance(item, dict) and item.get("type") == "reasoning" + ] + assert len(reasoning_items) == 1 + assert "id" not in reasoning_items[0] + + @pytest.mark.asyncio async def test_handoffs(): model = FakeModel() @@ -972,6 +1225,48 @@ def capture_model_input(data): assert "reasoning" not in handoff_input_types +@pytest.mark.asyncio +async def test_resumed_tool_context_conversation_history_uses_filtered_turn_input() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="approval_tool", needs_approval=True) + def approval_tool(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "ok" + + agent = Agent(name="test", model=model, tools=[approval_tool]) + + def redact_input(data): + return type(data.model_data)( + input=[get_text_input_item("redacted_user")], + instructions=data.model_data.instructions, + ) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("approval_tool", "{}", call_id="approval-call")], + [get_text_message("done")], + ] + ) + + first = await Runner.run( + agent, + input="original_user", + run_config=RunConfig(call_model_input_filter=redact_input), + ) + assert first.interruptions + + state = first.to_state() + state.approve(first.interruptions[0]) + + resumed = await Runner.run(agent, state) + + assert resumed.final_output == "done" + assert len(captured_contexts) == 1 + assert captured_contexts[0].conversation_history == [get_text_input_item("redacted_user")] + + @pytest.mark.asyncio async def test_resume_preserves_filtered_model_input_after_handoff(): model = FakeModel() @@ -2775,6 +3070,41 @@ async def test_previous_response_id_passed_between_runs_streamed_multi_turn(): assert model.last_turn_args.get("previous_response_id") == "resp-789" +@pytest.mark.asyncio +async def test_tool_context_history_includes_prior_context_in_conversation_id_mode() -> None: + model = FakeModel() + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="first_tool") + def first_tool() -> str: + return "first_result" + + @function_tool(name_override="second_tool") + def second_tool(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "second_result" + + agent = Agent(name="test", model=model, tools=[first_tool, second_tool]) + + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("first_tool", "{}")], + [get_function_tool_call("second_tool", "{}")], + [get_text_message("done")], + ] + ) + + result = await Runner.run(agent, input="first_user", conversation_id="conv-test") + + assert result.final_output == "done" + assert len(captured_contexts) == 1 + history = captured_contexts[0].conversation_history + assert any(isinstance(item, dict) and item.get("content") == "first_user" for item in history) + assert any( + isinstance(item, dict) and item.get("type") == "function_call_output" for item in history + ) + + @pytest.mark.asyncio async def test_conversation_id_only_sends_new_items_multi_turn(): """Test that conversation_id mode only sends new items on subsequent turns.""" diff --git a/tests/test_run_state.py b/tests/test_run_state.py index 56cd61fab..3ef0f6085 100644 --- a/tests/test_run_state.py +++ b/tests/test_run_state.py @@ -3969,7 +3969,7 @@ async def test_from_json_missing_schema_version(self): await RunState.from_json(agent, state_json) @pytest.mark.asyncio - @pytest.mark.parametrize("schema_version", ["1.7", "2.0"]) + @pytest.mark.parametrize("schema_version", ["1.8", "2.0"]) async def test_from_json_unsupported_schema_version(self, schema_version: str): """Test that from_json raises error when schema version is unsupported.""" agent = Agent(name="TestAgent") @@ -4021,7 +4021,7 @@ async def test_from_json_accepts_previous_schema_version(self): def test_supported_schema_versions_match_released_boundary(self): """The support set should include released versions plus the current unreleased writer.""" assert SUPPORTED_SCHEMA_VERSIONS == frozenset( - {"1.0", "1.1", "1.2", "1.3", "1.4", "1.5", CURRENT_SCHEMA_VERSION} + {"1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", CURRENT_SCHEMA_VERSION} ) @pytest.mark.asyncio @@ -4957,6 +4957,39 @@ async def test_from_json_handles_string_original_input(self): # Should handle string original_input (line 762-763) assert state._original_input == "string_input" + async def test_run_state_round_trips_interrupted_turn_input(self): + context: RunContextWrapper[dict[str, str]] = RunContextWrapper(context={}) + agent = Agent(name="TestAgent") + state = make_state(agent, context=context, original_input="input", max_turns=5) + state._interrupted_turn_input = [{"role": "user", "content": "redacted"}] + + restored = await RunState.from_json(agent, state.to_json()) + + assert restored._interrupted_turn_input == state._interrupted_turn_input + assert restored._interrupted_turn_input is not state._interrupted_turn_input + + async def test_run_state_serializes_model_like_interrupted_turn_input(self): + context: RunContextWrapper[dict[str, str]] = RunContextWrapper(context={}) + agent = Agent(name="TestAgent") + state = make_state(agent, context=context, original_input="input", max_turns=5) + state._interrupted_turn_input = [ + cast( + TResponseInputItem, + ResponseOutputMessage( + id="msg_1", + type="message", + role="assistant", + status="completed", + content=[ResponseOutputText(text="hello", annotations=[], type="output_text")], + ), + ) + ] + + json_data = state.to_json() + + assert isinstance(json_data["interrupted_turn_input"], list) + assert isinstance(json_data["interrupted_turn_input"][0], dict) + async def test_from_string_handles_non_dict_items_in_original_input(self): """Test that from_string handles non-dict items in original_input list.""" context: RunContextWrapper[dict[str, str]] = RunContextWrapper(context={}) diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index c1cb3fe74..cae4eed7f 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -1218,6 +1218,45 @@ async def _second_tool() -> str: assert output_guardrail_results == [] +@pytest.mark.asyncio +async def test_execute_function_tool_calls_preserves_parent_tool_context_history_when_unset(): + captured_contexts: list[ToolContext[Any]] = [] + + @function_tool(name_override="history_tool") + def history_tool(context: ToolContext[Any]) -> str: + captured_contexts.append(context) + return "ok" + + tool_run = ToolRunFunction( + tool_call=cast( + ResponseFunctionToolCall, + get_function_tool_call("history_tool", "{}", call_id="call-history"), + ), + function_tool=history_tool, + ) + parent_history: list[TResponseInputItem] = [get_text_input_item("hello")] + context_wrapper = ToolContext( + context=None, + tool_name="parent_tool", + tool_call_id="parent-call", + tool_arguments="{}", + conversation_history=parent_history, + ) + + function_results, _, _ = await execute_function_tool_calls( + agent=Agent(name="test", tools=[history_tool]), + tool_runs=[tool_run], + hooks=RunHooks(), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert [result.output for result in function_results] == ["ok"] + assert len(captured_contexts) == 1 + assert captured_contexts[0].conversation_history == parent_history + assert captured_contexts[0].conversation_history is not parent_history + + @pytest.mark.asyncio async def test_execute_function_tool_calls_collapse_trace_name_for_top_level_deferred_tools(): async def _shipping_eta(tracking_number: str) -> str: diff --git a/tests/test_source_compat_constructors.py b/tests/test_source_compat_constructors.py index c0f881817..4efc8da2e 100644 --- a/tests/test_source_compat_constructors.py +++ b/tests/test_source_compat_constructors.py @@ -22,6 +22,7 @@ tool_input_guardrail, tool_output_guardrail, ) +from agents.run_context import _ApprovalRecord from agents.tool_context import ToolContext @@ -159,6 +160,18 @@ def test_tool_context_supports_agent_keyword_argument() -> None: assert context.agent is agent +def test_run_context_wrapper_positional_constructor_preserves_legacy_slots() -> None: + usage = Usage() + turn_input: list[Any] = [{"role": "user", "content": "hello"}] + approvals = {"tool": _ApprovalRecord(approved=True)} + + wrapper = RunContextWrapper(None, usage, turn_input, approvals) + + assert wrapper.usage is usage + assert wrapper.turn_input == turn_input + assert wrapper._approvals is approvals + + def test_run_result_v070_positional_constructor_still_works() -> None: result = RunResult( "x", diff --git a/tests/test_tool_context.py b/tests/test_tool_context.py index a4579e8fb..1a817b39c 100644 --- a/tests/test_tool_context.py +++ b/tests/test_tool_context.py @@ -4,6 +4,7 @@ from openai.types.responses import ResponseFunctionToolCall from agents import Agent +from agents.items import TResponseInputItem from agents.run_config import RunConfig from agents.run_context import RunContextWrapper from agents.tool import FunctionTool, invoke_function_tool @@ -51,6 +52,27 @@ def test_tool_context_from_agent_context_populates_fields() -> None: assert tool_ctx.agent is agent +def test_tool_context_from_agent_context_copies_conversation_history() -> None: + tool_call = ResponseFunctionToolCall( + type="function_call", + name="test_tool", + call_id="call-history", + arguments="{}", + ) + ctx = make_context_wrapper() + history: list[TResponseInputItem] = [{"role": "user", "content": "hello"}] + + tool_ctx = ToolContext.from_agent_context( + ctx, + tool_call_id="call-history", + tool_call=tool_call, + conversation_history=history, + ) + + assert tool_ctx.conversation_history == history + assert tool_ctx.conversation_history is not history + + def test_tool_context_agent_none_by_default() -> None: tool_call = ResponseFunctionToolCall( type="function_call", @@ -184,6 +206,39 @@ def test_tool_context_from_tool_context_inherits_run_config() -> None: assert derived_context.run_config is parent_run_config +def test_tool_context_from_tool_context_inherits_conversation_history() -> None: + original_call = ResponseFunctionToolCall( + type="function_call", + name="test_tool", + call_id="call-3", + arguments="{}", + ) + derived_call = ResponseFunctionToolCall( + type="function_call", + name="test_tool", + call_id="call-4", + arguments="{}", + ) + history: list[TResponseInputItem] = [{"role": "user", "content": "hello"}] + parent_context: ToolContext[dict[str, object]] = ToolContext( + context={}, + tool_name="test_tool", + tool_call_id="call-3", + tool_arguments="{}", + tool_call=original_call, + conversation_history=history, + ) + + derived_context = ToolContext.from_agent_context( + parent_context, + tool_call_id="call-4", + tool_call=derived_call, + ) + + assert derived_context.conversation_history == history + assert derived_context.conversation_history is not history + + def test_tool_context_from_agent_context_prefers_explicit_run_config() -> None: tool_call = ResponseFunctionToolCall( type="function_call",