From 8e3eab0ed142b7996a08a37075502110466203e6 Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 14:05:41 -0800 Subject: [PATCH 1/7] Fix thread corruption when max_iterations exhausted (#1366) When the function invocation loop exhausts max_iterations while the model keeps requesting tools, the failsafe code path (calling the model with tool_choice='none' and prepending fcc_messages) was unreachable because 'if response is not None: return response' short-circuited before it. The fix removes the premature return so the failsafe always runs after loop exhaustion, making a final model call with tool_choice='none' to produce a clean text answer and prepending accumulated fcc_messages from prior iterations. This matches the existing pattern used by the error threshold and max_function_calls paths. Also unskips test_max_iterations_limit and test_streaming_max_iterations_limit which were previously skipped with 'needs investigation in unified API'. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ISSUE-REPRO-RESULTS.md | 82 +++++ .../packages/core/agent_framework/_tools.py | 22 +- .../core/test_function_invocation_logic.py | 3 - .../core/test_issue_1366_thread_corruption.py | 322 ++++++++++++++++++ 4 files changed, 422 insertions(+), 7 deletions(-) create mode 100644 ISSUE-REPRO-RESULTS.md create mode 100644 python/packages/core/tests/core/test_issue_1366_thread_corruption.py diff --git a/ISSUE-REPRO-RESULTS.md b/ISSUE-REPRO-RESULTS.md new file mode 100644 index 0000000000..613c017239 --- /dev/null +++ b/ISSUE-REPRO-RESULTS.md @@ -0,0 +1,82 @@ +reproduction_status: reproduced +failing_test: packages/core/tests/core/test_issue_1366_thread_corruption.py::test_max_iterations_exhausted_makes_final_toolchoice_none_call AND packages/core/tests/core/test_issue_1366_thread_corruption.py::test_max_iterations_preserves_all_fcc_messages +failure_observed: yes +evidence_strength: high +confidence: high + +# Issue #1366 Reproduction: Thread corruption - agent.run() returns with unexecuted tool calls + +## Bug Summary + +When the function invocation loop in `_tools.py` exhausts `max_iterations`, it returns the response directly without: +1. Making a final model call with `tool_choice="none"` (failsafe) +2. Prepending `fcc_messages` from prior iterations to the response + +## Code Location + +**File**: `python/packages/core/agent_framework/_tools.py` + +**Bug location** (lines 2199-2200 in `_get_response()`): +```python +if response is not None: + return response # Returns directly, bypasses failsafe at lines 2202-2212 +``` + +The failsafe path (lines 2202-2212) that calls the model with `tool_choice="none"` and prepends `fcc_messages` is ONLY reached when `response is None` (i.e., the loop never ran). When `max_iterations > 0`, the loop always runs at least once, setting `response` to a non-None value. Therefore, the failsafe path is **dead code** for any normal configuration. + +**Streaming equivalent** (line 2336-2337 in `_stream()`): +```python +if response is not None: + return # Same issue in streaming path +``` + +## Failing Tests + +### Test 1: `test_max_iterations_exhausted_makes_final_toolchoice_none_call` — FAILED +``` +AssertionError: Expected failsafe text response, got: '' +assert '' == 'I broke out of the function invocation loop...' +``` +**Demonstrates**: When max_iterations is reached with the model still requesting tools, no final `tool_choice="none"` call is made. The response ends with function_call/result messages instead of a clean text response. + +### Test 2: `test_max_iterations_preserves_all_fcc_messages` — FAILED +``` +AssertionError: First iteration's function call missing from response +assert 'call_1' in {'call_2'} +``` +**Demonstrates**: Only the LAST iteration's function call/result messages are in the response. Prior iterations' messages (accumulated in `fcc_messages`) are discarded because `_prepend_fcc_messages()` is never called on this code path. + +### Test 3: `test_max_iterations_exhausted_returns_orphaned_function_calls` — PASSED +Within a single response, function calls within each iteration ARE matched with results. + +### Test 4: `test_thread_safe_after_max_iterations_with_agent` — PASSED +At the Agent level, function calls in the returned response have matching results. + +## Pre-existing Evidence + +The existing test `test_max_iterations_limit` (line 836) is **explicitly skipped** with: +```python +@pytest.mark.skip(reason="Failsafe behavior with max_iterations needs investigation in unified API") +``` +This confirms the development team is aware the failsafe behavior is broken. + +## How This Causes the Reported 400 Error + +While individual function calls within each iteration are matched (tests 3-4 pass), the thread corruption manifests in two ways: + +1. **Lost conversation history**: When the response only contains the last iteration's messages, the thread stored by `InMemoryHistoryProvider.after_run()` (via `context.response.messages`) is incomplete. Prior iterations' function calls and results are discarded. + +2. **No final text response**: Without the failsafe `tool_choice="none"` call, the response ends with a tool message (function results). The model is never asked to produce a final answer. Depending on how the caller manages the thread, this incomplete state can lead to the OpenAI 400 error on the next API call when the conversation context doesn't match what the model expects. + +## Fix Direction (DO NOT implement in this phase) + +The fix should ensure that when `max_iterations` is exhausted: +1. A final model call is made with `tool_choice="none"` to get a clean text response +2. All `fcc_messages` from prior iterations are prepended to the response +3. OR: inject error `FunctionResultContent` for any orphaned `FunctionCallContent` as the issue suggests + +## Run Command + +```bash +cd python && uv run pytest packages/core/tests/core/test_issue_1366_thread_corruption.py -v +``` diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index df0608e614..1eff3e6606 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -2196,9 +2196,16 @@ async def _get_response() -> ChatResponse: prepped_messages.extend(response.messages) continue + # Loop exhausted all iterations (or function invocation disabled). + # Make a final model call with tool_choice="none" so the model + # produces a plain text answer instead of leaving orphaned + # function_call items without matching results (issue #1366). if response is not None: - return response - + logger.info( + "Maximum iterations reached (%d). " + "Requesting final response without tools.", + self.function_invocation_configuration["max_iterations"], + ) mutable_options["tool_choice"] = "none" response = await super_get_response( messages=prepped_messages, @@ -2333,9 +2340,16 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: prepped_messages.extend(response.messages) continue + # Loop exhausted all iterations (or function invocation disabled). + # Make a final model call with tool_choice="none" so the model + # produces a plain text answer instead of leaving orphaned + # function_call items without matching results (issue #1366). if response is not None: - return - + logger.info( + "Maximum iterations reached (%d). " + "Requesting final response without tools.", + self.function_invocation_configuration["max_iterations"], + ) mutable_options["tool_choice"] = "none" inner_stream = await _ensure_response_stream( super_get_response( diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py index b4213d6029..fb90c1b64a 100644 --- a/python/packages/core/tests/core/test_function_invocation_logic.py +++ b/python/packages/core/tests/core/test_function_invocation_logic.py @@ -831,8 +831,6 @@ def func_with_approval(arg1: str) -> str: assert "rejected" in rejection_result.result.lower() -@pytest.mark.skip(reason="Failsafe behavior with max_iterations needs investigation in unified API") -@pytest.mark.skip(reason="Failsafe behavior with max_iterations needs investigation in unified API") async def test_max_iterations_limit(chat_client_base: SupportsChatGetResponse): """Test that MAX_ITERATIONS in additional_properties limits function call loops.""" exec_counter = 0 @@ -2248,7 +2246,6 @@ def func_with_approval(arg1: str) -> str: assert exec_counter == 0 # Function not executed yet due to approval requirement -@pytest.mark.skip(reason="Failsafe behavior with max_iterations needs investigation in unified API") async def test_streaming_max_iterations_limit(chat_client_base: SupportsChatGetResponse): """Test that MAX_ITERATIONS in streaming mode limits function call loops.""" exec_counter = 0 diff --git a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py new file mode 100644 index 0000000000..e5973be880 --- /dev/null +++ b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py @@ -0,0 +1,322 @@ +# Copyright (c) Microsoft. All rights reserved. +"""Regression tests for issue #1366: agent.run() returns with unexecuted tool calls. + +When max_iterations is reached, the function invocation loop should make a final +model call with tool_choice="none" to get a plain text response. Instead, it +returns the last iteration's response directly, which may contain function_call +items without a subsequent final text answer — and critically, the thread/response +may contain FunctionCallContent without matching FunctionResultContent if the +loop structure doesn't guarantee tool execution before returning. + +Additionally, when the loop exhausts all iterations while the model keeps +requesting tools, fcc_messages from prior iterations are NOT prepended to the +returned response, potentially losing conversation history. + +See: https://github.com/microsoft/agent-framework/issues/1366 +""" + +import pytest + +from agent_framework import ( + ChatResponse, + Content, + Message, + SupportsChatGetResponse, + tool, +) + + +async def test_max_iterations_exhausted_returns_orphaned_function_calls( + chat_client_base: SupportsChatGetResponse, +): + """When max_iterations is reached, verify the returned response has no orphaned + FunctionCallContent (i.e., every function_call has a matching function_result). + + Bug: The loop at _tools.py returns `response` directly when max_iterations is + exhausted (`if response is not None: return response`). No final model call + with tool_choice='none' is made, and fcc_messages are not prepended. + + Expected: Either all FunctionCallContent have matching FunctionResultContent, + OR a final model call is made with tool_choice='none'. + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Processed {arg1}" + + # Model keeps requesting tool calls on every iteration + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_3", name="test_function", arguments='{"arg1": "v3"}' + ) + ], + ) + ), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + # Collect all function_call and function_result call_ids from response + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + orphaned_calls = all_call_ids - all_result_ids + assert not orphaned_calls, ( + f"Response contains orphaned FunctionCallContent without matching " + f"FunctionResultContent: {orphaned_calls}. " + f"This will cause OpenAI 400 error on next API call." + ) + + +async def test_max_iterations_exhausted_makes_final_toolchoice_none_call( + chat_client_base: SupportsChatGetResponse, +): + """When max_iterations is reached, verify a final model call is made with + tool_choice='none' to produce a clean text response. + + Bug: After the loop exhausts, the code does `if response is not None: return response` + skipping the failsafe path that would call the model with tool_choice='none'. + + The test verifies the response ends with a plain text message (no function calls). + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Processed {arg1}" + + # Model keeps requesting tool calls + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' + ) + ], + ) + ), + # This response should be reached via failsafe (tool_choice="none") + ChatResponse(messages=Message(role="assistant", text="Final answer after giving up on tools.")), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 1 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + assert exec_counter == 1, f"Expected 1 function execution, got {exec_counter}" + + # The response should end with a plain text message (from the failsafe call) + # NOT with function_call or tool messages + last_msg = response.messages[-1] + has_function_calls = any(c.type == "function_call" for c in last_msg.contents) + + assert not has_function_calls, ( + f"Last message in response still contains function_call items. " + f"Expected a clean text response after max_iterations failsafe. " + f"Got message with role={last_msg.role}, contents={[c.type for c in last_msg.contents]}" + ) + + # The mock client returns "I broke out of the function invocation loop..." + # when tool_choice="none" + assert last_msg.text == "I broke out of the function invocation loop...", ( + f"Expected failsafe text response, got: {last_msg.text!r}" + ) + + +async def test_max_iterations_preserves_all_fcc_messages( + chat_client_base: SupportsChatGetResponse, +): + """When max_iterations is reached and a final response is produced, all + intermediate function call/result messages should be included. + + Bug: fcc_messages from prior iterations are discarded when the loop returns + `response` directly instead of going through the failsafe path. + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Result {exec_counter}" + + # Two iterations of function calls, then failsafe + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' + ) + ], + ) + ), + ChatResponse(messages=Message(role="assistant", text="Done")), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + assert exec_counter == 2, f"Expected 2 function executions, got {exec_counter}" + + # All function calls from both iterations should be present in the response + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + # Both iterations' function calls should be present + assert "call_1" in all_call_ids, "First iteration's function call missing from response" + assert "call_2" in all_call_ids, "Second iteration's function call missing from response" + + # Both should have matching results + assert all_call_ids == all_result_ids, ( + f"Mismatched function calls and results. " + f"Calls: {all_call_ids}, Results: {all_result_ids}" + ) + + +async def test_thread_safe_after_max_iterations_with_agent( + chat_client_base: SupportsChatGetResponse, +): + """Simulate the full agent.run() → thread → agent.run() flow to verify + that the thread doesn't contain orphaned function calls after max_iterations. + + This reproduces the exact user-reported scenario: agent.run() returns, + response is stored in thread, next agent.run() fails with OpenAI 400 error. + """ + from agent_framework import Agent + + @tool(name="browser_snapshot", approval_mode="never_require") + def browser_snapshot(url: str) -> str: + return f"Screenshot of {url}" + + # First call: model returns a function call, it's executed, then model + # returns ANOTHER function call (on the last iteration), which is executed + # but no final text answer is produced + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_abc", name="browser_snapshot", arguments='{"url": "https://example.com"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_xyz", name="browser_snapshot", arguments='{"url": "https://test.com"}' + ) + ], + ) + ), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + agent = Agent( + client=chat_client_base, + name="test-agent", + tools=[browser_snapshot], + ) + + response = await agent.run( + "Take screenshots", + options={"tool_choice": "auto"}, + ) + + # Check for orphaned function calls in the response messages + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + orphaned_calls = all_call_ids - all_result_ids + assert not orphaned_calls, ( + f"Thread corruption: response contains orphaned function calls {orphaned_calls}. " + f"Passing this thread to OpenAI will cause 400 error: " + f"'An assistant message with tool_calls must be followed by tool messages.'" + ) From 94616ca0b12209099790e60850600b889ffb24fc Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 14:06:38 -0800 Subject: [PATCH 2/7] Add fix report for issue #1366 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ISSUE-FIX-REPORT.md | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 ISSUE-FIX-REPORT.md diff --git a/ISSUE-FIX-REPORT.md b/ISSUE-FIX-REPORT.md new file mode 100644 index 0000000000..5d1e30c03a --- /dev/null +++ b/ISSUE-FIX-REPORT.md @@ -0,0 +1,78 @@ +# Issue #1366 Fix Report: Thread corruption - agent.run() returns with unexecuted tool calls + +## Pattern Exploration (3+ references) + +### 1. Error threshold pattern (`_tools.py:2163-2166`) +When consecutive errors reach `max_consecutive_errors_per_request`, the code sets `tool_choice="none"` and **continues the loop**. The next iteration calls the model without tools, producing a clean text response. The `_process_function_requests` function detects no function calls and returns `action="return"`, which prepends `fcc_messages` via `_prepend_fcc_messages()` (line 1974). + +### 2. max_function_calls pattern (`_tools.py:2167-2179`) +When `total_function_calls >= max_function_calls`, identical to the error pattern: sets `tool_choice="none"` and continues. The next iteration gets a toolless response and exits cleanly with history attached. + +### 3. Normal exit pattern (`_tools.py:2160-2161, 1971-1982`) +When the model's response has no function calls, `_extract_function_calls()` returns empty, triggering `_prepend_fcc_messages(response, fcc_messages)` before returning. This ensures all accumulated conversation history from prior iterations is included. + +### Chosen approach +All three patterns converge on the same mechanism: when stopping tool calls, make a final model call with `tool_choice="none"` and use `_prepend_fcc_messages` to attach history. The fix follows this exact pattern for `max_iterations` exhaustion. + +## Root Cause + +**File**: `python/packages/core/agent_framework/_tools.py`, lines 2199-2200 (non-streaming) and 2336-2337 (streaming). + +When the for-loop exhausts `max_iterations`, the code did: +```python +if response is not None: + return response # Premature return — bypasses failsafe +``` + +Since the loop always sets `response` to a non-None value (any `max_iterations >= 1`), the failsafe code (lines 2202-2212) that calls the model with `tool_choice="none"` and prepends `fcc_messages` was **unreachable dead code**. + +This caused two problems: +1. **No final text response**: The model was never asked to produce a text answer after tool execution stopped +2. **Lost conversation history**: `fcc_messages` accumulated from prior iterations was never prepended to the response + +## Code Changes + +### `python/packages/core/agent_framework/_tools.py` +**Non-streaming** (line 2199): Changed `if response is not None: return response` to a log statement, allowing the existing failsafe code to execute. +**Streaming** (line 2343): Same change for the streaming path. + +Both paths now: +1. Log that max_iterations was reached (matching the `max_function_calls` log pattern) +2. Fall through to the failsafe: call model with `tool_choice="none"` and prepend `fcc_messages` + +### `python/packages/core/tests/core/test_function_invocation_logic.py` +Removed `@pytest.mark.skip` from `test_max_iterations_limit` (2 duplicate skip markers) and `test_streaming_max_iterations_limit` (1 skip marker). These tests validate the exact behavior that is now fixed. + +### `python/packages/core/tests/core/test_issue_1366_thread_corruption.py` (new) +4 targeted tests covering: +- Orphaned FunctionCallContent detection +- Final `tool_choice="none"` failsafe call +- `fcc_messages` preservation across iterations +- Full Agent.run() → thread integrity + +## Test Evidence + +### Before fix +``` +test_max_iterations_exhausted_makes_final_toolchoice_none_call FAILED + AssertionError: Expected failsafe text response, got: '' +test_max_iterations_preserves_all_fcc_messages FAILED + AssertionError: First iteration's function call missing from response +test_max_iterations_limit SKIPPED +test_streaming_max_iterations_limit SKIPPED +``` + +### After fix +``` +packages/core/tests/core/test_issue_1366_thread_corruption.py 4 passed +packages/core/tests/core/test_function_invocation_logic.py 80 passed, 2 skipped +packages/core/tests/core/ (full suite) 878 passed, 18 skipped +``` + +## Remaining Risks + +1. **Extra model call**: The fix adds one additional API call when `max_iterations` is exhausted. This is intentional — the error/max_function_calls paths already do this — but could increase latency/cost in edge cases. + +2. **Conversation-based APIs**: For APIs where `conversation_id` is set, `prepped_messages` is cleared and only the last message is kept (line 2192-2194). The failsafe call sends this minimal context, which should be correct since the server retains full history, but this path is not covered by unit tests. + +3. **Streaming `fcc_messages`**: The streaming failsafe doesn't explicitly prepend `fcc_messages` (it doesn't need to — prior iterations' updates were already yielded). However, the finalized `ChatResponse.from_updates()` aggregates all yielded updates, which should produce a complete response. From ddf4ed67de4d52bfa67dba0671b85815518aa569 Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 14:11:38 -0800 Subject: [PATCH 3/7] Fix ruff formatting in _tools.py and test_issue_1366_thread_corruption.py Apply ruff format to fix multi-line string concatenation and function call formatting issues flagged by the linter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/agent_framework/_tools.py | 22 ++++--------- .../core/test_issue_1366_thread_corruption.py | 33 +++++-------------- 2 files changed, 14 insertions(+), 41 deletions(-) diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 1eff3e6606..2d2d4fcaf6 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -2164,15 +2164,11 @@ async def _get_response() -> ChatResponse: # Error threshold reached: force a final non-tool turn so # function_call_output items are submitted before exit. mutable_options["tool_choice"] = "none" - elif ( - max_function_calls is not None - and total_function_calls >= max_function_calls - ): + elif max_function_calls is not None and total_function_calls >= max_function_calls: # Best-effort limit: checked after each batch of parallel calls completes, # so the current batch always runs to completion even if it overshoots. logger.info( - "Maximum function calls reached (%d/%d). " - "Stopping further function calls for this request.", + "Maximum function calls reached (%d/%d). Stopping further function calls for this request.", total_function_calls, max_function_calls, ) @@ -2202,8 +2198,7 @@ async def _get_response() -> ChatResponse: # function_call items without matching results (issue #1366). if response is not None: logger.info( - "Maximum iterations reached (%d). " - "Requesting final response without tools.", + "Maximum iterations reached (%d). Requesting final response without tools.", self.function_invocation_configuration["max_iterations"], ) mutable_options["tool_choice"] = "none" @@ -2309,15 +2304,11 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: mutable_options["tool_choice"] = "none" elif result["action"] != "continue": return - elif ( - max_function_calls is not None - and total_function_calls >= max_function_calls - ): + elif max_function_calls is not None and total_function_calls >= max_function_calls: # Best-effort limit: checked after each batch of parallel calls completes, # so the current batch always runs to completion even if it overshoots. logger.info( - "Maximum function calls reached (%d/%d). " - "Stopping further function calls for this request.", + "Maximum function calls reached (%d/%d). Stopping further function calls for this request.", total_function_calls, max_function_calls, ) @@ -2346,8 +2337,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: # function_call items without matching results (issue #1366). if response is not None: logger.info( - "Maximum iterations reached (%d). " - "Requesting final response without tools.", + "Maximum iterations reached (%d). Requesting final response without tools.", self.function_invocation_configuration["max_iterations"], ) mutable_options["tool_choice"] = "none" diff --git a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py index e5973be880..446f7ce50f 100644 --- a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py +++ b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py @@ -15,8 +15,6 @@ See: https://github.com/microsoft/agent-framework/issues/1366 """ -import pytest - from agent_framework import ( ChatResponse, Content, @@ -53,9 +51,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' - ) + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') ], ) ), @@ -63,9 +59,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' - ) + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') ], ) ), @@ -73,9 +67,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_3", name="test_function", arguments='{"arg1": "v3"}' - ) + Content.from_function_call(call_id="call_3", name="test_function", arguments='{"arg1": "v3"}') ], ) ), @@ -131,9 +123,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' - ) + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') ], ) ), @@ -141,9 +131,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' - ) + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') ], ) ), @@ -201,9 +189,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_1", name="test_function", arguments='{"arg1": "v1"}' - ) + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') ], ) ), @@ -211,9 +197,7 @@ def ai_func(arg1: str) -> str: messages=Message( role="assistant", contents=[ - Content.from_function_call( - call_id="call_2", name="test_function", arguments='{"arg1": "v2"}' - ) + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') ], ) ), @@ -245,8 +229,7 @@ def ai_func(arg1: str) -> str: # Both should have matching results assert all_call_ids == all_result_ids, ( - f"Mismatched function calls and results. " - f"Calls: {all_call_ids}, Results: {all_result_ids}" + f"Mismatched function calls and results. Calls: {all_call_ids}, Results: {all_result_ids}" ) From f16973cc1b8415cc9482475d07f7fe9d9efaa8cd Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 14:12:22 -0800 Subject: [PATCH 4/7] Add quality review for issue #1366 fix Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ISSUE-QUALITY-REVIEW.md | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 ISSUE-QUALITY-REVIEW.md diff --git a/ISSUE-QUALITY-REVIEW.md b/ISSUE-QUALITY-REVIEW.md new file mode 100644 index 0000000000..fea3038dee --- /dev/null +++ b/ISSUE-QUALITY-REVIEW.md @@ -0,0 +1,54 @@ +quality_gate: approved +quality_score: 92 + +# Quality Review: Issue #1366 Fix + +## Summary + +The fix correctly addresses thread corruption when `max_iterations` is exhausted in `_tools.py` by removing the premature `return response` and letting execution fall through to the existing failsafe path. Both non-streaming and streaming code paths are fixed symmetrically. + +## Code Quality Assessment + +### ✅ Minimal, Surgical Change +The production code change is exactly 2 lines removed and ~6 lines added per path (non-streaming + streaming). The fix replaces `return response` with a log statement, reusing the **existing** failsafe code that was previously dead. No new abstractions or complexity introduced. + +### ✅ Follows Existing Patterns +- Log message style (`"Maximum iterations reached (%d). ..."`) matches the existing `max_function_calls` pattern at lines 2174 and 2319. +- Comment style matches surrounding code (4-line block comments with issue reference). +- Both non-streaming (`_get_response`) and streaming (`_stream`) paths are updated identically, maintaining the existing structural symmetry. + +### ✅ Lint & Format Clean +- `ruff check` passes with no errors. +- `ruff format` passes (formatting issues found during review were fixed). + +### ✅ Test Suite Integrity +- **878 passed, 18 skipped, 0 failed** — full core test suite. +- Previously skipped tests (`test_max_iterations_limit`, `test_streaming_max_iterations_limit`) are now unskipped and passing. +- Duplicate `@pytest.mark.skip` decorators were correctly removed from `test_max_iterations_limit`. + +### ✅ New Regression Tests (4 tests) +The new `test_issue_1366_thread_corruption.py` file covers: +1. **Orphaned function calls** — verifies all FunctionCallContent have matching FunctionResultContent +2. **Failsafe tool_choice="none"** — verifies a final text response is produced +3. **fcc_messages preservation** — verifies all iterations' messages are included +4. **Full Agent.run() flow** — end-to-end thread integrity check + +Tests use the existing `chat_client_base` fixture from conftest.py, consistent with the 80+ other tests in this file. + +### ✅ Copyright Header +New test file includes required `# Copyright (c) Microsoft. All rights reserved.` header. + +## Issues Found and Fixed During Review + +1. **Formatting violations**: `ruff format` flagged multi-line string concatenations in log messages and `Content.from_function_call()` calls that should be single-line. Fixed by running `ruff format`. +2. **Unused import**: `import pytest` in the new test file was unused (removed by `ruff --fix` auto-correction). + +## Minor Observations (not blocking) + +1. **Test docstring in test 2 references mock behavior**: The assertion `last_msg.text == "I broke out of the function invocation loop..."` relies on MockBaseChatClient's specific mock response text. This is fine for a unit test but is somewhat fragile if the mock changes. The accompanying `has_function_calls` assertion is the more robust check. + +2. **Streaming fcc_messages not explicitly tested**: The streaming path fix is verified by the existing `test_streaming_max_iterations_limit`, but the new issue-specific tests only cover the non-streaming path. The streaming path works through a different mechanism (yielded updates) so this is acceptable. + +## Verdict + +The fix is correct, minimal, well-tested, and follows repository conventions. All formatting and lint issues were resolved during review. From d87400ada4d6fa988caeb1b4241d9e70b4a41b59 Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 15:23:42 -0800 Subject: [PATCH 5/7] Remove temporary investigation docs. --- ISSUE-FIX-REPORT.md | 78 --------------------------------------- ISSUE-QUALITY-REVIEW.md | 54 --------------------------- ISSUE-REPRO-RESULTS.md | 82 ----------------------------------------- 3 files changed, 214 deletions(-) delete mode 100644 ISSUE-FIX-REPORT.md delete mode 100644 ISSUE-QUALITY-REVIEW.md delete mode 100644 ISSUE-REPRO-RESULTS.md diff --git a/ISSUE-FIX-REPORT.md b/ISSUE-FIX-REPORT.md deleted file mode 100644 index 5d1e30c03a..0000000000 --- a/ISSUE-FIX-REPORT.md +++ /dev/null @@ -1,78 +0,0 @@ -# Issue #1366 Fix Report: Thread corruption - agent.run() returns with unexecuted tool calls - -## Pattern Exploration (3+ references) - -### 1. Error threshold pattern (`_tools.py:2163-2166`) -When consecutive errors reach `max_consecutive_errors_per_request`, the code sets `tool_choice="none"` and **continues the loop**. The next iteration calls the model without tools, producing a clean text response. The `_process_function_requests` function detects no function calls and returns `action="return"`, which prepends `fcc_messages` via `_prepend_fcc_messages()` (line 1974). - -### 2. max_function_calls pattern (`_tools.py:2167-2179`) -When `total_function_calls >= max_function_calls`, identical to the error pattern: sets `tool_choice="none"` and continues. The next iteration gets a toolless response and exits cleanly with history attached. - -### 3. Normal exit pattern (`_tools.py:2160-2161, 1971-1982`) -When the model's response has no function calls, `_extract_function_calls()` returns empty, triggering `_prepend_fcc_messages(response, fcc_messages)` before returning. This ensures all accumulated conversation history from prior iterations is included. - -### Chosen approach -All three patterns converge on the same mechanism: when stopping tool calls, make a final model call with `tool_choice="none"` and use `_prepend_fcc_messages` to attach history. The fix follows this exact pattern for `max_iterations` exhaustion. - -## Root Cause - -**File**: `python/packages/core/agent_framework/_tools.py`, lines 2199-2200 (non-streaming) and 2336-2337 (streaming). - -When the for-loop exhausts `max_iterations`, the code did: -```python -if response is not None: - return response # Premature return — bypasses failsafe -``` - -Since the loop always sets `response` to a non-None value (any `max_iterations >= 1`), the failsafe code (lines 2202-2212) that calls the model with `tool_choice="none"` and prepends `fcc_messages` was **unreachable dead code**. - -This caused two problems: -1. **No final text response**: The model was never asked to produce a text answer after tool execution stopped -2. **Lost conversation history**: `fcc_messages` accumulated from prior iterations was never prepended to the response - -## Code Changes - -### `python/packages/core/agent_framework/_tools.py` -**Non-streaming** (line 2199): Changed `if response is not None: return response` to a log statement, allowing the existing failsafe code to execute. -**Streaming** (line 2343): Same change for the streaming path. - -Both paths now: -1. Log that max_iterations was reached (matching the `max_function_calls` log pattern) -2. Fall through to the failsafe: call model with `tool_choice="none"` and prepend `fcc_messages` - -### `python/packages/core/tests/core/test_function_invocation_logic.py` -Removed `@pytest.mark.skip` from `test_max_iterations_limit` (2 duplicate skip markers) and `test_streaming_max_iterations_limit` (1 skip marker). These tests validate the exact behavior that is now fixed. - -### `python/packages/core/tests/core/test_issue_1366_thread_corruption.py` (new) -4 targeted tests covering: -- Orphaned FunctionCallContent detection -- Final `tool_choice="none"` failsafe call -- `fcc_messages` preservation across iterations -- Full Agent.run() → thread integrity - -## Test Evidence - -### Before fix -``` -test_max_iterations_exhausted_makes_final_toolchoice_none_call FAILED - AssertionError: Expected failsafe text response, got: '' -test_max_iterations_preserves_all_fcc_messages FAILED - AssertionError: First iteration's function call missing from response -test_max_iterations_limit SKIPPED -test_streaming_max_iterations_limit SKIPPED -``` - -### After fix -``` -packages/core/tests/core/test_issue_1366_thread_corruption.py 4 passed -packages/core/tests/core/test_function_invocation_logic.py 80 passed, 2 skipped -packages/core/tests/core/ (full suite) 878 passed, 18 skipped -``` - -## Remaining Risks - -1. **Extra model call**: The fix adds one additional API call when `max_iterations` is exhausted. This is intentional — the error/max_function_calls paths already do this — but could increase latency/cost in edge cases. - -2. **Conversation-based APIs**: For APIs where `conversation_id` is set, `prepped_messages` is cleared and only the last message is kept (line 2192-2194). The failsafe call sends this minimal context, which should be correct since the server retains full history, but this path is not covered by unit tests. - -3. **Streaming `fcc_messages`**: The streaming failsafe doesn't explicitly prepend `fcc_messages` (it doesn't need to — prior iterations' updates were already yielded). However, the finalized `ChatResponse.from_updates()` aggregates all yielded updates, which should produce a complete response. diff --git a/ISSUE-QUALITY-REVIEW.md b/ISSUE-QUALITY-REVIEW.md deleted file mode 100644 index fea3038dee..0000000000 --- a/ISSUE-QUALITY-REVIEW.md +++ /dev/null @@ -1,54 +0,0 @@ -quality_gate: approved -quality_score: 92 - -# Quality Review: Issue #1366 Fix - -## Summary - -The fix correctly addresses thread corruption when `max_iterations` is exhausted in `_tools.py` by removing the premature `return response` and letting execution fall through to the existing failsafe path. Both non-streaming and streaming code paths are fixed symmetrically. - -## Code Quality Assessment - -### ✅ Minimal, Surgical Change -The production code change is exactly 2 lines removed and ~6 lines added per path (non-streaming + streaming). The fix replaces `return response` with a log statement, reusing the **existing** failsafe code that was previously dead. No new abstractions or complexity introduced. - -### ✅ Follows Existing Patterns -- Log message style (`"Maximum iterations reached (%d). ..."`) matches the existing `max_function_calls` pattern at lines 2174 and 2319. -- Comment style matches surrounding code (4-line block comments with issue reference). -- Both non-streaming (`_get_response`) and streaming (`_stream`) paths are updated identically, maintaining the existing structural symmetry. - -### ✅ Lint & Format Clean -- `ruff check` passes with no errors. -- `ruff format` passes (formatting issues found during review were fixed). - -### ✅ Test Suite Integrity -- **878 passed, 18 skipped, 0 failed** — full core test suite. -- Previously skipped tests (`test_max_iterations_limit`, `test_streaming_max_iterations_limit`) are now unskipped and passing. -- Duplicate `@pytest.mark.skip` decorators were correctly removed from `test_max_iterations_limit`. - -### ✅ New Regression Tests (4 tests) -The new `test_issue_1366_thread_corruption.py` file covers: -1. **Orphaned function calls** — verifies all FunctionCallContent have matching FunctionResultContent -2. **Failsafe tool_choice="none"** — verifies a final text response is produced -3. **fcc_messages preservation** — verifies all iterations' messages are included -4. **Full Agent.run() flow** — end-to-end thread integrity check - -Tests use the existing `chat_client_base` fixture from conftest.py, consistent with the 80+ other tests in this file. - -### ✅ Copyright Header -New test file includes required `# Copyright (c) Microsoft. All rights reserved.` header. - -## Issues Found and Fixed During Review - -1. **Formatting violations**: `ruff format` flagged multi-line string concatenations in log messages and `Content.from_function_call()` calls that should be single-line. Fixed by running `ruff format`. -2. **Unused import**: `import pytest` in the new test file was unused (removed by `ruff --fix` auto-correction). - -## Minor Observations (not blocking) - -1. **Test docstring in test 2 references mock behavior**: The assertion `last_msg.text == "I broke out of the function invocation loop..."` relies on MockBaseChatClient's specific mock response text. This is fine for a unit test but is somewhat fragile if the mock changes. The accompanying `has_function_calls` assertion is the more robust check. - -2. **Streaming fcc_messages not explicitly tested**: The streaming path fix is verified by the existing `test_streaming_max_iterations_limit`, but the new issue-specific tests only cover the non-streaming path. The streaming path works through a different mechanism (yielded updates) so this is acceptable. - -## Verdict - -The fix is correct, minimal, well-tested, and follows repository conventions. All formatting and lint issues were resolved during review. diff --git a/ISSUE-REPRO-RESULTS.md b/ISSUE-REPRO-RESULTS.md deleted file mode 100644 index 613c017239..0000000000 --- a/ISSUE-REPRO-RESULTS.md +++ /dev/null @@ -1,82 +0,0 @@ -reproduction_status: reproduced -failing_test: packages/core/tests/core/test_issue_1366_thread_corruption.py::test_max_iterations_exhausted_makes_final_toolchoice_none_call AND packages/core/tests/core/test_issue_1366_thread_corruption.py::test_max_iterations_preserves_all_fcc_messages -failure_observed: yes -evidence_strength: high -confidence: high - -# Issue #1366 Reproduction: Thread corruption - agent.run() returns with unexecuted tool calls - -## Bug Summary - -When the function invocation loop in `_tools.py` exhausts `max_iterations`, it returns the response directly without: -1. Making a final model call with `tool_choice="none"` (failsafe) -2. Prepending `fcc_messages` from prior iterations to the response - -## Code Location - -**File**: `python/packages/core/agent_framework/_tools.py` - -**Bug location** (lines 2199-2200 in `_get_response()`): -```python -if response is not None: - return response # Returns directly, bypasses failsafe at lines 2202-2212 -``` - -The failsafe path (lines 2202-2212) that calls the model with `tool_choice="none"` and prepends `fcc_messages` is ONLY reached when `response is None` (i.e., the loop never ran). When `max_iterations > 0`, the loop always runs at least once, setting `response` to a non-None value. Therefore, the failsafe path is **dead code** for any normal configuration. - -**Streaming equivalent** (line 2336-2337 in `_stream()`): -```python -if response is not None: - return # Same issue in streaming path -``` - -## Failing Tests - -### Test 1: `test_max_iterations_exhausted_makes_final_toolchoice_none_call` — FAILED -``` -AssertionError: Expected failsafe text response, got: '' -assert '' == 'I broke out of the function invocation loop...' -``` -**Demonstrates**: When max_iterations is reached with the model still requesting tools, no final `tool_choice="none"` call is made. The response ends with function_call/result messages instead of a clean text response. - -### Test 2: `test_max_iterations_preserves_all_fcc_messages` — FAILED -``` -AssertionError: First iteration's function call missing from response -assert 'call_1' in {'call_2'} -``` -**Demonstrates**: Only the LAST iteration's function call/result messages are in the response. Prior iterations' messages (accumulated in `fcc_messages`) are discarded because `_prepend_fcc_messages()` is never called on this code path. - -### Test 3: `test_max_iterations_exhausted_returns_orphaned_function_calls` — PASSED -Within a single response, function calls within each iteration ARE matched with results. - -### Test 4: `test_thread_safe_after_max_iterations_with_agent` — PASSED -At the Agent level, function calls in the returned response have matching results. - -## Pre-existing Evidence - -The existing test `test_max_iterations_limit` (line 836) is **explicitly skipped** with: -```python -@pytest.mark.skip(reason="Failsafe behavior with max_iterations needs investigation in unified API") -``` -This confirms the development team is aware the failsafe behavior is broken. - -## How This Causes the Reported 400 Error - -While individual function calls within each iteration are matched (tests 3-4 pass), the thread corruption manifests in two ways: - -1. **Lost conversation history**: When the response only contains the last iteration's messages, the thread stored by `InMemoryHistoryProvider.after_run()` (via `context.response.messages`) is incomplete. Prior iterations' function calls and results are discarded. - -2. **No final text response**: Without the failsafe `tool_choice="none"` call, the response ends with a tool message (function results). The model is never asked to produce a final answer. Depending on how the caller manages the thread, this incomplete state can lead to the OpenAI 400 error on the next API call when the conversation context doesn't match what the model expects. - -## Fix Direction (DO NOT implement in this phase) - -The fix should ensure that when `max_iterations` is exhausted: -1. A final model call is made with `tool_choice="none"` to get a clean text response -2. All `fcc_messages` from prior iterations are prepended to the response -3. OR: inject error `FunctionResultContent` for any orphaned `FunctionCallContent` as the issue suggests - -## Run Command - -```bash -cd python && uv run pytest packages/core/tests/core/test_issue_1366_thread_corruption.py -v -``` From 0accd2ae8936310b22e8f73348a54b8daeecf0bc Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 16:29:53 -0800 Subject: [PATCH 6/7] Address PR review: explicit enabled check in log condition, clarify mock behavior in test - Add explicit function_invocation_configuration['enabled'] check to the 'Maximum iterations reached' log condition in both non-streaming and streaming paths, making intent clearer when function invocation is disabled. - Add comment in test_thread_safe_after_max_iterations_with_agent explaining that the failsafe response (tool_choice='none') is provided automatically by the mock client, not from run_responses. --- python/packages/core/agent_framework/_tools.py | 4 ++-- .../core/tests/core/test_issue_1366_thread_corruption.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 2d2d4fcaf6..ba57f42a5a 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -2196,7 +2196,7 @@ async def _get_response() -> ChatResponse: # Make a final model call with tool_choice="none" so the model # produces a plain text answer instead of leaving orphaned # function_call items without matching results (issue #1366). - if response is not None: + if response is not None and self.function_invocation_configuration["enabled"]: logger.info( "Maximum iterations reached (%d). Requesting final response without tools.", self.function_invocation_configuration["max_iterations"], @@ -2335,7 +2335,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: # Make a final model call with tool_choice="none" so the model # produces a plain text answer instead of leaving orphaned # function_call items without matching results (issue #1366). - if response is not None: + if response is not None and self.function_invocation_configuration["enabled"]: logger.info( "Maximum iterations reached (%d). Requesting final response without tools.", self.function_invocation_configuration["max_iterations"], diff --git a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py index 446f7ce50f..293f17ec91 100644 --- a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py +++ b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py @@ -250,7 +250,11 @@ def browser_snapshot(url: str) -> str: # First call: model returns a function call, it's executed, then model # returns ANOTHER function call (on the last iteration), which is executed - # but no final text answer is produced + # but no final text answer is produced. + # Note: Only 2 responses are listed here for 2 iterations. The failsafe + # call (with tool_choice="none") after the loop is handled automatically + # by the mock client, which returns a hardcoded text response when + # tool_choice="none" (see conftest.py ChatClientBase.get_response). chat_client_base.run_responses = [ ChatResponse( messages=Message( From 819cbc6c5dca91944e5b099ad9f0f8aa58dbe4e9 Mon Sep 17 00:00:00 2001 From: alliscode Date: Tue, 24 Feb 2026 16:52:53 -0800 Subject: [PATCH 7/7] Blend fix and tests into project without issue-specific callouts - Remove issue #1366 references from _tools.py comments - Move regression tests from standalone test_issue_1366_thread_corruption.py into test_function_invocation_logic.py alongside existing max_iterations tests - Clean up test docstrings to describe behavior generically - Delete the standalone issue-specific test file --- .../packages/core/agent_framework/_tools.py | 4 +- .../core/test_function_invocation_logic.py | 250 ++++++++++++++ .../core/test_issue_1366_thread_corruption.py | 309 ------------------ 3 files changed, 252 insertions(+), 311 deletions(-) delete mode 100644 python/packages/core/tests/core/test_issue_1366_thread_corruption.py diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index ba57f42a5a..3ec167d4f7 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -2195,7 +2195,7 @@ async def _get_response() -> ChatResponse: # Loop exhausted all iterations (or function invocation disabled). # Make a final model call with tool_choice="none" so the model # produces a plain text answer instead of leaving orphaned - # function_call items without matching results (issue #1366). + # function_call items without matching results. if response is not None and self.function_invocation_configuration["enabled"]: logger.info( "Maximum iterations reached (%d). Requesting final response without tools.", @@ -2334,7 +2334,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: # Loop exhausted all iterations (or function invocation disabled). # Make a final model call with tool_choice="none" so the model # produces a plain text answer instead of leaving orphaned - # function_call items without matching results (issue #1366). + # function_call items without matching results. if response is not None and self.function_invocation_configuration["enabled"]: logger.info( "Maximum iterations reached (%d). Requesting final response without tools.", diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py index fb90c1b64a..f278afaeac 100644 --- a/python/packages/core/tests/core/test_function_invocation_logic.py +++ b/python/packages/core/tests/core/test_function_invocation_logic.py @@ -878,6 +878,256 @@ def ai_func(arg1: str) -> str: assert response.messages[-1].text == "I broke out of the function invocation loop..." # Failsafe response +async def test_max_iterations_no_orphaned_function_calls(chat_client_base: SupportsChatGetResponse): + """When max_iterations is reached, verify the returned response has no orphaned + FunctionCallContent (i.e., every function_call has a matching function_result). + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Processed {arg1}" + + # Model keeps requesting tool calls on every iteration + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_3", name="test_function", arguments='{"arg1": "v3"}') + ], + ) + ), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + # Collect all function_call and function_result call_ids from response + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + orphaned_calls = all_call_ids - all_result_ids + assert not orphaned_calls, ( + f"Response contains orphaned FunctionCallContent without matching " + f"FunctionResultContent: {orphaned_calls}." + ) + + +async def test_max_iterations_makes_final_toolchoice_none_call(chat_client_base: SupportsChatGetResponse): + """When max_iterations is reached, verify a final model call is made with + tool_choice='none' to produce a clean text response. + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Processed {arg1}" + + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') + ], + ) + ), + # This response should be reached via failsafe (tool_choice="none") + ChatResponse(messages=Message(role="assistant", text="Final answer after giving up on tools.")), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 1 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + assert exec_counter == 1, f"Expected 1 function execution, got {exec_counter}" + + # The response should end with a plain text message (from the failsafe call) + last_msg = response.messages[-1] + has_function_calls = any(c.type == "function_call" for c in last_msg.contents) + + assert not has_function_calls, ( + f"Last message in response still contains function_call items. " + f"Expected a clean text response after max_iterations failsafe. " + f"Got message with role={last_msg.role}, contents={[c.type for c in last_msg.contents]}" + ) + + # The mock client returns "I broke out of the function invocation loop..." + # when tool_choice="none" + assert last_msg.text == "I broke out of the function invocation loop...", ( + f"Expected failsafe text response, got: {last_msg.text!r}" + ) + + +async def test_max_iterations_preserves_all_fcc_messages(chat_client_base: SupportsChatGetResponse): + """When max_iterations is reached and a final response is produced, all + intermediate function call/result messages should be included. + """ + exec_counter = 0 + + @tool(name="test_function", approval_mode="never_require") + def ai_func(arg1: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Result {exec_counter}" + + # Two iterations of function calls, then failsafe + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') + ], + ) + ), + ChatResponse(messages=Message(role="assistant", text="Done")), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + response = await chat_client_base.get_response( + [Message(role="user", text="hello")], + options={"tool_choice": "auto", "tools": [ai_func]}, + ) + + assert exec_counter == 2, f"Expected 2 function executions, got {exec_counter}" + + # All function calls from both iterations should be present in the response + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + assert "call_1" in all_call_ids, "First iteration's function call missing from response" + assert "call_2" in all_call_ids, "Second iteration's function call missing from response" + + assert all_call_ids == all_result_ids, ( + f"Mismatched function calls and results. Calls: {all_call_ids}, Results: {all_result_ids}" + ) + + +async def test_max_iterations_thread_integrity_with_agent(chat_client_base: SupportsChatGetResponse): + """Verify that agent.run() does not produce orphaned function calls after + max_iterations, which would corrupt the thread and cause API errors on the + next call. + """ + + @tool(name="browser_snapshot", approval_mode="never_require") + def browser_snapshot(url: str) -> str: + return f"Screenshot of {url}" + + # Model keeps requesting tool calls on every iteration. + # The failsafe call (with tool_choice="none") after the loop is handled + # automatically by the mock client, which returns a hardcoded text response + # when tool_choice="none" (see conftest.py ChatClientBase.get_response). + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_abc", name="browser_snapshot", arguments='{"url": "https://example.com"}' + ) + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="call_xyz", name="browser_snapshot", arguments='{"url": "https://test.com"}' + ) + ], + ) + ), + ] + + chat_client_base.function_invocation_configuration["max_iterations"] = 2 + + agent = Agent( + client=chat_client_base, + name="test-agent", + tools=[browser_snapshot], + ) + + response = await agent.run( + "Take screenshots", + options={"tool_choice": "auto"}, + ) + + # Check for orphaned function calls in the response messages + all_call_ids = set() + all_result_ids = set() + for msg in response.messages: + for content in msg.contents: + if content.type == "function_call": + all_call_ids.add(content.call_id) + elif content.type == "function_result": + all_result_ids.add(content.call_id) + + orphaned_calls = all_call_ids - all_result_ids + assert not orphaned_calls, ( + f"Response contains orphaned function calls {orphaned_calls}. " + f"This would cause API errors on the next call." + ) + + @pytest.mark.parametrize("max_iterations", [10]) async def test_max_function_calls_limits_parallel_invocations(chat_client_base: SupportsChatGetResponse): """Test that max_function_calls caps total function invocations across iterations with parallel calls.""" diff --git a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py b/python/packages/core/tests/core/test_issue_1366_thread_corruption.py deleted file mode 100644 index 293f17ec91..0000000000 --- a/python/packages/core/tests/core/test_issue_1366_thread_corruption.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. -"""Regression tests for issue #1366: agent.run() returns with unexecuted tool calls. - -When max_iterations is reached, the function invocation loop should make a final -model call with tool_choice="none" to get a plain text response. Instead, it -returns the last iteration's response directly, which may contain function_call -items without a subsequent final text answer — and critically, the thread/response -may contain FunctionCallContent without matching FunctionResultContent if the -loop structure doesn't guarantee tool execution before returning. - -Additionally, when the loop exhausts all iterations while the model keeps -requesting tools, fcc_messages from prior iterations are NOT prepended to the -returned response, potentially losing conversation history. - -See: https://github.com/microsoft/agent-framework/issues/1366 -""" - -from agent_framework import ( - ChatResponse, - Content, - Message, - SupportsChatGetResponse, - tool, -) - - -async def test_max_iterations_exhausted_returns_orphaned_function_calls( - chat_client_base: SupportsChatGetResponse, -): - """When max_iterations is reached, verify the returned response has no orphaned - FunctionCallContent (i.e., every function_call has a matching function_result). - - Bug: The loop at _tools.py returns `response` directly when max_iterations is - exhausted (`if response is not None: return response`). No final model call - with tool_choice='none' is made, and fcc_messages are not prepended. - - Expected: Either all FunctionCallContent have matching FunctionResultContent, - OR a final model call is made with tool_choice='none'. - """ - exec_counter = 0 - - @tool(name="test_function", approval_mode="never_require") - def ai_func(arg1: str) -> str: - nonlocal exec_counter - exec_counter += 1 - return f"Processed {arg1}" - - # Model keeps requesting tool calls on every iteration - chat_client_base.run_responses = [ - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') - ], - ) - ), - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') - ], - ) - ), - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_3", name="test_function", arguments='{"arg1": "v3"}') - ], - ) - ), - ] - - chat_client_base.function_invocation_configuration["max_iterations"] = 2 - - response = await chat_client_base.get_response( - [Message(role="user", text="hello")], - options={"tool_choice": "auto", "tools": [ai_func]}, - ) - - # Collect all function_call and function_result call_ids from response - all_call_ids = set() - all_result_ids = set() - for msg in response.messages: - for content in msg.contents: - if content.type == "function_call": - all_call_ids.add(content.call_id) - elif content.type == "function_result": - all_result_ids.add(content.call_id) - - orphaned_calls = all_call_ids - all_result_ids - assert not orphaned_calls, ( - f"Response contains orphaned FunctionCallContent without matching " - f"FunctionResultContent: {orphaned_calls}. " - f"This will cause OpenAI 400 error on next API call." - ) - - -async def test_max_iterations_exhausted_makes_final_toolchoice_none_call( - chat_client_base: SupportsChatGetResponse, -): - """When max_iterations is reached, verify a final model call is made with - tool_choice='none' to produce a clean text response. - - Bug: After the loop exhausts, the code does `if response is not None: return response` - skipping the failsafe path that would call the model with tool_choice='none'. - - The test verifies the response ends with a plain text message (no function calls). - """ - exec_counter = 0 - - @tool(name="test_function", approval_mode="never_require") - def ai_func(arg1: str) -> str: - nonlocal exec_counter - exec_counter += 1 - return f"Processed {arg1}" - - # Model keeps requesting tool calls - chat_client_base.run_responses = [ - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') - ], - ) - ), - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') - ], - ) - ), - # This response should be reached via failsafe (tool_choice="none") - ChatResponse(messages=Message(role="assistant", text="Final answer after giving up on tools.")), - ] - - chat_client_base.function_invocation_configuration["max_iterations"] = 1 - - response = await chat_client_base.get_response( - [Message(role="user", text="hello")], - options={"tool_choice": "auto", "tools": [ai_func]}, - ) - - assert exec_counter == 1, f"Expected 1 function execution, got {exec_counter}" - - # The response should end with a plain text message (from the failsafe call) - # NOT with function_call or tool messages - last_msg = response.messages[-1] - has_function_calls = any(c.type == "function_call" for c in last_msg.contents) - - assert not has_function_calls, ( - f"Last message in response still contains function_call items. " - f"Expected a clean text response after max_iterations failsafe. " - f"Got message with role={last_msg.role}, contents={[c.type for c in last_msg.contents]}" - ) - - # The mock client returns "I broke out of the function invocation loop..." - # when tool_choice="none" - assert last_msg.text == "I broke out of the function invocation loop...", ( - f"Expected failsafe text response, got: {last_msg.text!r}" - ) - - -async def test_max_iterations_preserves_all_fcc_messages( - chat_client_base: SupportsChatGetResponse, -): - """When max_iterations is reached and a final response is produced, all - intermediate function call/result messages should be included. - - Bug: fcc_messages from prior iterations are discarded when the loop returns - `response` directly instead of going through the failsafe path. - """ - exec_counter = 0 - - @tool(name="test_function", approval_mode="never_require") - def ai_func(arg1: str) -> str: - nonlocal exec_counter - exec_counter += 1 - return f"Result {exec_counter}" - - # Two iterations of function calls, then failsafe - chat_client_base.run_responses = [ - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_1", name="test_function", arguments='{"arg1": "v1"}') - ], - ) - ), - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call(call_id="call_2", name="test_function", arguments='{"arg1": "v2"}') - ], - ) - ), - ChatResponse(messages=Message(role="assistant", text="Done")), - ] - - chat_client_base.function_invocation_configuration["max_iterations"] = 2 - - response = await chat_client_base.get_response( - [Message(role="user", text="hello")], - options={"tool_choice": "auto", "tools": [ai_func]}, - ) - - assert exec_counter == 2, f"Expected 2 function executions, got {exec_counter}" - - # All function calls from both iterations should be present in the response - all_call_ids = set() - all_result_ids = set() - for msg in response.messages: - for content in msg.contents: - if content.type == "function_call": - all_call_ids.add(content.call_id) - elif content.type == "function_result": - all_result_ids.add(content.call_id) - - # Both iterations' function calls should be present - assert "call_1" in all_call_ids, "First iteration's function call missing from response" - assert "call_2" in all_call_ids, "Second iteration's function call missing from response" - - # Both should have matching results - assert all_call_ids == all_result_ids, ( - f"Mismatched function calls and results. Calls: {all_call_ids}, Results: {all_result_ids}" - ) - - -async def test_thread_safe_after_max_iterations_with_agent( - chat_client_base: SupportsChatGetResponse, -): - """Simulate the full agent.run() → thread → agent.run() flow to verify - that the thread doesn't contain orphaned function calls after max_iterations. - - This reproduces the exact user-reported scenario: agent.run() returns, - response is stored in thread, next agent.run() fails with OpenAI 400 error. - """ - from agent_framework import Agent - - @tool(name="browser_snapshot", approval_mode="never_require") - def browser_snapshot(url: str) -> str: - return f"Screenshot of {url}" - - # First call: model returns a function call, it's executed, then model - # returns ANOTHER function call (on the last iteration), which is executed - # but no final text answer is produced. - # Note: Only 2 responses are listed here for 2 iterations. The failsafe - # call (with tool_choice="none") after the loop is handled automatically - # by the mock client, which returns a hardcoded text response when - # tool_choice="none" (see conftest.py ChatClientBase.get_response). - chat_client_base.run_responses = [ - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call( - call_id="call_abc", name="browser_snapshot", arguments='{"url": "https://example.com"}' - ) - ], - ) - ), - ChatResponse( - messages=Message( - role="assistant", - contents=[ - Content.from_function_call( - call_id="call_xyz", name="browser_snapshot", arguments='{"url": "https://test.com"}' - ) - ], - ) - ), - ] - - chat_client_base.function_invocation_configuration["max_iterations"] = 2 - - agent = Agent( - client=chat_client_base, - name="test-agent", - tools=[browser_snapshot], - ) - - response = await agent.run( - "Take screenshots", - options={"tool_choice": "auto"}, - ) - - # Check for orphaned function calls in the response messages - all_call_ids = set() - all_result_ids = set() - for msg in response.messages: - for content in msg.contents: - if content.type == "function_call": - all_call_ids.add(content.call_id) - elif content.type == "function_result": - all_result_ids.add(content.call_id) - - orphaned_calls = all_call_ids - all_result_ids - assert not orphaned_calls, ( - f"Thread corruption: response contains orphaned function calls {orphaned_calls}. " - f"Passing this thread to OpenAI will cause 400 error: " - f"'An assistant message with tool_calls must be followed by tool messages.'" - )