From e7061bd1d30e34bbd4b4cf3c894bb954a7e0473d Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Mon, 12 Jan 2026 12:29:45 +0900 Subject: [PATCH 1/3] fix(core): simplify FunctionResultContent ordering in WorkflowAgent.merge_updates --- .../core/agent_framework/_workflows/_agent.py | 24 +- .../tests/workflow/test_workflow_agent.py | 252 ++++++++++++++++++ 2 files changed, 274 insertions(+), 2 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_agent.py b/python/packages/core/agent_framework/_workflows/_agent.py index 7eec2472f0..a8b4533bd8 100644 --- a/python/packages/core/agent_framework/_workflows/_agent.py +++ b/python/packages/core/agent_framework/_workflows/_agent.py @@ -463,12 +463,30 @@ def merge_updates(updates: list[AgentRunResponseUpdate], response_id: str) -> Ag An AgentRunResponse with messages in processing order and aggregated metadata. """ # PHASE 1: GROUP UPDATES BY RESPONSE_ID AND MESSAGE_ID + # First pass: build call_id -> response_id map from FunctionCallContent updates + call_id_to_response_id: dict[str, str] = {} + for u in updates: + if u.response_id: + for content in u.contents: + if isinstance(content, FunctionCallContent) and content.call_id: + call_id_to_response_id[content.call_id] = u.response_id + + # Second pass: group updates, associating FunctionResultContent with their calls states: dict[str, WorkflowAgent._ResponseState] = {} global_dangling: list[AgentRunResponseUpdate] = [] for u in updates: - if u.response_id: - state = states.setdefault(u.response_id, {"by_msg": {}, "dangling": []}) + effective_response_id = u.response_id + # If no response_id, check if this is a FunctionResultContent that matches a call + if not effective_response_id: + for content in u.contents: + if isinstance(content, FunctionResultContent) and content.call_id: + effective_response_id = call_id_to_response_id.get(content.call_id) + if effective_response_id: + break + + if effective_response_id: + state = states.setdefault(effective_response_id, {"by_msg": {}, "dangling": []}) by_msg = state["by_msg"] dangling = state["dangling"] if u.message_id: @@ -569,6 +587,8 @@ def _add_raw(value: object) -> None: raw_representations.append(cast_value) # PHASE 3: HANDLE GLOBAL DANGLING UPDATES (NO RESPONSE_ID) + # These are updates that couldn't be associated with any response_id + # (e.g., orphan FunctionResultContent with no matching FunctionCallContent) if global_dangling: flattened = AgentRunResponse.from_agent_run_response_updates(global_dangling) final_messages.extend(flattened.messages) diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py index d2ed8d1394..9be6f7e8e2 100644 --- a/python/packages/core/tests/workflow/test_workflow_agent.py +++ b/python/packages/core/tests/workflow/test_workflow_agent.py @@ -19,6 +19,7 @@ FunctionApprovalRequestContent, FunctionApprovalResponseContent, FunctionCallContent, + FunctionResultContent, Role, TextContent, UriContent, @@ -957,3 +958,254 @@ def test_merge_updates_metadata_aggregation(self): # properties only include final merged result from its own updates } assert result.additional_properties == expected_properties + + def test_merge_updates_function_result_ordering_github_2977(self): + """Test that FunctionResultContent updates are placed after their FunctionCallContent. + + This test reproduces GitHub issue #2977: When using a thread with WorkflowAgent, + FunctionResultContent updates without response_id were being added to global_dangling + and placed at the end of messages. This caused OpenAI to reject the conversation because + "An assistant message with 'tool_calls' must be followed by tool messages responding + to each 'tool_call_id'." + + The expected ordering should be: + - User Question + - FunctionCallContent (assistant) + - FunctionResultContent (tool) + - Assistant Answer + + NOT: + - User Question + - FunctionCallContent (assistant) + - Assistant Answer + - FunctionResultContent (tool) <-- This was the bug + """ + call_id = "call_F09je20iUue6DlFRDLLh3dGK" + + updates = [ + # User question + AgentRunResponseUpdate( + contents=[TextContent(text="What is the weather?")], + role=Role.USER, + response_id="resp-1", + message_id="msg-1", + created_at="2024-01-01T12:00:00Z", + ), + # Assistant with function call + AgentRunResponseUpdate( + contents=[FunctionCallContent(call_id=call_id, name="get_weather", arguments='{"location": "NYC"}')], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-2", + created_at="2024-01-01T12:00:01Z", + ), + # Function result (no response_id - this was being placed at the end) + AgentRunResponseUpdate( + contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")], + role=Role.TOOL, + response_id=None, # Bug: no response_id causes this to go to global_dangling + message_id="msg-3", + created_at="2024-01-01T12:00:02Z", + ), + # Final assistant answer + AgentRunResponseUpdate( + contents=[TextContent(text="The weather in NYC is sunny and 72F.")], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-4", + created_at="2024-01-01T12:00:03Z", + ), + ] + + result = WorkflowAgent.merge_updates(updates, "final-response") + + assert len(result.messages) == 4 + + # Extract content types for verification + content_sequence = [] + for msg in result.messages: + for content in msg.contents: + if isinstance(content, TextContent): + content_sequence.append(("text", msg.role)) + elif isinstance(content, FunctionCallContent): + content_sequence.append(("function_call", msg.role)) + elif isinstance(content, FunctionResultContent): + content_sequence.append(("function_result", msg.role)) + + # Verify correct ordering: user -> function_call -> function_result -> assistant_answer + expected_sequence = [ + ("text", Role.USER), + ("function_call", Role.ASSISTANT), + ("function_result", Role.TOOL), + ("text", Role.ASSISTANT), + ] + + assert content_sequence == expected_sequence, ( + f"FunctionResultContent should come immediately after FunctionCallContent. " + f"Got: {content_sequence}, Expected: {expected_sequence}" + ) + + # Additional check: verify FunctionResultContent call_id matches FunctionCallContent + function_call_idx = None + function_result_idx = None + for i, msg in enumerate(result.messages): + for content in msg.contents: + if isinstance(content, FunctionCallContent): + function_call_idx = i + assert content.call_id == call_id + elif isinstance(content, FunctionResultContent): + function_result_idx = i + assert content.call_id == call_id + + assert function_call_idx is not None + assert function_result_idx is not None + assert function_result_idx == function_call_idx + 1, ( + f"FunctionResultContent at index {function_result_idx} should immediately follow " + f"FunctionCallContent at index {function_call_idx}" + ) + + def test_merge_updates_multiple_function_results_ordering_github_2977(self): + """Test ordering with multiple FunctionCallContent/FunctionResultContent pairs. + + Validates that multiple tool calls and results appear before the final assistant + answer, even when results arrive without response_id and in different order than calls. + + OpenAI requires that tool results appear after their calls and before the next + assistant text message, but doesn't require strict interleaving (result_1 immediately + after call_1). The key constraint is: calls -> results -> final_answer. + """ + call_id_1 = "call_weather_001" + call_id_2 = "call_time_002" + + updates = [ + # User question + AgentRunResponseUpdate( + contents=[TextContent(text="What's the weather and time?")], + role=Role.USER, + response_id="resp-1", + message_id="msg-1", + created_at="2024-01-01T12:00:00Z", + ), + # Assistant with first function call + AgentRunResponseUpdate( + contents=[FunctionCallContent(call_id=call_id_1, name="get_weather", arguments='{"location": "NYC"}')], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-2", + created_at="2024-01-01T12:00:01Z", + ), + # Assistant with second function call + AgentRunResponseUpdate( + contents=[FunctionCallContent(call_id=call_id_2, name="get_time", arguments='{"timezone": "EST"}')], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-3", + created_at="2024-01-01T12:00:02Z", + ), + # Second function result arrives first (no response_id) + AgentRunResponseUpdate( + contents=[FunctionResultContent(call_id=call_id_2, result="3:00 PM EST")], + role=Role.TOOL, + response_id=None, + message_id="msg-4", + created_at="2024-01-01T12:00:03Z", + ), + # First function result arrives second (no response_id) + AgentRunResponseUpdate( + contents=[FunctionResultContent(call_id=call_id_1, result="Sunny, 72F")], + role=Role.TOOL, + response_id=None, + message_id="msg-5", + created_at="2024-01-01T12:00:04Z", + ), + # Final assistant answer + AgentRunResponseUpdate( + contents=[TextContent(text="It's sunny (72F) and 3 PM in NYC.")], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-6", + created_at="2024-01-01T12:00:05Z", + ), + ] + + result = WorkflowAgent.merge_updates(updates, "final-response") + + assert len(result.messages) == 6 + + # Build a sequence of (content_type, call_id_if_applicable) + content_sequence = [] + for msg in result.messages: + for content in msg.contents: + if isinstance(content, TextContent): + content_sequence.append(("text", None)) + elif isinstance(content, FunctionCallContent): + content_sequence.append(("function_call", content.call_id)) + elif isinstance(content, FunctionResultContent): + content_sequence.append(("function_result", content.call_id)) + + # Verify all function results appear before the final assistant text + # Find indices + call_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_call"] + result_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_result"] + final_text_idx = len(content_sequence) - 1 # Last item should be final text + + # All calls should have corresponding results + call_ids_in_calls = {content_sequence[i][1] for i in call_indices} + call_ids_in_results = {content_sequence[i][1] for i in result_indices} + assert call_ids_in_calls == call_ids_in_results, "All function calls should have matching results" + + # All results should appear after all calls and before final text + assert all(r > max(call_indices) for r in result_indices), ( + "All function results should appear after all function calls" + ) + assert all(r < final_text_idx for r in result_indices), ( + "All function results should appear before the final assistant answer" + ) + assert content_sequence[final_text_idx] == ("text", None), "Final message should be assistant text" + + def test_merge_updates_function_result_no_matching_call(self): + """Test that FunctionResultContent without matching FunctionCallContent still appears. + + If a FunctionResultContent has a call_id that doesn't match any FunctionCallContent + in the messages, it should be appended at the end (fallback behavior). + """ + updates = [ + AgentRunResponseUpdate( + contents=[TextContent(text="Hello")], + role=Role.USER, + response_id="resp-1", + message_id="msg-1", + created_at="2024-01-01T12:00:00Z", + ), + # Function result with no matching call + AgentRunResponseUpdate( + contents=[FunctionResultContent(call_id="orphan_call_id", result="orphan result")], + role=Role.TOOL, + response_id=None, + message_id="msg-2", + created_at="2024-01-01T12:00:01Z", + ), + AgentRunResponseUpdate( + contents=[TextContent(text="Goodbye")], + role=Role.ASSISTANT, + response_id="resp-1", + message_id="msg-3", + created_at="2024-01-01T12:00:02Z", + ), + ] + + result = WorkflowAgent.merge_updates(updates, "final-response") + + assert len(result.messages) == 3 + + # Orphan function result should be at the end since it can't be matched + content_types = [] + for msg in result.messages: + for content in msg.contents: + if isinstance(content, TextContent): + content_types.append("text") + elif isinstance(content, FunctionResultContent): + content_types.append("function_result") + + # Order: text (user), text (assistant), function_result (orphan at end) + assert content_types == ["text", "text", "function_result"] From 85f04e3c1171804cfe9b0870d09f822d08d96f25 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Mon, 12 Jan 2026 13:30:23 +0900 Subject: [PATCH 2/3] improve comment --- python/packages/core/tests/workflow/test_workflow_agent.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py index 9be6f7e8e2..6b6c20583a 100644 --- a/python/packages/core/tests/workflow/test_workflow_agent.py +++ b/python/packages/core/tests/workflow/test_workflow_agent.py @@ -999,11 +999,12 @@ def test_merge_updates_function_result_ordering_github_2977(self): message_id="msg-2", created_at="2024-01-01T12:00:01Z", ), - # Function result (no response_id - this was being placed at the end) + # Function result: no response_id previously caused this to go to global_dangling + # and be placed at the end (the bug); fix now correctly associates via call_id AgentRunResponseUpdate( contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")], role=Role.TOOL, - response_id=None, # Bug: no response_id causes this to go to global_dangling + response_id=None, message_id="msg-3", created_at="2024-01-01T12:00:02Z", ), From 434641d67b745eb043e0af90424c95ff33769778 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 15 Jan 2026 10:25:17 +0900 Subject: [PATCH 3/3] Fix name --- .../tests/workflow/test_workflow_agent.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py index 7135e6f883..7e47a82c9c 100644 --- a/python/packages/core/tests/workflow/test_workflow_agent.py +++ b/python/packages/core/tests/workflow/test_workflow_agent.py @@ -984,7 +984,7 @@ def test_merge_updates_function_result_ordering_github_2977(self): updates = [ # User question - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="What is the weather?")], role=Role.USER, response_id="resp-1", @@ -992,7 +992,7 @@ def test_merge_updates_function_result_ordering_github_2977(self): created_at="2024-01-01T12:00:00Z", ), # Assistant with function call - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionCallContent(call_id=call_id, name="get_weather", arguments='{"location": "NYC"}')], role=Role.ASSISTANT, response_id="resp-1", @@ -1001,7 +1001,7 @@ def test_merge_updates_function_result_ordering_github_2977(self): ), # Function result: no response_id previously caused this to go to global_dangling # and be placed at the end (the bug); fix now correctly associates via call_id - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")], role=Role.TOOL, response_id=None, @@ -1009,7 +1009,7 @@ def test_merge_updates_function_result_ordering_github_2977(self): created_at="2024-01-01T12:00:02Z", ), # Final assistant answer - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="The weather in NYC is sunny and 72F.")], role=Role.ASSISTANT, response_id="resp-1", @@ -1080,7 +1080,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): updates = [ # User question - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="What's the weather and time?")], role=Role.USER, response_id="resp-1", @@ -1088,7 +1088,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): created_at="2024-01-01T12:00:00Z", ), # Assistant with first function call - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionCallContent(call_id=call_id_1, name="get_weather", arguments='{"location": "NYC"}')], role=Role.ASSISTANT, response_id="resp-1", @@ -1096,7 +1096,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): created_at="2024-01-01T12:00:01Z", ), # Assistant with second function call - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionCallContent(call_id=call_id_2, name="get_time", arguments='{"timezone": "EST"}')], role=Role.ASSISTANT, response_id="resp-1", @@ -1104,7 +1104,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): created_at="2024-01-01T12:00:02Z", ), # Second function result arrives first (no response_id) - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionResultContent(call_id=call_id_2, result="3:00 PM EST")], role=Role.TOOL, response_id=None, @@ -1112,7 +1112,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): created_at="2024-01-01T12:00:03Z", ), # First function result arrives second (no response_id) - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionResultContent(call_id=call_id_1, result="Sunny, 72F")], role=Role.TOOL, response_id=None, @@ -1120,7 +1120,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self): created_at="2024-01-01T12:00:04Z", ), # Final assistant answer - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="It's sunny (72F) and 3 PM in NYC.")], role=Role.ASSISTANT, response_id="resp-1", @@ -1171,7 +1171,7 @@ def test_merge_updates_function_result_no_matching_call(self): in the messages, it should be appended at the end (fallback behavior). """ updates = [ - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="Hello")], role=Role.USER, response_id="resp-1", @@ -1179,14 +1179,14 @@ def test_merge_updates_function_result_no_matching_call(self): created_at="2024-01-01T12:00:00Z", ), # Function result with no matching call - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[FunctionResultContent(call_id="orphan_call_id", result="orphan result")], role=Role.TOOL, response_id=None, message_id="msg-2", created_at="2024-01-01T12:00:01Z", ), - AgentRunResponseUpdate( + AgentResponseUpdate( contents=[TextContent(text="Goodbye")], role=Role.ASSISTANT, response_id="resp-1",