From e7061bd1d30e34bbd4b4cf3c894bb954a7e0473d Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Mon, 12 Jan 2026 12:29:45 +0900
Subject: [PATCH 1/3] fix(core): simplify FunctionResultContent ordering in
 WorkflowAgent.merge_updates

---
 .../core/agent_framework/_workflows/_agent.py |  24 +-
 .../tests/workflow/test_workflow_agent.py     | 252 ++++++++++++++++++
 2 files changed, 274 insertions(+), 2 deletions(-)

diff --git a/python/packages/core/agent_framework/_workflows/_agent.py b/python/packages/core/agent_framework/_workflows/_agent.py
index 7eec2472f0..a8b4533bd8 100644
--- a/python/packages/core/agent_framework/_workflows/_agent.py
+++ b/python/packages/core/agent_framework/_workflows/_agent.py
@@ -463,12 +463,30 @@ def merge_updates(updates: list[AgentRunResponseUpdate], response_id: str) -> Ag
             An AgentRunResponse with messages in processing order and aggregated metadata.
         """
         # PHASE 1: GROUP UPDATES BY RESPONSE_ID AND MESSAGE_ID
+        # First pass: build call_id -> response_id map from FunctionCallContent updates
+        call_id_to_response_id: dict[str, str] = {}
+        for u in updates:
+            if u.response_id:
+                for content in u.contents:
+                    if isinstance(content, FunctionCallContent) and content.call_id:
+                        call_id_to_response_id[content.call_id] = u.response_id
+
+        # Second pass: group updates, associating FunctionResultContent with their calls
         states: dict[str, WorkflowAgent._ResponseState] = {}
         global_dangling: list[AgentRunResponseUpdate] = []
 
         for u in updates:
-            if u.response_id:
-                state = states.setdefault(u.response_id, {"by_msg": {}, "dangling": []})
+            effective_response_id = u.response_id
+            # If no response_id, check if this is a FunctionResultContent that matches a call
+            if not effective_response_id:
+                for content in u.contents:
+                    if isinstance(content, FunctionResultContent) and content.call_id:
+                        effective_response_id = call_id_to_response_id.get(content.call_id)
+                        if effective_response_id:
+                            break
+
+            if effective_response_id:
+                state = states.setdefault(effective_response_id, {"by_msg": {}, "dangling": []})
                 by_msg = state["by_msg"]
                 dangling = state["dangling"]
                 if u.message_id:
@@ -569,6 +587,8 @@ def _add_raw(value: object) -> None:
                         raw_representations.append(cast_value)
 
         # PHASE 3: HANDLE GLOBAL DANGLING UPDATES (NO RESPONSE_ID)
+        # These are updates that couldn't be associated with any response_id
+        # (e.g., orphan FunctionResultContent with no matching FunctionCallContent)
         if global_dangling:
             flattened = AgentRunResponse.from_agent_run_response_updates(global_dangling)
             final_messages.extend(flattened.messages)
diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py
index d2ed8d1394..9be6f7e8e2 100644
--- a/python/packages/core/tests/workflow/test_workflow_agent.py
+++ b/python/packages/core/tests/workflow/test_workflow_agent.py
@@ -19,6 +19,7 @@
     FunctionApprovalRequestContent,
     FunctionApprovalResponseContent,
     FunctionCallContent,
+    FunctionResultContent,
     Role,
     TextContent,
     UriContent,
@@ -957,3 +958,254 @@ def test_merge_updates_metadata_aggregation(self):
             # properties only include final merged result from its own updates
         }
         assert result.additional_properties == expected_properties
+
+    def test_merge_updates_function_result_ordering_github_2977(self):
+        """Test that FunctionResultContent updates are placed after their FunctionCallContent.
+
+        This test reproduces GitHub issue #2977: When using a thread with WorkflowAgent,
+        FunctionResultContent updates without response_id were being added to global_dangling
+        and placed at the end of messages. This caused OpenAI to reject the conversation because
+        "An assistant message with 'tool_calls' must be followed by tool messages responding
+        to each 'tool_call_id'."
+
+        The expected ordering should be:
+        - User Question
+        - FunctionCallContent (assistant)
+        - FunctionResultContent (tool)
+        - Assistant Answer
+
+        NOT:
+        - User Question
+        - FunctionCallContent (assistant)
+        - Assistant Answer
+        - FunctionResultContent (tool)  <-- This was the bug
+        """
+        call_id = "call_F09je20iUue6DlFRDLLh3dGK"
+
+        updates = [
+            # User question
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="What is the weather?")],
+                role=Role.USER,
+                response_id="resp-1",
+                message_id="msg-1",
+                created_at="2024-01-01T12:00:00Z",
+            ),
+            # Assistant with function call
+            AgentRunResponseUpdate(
+                contents=[FunctionCallContent(call_id=call_id, name="get_weather", arguments='{"location": "NYC"}')],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-2",
+                created_at="2024-01-01T12:00:01Z",
+            ),
+            # Function result (no response_id - this was being placed at the end)
+            AgentRunResponseUpdate(
+                contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")],
+                role=Role.TOOL,
+                response_id=None,  # Bug: no response_id causes this to go to global_dangling
+                message_id="msg-3",
+                created_at="2024-01-01T12:00:02Z",
+            ),
+            # Final assistant answer
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="The weather in NYC is sunny and 72F.")],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-4",
+                created_at="2024-01-01T12:00:03Z",
+            ),
+        ]
+
+        result = WorkflowAgent.merge_updates(updates, "final-response")
+
+        assert len(result.messages) == 4
+
+        # Extract content types for verification
+        content_sequence = []
+        for msg in result.messages:
+            for content in msg.contents:
+                if isinstance(content, TextContent):
+                    content_sequence.append(("text", msg.role))
+                elif isinstance(content, FunctionCallContent):
+                    content_sequence.append(("function_call", msg.role))
+                elif isinstance(content, FunctionResultContent):
+                    content_sequence.append(("function_result", msg.role))
+
+        # Verify correct ordering: user -> function_call -> function_result -> assistant_answer
+        expected_sequence = [
+            ("text", Role.USER),
+            ("function_call", Role.ASSISTANT),
+            ("function_result", Role.TOOL),
+            ("text", Role.ASSISTANT),
+        ]
+
+        assert content_sequence == expected_sequence, (
+            f"FunctionResultContent should come immediately after FunctionCallContent. "
+            f"Got: {content_sequence}, Expected: {expected_sequence}"
+        )
+
+        # Additional check: verify FunctionResultContent call_id matches FunctionCallContent
+        function_call_idx = None
+        function_result_idx = None
+        for i, msg in enumerate(result.messages):
+            for content in msg.contents:
+                if isinstance(content, FunctionCallContent):
+                    function_call_idx = i
+                    assert content.call_id == call_id
+                elif isinstance(content, FunctionResultContent):
+                    function_result_idx = i
+                    assert content.call_id == call_id
+
+        assert function_call_idx is not None
+        assert function_result_idx is not None
+        assert function_result_idx == function_call_idx + 1, (
+            f"FunctionResultContent at index {function_result_idx} should immediately follow "
+            f"FunctionCallContent at index {function_call_idx}"
+        )
+
+    def test_merge_updates_multiple_function_results_ordering_github_2977(self):
+        """Test ordering with multiple FunctionCallContent/FunctionResultContent pairs.
+
+        Validates that multiple tool calls and results appear before the final assistant
+        answer, even when results arrive without response_id and in different order than calls.
+
+        OpenAI requires that tool results appear after their calls and before the next
+        assistant text message, but doesn't require strict interleaving (result_1 immediately
+        after call_1). The key constraint is: calls -> results -> final_answer.
+        """
+        call_id_1 = "call_weather_001"
+        call_id_2 = "call_time_002"
+
+        updates = [
+            # User question
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="What's the weather and time?")],
+                role=Role.USER,
+                response_id="resp-1",
+                message_id="msg-1",
+                created_at="2024-01-01T12:00:00Z",
+            ),
+            # Assistant with first function call
+            AgentRunResponseUpdate(
+                contents=[FunctionCallContent(call_id=call_id_1, name="get_weather", arguments='{"location": "NYC"}')],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-2",
+                created_at="2024-01-01T12:00:01Z",
+            ),
+            # Assistant with second function call
+            AgentRunResponseUpdate(
+                contents=[FunctionCallContent(call_id=call_id_2, name="get_time", arguments='{"timezone": "EST"}')],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-3",
+                created_at="2024-01-01T12:00:02Z",
+            ),
+            # Second function result arrives first (no response_id)
+            AgentRunResponseUpdate(
+                contents=[FunctionResultContent(call_id=call_id_2, result="3:00 PM EST")],
+                role=Role.TOOL,
+                response_id=None,
+                message_id="msg-4",
+                created_at="2024-01-01T12:00:03Z",
+            ),
+            # First function result arrives second (no response_id)
+            AgentRunResponseUpdate(
+                contents=[FunctionResultContent(call_id=call_id_1, result="Sunny, 72F")],
+                role=Role.TOOL,
+                response_id=None,
+                message_id="msg-5",
+                created_at="2024-01-01T12:00:04Z",
+            ),
+            # Final assistant answer
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="It's sunny (72F) and 3 PM in NYC.")],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-6",
+                created_at="2024-01-01T12:00:05Z",
+            ),
+        ]
+
+        result = WorkflowAgent.merge_updates(updates, "final-response")
+
+        assert len(result.messages) == 6
+
+        # Build a sequence of (content_type, call_id_if_applicable)
+        content_sequence = []
+        for msg in result.messages:
+            for content in msg.contents:
+                if isinstance(content, TextContent):
+                    content_sequence.append(("text", None))
+                elif isinstance(content, FunctionCallContent):
+                    content_sequence.append(("function_call", content.call_id))
+                elif isinstance(content, FunctionResultContent):
+                    content_sequence.append(("function_result", content.call_id))
+
+        # Verify all function results appear before the final assistant text
+        # Find indices
+        call_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_call"]
+        result_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_result"]
+        final_text_idx = len(content_sequence) - 1  # Last item should be final text
+
+        # All calls should have corresponding results
+        call_ids_in_calls = {content_sequence[i][1] for i in call_indices}
+        call_ids_in_results = {content_sequence[i][1] for i in result_indices}
+        assert call_ids_in_calls == call_ids_in_results, "All function calls should have matching results"
+
+        # All results should appear after all calls and before final text
+        assert all(r > max(call_indices) for r in result_indices), (
+            "All function results should appear after all function calls"
+        )
+        assert all(r < final_text_idx for r in result_indices), (
+            "All function results should appear before the final assistant answer"
+        )
+        assert content_sequence[final_text_idx] == ("text", None), "Final message should be assistant text"
+
+    def test_merge_updates_function_result_no_matching_call(self):
+        """Test that FunctionResultContent without matching FunctionCallContent still appears.
+
+        If a FunctionResultContent has a call_id that doesn't match any FunctionCallContent
+        in the messages, it should be appended at the end (fallback behavior).
+        """
+        updates = [
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="Hello")],
+                role=Role.USER,
+                response_id="resp-1",
+                message_id="msg-1",
+                created_at="2024-01-01T12:00:00Z",
+            ),
+            # Function result with no matching call
+            AgentRunResponseUpdate(
+                contents=[FunctionResultContent(call_id="orphan_call_id", result="orphan result")],
+                role=Role.TOOL,
+                response_id=None,
+                message_id="msg-2",
+                created_at="2024-01-01T12:00:01Z",
+            ),
+            AgentRunResponseUpdate(
+                contents=[TextContent(text="Goodbye")],
+                role=Role.ASSISTANT,
+                response_id="resp-1",
+                message_id="msg-3",
+                created_at="2024-01-01T12:00:02Z",
+            ),
+        ]
+
+        result = WorkflowAgent.merge_updates(updates, "final-response")
+
+        assert len(result.messages) == 3
+
+        # Orphan function result should be at the end since it can't be matched
+        content_types = []
+        for msg in result.messages:
+            for content in msg.contents:
+                if isinstance(content, TextContent):
+                    content_types.append("text")
+                elif isinstance(content, FunctionResultContent):
+                    content_types.append("function_result")
+
+        # Order: text (user), text (assistant), function_result (orphan at end)
+        assert content_types == ["text", "text", "function_result"]

From 85f04e3c1171804cfe9b0870d09f822d08d96f25 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Mon, 12 Jan 2026 13:30:23 +0900
Subject: [PATCH 2/3] improve comment

---
 python/packages/core/tests/workflow/test_workflow_agent.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py
index 9be6f7e8e2..6b6c20583a 100644
--- a/python/packages/core/tests/workflow/test_workflow_agent.py
+++ b/python/packages/core/tests/workflow/test_workflow_agent.py
@@ -999,11 +999,12 @@ def test_merge_updates_function_result_ordering_github_2977(self):
                 message_id="msg-2",
                 created_at="2024-01-01T12:00:01Z",
             ),
-            # Function result (no response_id - this was being placed at the end)
+            # Function result: no response_id previously caused this to go to global_dangling
+            # and be placed at the end (the bug); fix now correctly associates via call_id
             AgentRunResponseUpdate(
                 contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")],
                 role=Role.TOOL,
-                response_id=None,  # Bug: no response_id causes this to go to global_dangling
+                response_id=None,
                 message_id="msg-3",
                 created_at="2024-01-01T12:00:02Z",
             ),

From 434641d67b745eb043e0af90424c95ff33769778 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Thu, 15 Jan 2026 10:25:17 +0900
Subject: [PATCH 3/3] Fix name

---
 .../tests/workflow/test_workflow_agent.py     | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/python/packages/core/tests/workflow/test_workflow_agent.py b/python/packages/core/tests/workflow/test_workflow_agent.py
index 7135e6f883..7e47a82c9c 100644
--- a/python/packages/core/tests/workflow/test_workflow_agent.py
+++ b/python/packages/core/tests/workflow/test_workflow_agent.py
@@ -984,7 +984,7 @@ def test_merge_updates_function_result_ordering_github_2977(self):
 
         updates = [
             # User question
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="What is the weather?")],
                 role=Role.USER,
                 response_id="resp-1",
@@ -992,7 +992,7 @@ def test_merge_updates_function_result_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:00Z",
             ),
             # Assistant with function call
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionCallContent(call_id=call_id, name="get_weather", arguments='{"location": "NYC"}')],
                 role=Role.ASSISTANT,
                 response_id="resp-1",
@@ -1001,7 +1001,7 @@ def test_merge_updates_function_result_ordering_github_2977(self):
             ),
             # Function result: no response_id previously caused this to go to global_dangling
             # and be placed at the end (the bug); fix now correctly associates via call_id
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")],
                 role=Role.TOOL,
                 response_id=None,
@@ -1009,7 +1009,7 @@ def test_merge_updates_function_result_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:02Z",
             ),
             # Final assistant answer
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="The weather in NYC is sunny and 72F.")],
                 role=Role.ASSISTANT,
                 response_id="resp-1",
@@ -1080,7 +1080,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
 
         updates = [
             # User question
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="What's the weather and time?")],
                 role=Role.USER,
                 response_id="resp-1",
@@ -1088,7 +1088,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:00Z",
             ),
             # Assistant with first function call
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionCallContent(call_id=call_id_1, name="get_weather", arguments='{"location": "NYC"}')],
                 role=Role.ASSISTANT,
                 response_id="resp-1",
@@ -1096,7 +1096,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:01Z",
             ),
             # Assistant with second function call
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionCallContent(call_id=call_id_2, name="get_time", arguments='{"timezone": "EST"}')],
                 role=Role.ASSISTANT,
                 response_id="resp-1",
@@ -1104,7 +1104,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:02Z",
             ),
             # Second function result arrives first (no response_id)
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionResultContent(call_id=call_id_2, result="3:00 PM EST")],
                 role=Role.TOOL,
                 response_id=None,
@@ -1112,7 +1112,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:03Z",
             ),
             # First function result arrives second (no response_id)
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionResultContent(call_id=call_id_1, result="Sunny, 72F")],
                 role=Role.TOOL,
                 response_id=None,
@@ -1120,7 +1120,7 @@ def test_merge_updates_multiple_function_results_ordering_github_2977(self):
                 created_at="2024-01-01T12:00:04Z",
             ),
             # Final assistant answer
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="It's sunny (72F) and 3 PM in NYC.")],
                 role=Role.ASSISTANT,
                 response_id="resp-1",
@@ -1171,7 +1171,7 @@ def test_merge_updates_function_result_no_matching_call(self):
         in the messages, it should be appended at the end (fallback behavior).
         """
         updates = [
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="Hello")],
                 role=Role.USER,
                 response_id="resp-1",
@@ -1179,14 +1179,14 @@ def test_merge_updates_function_result_no_matching_call(self):
                 created_at="2024-01-01T12:00:00Z",
             ),
             # Function result with no matching call
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[FunctionResultContent(call_id="orphan_call_id", result="orphan result")],
                 role=Role.TOOL,
                 response_id=None,
                 message_id="msg-2",
                 created_at="2024-01-01T12:00:01Z",
             ),
-            AgentRunResponseUpdate(
+            AgentResponseUpdate(
                 contents=[TextContent(text="Goodbye")],
                 role=Role.ASSISTANT,
                 response_id="resp-1",