Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions python/packages/core/agent_framework/_workflows/_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,12 +463,30 @@ def merge_updates(updates: list[AgentResponseUpdate], response_id: str) -> Agent
An AgentResponse with messages in processing order and aggregated metadata.
"""
# PHASE 1: GROUP UPDATES BY RESPONSE_ID AND MESSAGE_ID
# First pass: build call_id -> response_id map from FunctionCallContent updates
call_id_to_response_id: dict[str, str] = {}
for u in updates:
if u.response_id:
for content in u.contents:
if isinstance(content, FunctionCallContent) and content.call_id:
call_id_to_response_id[content.call_id] = u.response_id

# Second pass: group updates, associating FunctionResultContent with their calls
states: dict[str, WorkflowAgent._ResponseState] = {}
global_dangling: list[AgentResponseUpdate] = []

for u in updates:
if u.response_id:
state = states.setdefault(u.response_id, {"by_msg": {}, "dangling": []})
effective_response_id = u.response_id
# If no response_id, check if this is a FunctionResultContent that matches a call
if not effective_response_id:
for content in u.contents:
if isinstance(content, FunctionResultContent) and content.call_id:
effective_response_id = call_id_to_response_id.get(content.call_id)
if effective_response_id:
break

if effective_response_id:
state = states.setdefault(effective_response_id, {"by_msg": {}, "dangling": []})
by_msg = state["by_msg"]
dangling = state["dangling"]
if u.message_id:
Expand Down Expand Up @@ -569,6 +587,8 @@ def _add_raw(value: object) -> None:
raw_representations.append(cast_value)

# PHASE 3: HANDLE GLOBAL DANGLING UPDATES (NO RESPONSE_ID)
# These are updates that couldn't be associated with any response_id
# (e.g., orphan FunctionResultContent with no matching FunctionCallContent)
if global_dangling:
flattened = AgentResponse.from_agent_run_response_updates(global_dangling)
final_messages.extend(flattened.messages)
Expand Down
253 changes: 253 additions & 0 deletions python/packages/core/tests/workflow/test_workflow_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
FunctionApprovalRequestContent,
FunctionApprovalResponseContent,
FunctionCallContent,
FunctionResultContent,
Role,
TextContent,
UriContent,
Expand Down Expand Up @@ -957,3 +958,255 @@ def test_merge_updates_metadata_aggregation(self):
# properties only include final merged result from its own updates
}
assert result.additional_properties == expected_properties

def test_merge_updates_function_result_ordering_github_2977(self):
"""Test that FunctionResultContent updates are placed after their FunctionCallContent.

This test reproduces GitHub issue #2977: When using a thread with WorkflowAgent,
FunctionResultContent updates without response_id were being added to global_dangling
and placed at the end of messages. This caused OpenAI to reject the conversation because
"An assistant message with 'tool_calls' must be followed by tool messages responding
to each 'tool_call_id'."

The expected ordering should be:
- User Question
- FunctionCallContent (assistant)
- FunctionResultContent (tool)
- Assistant Answer

NOT:
- User Question
- FunctionCallContent (assistant)
- Assistant Answer
- FunctionResultContent (tool) <-- This was the bug
"""
call_id = "call_F09je20iUue6DlFRDLLh3dGK"

updates = [
# User question
AgentResponseUpdate(
contents=[TextContent(text="What is the weather?")],
role=Role.USER,
response_id="resp-1",
message_id="msg-1",
created_at="2024-01-01T12:00:00Z",
),
# Assistant with function call
AgentResponseUpdate(
contents=[FunctionCallContent(call_id=call_id, name="get_weather", arguments='{"location": "NYC"}')],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-2",
created_at="2024-01-01T12:00:01Z",
),
# Function result: no response_id previously caused this to go to global_dangling
# and be placed at the end (the bug); fix now correctly associates via call_id
AgentResponseUpdate(
contents=[FunctionResultContent(call_id=call_id, result="Sunny, 72F")],
role=Role.TOOL,
response_id=None,
message_id="msg-3",
created_at="2024-01-01T12:00:02Z",
),
# Final assistant answer
AgentResponseUpdate(
contents=[TextContent(text="The weather in NYC is sunny and 72F.")],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-4",
created_at="2024-01-01T12:00:03Z",
),
]

result = WorkflowAgent.merge_updates(updates, "final-response")

assert len(result.messages) == 4

# Extract content types for verification
content_sequence = []
for msg in result.messages:
for content in msg.contents:
if isinstance(content, TextContent):
content_sequence.append(("text", msg.role))
elif isinstance(content, FunctionCallContent):
content_sequence.append(("function_call", msg.role))
elif isinstance(content, FunctionResultContent):
content_sequence.append(("function_result", msg.role))

# Verify correct ordering: user -> function_call -> function_result -> assistant_answer
expected_sequence = [
("text", Role.USER),
("function_call", Role.ASSISTANT),
("function_result", Role.TOOL),
("text", Role.ASSISTANT),
]

assert content_sequence == expected_sequence, (
f"FunctionResultContent should come immediately after FunctionCallContent. "
f"Got: {content_sequence}, Expected: {expected_sequence}"
)

# Additional check: verify FunctionResultContent call_id matches FunctionCallContent
function_call_idx = None
function_result_idx = None
for i, msg in enumerate(result.messages):
for content in msg.contents:
if isinstance(content, FunctionCallContent):
function_call_idx = i
assert content.call_id == call_id
elif isinstance(content, FunctionResultContent):
function_result_idx = i
assert content.call_id == call_id

assert function_call_idx is not None
assert function_result_idx is not None
assert function_result_idx == function_call_idx + 1, (
f"FunctionResultContent at index {function_result_idx} should immediately follow "
f"FunctionCallContent at index {function_call_idx}"
)

def test_merge_updates_multiple_function_results_ordering_github_2977(self):
"""Test ordering with multiple FunctionCallContent/FunctionResultContent pairs.

Validates that multiple tool calls and results appear before the final assistant
answer, even when results arrive without response_id and in different order than calls.

OpenAI requires that tool results appear after their calls and before the next
assistant text message, but doesn't require strict interleaving (result_1 immediately
after call_1). The key constraint is: calls -> results -> final_answer.
"""
call_id_1 = "call_weather_001"
call_id_2 = "call_time_002"

updates = [
# User question
AgentResponseUpdate(
contents=[TextContent(text="What's the weather and time?")],
role=Role.USER,
response_id="resp-1",
message_id="msg-1",
created_at="2024-01-01T12:00:00Z",
),
# Assistant with first function call
AgentResponseUpdate(
contents=[FunctionCallContent(call_id=call_id_1, name="get_weather", arguments='{"location": "NYC"}')],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-2",
created_at="2024-01-01T12:00:01Z",
),
# Assistant with second function call
AgentResponseUpdate(
contents=[FunctionCallContent(call_id=call_id_2, name="get_time", arguments='{"timezone": "EST"}')],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-3",
created_at="2024-01-01T12:00:02Z",
),
# Second function result arrives first (no response_id)
AgentResponseUpdate(
contents=[FunctionResultContent(call_id=call_id_2, result="3:00 PM EST")],
role=Role.TOOL,
response_id=None,
message_id="msg-4",
created_at="2024-01-01T12:00:03Z",
),
# First function result arrives second (no response_id)
AgentResponseUpdate(
contents=[FunctionResultContent(call_id=call_id_1, result="Sunny, 72F")],
role=Role.TOOL,
response_id=None,
message_id="msg-5",
created_at="2024-01-01T12:00:04Z",
),
# Final assistant answer
AgentResponseUpdate(
contents=[TextContent(text="It's sunny (72F) and 3 PM in NYC.")],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-6",
created_at="2024-01-01T12:00:05Z",
),
]

result = WorkflowAgent.merge_updates(updates, "final-response")

assert len(result.messages) == 6

# Build a sequence of (content_type, call_id_if_applicable)
content_sequence = []
for msg in result.messages:
for content in msg.contents:
if isinstance(content, TextContent):
content_sequence.append(("text", None))
elif isinstance(content, FunctionCallContent):
content_sequence.append(("function_call", content.call_id))
elif isinstance(content, FunctionResultContent):
content_sequence.append(("function_result", content.call_id))

# Verify all function results appear before the final assistant text
# Find indices
call_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_call"]
result_indices = [i for i, (t, _) in enumerate(content_sequence) if t == "function_result"]
final_text_idx = len(content_sequence) - 1 # Last item should be final text

# All calls should have corresponding results
call_ids_in_calls = {content_sequence[i][1] for i in call_indices}
call_ids_in_results = {content_sequence[i][1] for i in result_indices}
assert call_ids_in_calls == call_ids_in_results, "All function calls should have matching results"

# All results should appear after all calls and before final text
assert all(r > max(call_indices) for r in result_indices), (
"All function results should appear after all function calls"
)
assert all(r < final_text_idx for r in result_indices), (
"All function results should appear before the final assistant answer"
)
assert content_sequence[final_text_idx] == ("text", None), "Final message should be assistant text"

def test_merge_updates_function_result_no_matching_call(self):
"""Test that FunctionResultContent without matching FunctionCallContent still appears.

If a FunctionResultContent has a call_id that doesn't match any FunctionCallContent
in the messages, it should be appended at the end (fallback behavior).
"""
updates = [
AgentResponseUpdate(
contents=[TextContent(text="Hello")],
role=Role.USER,
response_id="resp-1",
message_id="msg-1",
created_at="2024-01-01T12:00:00Z",
),
# Function result with no matching call
AgentResponseUpdate(
contents=[FunctionResultContent(call_id="orphan_call_id", result="orphan result")],
role=Role.TOOL,
response_id=None,
message_id="msg-2",
created_at="2024-01-01T12:00:01Z",
),
AgentResponseUpdate(
contents=[TextContent(text="Goodbye")],
role=Role.ASSISTANT,
response_id="resp-1",
message_id="msg-3",
created_at="2024-01-01T12:00:02Z",
),
]

result = WorkflowAgent.merge_updates(updates, "final-response")

assert len(result.messages) == 3

# Orphan function result should be at the end since it can't be matched
content_types = []
for msg in result.messages:
for content in msg.contents:
if isinstance(content, TextContent):
content_types.append("text")
elif isinstance(content, FunctionResultContent):
content_types.append("function_result")

# Order: text (user), text (assistant), function_result (orphan at end)
assert content_types == ["text", "text", "function_result"]
Loading