diff --git a/python/packages/ag-ui/tests/test_events_comprehensive.py b/python/packages/ag-ui/tests/test_events_comprehensive.py index cfd45ea5c8..30d302e7be 100644 --- a/python/packages/ag-ui/tests/test_events_comprehensive.py +++ b/python/packages/ag-ui/tests/test_events_comprehensive.py @@ -152,6 +152,42 @@ async def test_tool_call_streaming_args(): assert events1[0].tool_call_id == events2[0].tool_call_id == events3[0].tool_call_id +async def test_streaming_tool_call_no_duplicate_start_events(): + """Test that streaming tool calls emit exactly one ToolCallStartEvent. + + This is a regression test for the Anthropic streaming fix where input_json_delta + events were incorrectly passing the tool name, causing duplicate ToolCallStartEvents. + + The correct behavior is: + - Initial FunctionCallContent with name -> emits ToolCallStartEvent + - Subsequent FunctionCallContent with name="" -> emits only ToolCallArgsEvent + + See: https://github.com/microsoft/agent-framework/pull/3051 + """ + from agent_framework_ag_ui._events import AgentFrameworkEventBridge + + bridge = AgentFrameworkEventBridge(run_id="test_run", thread_id="test_thread") + + # Simulate streaming tool call: first chunk has name, subsequent chunks have name="" + update1 = AgentRunResponseUpdate(contents=[FunctionCallContent(name="get_weather", call_id="call_789")]) + update2 = AgentRunResponseUpdate(contents=[FunctionCallContent(name="", call_id="call_789", arguments='{"loc":')]) + update3 = AgentRunResponseUpdate(contents=[FunctionCallContent(name="", call_id="call_789", arguments='"SF"}')]) + + events1 = await bridge.from_agent_run_update(update1) + events2 = await bridge.from_agent_run_update(update2) + events3 = await bridge.from_agent_run_update(update3) + + # Count all ToolCallStartEvents - should be exactly 1 + all_events = events1 + events2 + events3 + tool_call_start_count = sum(1 for e in all_events if e.type == "TOOL_CALL_START") + assert tool_call_start_count == 1, f"Expected 1 ToolCallStartEvent, got {tool_call_start_count}" + + # Verify event types + assert events1[0].type == "TOOL_CALL_START" + assert events2[0].type == "TOOL_CALL_ARGS" + assert events3[0].type == "TOOL_CALL_ARGS" + + async def test_tool_result_with_dict(): """Test FunctionResultContent with dict result.""" from agent_framework_ag_ui._events import AgentFrameworkEventBridge diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index b29b13fbd3..81655c16a4 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -832,11 +832,16 @@ def _parse_contents_from_anthropic( ) ) case "input_json_delta": - call_id, name = self._last_call_id_name if self._last_call_id_name else ("", "") + # For streaming argument deltas, only pass call_id and arguments. + # Pass empty string for name - it causes ag-ui to emit duplicate ToolCallStartEvents + # since it triggers on `if content.name:`. The initial tool_use event already + # provides the name, so deltas should only carry incremental arguments. + # This matches OpenAI's behavior where streaming chunks have name="". + call_id, _ = self._last_call_id_name if self._last_call_id_name else ("", "") contents.append( FunctionCallContent( call_id=call_id, - name=name, + name="", arguments=content_block.partial_json, raw_representation=content_block, ) diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index e8a3ac9cb0..8d5a42d3e1 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -595,6 +595,53 @@ def test_parse_contents_from_anthropic_tool_use(mock_anthropic_client: MagicMock assert result[0].name == "get_weather" +def test_parse_contents_from_anthropic_input_json_delta_no_duplicate_name(mock_anthropic_client: MagicMock) -> None: + """Test that input_json_delta events have empty name to prevent duplicate ToolCallStartEvents. + + When streaming tool calls, the initial tool_use event provides the name, + and subsequent input_json_delta events should have name="" to prevent + ag-ui from emitting duplicate ToolCallStartEvents. + """ + chat_client = create_test_anthropic_client(mock_anthropic_client) + + # First, simulate a tool_use event that sets _last_call_id_name + tool_use_content = MagicMock() + tool_use_content.type = "tool_use" + tool_use_content.id = "call_123" + tool_use_content.name = "get_weather" + tool_use_content.input = {} + + result = chat_client._parse_contents_from_anthropic([tool_use_content]) + assert len(result) == 1 + assert isinstance(result[0], FunctionCallContent) + assert result[0].call_id == "call_123" + assert result[0].name == "get_weather" # Initial event has name + + # Now simulate input_json_delta events (argument streaming) + delta_content_1 = MagicMock() + delta_content_1.type = "input_json_delta" + delta_content_1.partial_json = '{"location":' + + result = chat_client._parse_contents_from_anthropic([delta_content_1]) + assert len(result) == 1 + assert isinstance(result[0], FunctionCallContent) + assert result[0].call_id == "call_123" + assert result[0].name == "" # Delta events should have empty name + assert result[0].arguments == '{"location":' + + # Another delta + delta_content_2 = MagicMock() + delta_content_2.type = "input_json_delta" + delta_content_2.partial_json = '"San Francisco"}' + + result = chat_client._parse_contents_from_anthropic([delta_content_2]) + assert len(result) == 1 + assert isinstance(result[0], FunctionCallContent) + assert result[0].call_id == "call_123" + assert result[0].name == "" # Still empty name for subsequent deltas + assert result[0].arguments == '"San Francisco"}' + + # Stream Processing Tests