From 9fd01a8b66fe8aed91c4c55c0d350bb4619c5356 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Tue, 17 Feb 2026 17:28:30 +0100 Subject: [PATCH 1/3] Python: Fix Anthropic kwargs and manager parse retries - Strip unsupported Anthropic kwargs from prepared run options while preserving provider-specific mappings. - Keep stream mode explicit at Anthropic SDK call sites and prevent duplicate stream kwarg conflicts. - Add bounded default retries with strict retry prompt for agent-based group chat manager parse failures. - Add regression tests in anthropic and orchestrations packages covering #3371, #3827, and #3078. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework_anthropic/_chat_client.py | 4 + .../anthropic/tests/test_anthropic_client.py | 65 ++++++++- .../_group_chat.py | 13 +- .../orchestrations/tests/test_group_chat.py | 130 ++++++++++++++++++ 4 files changed, 208 insertions(+), 4 deletions(-) diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index 4e7ffafc2d..29b3a62416 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -488,6 +488,10 @@ def _prepare_options( run_options: dict[str, Any] = { k: v for k, v in options.items() if v is not None and k not in {"instructions", "response_format"} } + # Framework-level options handled elsewhere; do not forward as raw Anthropic request kwargs. + run_options.pop("allow_multiple_tool_calls", None) + # Stream mode is controlled explicitly at call sites. + run_options.pop("stream", None) # Translation between options keys and Anthropic Messages API for old_key, new_key in OPTION_TRANSLATIONS.items(): diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index ff9234f60b..261b0e0139 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -396,11 +396,13 @@ async def test_prepare_options_with_tool_choice_auto(mock_anthropic_client: Magi client = create_test_anthropic_client(mock_anthropic_client) messages = [Message(role="user", text="Hello")] - chat_options = ChatOptions(tool_choice="auto") + chat_options = ChatOptions(tool_choice="auto", allow_multiple_tool_calls=False) run_options = client._prepare_options(messages, chat_options) assert run_options["tool_choice"]["type"] == "auto" + assert run_options["tool_choice"]["disable_parallel_tool_use"] is True + assert "allow_multiple_tool_calls" not in run_options async def test_prepare_options_with_tool_choice_required(mock_anthropic_client: MagicMock) -> None: @@ -471,6 +473,18 @@ async def test_prepare_options_with_top_p(mock_anthropic_client: MagicMock) -> N assert run_options["top_p"] == 0.9 +async def test_prepare_options_excludes_stream_option(mock_anthropic_client: MagicMock) -> None: + """Test _prepare_options excludes stream when stream is provided in options.""" + client = create_test_anthropic_client(mock_anthropic_client) + + messages = [Message(role="user", text="Hello")] + chat_options: dict[str, Any] = {"stream": True, "max_tokens": 100} + + run_options = client._prepare_options(messages, chat_options) + + assert "stream" not in run_options + + async def test_prepare_options_filters_internal_kwargs(mock_anthropic_client: MagicMock) -> None: """Test _prepare_options filters internal framework kwargs. @@ -700,6 +714,30 @@ async def test_inner_get_response(mock_anthropic_client: MagicMock) -> None: assert len(response.messages) == 1 +async def test_inner_get_response_ignores_options_stream_non_streaming(mock_anthropic_client: MagicMock) -> None: + """Test stream option in options does not conflict in non-streaming mode.""" + client = create_test_anthropic_client(mock_anthropic_client) + + mock_message = MagicMock(spec=BetaMessage) + mock_message.id = "msg_test" + mock_message.model = "claude-3-5-sonnet-20241022" + mock_message.content = [BetaTextBlock(type="text", text="Hello!")] + mock_message.usage = BetaUsage(input_tokens=5, output_tokens=3) + mock_message.stop_reason = "end_turn" + mock_anthropic_client.beta.messages.create.return_value = mock_message + + messages = [Message(role="user", text="Hi")] + options: dict[str, Any] = {"max_tokens": 10, "stream": True} + + await client._inner_get_response( # type: ignore[attr-defined] + messages=messages, + options=options, + ) + + assert mock_anthropic_client.beta.messages.create.call_count == 1 + assert mock_anthropic_client.beta.messages.create.call_args.kwargs["stream"] is False + + async def test_inner_get_response_streaming(mock_anthropic_client: MagicMock) -> None: """Test _inner_get_response method with streaming.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -726,6 +764,31 @@ async def mock_stream(): assert isinstance(chunks, list) +async def test_inner_get_response_ignores_options_stream_streaming(mock_anthropic_client: MagicMock) -> None: + """Test stream option in options does not conflict in streaming mode.""" + client = create_test_anthropic_client(mock_anthropic_client) + + async def mock_stream(): + mock_event = MagicMock() + mock_event.type = "message_stop" + yield mock_event + + mock_anthropic_client.beta.messages.create.return_value = mock_stream() + + messages = [Message(role="user", text="Hi")] + options: dict[str, Any] = {"max_tokens": 10, "stream": False} + + async for _ in client._inner_get_response( # type: ignore[attr-defined] + messages=messages, + options=options, + stream=True, + ): + pass + + assert mock_anthropic_client.beta.messages.create.call_count == 1 + assert mock_anthropic_client.beta.messages.create.call_args.kwargs["stream"] is True + + # Integration Tests diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py index a99e221409..f302ff50c6 100644 --- a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py +++ b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py @@ -302,6 +302,7 @@ def __init__( max_rounds: Optional limit on selection rounds to prevent infinite loops. termination_condition: Optional callable that halts the conversation when it returns True retry_attempts: Optional number of retry attempts for the agent in case of failure. + Defaults to 2 retries when not provided. session: Optional agent session to use for the orchestrator agent. """ super().__init__( @@ -312,7 +313,7 @@ def __init__( termination_condition=termination_condition, ) self._agent = agent - self._retry_attempts = retry_attempts + self._retry_attempts = retry_attempts if retry_attempts is not None else 2 self._session = session or agent.create_session() # Cache for messages since last agent invocation # This is different from the full conversation history maintained by the base orchestrator @@ -485,6 +486,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio # We only need the last message for context since history is maintained in the thread current_conversation = self._cache.copy() self._cache.clear() + participant_names = ", ".join(self._participant_registry.participants.keys()) instruction = ( "Decide what to do next. Respond with a JSON object of the following format:\n" "{\n" @@ -507,7 +509,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio return await _invoke_agent_helper(current_conversation) except Exception as ex: logger.error(f"Agent orchestration invocation failed: {ex}") - if retry_attempts is None or retry_attempts <= 0: + if retry_attempts <= 0: raise retry_attempts -= 1 logger.debug(f"Retrying agent orchestration invocation, attempts left: {retry_attempts}") @@ -515,7 +517,12 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio current_conversation = [ Message( role="user", - text=f"Your input could not be parsed due to an error: {ex}. Please try again.", + text=( + "Your previous response could not be parsed as valid orchestration JSON. " + f"Error: {ex}. Respond with ONLY one JSON object and no extra text. " + "Use exactly these keys: terminate, reason, next_speaker, final_message. " + f"If terminate is false, next_speaker must be one of: {participant_names}." + ), ) ] diff --git a/python/packages/orchestrations/tests/test_group_chat.py b/python/packages/orchestrations/tests/test_group_chat.py index 7550f820c7..3f6a5c658b 100644 --- a/python/packages/orchestrations/tests/test_group_chat.py +++ b/python/packages/orchestrations/tests/test_group_chat.py @@ -166,6 +166,96 @@ async def run( ) +class NonJsonManagerAgent(Agent): + """Manager agent that emits only non-JSON assistant text.""" + + def __init__(self) -> None: + super().__init__(client=MockChatClient(), name="non_json_manager", description="Non-JSON manager") + self._call_count = 0 + + async def run( + self, + messages: str | Content | Message | Sequence[str | Content | Message] | None = None, + *, + session: AgentSession | None = None, + **kwargs: Any, + ) -> AgentResponse: + self._call_count += 1 + return AgentResponse( + messages=[ + Message( + role="assistant", + text="Good! Continue with the audit.", + author_name=self.name, + ) + ] + ) + + +class RetryableNonJsonManagerAgent(Agent): + """Manager agent that emits non-JSON once, then valid orchestration JSON.""" + + def __init__(self) -> None: + super().__init__(client=MockChatClient(), name="retry_manager", description="Retry manager") + self._call_count = 0 + + async def run( + self, + messages: str | Content | Message | Sequence[str | Content | Message] | None = None, + *, + session: AgentSession | None = None, + **kwargs: Any, + ) -> AgentResponse: + if self._call_count == 0: + self._call_count += 1 + return AgentResponse( + messages=[ + Message( + role="assistant", + text="Good! Continue with the audit.", + author_name=self.name, + ) + ] + ) + if self._call_count == 1: + self._call_count += 1 + payload = {"terminate": False, "reason": "Selecting agent", "next_speaker": "agent", "final_message": None} + return AgentResponse( + messages=[ + Message( + role="assistant", + text=( + '{"terminate": false, "reason": "Selecting agent", ' + '"next_speaker": "agent", "final_message": null}' + ), + author_name=self.name, + ) + ], + value=payload, + ) + + self._call_count += 1 + payload = { + "terminate": True, + "reason": "Task complete", + "next_speaker": None, + "final_message": "retry manager final", + } + return AgentResponse( + messages=[ + Message( + role="assistant", + text=( + '{"terminate": true, "reason": "Task complete", ' + '"next_speaker": null, "final_message": "retry manager final"}' + ), + author_name=self.name, + ) + ], + value=payload, + ) + + def make_sequence_selector() -> Callable[[GroupChatState], str]: state_counter = {"value": 0} @@ -289,6 +379,46 @@ async def test_agent_manager_handles_concatenated_json_output() -> None: assert conversation[-1].text == "concatenated manager final" +async def test_agent_manager_non_json_output_raises_parse_error() -> None: + manager = NonJsonManagerAgent() + worker = StubAgent("agent", "worker response") + + workflow = GroupChatBuilder( + participants=[worker], + orchestrator_agent=manager, + ).build() + + with pytest.raises(ValueError, match="Failed to parse agent orchestration output."): + async for _ in workflow.run("coordinate task", stream=True): + pass + + assert manager._call_count > 1 + + +async def test_agent_manager_retries_once_then_recovers_from_non_json_output() -> None: + manager = RetryableNonJsonManagerAgent() + worker = StubAgent("agent", "worker response") + + workflow = GroupChatBuilder( + participants=[worker], + orchestrator_agent=manager, + ).build() + + outputs: list[list[Message]] = [] + async for event in workflow.run("coordinate task", stream=True): + if event.type == "output": + data = event.data + if isinstance(data, list): + outputs.append(cast(list[Message], data)) + + assert outputs + conversation = outputs[-1] + assert any(msg.author_name == "agent" and msg.text == "worker response" for msg in conversation) + assert conversation[-1].author_name == manager.name + assert conversation[-1].text == "retry manager final" + assert manager._call_count == 3 + + # Comprehensive tests for group chat functionality From 97dcb9836ab637351904d5ed988277f6eacc8123 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Tue, 17 Feb 2026 17:34:04 +0100 Subject: [PATCH 2/3] Python: Fix Anthropic test lint import Add missing Any import in anthropic test module to fix ruff F821 failures in Package Checks. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/packages/anthropic/tests/test_anthropic_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index 261b0e0139..2e0d6f8147 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. import os from pathlib import Path -from typing import Annotated +from typing import Annotated, Any from unittest.mock import MagicMock, patch import pytest From 32084356f938f3ab5f102abf02b3d7af7682d7b0 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Tue, 17 Feb 2026 20:20:27 +0100 Subject: [PATCH 3/3] Python: Revert group chat changes from PR 4000 Revert orchestrations changes so this PR only contains Anthropic client fixes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_group_chat.py | 13 +- .../orchestrations/tests/test_group_chat.py | 130 ------------------ 2 files changed, 3 insertions(+), 140 deletions(-) diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py index f302ff50c6..a99e221409 100644 --- a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py +++ b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py @@ -302,7 +302,6 @@ def __init__( max_rounds: Optional limit on selection rounds to prevent infinite loops. termination_condition: Optional callable that halts the conversation when it returns True retry_attempts: Optional number of retry attempts for the agent in case of failure. - Defaults to 2 retries when not provided. session: Optional agent session to use for the orchestrator agent. """ super().__init__( @@ -313,7 +312,7 @@ def __init__( termination_condition=termination_condition, ) self._agent = agent - self._retry_attempts = retry_attempts if retry_attempts is not None else 2 + self._retry_attempts = retry_attempts self._session = session or agent.create_session() # Cache for messages since last agent invocation # This is different from the full conversation history maintained by the base orchestrator @@ -486,7 +485,6 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio # We only need the last message for context since history is maintained in the thread current_conversation = self._cache.copy() self._cache.clear() - participant_names = ", ".join(self._participant_registry.participants.keys()) instruction = ( "Decide what to do next. Respond with a JSON object of the following format:\n" "{\n" @@ -509,7 +507,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio return await _invoke_agent_helper(current_conversation) except Exception as ex: logger.error(f"Agent orchestration invocation failed: {ex}") - if retry_attempts <= 0: + if retry_attempts is None or retry_attempts <= 0: raise retry_attempts -= 1 logger.debug(f"Retrying agent orchestration invocation, attempts left: {retry_attempts}") @@ -517,12 +515,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio current_conversation = [ Message( role="user", - text=( - "Your previous response could not be parsed as valid orchestration JSON. " - f"Error: {ex}. Respond with ONLY one JSON object and no extra text. " - "Use exactly these keys: terminate, reason, next_speaker, final_message. " - f"If terminate is false, next_speaker must be one of: {participant_names}." - ), + text=f"Your input could not be parsed due to an error: {ex}. Please try again.", ) ] diff --git a/python/packages/orchestrations/tests/test_group_chat.py b/python/packages/orchestrations/tests/test_group_chat.py index 3f6a5c658b..7550f820c7 100644 --- a/python/packages/orchestrations/tests/test_group_chat.py +++ b/python/packages/orchestrations/tests/test_group_chat.py @@ -166,96 +166,6 @@ async def run( ) -class NonJsonManagerAgent(Agent): - """Manager agent that emits only non-JSON assistant text.""" - - def __init__(self) -> None: - super().__init__(client=MockChatClient(), name="non_json_manager", description="Non-JSON manager") - self._call_count = 0 - - async def run( - self, - messages: str | Content | Message | Sequence[str | Content | Message] | None = None, - *, - session: AgentSession | None = None, - **kwargs: Any, - ) -> AgentResponse: - self._call_count += 1 - return AgentResponse( - messages=[ - Message( - role="assistant", - text="Good! Continue with the audit.", - author_name=self.name, - ) - ] - ) - - -class RetryableNonJsonManagerAgent(Agent): - """Manager agent that emits non-JSON once, then valid orchestration JSON.""" - - def __init__(self) -> None: - super().__init__(client=MockChatClient(), name="retry_manager", description="Retry manager") - self._call_count = 0 - - async def run( - self, - messages: str | Content | Message | Sequence[str | Content | Message] | None = None, - *, - session: AgentSession | None = None, - **kwargs: Any, - ) -> AgentResponse: - if self._call_count == 0: - self._call_count += 1 - return AgentResponse( - messages=[ - Message( - role="assistant", - text="Good! Continue with the audit.", - author_name=self.name, - ) - ] - ) - if self._call_count == 1: - self._call_count += 1 - payload = {"terminate": False, "reason": "Selecting agent", "next_speaker": "agent", "final_message": None} - return AgentResponse( - messages=[ - Message( - role="assistant", - text=( - '{"terminate": false, "reason": "Selecting agent", ' - '"next_speaker": "agent", "final_message": null}' - ), - author_name=self.name, - ) - ], - value=payload, - ) - - self._call_count += 1 - payload = { - "terminate": True, - "reason": "Task complete", - "next_speaker": None, - "final_message": "retry manager final", - } - return AgentResponse( - messages=[ - Message( - role="assistant", - text=( - '{"terminate": true, "reason": "Task complete", ' - '"next_speaker": null, "final_message": "retry manager final"}' - ), - author_name=self.name, - ) - ], - value=payload, - ) - - def make_sequence_selector() -> Callable[[GroupChatState], str]: state_counter = {"value": 0} @@ -379,46 +289,6 @@ async def test_agent_manager_handles_concatenated_json_output() -> None: assert conversation[-1].text == "concatenated manager final" -async def test_agent_manager_non_json_output_raises_parse_error() -> None: - manager = NonJsonManagerAgent() - worker = StubAgent("agent", "worker response") - - workflow = GroupChatBuilder( - participants=[worker], - orchestrator_agent=manager, - ).build() - - with pytest.raises(ValueError, match="Failed to parse agent orchestration output."): - async for _ in workflow.run("coordinate task", stream=True): - pass - - assert manager._call_count > 1 - - -async def test_agent_manager_retries_once_then_recovers_from_non_json_output() -> None: - manager = RetryableNonJsonManagerAgent() - worker = StubAgent("agent", "worker response") - - workflow = GroupChatBuilder( - participants=[worker], - orchestrator_agent=manager, - ).build() - - outputs: list[list[Message]] = [] - async for event in workflow.run("coordinate task", stream=True): - if event.type == "output": - data = event.data - if isinstance(data, list): - outputs.append(cast(list[Message], data)) - - assert outputs - conversation = outputs[-1] - assert any(msg.author_name == "agent" and msg.text == "worker response" for msg in conversation) - assert conversation[-1].author_name == manager.name - assert conversation[-1].text == "retry manager final" - assert manager._call_count == 3 - - # Comprehensive tests for group chat functionality