From 9fd01a8b66fe8aed91c4c55c0d350bb4619c5356 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 17 Feb 2026 17:28:30 +0100
Subject: [PATCH 1/3] Python: Fix Anthropic kwargs and manager parse retries

- Strip unsupported Anthropic kwargs from prepared run options while preserving provider-specific mappings.
- Keep stream mode explicit at Anthropic SDK call sites and prevent duplicate stream kwarg conflicts.
- Add bounded default retries with strict retry prompt for agent-based group chat manager parse failures.
- Add regression tests in anthropic and orchestrations packages covering #3371, #3827, and #3078.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../agent_framework_anthropic/_chat_client.py |   4 +
 .../anthropic/tests/test_anthropic_client.py  |  65 ++++++++-
 .../_group_chat.py                            |  13 +-
 .../orchestrations/tests/test_group_chat.py   | 130 ++++++++++++++++++
 4 files changed, 208 insertions(+), 4 deletions(-)

diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py
index 4e7ffafc2d..29b3a62416 100644
--- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py
+++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py
@@ -488,6 +488,10 @@ def _prepare_options(
         run_options: dict[str, Any] = {
             k: v for k, v in options.items() if v is not None and k not in {"instructions", "response_format"}
         }
+        # Framework-level options handled elsewhere; do not forward as raw Anthropic request kwargs.
+        run_options.pop("allow_multiple_tool_calls", None)
+        # Stream mode is controlled explicitly at call sites.
+        run_options.pop("stream", None)
 
         # Translation between options keys and Anthropic Messages API
         for old_key, new_key in OPTION_TRANSLATIONS.items():
diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py
index ff9234f60b..261b0e0139 100644
--- a/python/packages/anthropic/tests/test_anthropic_client.py
+++ b/python/packages/anthropic/tests/test_anthropic_client.py
@@ -396,11 +396,13 @@ async def test_prepare_options_with_tool_choice_auto(mock_anthropic_client: Magi
     client = create_test_anthropic_client(mock_anthropic_client)
 
     messages = [Message(role="user", text="Hello")]
-    chat_options = ChatOptions(tool_choice="auto")
+    chat_options = ChatOptions(tool_choice="auto", allow_multiple_tool_calls=False)
 
     run_options = client._prepare_options(messages, chat_options)
 
     assert run_options["tool_choice"]["type"] == "auto"
+    assert run_options["tool_choice"]["disable_parallel_tool_use"] is True
+    assert "allow_multiple_tool_calls" not in run_options
 
 
 async def test_prepare_options_with_tool_choice_required(mock_anthropic_client: MagicMock) -> None:
@@ -471,6 +473,18 @@ async def test_prepare_options_with_top_p(mock_anthropic_client: MagicMock) -> N
     assert run_options["top_p"] == 0.9
 
 
+async def test_prepare_options_excludes_stream_option(mock_anthropic_client: MagicMock) -> None:
+    """Test _prepare_options excludes stream when stream is provided in options."""
+    client = create_test_anthropic_client(mock_anthropic_client)
+
+    messages = [Message(role="user", text="Hello")]
+    chat_options: dict[str, Any] = {"stream": True, "max_tokens": 100}
+
+    run_options = client._prepare_options(messages, chat_options)
+
+    assert "stream" not in run_options
+
+
 async def test_prepare_options_filters_internal_kwargs(mock_anthropic_client: MagicMock) -> None:
     """Test _prepare_options filters internal framework kwargs.
 
@@ -700,6 +714,30 @@ async def test_inner_get_response(mock_anthropic_client: MagicMock) -> None:
     assert len(response.messages) == 1
 
 
+async def test_inner_get_response_ignores_options_stream_non_streaming(mock_anthropic_client: MagicMock) -> None:
+    """Test stream option in options does not conflict in non-streaming mode."""
+    client = create_test_anthropic_client(mock_anthropic_client)
+
+    mock_message = MagicMock(spec=BetaMessage)
+    mock_message.id = "msg_test"
+    mock_message.model = "claude-3-5-sonnet-20241022"
+    mock_message.content = [BetaTextBlock(type="text", text="Hello!")]
+    mock_message.usage = BetaUsage(input_tokens=5, output_tokens=3)
+    mock_message.stop_reason = "end_turn"
+    mock_anthropic_client.beta.messages.create.return_value = mock_message
+
+    messages = [Message(role="user", text="Hi")]
+    options: dict[str, Any] = {"max_tokens": 10, "stream": True}
+
+    await client._inner_get_response(  # type: ignore[attr-defined]
+        messages=messages,
+        options=options,
+    )
+
+    assert mock_anthropic_client.beta.messages.create.call_count == 1
+    assert mock_anthropic_client.beta.messages.create.call_args.kwargs["stream"] is False
+
+
 async def test_inner_get_response_streaming(mock_anthropic_client: MagicMock) -> None:
     """Test _inner_get_response method with streaming."""
     client = create_test_anthropic_client(mock_anthropic_client)
@@ -726,6 +764,31 @@ async def mock_stream():
     assert isinstance(chunks, list)
 
 
+async def test_inner_get_response_ignores_options_stream_streaming(mock_anthropic_client: MagicMock) -> None:
+    """Test stream option in options does not conflict in streaming mode."""
+    client = create_test_anthropic_client(mock_anthropic_client)
+
+    async def mock_stream():
+        mock_event = MagicMock()
+        mock_event.type = "message_stop"
+        yield mock_event
+
+    mock_anthropic_client.beta.messages.create.return_value = mock_stream()
+
+    messages = [Message(role="user", text="Hi")]
+    options: dict[str, Any] = {"max_tokens": 10, "stream": False}
+
+    async for _ in client._inner_get_response(  # type: ignore[attr-defined]
+        messages=messages,
+        options=options,
+        stream=True,
+    ):
+        pass
+
+    assert mock_anthropic_client.beta.messages.create.call_count == 1
+    assert mock_anthropic_client.beta.messages.create.call_args.kwargs["stream"] is True
+
+
 # Integration Tests
 
 
diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
index a99e221409..f302ff50c6 100644
--- a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
+++ b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
@@ -302,6 +302,7 @@ def __init__(
             max_rounds: Optional limit on selection rounds to prevent infinite loops.
             termination_condition: Optional callable that halts the conversation when it returns True
             retry_attempts: Optional number of retry attempts for the agent in case of failure.
+                Defaults to 2 retries when not provided.
             session: Optional agent session to use for the orchestrator agent.
         """
         super().__init__(
@@ -312,7 +313,7 @@ def __init__(
             termination_condition=termination_condition,
         )
         self._agent = agent
-        self._retry_attempts = retry_attempts
+        self._retry_attempts = retry_attempts if retry_attempts is not None else 2
         self._session = session or agent.create_session()
         # Cache for messages since last agent invocation
         # This is different from the full conversation history maintained by the base orchestrator
@@ -485,6 +486,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
         # We only need the last message for context since history is maintained in the thread
         current_conversation = self._cache.copy()
         self._cache.clear()
+        participant_names = ", ".join(self._participant_registry.participants.keys())
         instruction = (
             "Decide what to do next. Respond with a JSON object of the following format:\n"
             "{\n"
@@ -507,7 +509,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
                 return await _invoke_agent_helper(current_conversation)
             except Exception as ex:
                 logger.error(f"Agent orchestration invocation failed: {ex}")
-                if retry_attempts is None or retry_attempts <= 0:
+                if retry_attempts <= 0:
                     raise
                 retry_attempts -= 1
                 logger.debug(f"Retrying agent orchestration invocation, attempts left: {retry_attempts}")
@@ -515,7 +517,12 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
                 current_conversation = [
                     Message(
                         role="user",
-                        text=f"Your input could not be parsed due to an error: {ex}. Please try again.",
+                        text=(
+                            "Your previous response could not be parsed as valid orchestration JSON. "
+                            f"Error: {ex}. Respond with ONLY one JSON object and no extra text. "
+                            "Use exactly these keys: terminate, reason, next_speaker, final_message. "
+                            f"If terminate is false, next_speaker must be one of: {participant_names}."
+                        ),
                     )
                 ]
 
diff --git a/python/packages/orchestrations/tests/test_group_chat.py b/python/packages/orchestrations/tests/test_group_chat.py
index 7550f820c7..3f6a5c658b 100644
--- a/python/packages/orchestrations/tests/test_group_chat.py
+++ b/python/packages/orchestrations/tests/test_group_chat.py
@@ -166,6 +166,96 @@ async def run(
         )
 
 
+class NonJsonManagerAgent(Agent):
+    """Manager agent that emits only non-JSON assistant text."""
+
+    def __init__(self) -> None:
+        super().__init__(client=MockChatClient(), name="non_json_manager", description="Non-JSON manager")
+        self._call_count = 0
+
+    async def run(
+        self,
+        messages: str | Content | Message | Sequence[str | Content | Message] | None = None,
+        *,
+        session: AgentSession | None = None,
+        **kwargs: Any,
+    ) -> AgentResponse:
+        self._call_count += 1
+        return AgentResponse(
+            messages=[
+                Message(
+                    role="assistant",
+                    text="Good! Continue with the audit.",
+                    author_name=self.name,
+                )
+            ]
+        )
+
+
+class RetryableNonJsonManagerAgent(Agent):
+    """Manager agent that emits non-JSON once, then valid orchestration JSON."""
+
+    def __init__(self) -> None:
+        super().__init__(client=MockChatClient(), name="retry_manager", description="Retry manager")
+        self._call_count = 0
+
+    async def run(
+        self,
+        messages: str | Content | Message | Sequence[str | Content | Message] | None = None,
+        *,
+        session: AgentSession | None = None,
+        **kwargs: Any,
+    ) -> AgentResponse:
+        if self._call_count == 0:
+            self._call_count += 1
+            return AgentResponse(
+                messages=[
+                    Message(
+                        role="assistant",
+                        text="Good! Continue with the audit.",
+                        author_name=self.name,
+                    )
+                ]
+            )
+        if self._call_count == 1:
+            self._call_count += 1
+            payload = {"terminate": False, "reason": "Selecting agent", "next_speaker": "agent", "final_message": None}
+            return AgentResponse(
+                messages=[
+                    Message(
+                        role="assistant",
+                        text=(
+                            '{"terminate": false, "reason": "Selecting agent", '
+                            '"next_speaker": "agent", "final_message": null}'
+                        ),
+                        author_name=self.name,
+                    )
+                ],
+                value=payload,
+            )
+
+        self._call_count += 1
+        payload = {
+            "terminate": True,
+            "reason": "Task complete",
+            "next_speaker": None,
+            "final_message": "retry manager final",
+        }
+        return AgentResponse(
+            messages=[
+                Message(
+                    role="assistant",
+                    text=(
+                        '{"terminate": true, "reason": "Task complete", '
+                        '"next_speaker": null, "final_message": "retry manager final"}'
+                    ),
+                    author_name=self.name,
+                )
+            ],
+            value=payload,
+        )
+
+
 def make_sequence_selector() -> Callable[[GroupChatState], str]:
     state_counter = {"value": 0}
 
@@ -289,6 +379,46 @@ async def test_agent_manager_handles_concatenated_json_output() -> None:
     assert conversation[-1].text == "concatenated manager final"
 
 
+async def test_agent_manager_non_json_output_raises_parse_error() -> None:
+    manager = NonJsonManagerAgent()
+    worker = StubAgent("agent", "worker response")
+
+    workflow = GroupChatBuilder(
+        participants=[worker],
+        orchestrator_agent=manager,
+    ).build()
+
+    with pytest.raises(ValueError, match="Failed to parse agent orchestration output."):
+        async for _ in workflow.run("coordinate task", stream=True):
+            pass
+
+    assert manager._call_count > 1
+
+
+async def test_agent_manager_retries_once_then_recovers_from_non_json_output() -> None:
+    manager = RetryableNonJsonManagerAgent()
+    worker = StubAgent("agent", "worker response")
+
+    workflow = GroupChatBuilder(
+        participants=[worker],
+        orchestrator_agent=manager,
+    ).build()
+
+    outputs: list[list[Message]] = []
+    async for event in workflow.run("coordinate task", stream=True):
+        if event.type == "output":
+            data = event.data
+            if isinstance(data, list):
+                outputs.append(cast(list[Message], data))
+
+    assert outputs
+    conversation = outputs[-1]
+    assert any(msg.author_name == "agent" and msg.text == "worker response" for msg in conversation)
+    assert conversation[-1].author_name == manager.name
+    assert conversation[-1].text == "retry manager final"
+    assert manager._call_count == 3
+
+
 # Comprehensive tests for group chat functionality
 
 

From 97dcb9836ab637351904d5ed988277f6eacc8123 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 17 Feb 2026 17:34:04 +0100
Subject: [PATCH 2/3] Python: Fix Anthropic test lint import

Add missing Any import in anthropic test module to fix ruff F821 failures in Package Checks.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/packages/anthropic/tests/test_anthropic_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py
index 261b0e0139..2e0d6f8147 100644
--- a/python/packages/anthropic/tests/test_anthropic_client.py
+++ b/python/packages/anthropic/tests/test_anthropic_client.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 import os
 from pathlib import Path
-from typing import Annotated
+from typing import Annotated, Any
 from unittest.mock import MagicMock, patch
 
 import pytest

From 32084356f938f3ab5f102abf02b3d7af7682d7b0 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 17 Feb 2026 20:20:27 +0100
Subject: [PATCH 3/3] Python: Revert group chat changes from PR 4000

Revert orchestrations changes so this PR only contains Anthropic client fixes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../_group_chat.py                            |  13 +-
 .../orchestrations/tests/test_group_chat.py   | 130 ------------------
 2 files changed, 3 insertions(+), 140 deletions(-)

diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
index f302ff50c6..a99e221409 100644
--- a/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
+++ b/python/packages/orchestrations/agent_framework_orchestrations/_group_chat.py
@@ -302,7 +302,6 @@ def __init__(
             max_rounds: Optional limit on selection rounds to prevent infinite loops.
             termination_condition: Optional callable that halts the conversation when it returns True
             retry_attempts: Optional number of retry attempts for the agent in case of failure.
-                Defaults to 2 retries when not provided.
             session: Optional agent session to use for the orchestrator agent.
         """
         super().__init__(
@@ -313,7 +312,7 @@ def __init__(
             termination_condition=termination_condition,
         )
         self._agent = agent
-        self._retry_attempts = retry_attempts if retry_attempts is not None else 2
+        self._retry_attempts = retry_attempts
         self._session = session or agent.create_session()
         # Cache for messages since last agent invocation
         # This is different from the full conversation history maintained by the base orchestrator
@@ -486,7 +485,6 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
         # We only need the last message for context since history is maintained in the thread
         current_conversation = self._cache.copy()
         self._cache.clear()
-        participant_names = ", ".join(self._participant_registry.participants.keys())
         instruction = (
             "Decide what to do next. Respond with a JSON object of the following format:\n"
             "{\n"
@@ -509,7 +507,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
                 return await _invoke_agent_helper(current_conversation)
             except Exception as ex:
                 logger.error(f"Agent orchestration invocation failed: {ex}")
-                if retry_attempts <= 0:
+                if retry_attempts is None or retry_attempts <= 0:
                     raise
                 retry_attempts -= 1
                 logger.debug(f"Retrying agent orchestration invocation, attempts left: {retry_attempts}")
@@ -517,12 +515,7 @@ async def _invoke_agent_helper(conversation: list[Message]) -> AgentOrchestratio
                 current_conversation = [
                     Message(
                         role="user",
-                        text=(
-                            "Your previous response could not be parsed as valid orchestration JSON. "
-                            f"Error: {ex}. Respond with ONLY one JSON object and no extra text. "
-                            "Use exactly these keys: terminate, reason, next_speaker, final_message. "
-                            f"If terminate is false, next_speaker must be one of: {participant_names}."
-                        ),
+                        text=f"Your input could not be parsed due to an error: {ex}. Please try again.",
                     )
                 ]
 
diff --git a/python/packages/orchestrations/tests/test_group_chat.py b/python/packages/orchestrations/tests/test_group_chat.py
index 3f6a5c658b..7550f820c7 100644
--- a/python/packages/orchestrations/tests/test_group_chat.py
+++ b/python/packages/orchestrations/tests/test_group_chat.py
@@ -166,96 +166,6 @@ async def run(
         )
 
 
-class NonJsonManagerAgent(Agent):
-    """Manager agent that emits only non-JSON assistant text."""
-
-    def __init__(self) -> None:
-        super().__init__(client=MockChatClient(), name="non_json_manager", description="Non-JSON manager")
-        self._call_count = 0
-
-    async def run(
-        self,
-        messages: str | Content | Message | Sequence[str | Content | Message] | None = None,
-        *,
-        session: AgentSession | None = None,
-        **kwargs: Any,
-    ) -> AgentResponse:
-        self._call_count += 1
-        return AgentResponse(
-            messages=[
-                Message(
-                    role="assistant",
-                    text="Good! Continue with the audit.",
-                    author_name=self.name,
-                )
-            ]
-        )
-
-
-class RetryableNonJsonManagerAgent(Agent):
-    """Manager agent that emits non-JSON once, then valid orchestration JSON."""
-
-    def __init__(self) -> None:
-        super().__init__(client=MockChatClient(), name="retry_manager", description="Retry manager")
-        self._call_count = 0
-
-    async def run(
-        self,
-        messages: str | Content | Message | Sequence[str | Content | Message] | None = None,
-        *,
-        session: AgentSession | None = None,
-        **kwargs: Any,
-    ) -> AgentResponse:
-        if self._call_count == 0:
-            self._call_count += 1
-            return AgentResponse(
-                messages=[
-                    Message(
-                        role="assistant",
-                        text="Good! Continue with the audit.",
-                        author_name=self.name,
-                    )
-                ]
-            )
-        if self._call_count == 1:
-            self._call_count += 1
-            payload = {"terminate": False, "reason": "Selecting agent", "next_speaker": "agent", "final_message": None}
-            return AgentResponse(
-                messages=[
-                    Message(
-                        role="assistant",
-                        text=(
-                            '{"terminate": false, "reason": "Selecting agent", '
-                            '"next_speaker": "agent", "final_message": null}'
-                        ),
-                        author_name=self.name,
-                    )
-                ],
-                value=payload,
-            )
-
-        self._call_count += 1
-        payload = {
-            "terminate": True,
-            "reason": "Task complete",
-            "next_speaker": None,
-            "final_message": "retry manager final",
-        }
-        return AgentResponse(
-            messages=[
-                Message(
-                    role="assistant",
-                    text=(
-                        '{"terminate": true, "reason": "Task complete", '
-                        '"next_speaker": null, "final_message": "retry manager final"}'
-                    ),
-                    author_name=self.name,
-                )
-            ],
-            value=payload,
-        )
-
-
 def make_sequence_selector() -> Callable[[GroupChatState], str]:
     state_counter = {"value": 0}
 
@@ -379,46 +289,6 @@ async def test_agent_manager_handles_concatenated_json_output() -> None:
     assert conversation[-1].text == "concatenated manager final"
 
 
-async def test_agent_manager_non_json_output_raises_parse_error() -> None:
-    manager = NonJsonManagerAgent()
-    worker = StubAgent("agent", "worker response")
-
-    workflow = GroupChatBuilder(
-        participants=[worker],
-        orchestrator_agent=manager,
-    ).build()
-
-    with pytest.raises(ValueError, match="Failed to parse agent orchestration output."):
-        async for _ in workflow.run("coordinate task", stream=True):
-            pass
-
-    assert manager._call_count > 1
-
-
-async def test_agent_manager_retries_once_then_recovers_from_non_json_output() -> None:
-    manager = RetryableNonJsonManagerAgent()
-    worker = StubAgent("agent", "worker response")
-
-    workflow = GroupChatBuilder(
-        participants=[worker],
-        orchestrator_agent=manager,
-    ).build()
-
-    outputs: list[list[Message]] = []
-    async for event in workflow.run("coordinate task", stream=True):
-        if event.type == "output":
-            data = event.data
-            if isinstance(data, list):
-                outputs.append(cast(list[Message], data))
-
-    assert outputs
-    conversation = outputs[-1]
-    assert any(msg.author_name == "agent" and msg.text == "worker response" for msg in conversation)
-    assert conversation[-1].author_name == manager.name
-    assert conversation[-1].text == "retry manager final"
-    assert manager._call_count == 3
-
-
 # Comprehensive tests for group chat functionality