microsoft · eavanvalkenburg · Feb 5, 2026 · Jan 20, 2026 · Jan 22, 2026 · Jan 23, 2026
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
@@ -96,8 +96,7 @@ jobs:
         uses: ./.github/actions/azure-functions-integration-setup
         id: azure-functions-setup
       - name: Test with pytest
-        timeout-minutes: 10
-        run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout 900 --retries 3 --retry-delay 10
+        run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
         working-directory: ./python
       - name: Test core samples
         timeout-minutes: 10
@@ -153,8 +152,8 @@ jobs:
           tenant-id: ${{ secrets.AZURE_TENANT_ID }}
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
       - name: Test with pytest
-        timeout-minutes: 10
-        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout 300 --retries 3 --retry-delay 10
+        timeout-minutes: 15
+        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
         working-directory: ./python
       - name: Test Azure AI samples
         timeout-minutes: 10

diff --git a/docs/decisions/0012-python-typeddict-options.md b/docs/decisions/0012-python-typeddict-options.md
@@ -126,4 +126,4 @@ response = await client.get_response(
 
 Chosen option: **"Option 2: TypedDict with Generic Type Parameters"**, because it provides full type safety, excellent IDE support with autocompletion, and allows users to extend provider-specific options for their use cases. Extended this Generic to ChatAgents in order to also properly type the options used in agent construction and run methods.
 
-See [typed_options.py](../../python/samples/getting_started/chat_client/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.
+See [typed_options.py](../../python/samples/concepts/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.
diff --git a/python/.cspell.json b/python/.cspell.json
@@ -38,6 +38,8 @@
         "endregion",
         "entra",
         "faiss",
+        "finalizer",
+        "finalizers",
         "genai",
         "generativeai",
         "hnsw",

diff --git a/python/CODING_STANDARD.md b/python/CODING_STANDARD.md
@@ -484,3 +484,56 @@ otel_messages.append(_to_otel_message(message)) # this already serializes
 message_data = message.to_dict(exclude_none=True)  # and this does so again!
 logger.info(message_data, extra={...})
 ```
+
+## Test Organization
+
+### Test Directory Structure
+
+Test folders require specific organization to avoid pytest conflicts when running tests across packages:
+
+1. **No `__init__.py` in test folders**: Test directories should NOT contain `__init__.py` files. This can cause import conflicts when pytest collects tests across multiple packages.
+
+2. **File naming**: Files starting with `test_` are treated as test files by pytest. Do not use this prefix for helper modules or utilities. If you need shared test utilities, put them in `conftest.py` or a file with a different name pattern (e.g., `helpers.py`, `fixtures.py`).
+
+3. **Package-specific conftest location**: The `tests/conftest.py` path is reserved for the core package (`packages/core/tests/conftest.py`). Other packages must place their tests in a uniquely-named subdirectory:
+
+```plaintext
+# ✅ Correct structure for non-core packages
+packages/devui/
+├── tests/
+│   └── devui/           # Unique subdirectory matching package name
+│       ├── conftest.py  # Package-specific fixtures
+│       ├── test_server.py
+│       └── test_mapper.py
+
+packages/anthropic/
+├── tests/
+│   └── anthropic/       # Unique subdirectory
+│       ├── conftest.py
+│       └── test_client.py
+
+# ❌ Incorrect - will conflict with core package
+packages/devui/
+├── tests/
+│   ├── conftest.py      # Conflicts when running all tests
+│   ├── test_server.py
+│   └── test_helpers.py  # Bad name - looks like a test file
+
+# ✅ Core package can use tests/ directly
+packages/core/
+├── tests/
+│   ├── conftest.py      # Core's conftest.py
+│   ├── core/
+│   │   └── test_agents.py
+│   └── openai/
+│       └── test_client.py
+```
+
+4. **Keep the `tests/` folder**: Even when using a subdirectory, keep the `tests/` folder at the package root. Some test discovery commands and tooling rely on this convention.
+
+### Fixture Guidelines
+
+- Use `conftest.py` for shared fixtures within a test directory
+- Factory functions with parameters should be regular functions, not fixtures (fixtures can't accept arguments)
+- Import factory functions explicitly: `from conftest import create_test_request`
+- Fixtures should use simple names that describe what they provide: `mapper`, `test_request`, `mock_client`
diff --git a/python/packages/a2a/agent_framework_a2a/_agent.py b/python/packages/a2a/agent_framework_a2a/_agent.py
@@ -4,8 +4,8 @@
 import json
 import re
 import uuid
-from collections.abc import AsyncIterable, Sequence
-from typing import Any, Final, cast
+from collections.abc import AsyncIterable, Awaitable, Sequence
+from typing import Any, Final, Literal, cast, overload
 
 import httpx
 from a2a.client import Client, ClientConfig, ClientFactory, minimal_agent_card
@@ -32,10 +32,11 @@
     BaseAgent,
     ChatMessage,
     Content,
+    ResponseStream,
     normalize_messages,
     prepend_agent_framework_to_user_agent,
 )
-from agent_framework.observability import use_agent_instrumentation
+from agent_framework.observability import AgentTelemetryLayer
 
 __all__ = ["A2AAgent"]
 
@@ -56,8 +57,7 @@ def _get_uri_data(uri: str) -> str:
     return match.group("base64_data")
 
 
-@use_agent_instrumentation
-class A2AAgent(BaseAgent):
+class A2AAgent(AgentTelemetryLayer, BaseAgent):
     """Agent2Agent (A2A) protocol implementation.
 
     Wraps an A2A Client to connect the Agent Framework with external A2A-compliant agents
@@ -184,44 +184,92 @@ async def __aexit__(
         if self._http_client is not None and self._close_http_client:
             await self._http_client.aclose()
 
-    async def run(
+    @overload
+    def run(
         self,
-        messages: str | Content | ChatMessage | Sequence[str | Content | ChatMessage] | None = None,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
         *,
+        stream: Literal[False] = ...,
         thread: AgentThread | None = None,
         **kwargs: Any,
-    ) -> AgentResponse:
+    ) -> Awaitable[AgentResponse[Any]]: ...
+
+    @overload
+    def run(
+        self,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
+        *,
+        stream: Literal[True],
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> ResponseStream[AgentResponseUpdate, AgentResponse[Any]]: ...
+
+    def run(
+        self,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
+        *,
+        stream: bool = False,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> Awaitable[AgentResponse[Any]] | ResponseStream[AgentResponseUpdate, AgentResponse[Any]]:
         """Get a response from the agent.
 
         This method returns the final result of the agent's execution
-        as a single AgentResponse object. The caller is blocked until
-        the final result is available.
+        as a single AgentResponse object when stream=False. When stream=True,
+        it returns a ResponseStream that yields AgentResponseUpdate objects.
 
         Args:
             messages: The message(s) to send to the agent.
 
         Keyword Args:
+            stream: Whether to stream the response. Defaults to False.
             thread: The conversation thread associated with the message(s).
             kwargs: Additional keyword arguments.
 
         Returns:
-            An agent response item.
+            When stream=False: An Awaitable[AgentResponse].
+            When stream=True: A ResponseStream of AgentResponseUpdate items.
         """
+        if stream:
+            return self._run_stream_impl(messages=messages, thread=thread, **kwargs)
+        return self._run_impl(messages=messages, thread=thread, **kwargs)
+
+    async def _run_impl(
+        self,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AgentResponse[Any]:
+        """Non-streaming implementation of run."""
         # Collect all updates and use framework to consolidate updates into response
-        updates = [update async for update in self.run_stream(messages, thread=thread, **kwargs)]
+        updates: list[AgentResponseUpdate] = []
+        async for update in self._stream_updates(messages, thread=thread, **kwargs):
+            updates.append(update)
         return AgentResponse.from_updates(updates)
 
-    async def run_stream(
+    def _run_stream_impl(
         self,
-        messages: str | Content | ChatMessage | Sequence[str | Content | ChatMessage] | None = None,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
         *,
         thread: AgentThread | None = None,
         **kwargs: Any,
-    ) -> AsyncIterable[AgentResponseUpdate]:
-        """Run the agent as a stream.
+    ) -> ResponseStream[AgentResponseUpdate, AgentResponse[Any]]:
+        """Streaming implementation of run."""
+
+        def _finalize(updates: Sequence[AgentResponseUpdate]) -> AgentResponse[Any]:
+            return AgentResponse.from_updates(list(updates))
+
+        return ResponseStream(self._stream_updates(messages, thread=thread, **kwargs), finalizer=_finalize)
 
-        This method will return the intermediate steps and final results of the
-        agent's execution as a stream of AgentResponseUpdate objects to the caller.
+    async def _stream_updates(
+        self,
+        messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[AgentResponseUpdate]:
+        """Internal method to stream updates from the A2A agent.
 
         Args:
             messages: The message(s) to send to the agent.
@@ -231,10 +279,10 @@ async def run_stream(
             kwargs: Additional keyword arguments.
 
         Yields:
-            An agent response item.
+            AgentResponseUpdate items from the A2A agent.
         """
-        messages = normalize_messages(messages)
-        a2a_message = self._prepare_message_for_a2a(messages[-1])
+        normalized_messages = normalize_messages(messages)
+        a2a_message = self._prepare_message_for_a2a(normalized_messages[-1])
 
         response_stream = self.client.send_message(a2a_message)
 

diff --git a/python/packages/a2a/tests/test_a2a_agent.py b/python/packages/a2a/tests/test_a2a_agent.py
@@ -295,7 +295,7 @@ def test_prepare_message_for_a2a_with_error_content(a2a_agent: A2AAgent) -> None
 
     # Create ChatMessage with ErrorContent
     error_content = Content.from_error(message="Test error message")
-    message = ChatMessage("user", [error_content])
+    message = ChatMessage(role="user", contents=[error_content])
 
     # Convert to A2A message
     a2a_message = a2a_agent._prepare_message_for_a2a(message)
@@ -310,7 +310,7 @@ def test_prepare_message_for_a2a_with_uri_content(a2a_agent: A2AAgent) -> None:
 
     # Create ChatMessage with UriContent
     uri_content = Content.from_uri(uri="http://example.com/file.pdf", media_type="application/pdf")
-    message = ChatMessage("user", [uri_content])
+    message = ChatMessage(role="user", contents=[uri_content])
 
     # Convert to A2A message
     a2a_message = a2a_agent._prepare_message_for_a2a(message)
@@ -326,7 +326,7 @@ def test_prepare_message_for_a2a_with_data_content(a2a_agent: A2AAgent) -> None:
 
     # Create ChatMessage with DataContent (base64 data URI)
     data_content = Content.from_uri(uri="data:text/plain;base64,SGVsbG8gV29ybGQ=", media_type="text/plain")
-    message = ChatMessage("user", [data_content])
+    message = ChatMessage(role="user", contents=[data_content])
 
     # Convert to A2A message
     a2a_message = a2a_agent._prepare_message_for_a2a(message)
@@ -340,20 +340,20 @@ def test_prepare_message_for_a2a_with_data_content(a2a_agent: A2AAgent) -> None:
 def test_prepare_message_for_a2a_empty_contents_raises_error(a2a_agent: A2AAgent) -> None:
     """Test _prepare_message_for_a2a with empty contents raises ValueError."""
     # Create ChatMessage with no contents
-    message = ChatMessage("user", [])
+    message = ChatMessage(role="user", contents=[])
 
     # Should raise ValueError for empty contents
     with raises(ValueError, match="ChatMessage.contents is empty"):
         a2a_agent._prepare_message_for_a2a(message)
 
 
-async def test_run_stream_with_message_response(a2a_agent: A2AAgent, mock_a2a_client: MockA2AClient) -> None:
-    """Test run_stream() method with immediate Message response."""
+async def test_run_streaming_with_message_response(a2a_agent: A2AAgent, mock_a2a_client: MockA2AClient) -> None:
+    """Test run(stream=True) method with immediate Message response."""
     mock_a2a_client.add_message_response("msg-stream-123", "Streaming response from agent!", "agent")
 
     # Collect streaming updates
     updates: list[AgentResponseUpdate] = []
-    async for update in a2a_agent.run_stream("Hello agent"):
+    async for update in a2a_agent.run("Hello agent", stream=True):
         updates.append(update)
 
     # Verify streaming response

diff --git a/python/packages/ag-ui/README.md b/python/packages/ag-ui/README.md
@@ -46,7 +46,7 @@ from agent_framework.ag_ui import AGUIChatClient
 async def main():
     async with AGUIChatClient(endpoint="http://localhost:8000/") as client:
         # Stream responses
-        async for update in client.get_streaming_response("Hello!"):
+        async for update in client.get_response("Hello!", stream=True):
             for content in update.contents:
                 if isinstance(content, TextContent):
                     print(content.text, end="", flush=True)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -126,4 +126,4 @@ response = await client.get_response(

		Chosen option: "Option 2: TypedDict with Generic Type Parameters", because it provides full type safety, excellent IDE support with autocompletion, and allows users to extend provider-specific options for their use cases. Extended this Generic to ChatAgents in order to also properly type the options used in agent construction and run methods.

		See [typed_options.py](../../python/samples/getting_started/chat_client/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.
		See [typed_options.py](../../python/samples/concepts/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.