Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
102 commits
Select commit Hold shift + click to select a range
32be8db
WIP
eavanvalkenburg Jan 20, 2026
0951dc2
big update to new ResponseStream model
eavanvalkenburg Jan 22, 2026
d387101
fixed tests and typing
eavanvalkenburg Jan 23, 2026
f902741
fixed tests and typing
eavanvalkenburg Feb 4, 2026
1b2c99d
fixed tools typevar import
eavanvalkenburg Jan 23, 2026
3c35c6e
fix
eavanvalkenburg Jan 23, 2026
7bd266e
mypy fix
eavanvalkenburg Jan 23, 2026
6ad1ba9
mypy fixes and some cleanup
eavanvalkenburg Jan 23, 2026
76b9399
fix missing quoted names
eavanvalkenburg Feb 4, 2026
bc8b890
and client
eavanvalkenburg Jan 23, 2026
eb51ba3
fix imports agui
eavanvalkenburg Jan 23, 2026
c28f1ef
fix anthropic override
eavanvalkenburg Jan 23, 2026
dc7c92b
fix agui
eavanvalkenburg Jan 23, 2026
57a5531
fix ag ui
eavanvalkenburg Feb 4, 2026
1d931f5
fix import
eavanvalkenburg Jan 23, 2026
778e4db
fix anthropic types
eavanvalkenburg Jan 23, 2026
5b24489
fix mypy
eavanvalkenburg Jan 23, 2026
fa9ab9d
refactoring
eavanvalkenburg Jan 23, 2026
2c48a63
updated typing
eavanvalkenburg Jan 29, 2026
4e37d69
fix 3.11
eavanvalkenburg Jan 29, 2026
006ba98
fixes
eavanvalkenburg Feb 4, 2026
c52c476
redid layering of chat clients and agents
eavanvalkenburg Feb 4, 2026
f2c6bcf
redid layering of chat clients and agents
eavanvalkenburg Feb 4, 2026
2c1f36b
Fix lint, type, and test issues after rebase
eavanvalkenburg Jan 30, 2026
4447c26
Fix AgentExecutionException import error in test_agents.py
eavanvalkenburg Jan 30, 2026
c9f3e36
Fix test import and asyncio deprecation issues
eavanvalkenburg Jan 30, 2026
3bd0086
Fix azure-ai test failures
eavanvalkenburg Jan 30, 2026
3052ae9
Convert ag-ui utils_test_ag_ui.py to conftest.py
eavanvalkenburg Jan 30, 2026
bbada82
fix: use relative imports for ag-ui test utilities
eavanvalkenburg Jan 30, 2026
70eaece
fix agui
eavanvalkenburg Jan 30, 2026
5cc21eb
Rename Bare*Client to Raw*Client and BaseChatClient
eavanvalkenburg Jan 30, 2026
7772f2e
Fix layer ordering: FunctionInvocationLayer before ChatTelemetryLayer
eavanvalkenburg Jan 30, 2026
1f58d6a
Remove run_stream usage
eavanvalkenburg Feb 4, 2026
4f7536c
Fix conversation_id propagation
eavanvalkenburg Feb 4, 2026
58cae68
Python: Add BaseAgent implementation for Claude Agent SDK (#3509)
dmytrostruk Jan 30, 2026
87007e9
Update Claude agent connector layering
eavanvalkenburg Feb 4, 2026
0bd1a8b
fix test and plugin
eavanvalkenburg Feb 1, 2026
3fe9907
Store function middleware in invocation layer
eavanvalkenburg Feb 2, 2026
747cc8a
Fix telemetry streaming and ag-ui tests
eavanvalkenburg Feb 2, 2026
6a00f5b
Remove legacy ag-ui tests folder
eavanvalkenburg Feb 4, 2026
ec7dd84
updates
eavanvalkenburg Feb 4, 2026
716a351
Remove terminate flag from FunctionInvocationContext, use MiddlewareT…
eavanvalkenburg Feb 3, 2026
1d3c929
fix: remove references to removed terminate flag in purview tests, ad…
eavanvalkenburg Feb 3, 2026
8a8113a
fix: move _test_utils.py from package to test folder
eavanvalkenburg Feb 3, 2026
d1b003b
fix: call get_final_response() to trigger context provider notificati…
eavanvalkenburg Feb 3, 2026
6bbe7e7
fix: correct broken links in tools README
eavanvalkenburg Feb 3, 2026
648a332
docs: clarify default middleware behavior in summary table
eavanvalkenburg Feb 3, 2026
ddab5cb
fix: ensure inner stream result hooks are called when using map()/fro…
eavanvalkenburg Feb 3, 2026
b571b4e
Fix mypy type errors
eavanvalkenburg Feb 4, 2026
443667e
Address PR review comments on observability.py
eavanvalkenburg Feb 3, 2026
7eaf94c
Remove gen_ai.client.operation.duration from span attributes
eavanvalkenburg Feb 3, 2026
04c1e26
Remove duration from _get_response_attributes, pass directly to _capt…
eavanvalkenburg Feb 3, 2026
49d3d55
Remove redundant _close_span cleanup hook in AgentTelemetryLayer
eavanvalkenburg Feb 3, 2026
c9049a0
Use weakref.finalize to close span when stream is garbage collected
eavanvalkenburg Feb 3, 2026
7ea9ccc
Fix _get_finalizers_from_stream to use _result_hooks attribute
eavanvalkenburg Feb 4, 2026
d634da0
Add missing asyncio import in test_request_info_mixin.py
eavanvalkenburg Feb 4, 2026
7637a26
Fix leftover merge conflict marker in image_generation sample
eavanvalkenburg Feb 4, 2026
ed74996
Update integration tests
eavanvalkenburg Feb 4, 2026
27122ac
Fix integration tests: increase max_iterations from 1 to 2
eavanvalkenburg Feb 4, 2026
985eb8f
Fix duplicate function call error in conversation-based APIs
eavanvalkenburg Feb 4, 2026
d5e1f82
Add regression test for conversation_id propagation between tool iter…
eavanvalkenburg Feb 4, 2026
7ddde04
Fix tool_choice=required to return after tool execution
eavanvalkenburg Feb 4, 2026
55414d9
Document tool_choice behavior in tools README
eavanvalkenburg Feb 4, 2026
7c8f911
Fix tool_choice=None behavior - don't default to 'auto'
eavanvalkenburg Feb 4, 2026
d8d6ab9
Fix tool_choice=none should not remove tools
eavanvalkenburg Feb 4, 2026
447600f
Add test for tool_choice=none preserving tools
eavanvalkenburg Feb 4, 2026
6943352
Fix tool_choice=none should not remove tools in all clients
eavanvalkenburg Feb 4, 2026
2911b27
Keep tool_choice even when tools is None
eavanvalkenburg Feb 4, 2026
cdb016f
Update test to match new parallel_tool_calls behavior
eavanvalkenburg Feb 4, 2026
a2d1b17
Fix ChatMessage API and Role enum usage after rebase
eavanvalkenburg Feb 4, 2026
8f78b71
Fix additional ChatMessage API and method name changes
eavanvalkenburg Feb 4, 2026
1449a1e
Fix remaining ChatMessage API usage in test files
eavanvalkenburg Feb 4, 2026
0f8f992
Fix more ChatMessage and Role API changes in source and test files
eavanvalkenburg Feb 4, 2026
5426f2f
Fix ChatMessage and Role API changes across packages
eavanvalkenburg Feb 4, 2026
51a51b9
Fix ChatMessage and Role API changes in github_copilot tests
eavanvalkenburg Feb 4, 2026
ba7f817
Fix ChatMessage and Role API changes in redis and github_copilot pack…
eavanvalkenburg Feb 4, 2026
c39b961
Fix ChatMessage and Role API changes in devui package
eavanvalkenburg Feb 4, 2026
2c14735
Fix ChatMessage and Role API changes in a2a and lab packages
eavanvalkenburg Feb 4, 2026
0ce2b34
Remove duplicate test files from ag-ui/tests (tests are in ag_ui_tests)
eavanvalkenburg Feb 4, 2026
b7ca460
Fix ChatMessage and Role API changes across packages
eavanvalkenburg Feb 4, 2026
6e5e9cb
Fix mypy errors for ChatMessage and Role API changes
eavanvalkenburg Feb 4, 2026
1d60c46
Improve CI test timeout configuration
eavanvalkenburg Feb 4, 2026
7074c6c
Fix ChatMessage API usage in docstrings and source
eavanvalkenburg Feb 4, 2026
8b7561a
Revert tool_choice/parallel_tool_calls changes - must be removed when…
eavanvalkenburg Feb 4, 2026
0aeb6c3
fixed issue in tests
eavanvalkenburg Feb 4, 2026
278c7a1
fix: resolve merge conflict markers in ag-ui tests
eavanvalkenburg Feb 5, 2026
791d208
fix: restructure ag-ui tests and fix Role/FinishReason to use string …
eavanvalkenburg Feb 5, 2026
1c823a8
fix: streaming function invocation and middleware termination
eavanvalkenburg Feb 5, 2026
07b699a
fix all tests command
eavanvalkenburg Feb 5, 2026
e95d8f4
Refactor integration tests to use pytest fixtures
eavanvalkenburg Feb 5, 2026
5a83cfb
Fix pytest_collection_modifyitems to only skip integration tests
eavanvalkenburg Feb 5, 2026
67d9ca5
Fix mem0 tests failing on Python 3.13
eavanvalkenburg Feb 5, 2026
49fe1e5
fix mem0
eavanvalkenburg Feb 5, 2026
d9552e0
another attempt for mem0
eavanvalkenburg Feb 5, 2026
e82a7ab
fix for mem0
eavanvalkenburg Feb 5, 2026
67ec9f5
fix mem0
eavanvalkenburg Feb 5, 2026
a86ebf6
Increase worker initialization wait time in durabletask tests
eavanvalkenburg Feb 5, 2026
5f4d3cf
Fix streaming test to use ResponseStream with finalizer
eavanvalkenburg Feb 5, 2026
2c3545a
Fix MockToolCallingAgent to use new ResponseStream API and update sam…
eavanvalkenburg Feb 5, 2026
7990238
small updates to run_stream to run
eavanvalkenburg Feb 5, 2026
e370e22
fix sub workflow
eavanvalkenburg Feb 5, 2026
6107218
temp fix for az func test
eavanvalkenburg Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
7 changes: 3 additions & 4 deletions .github/workflows/python-merge-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ jobs:
uses: ./.github/actions/azure-functions-integration-setup
id: azure-functions-setup
- name: Test with pytest
timeout-minutes: 10
run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout 900 --retries 3 --retry-delay 10
run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
working-directory: ./python
- name: Test core samples
timeout-minutes: 10
Expand Down Expand Up @@ -153,8 +152,8 @@ jobs:
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Test with pytest
timeout-minutes: 10
run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout 300 --retries 3 --retry-delay 10
timeout-minutes: 15
run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
working-directory: ./python
- name: Test Azure AI samples
timeout-minutes: 10
Expand Down
2 changes: 1 addition & 1 deletion docs/decisions/0012-python-typeddict-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@ response = await client.get_response(

Chosen option: **"Option 2: TypedDict with Generic Type Parameters"**, because it provides full type safety, excellent IDE support with autocompletion, and allows users to extend provider-specific options for their use cases. Extended this Generic to ChatAgents in order to also properly type the options used in agent construction and run methods.

See [typed_options.py](../../python/samples/getting_started/chat_client/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.
See [typed_options.py](../../python/samples/concepts/typed_options.py) for a complete example demonstrating the usage of typed options with custom extensions.
2 changes: 2 additions & 0 deletions python/.cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
"endregion",
"entra",
"faiss",
"finalizer",
"finalizers",
"genai",
"generativeai",
"hnsw",
Expand Down
53 changes: 53 additions & 0 deletions python/CODING_STANDARD.md
Original file line number Diff line number Diff line change
Expand Up @@ -484,3 +484,56 @@ otel_messages.append(_to_otel_message(message)) # this already serializes
message_data = message.to_dict(exclude_none=True) # and this does so again!
logger.info(message_data, extra={...})
```

## Test Organization

### Test Directory Structure

Test folders require specific organization to avoid pytest conflicts when running tests across packages:

1. **No `__init__.py` in test folders**: Test directories should NOT contain `__init__.py` files. This can cause import conflicts when pytest collects tests across multiple packages.

2. **File naming**: Files starting with `test_` are treated as test files by pytest. Do not use this prefix for helper modules or utilities. If you need shared test utilities, put them in `conftest.py` or a file with a different name pattern (e.g., `helpers.py`, `fixtures.py`).

3. **Package-specific conftest location**: The `tests/conftest.py` path is reserved for the core package (`packages/core/tests/conftest.py`). Other packages must place their tests in a uniquely-named subdirectory:

```plaintext
# ✅ Correct structure for non-core packages
packages/devui/
├── tests/
│ └── devui/ # Unique subdirectory matching package name
│ ├── conftest.py # Package-specific fixtures
│ ├── test_server.py
│ └── test_mapper.py

packages/anthropic/
├── tests/
│ └── anthropic/ # Unique subdirectory
│ ├── conftest.py
│ └── test_client.py

# ❌ Incorrect - will conflict with core package
packages/devui/
├── tests/
│ ├── conftest.py # Conflicts when running all tests
│ ├── test_server.py
│ └── test_helpers.py # Bad name - looks like a test file

# ✅ Core package can use tests/ directly
packages/core/
├── tests/
│ ├── conftest.py # Core's conftest.py
│ ├── core/
│ │ └── test_agents.py
│ └── openai/
│ └── test_client.py
```

4. **Keep the `tests/` folder**: Even when using a subdirectory, keep the `tests/` folder at the package root. Some test discovery commands and tooling rely on this convention.

### Fixture Guidelines

- Use `conftest.py` for shared fixtures within a test directory
- Factory functions with parameters should be regular functions, not fixtures (fixtures can't accept arguments)
- Import factory functions explicitly: `from conftest import create_test_request`
- Fixtures should use simple names that describe what they provide: `mapper`, `test_request`, `mock_client`
90 changes: 69 additions & 21 deletions python/packages/a2a/agent_framework_a2a/_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import json
import re
import uuid
from collections.abc import AsyncIterable, Sequence
from typing import Any, Final, cast
from collections.abc import AsyncIterable, Awaitable, Sequence
from typing import Any, Final, Literal, cast, overload

import httpx
from a2a.client import Client, ClientConfig, ClientFactory, minimal_agent_card
Expand All @@ -32,10 +32,11 @@
BaseAgent,
ChatMessage,
Content,
ResponseStream,
normalize_messages,
prepend_agent_framework_to_user_agent,
)
from agent_framework.observability import use_agent_instrumentation
from agent_framework.observability import AgentTelemetryLayer

__all__ = ["A2AAgent"]

Expand All @@ -56,8 +57,7 @@ def _get_uri_data(uri: str) -> str:
return match.group("base64_data")


@use_agent_instrumentation
class A2AAgent(BaseAgent):
class A2AAgent(AgentTelemetryLayer, BaseAgent):
"""Agent2Agent (A2A) protocol implementation.

Wraps an A2A Client to connect the Agent Framework with external A2A-compliant agents
Expand Down Expand Up @@ -184,44 +184,92 @@ async def __aexit__(
if self._http_client is not None and self._close_http_client:
await self._http_client.aclose()

async def run(
@overload
def run(
self,
messages: str | Content | ChatMessage | Sequence[str | Content | ChatMessage] | None = None,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
stream: Literal[False] = ...,
thread: AgentThread | None = None,
**kwargs: Any,
) -> AgentResponse:
) -> Awaitable[AgentResponse[Any]]: ...

@overload
def run(
self,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
stream: Literal[True],
thread: AgentThread | None = None,
**kwargs: Any,
) -> ResponseStream[AgentResponseUpdate, AgentResponse[Any]]: ...

def run(
self,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
stream: bool = False,
thread: AgentThread | None = None,
**kwargs: Any,
) -> Awaitable[AgentResponse[Any]] | ResponseStream[AgentResponseUpdate, AgentResponse[Any]]:
"""Get a response from the agent.

This method returns the final result of the agent's execution
as a single AgentResponse object. The caller is blocked until
the final result is available.
as a single AgentResponse object when stream=False. When stream=True,
it returns a ResponseStream that yields AgentResponseUpdate objects.

Args:
messages: The message(s) to send to the agent.

Keyword Args:
stream: Whether to stream the response. Defaults to False.
thread: The conversation thread associated with the message(s).
kwargs: Additional keyword arguments.

Returns:
An agent response item.
When stream=False: An Awaitable[AgentResponse].
When stream=True: A ResponseStream of AgentResponseUpdate items.
"""
if stream:
return self._run_stream_impl(messages=messages, thread=thread, **kwargs)
return self._run_impl(messages=messages, thread=thread, **kwargs)

async def _run_impl(
self,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
thread: AgentThread | None = None,
**kwargs: Any,
) -> AgentResponse[Any]:
"""Non-streaming implementation of run."""
# Collect all updates and use framework to consolidate updates into response
updates = [update async for update in self.run_stream(messages, thread=thread, **kwargs)]
updates: list[AgentResponseUpdate] = []
async for update in self._stream_updates(messages, thread=thread, **kwargs):
updates.append(update)
return AgentResponse.from_updates(updates)

async def run_stream(
def _run_stream_impl(
self,
messages: str | Content | ChatMessage | Sequence[str | Content | ChatMessage] | None = None,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
thread: AgentThread | None = None,
**kwargs: Any,
) -> AsyncIterable[AgentResponseUpdate]:
"""Run the agent as a stream.
) -> ResponseStream[AgentResponseUpdate, AgentResponse[Any]]:
"""Streaming implementation of run."""

def _finalize(updates: Sequence[AgentResponseUpdate]) -> AgentResponse[Any]:
return AgentResponse.from_updates(list(updates))

return ResponseStream(self._stream_updates(messages, thread=thread, **kwargs), finalizer=_finalize)

This method will return the intermediate steps and final results of the
agent's execution as a stream of AgentResponseUpdate objects to the caller.
async def _stream_updates(
self,
messages: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
*,
thread: AgentThread | None = None,
**kwargs: Any,
) -> AsyncIterable[AgentResponseUpdate]:
"""Internal method to stream updates from the A2A agent.

Args:
messages: The message(s) to send to the agent.
Expand All @@ -231,10 +279,10 @@ async def run_stream(
kwargs: Additional keyword arguments.

Yields:
An agent response item.
AgentResponseUpdate items from the A2A agent.
"""
messages = normalize_messages(messages)
a2a_message = self._prepare_message_for_a2a(messages[-1])
normalized_messages = normalize_messages(messages)
a2a_message = self._prepare_message_for_a2a(normalized_messages[-1])

response_stream = self.client.send_message(a2a_message)

Expand Down
14 changes: 7 additions & 7 deletions python/packages/a2a/tests/test_a2a_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def test_prepare_message_for_a2a_with_error_content(a2a_agent: A2AAgent) -> None

# Create ChatMessage with ErrorContent
error_content = Content.from_error(message="Test error message")
message = ChatMessage("user", [error_content])
message = ChatMessage(role="user", contents=[error_content])

# Convert to A2A message
a2a_message = a2a_agent._prepare_message_for_a2a(message)
Expand All @@ -310,7 +310,7 @@ def test_prepare_message_for_a2a_with_uri_content(a2a_agent: A2AAgent) -> None:

# Create ChatMessage with UriContent
uri_content = Content.from_uri(uri="http://example.com/file.pdf", media_type="application/pdf")
message = ChatMessage("user", [uri_content])
message = ChatMessage(role="user", contents=[uri_content])

# Convert to A2A message
a2a_message = a2a_agent._prepare_message_for_a2a(message)
Expand All @@ -326,7 +326,7 @@ def test_prepare_message_for_a2a_with_data_content(a2a_agent: A2AAgent) -> None:

# Create ChatMessage with DataContent (base64 data URI)
data_content = Content.from_uri(uri="data:text/plain;base64,SGVsbG8gV29ybGQ=", media_type="text/plain")
message = ChatMessage("user", [data_content])
message = ChatMessage(role="user", contents=[data_content])

# Convert to A2A message
a2a_message = a2a_agent._prepare_message_for_a2a(message)
Expand All @@ -340,20 +340,20 @@ def test_prepare_message_for_a2a_with_data_content(a2a_agent: A2AAgent) -> None:
def test_prepare_message_for_a2a_empty_contents_raises_error(a2a_agent: A2AAgent) -> None:
"""Test _prepare_message_for_a2a with empty contents raises ValueError."""
# Create ChatMessage with no contents
message = ChatMessage("user", [])
message = ChatMessage(role="user", contents=[])

# Should raise ValueError for empty contents
with raises(ValueError, match="ChatMessage.contents is empty"):
a2a_agent._prepare_message_for_a2a(message)


async def test_run_stream_with_message_response(a2a_agent: A2AAgent, mock_a2a_client: MockA2AClient) -> None:
"""Test run_stream() method with immediate Message response."""
async def test_run_streaming_with_message_response(a2a_agent: A2AAgent, mock_a2a_client: MockA2AClient) -> None:
"""Test run(stream=True) method with immediate Message response."""
mock_a2a_client.add_message_response("msg-stream-123", "Streaming response from agent!", "agent")

# Collect streaming updates
updates: list[AgentResponseUpdate] = []
async for update in a2a_agent.run_stream("Hello agent"):
async for update in a2a_agent.run("Hello agent", stream=True):
updates.append(update)

# Verify streaming response
Expand Down
2 changes: 1 addition & 1 deletion python/packages/ag-ui/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ from agent_framework.ag_ui import AGUIChatClient
async def main():
async with AGUIChatClient(endpoint="http://localhost:8000/") as client:
# Stream responses
async for update in client.get_streaming_response("Hello!"):
async for update in client.get_response("Hello!", stream=True):
for content in update.contents:
if isinstance(content, TextContent):
print(content.text, end="", flush=True)
Expand Down
Loading