From 1243b77eac1dd773dd2edfbbb8144f1390f13a70 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 23 Feb 2026 10:03:02 +0100 Subject: [PATCH 1/5] Fix system message content sent as list instead of string Some OpenAI-compatible endpoints (e.g. NVIDIA NIM) reject system messages when content is a list of content parts. This change flattens system and developer message content to a plain string in the Chat Completions client. Fixes https://github.com/microsoft/agent-framework/issues/1407 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/openai/_chat_client.py | 24 ++++++- .../tests/openai/test_openai_chat_client.py | 67 ++++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index 60e0daaf2b..4ee9abb181 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -5,7 +5,14 @@ import json import logging import sys -from collections.abc import AsyncIterable, Awaitable, Callable, Mapping, MutableMapping, Sequence +from collections.abc import ( + AsyncIterable, + Awaitable, + Callable, + Mapping, + MutableMapping, + Sequence, +) from datetime import datetime, timezone from itertools import chain from typing import Any, Generic, Literal @@ -16,7 +23,9 @@ from openai.types.chat.chat_completion import ChatCompletion, Choice from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice -from openai.types.chat.chat_completion_message_custom_tool_call import ChatCompletionMessageCustomToolCall +from openai.types.chat.chat_completion_message_custom_tool_call import ( + ChatCompletionMessageCustomToolCall, +) from openai.types.chat.completion_create_params import WebSearchOptions from pydantic import BaseModel @@ -532,6 +541,17 @@ def _prepare_messages_for_openai( def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: """Prepare a chat message for OpenAI.""" + # System/developer messages must use plain string content because some + # OpenAI-compatible endpoints reject list content for non-user roles. + if message.role in ("system", "developer"): + texts = [content.text for content in message.contents if content.type == "text" and content.text] + if texts: + args: dict[str, Any] = {"role": message.role, "content": "\n".join(texts)} + if message.author_name: + args["name"] = message.author_name + return [args] + return [] + all_messages: list[dict[str, Any]] = [] for content in message.contents: # Skip approval content - it's internal framework state, not for the LLM diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py index d2d027fcb1..9b6baadaea 100644 --- a/python/packages/core/tests/openai/test_openai_chat_client.py +++ b/python/packages/core/tests/openai/test_openai_chat_client.py @@ -814,7 +814,7 @@ def test_prepare_options_with_instructions(openai_unit_test_env: dict[str, str]) assert "messages" in prepared_options assert len(prepared_options["messages"]) == 2 assert prepared_options["messages"][0]["role"] == "system" - assert prepared_options["messages"][0]["content"][0]["text"] == "You are a helpful assistant." + assert prepared_options["messages"][0]["content"] == "You are a helpful assistant." def test_prepare_message_with_author_name(openai_unit_test_env: dict[str, str]) -> None: @@ -851,6 +851,71 @@ def test_prepare_message_with_tool_result_author_name(openai_unit_test_env: dict assert "name" not in prepared[0] +def test_prepare_system_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that system message content is a plain string, not a list. + + Some OpenAI-compatible endpoints (e.g. NVIDIA NIM) reject system messages + with list content. See https://github.com/microsoft/agent-framework/issues/1407 + """ + client = OpenAIChatClient() + + message = Message(role="system", contents=[Content.from_text(text="You are a helpful assistant.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "system" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "You are a helpful assistant." + + +def test_prepare_developer_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that developer message content is a plain string, not a list.""" + client = OpenAIChatClient() + + message = Message(role="developer", contents=[Content.from_text(text="Follow these rules.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "developer" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Follow these rules." + + +def test_prepare_system_message_multiple_text_contents_joined(openai_unit_test_env: dict[str, str]) -> None: + """Test that system messages with multiple text contents are joined into a single string.""" + client = OpenAIChatClient() + + message = Message( + role="system", + contents=[ + Content.from_text(text="You are a helpful assistant."), + Content.from_text(text="Be concise."), + ], + ) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "system" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "You are a helpful assistant.\nBe concise." + + +def test_prepare_user_message_content_remains_list(openai_unit_test_env: dict[str, str]) -> None: + """Test that user message content remains a list to support multimodal content.""" + client = OpenAIChatClient() + + message = Message(role="user", contents=[Content.from_text(text="Hello")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "user" + assert isinstance(prepared[0]["content"], list) + + def test_tool_choice_required_with_function_name(openai_unit_test_env: dict[str, str]) -> None: """Test that tool_choice with required mode and function name is correctly prepared.""" client = OpenAIChatClient() From 2706e1b407b082922ac684656ef358575da29cf0 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 23 Feb 2026 10:11:59 +0100 Subject: [PATCH 2/5] Fix compatibility with opentelemetry-semantic-conventions-ai 0.4.14 Version 0.4.14 removed several LLM_* attributes from SpanAttributes (LLM_SYSTEM, LLM_REQUEST_MODEL, LLM_RESPONSE_MODEL, LLM_REQUEST_MAX_TOKENS, LLM_REQUEST_TEMPERATURE, LLM_REQUEST_TOP_P, LLM_TOKEN_TYPE). Move these to the OtelAttr enum with their well-known gen_ai.* string values and update all references in observability.py and tests. Fixes https://github.com/microsoft/agent-framework/issues/4160 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../core/agent_framework/observability.py | 32 ++++++++++++------- .../core/tests/core/test_observability.py | 21 ++++++------ 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py index 8f581a605d..338e4c6ab7 100644 --- a/python/packages/core/agent_framework/observability.py +++ b/python/packages/core/agent_framework/observability.py @@ -203,6 +203,14 @@ class OtelAttr(str, Enum): INPUT_MESSAGES = "gen_ai.input.messages" OUTPUT_MESSAGES = "gen_ai.output.messages" SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + # Attributes previously from opentelemetry-semantic-conventions-ai SpanAttributes, + # removed in v0.4.14. Defined here for forward compatibility. + SYSTEM = "gen_ai.system" + REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" + REQUEST_TEMPERATURE = "gen_ai.request.temperature" + REQUEST_TOP_P = "gen_ai.request.top_p" + REQUEST_MODEL = "gen_ai.request.model" + RESPONSE_MODEL = "gen_ai.response.model" # Workflow attributes WORKFLOW_ID = "workflow.id" @@ -1167,7 +1175,7 @@ def get_response( # in a different async context than creation — using use_span() would # cause "Failed to detach context" errors from OpenTelemetry. operation = attributes.get(OtelAttr.OPERATION, "operation") - span_name = attributes.get(SpanAttributes.LLM_REQUEST_MODEL, "unknown") + span_name = attributes.get(OtelAttr.REQUEST_MODEL, "unknown") span = get_tracer().start_span(f"{operation} {span_name}") span.set_attributes(attributes) if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages: @@ -1229,7 +1237,7 @@ async def _finalize_stream() -> None: return wrapped_stream async def _get_response() -> ChatResponse: - with _get_span(attributes=attributes, span_name_attribute=SpanAttributes.LLM_REQUEST_MODEL) as span: + with _get_span(attributes=attributes, span_name_attribute=OtelAttr.REQUEST_MODEL) as span: if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages: _capture_messages( span=span, @@ -1539,16 +1547,16 @@ def _get_instructions_from_options(options: Any) -> str | None: OTEL_ATTR_MAP: dict[str | tuple[str, ...], tuple[str, Callable[[Any], Any] | None, bool, Any]] = { "choice_count": (OtelAttr.CHOICE_COUNT, None, False, 1), "operation_name": (OtelAttr.OPERATION, None, False, None), - "system_name": (SpanAttributes.LLM_SYSTEM, None, False, None), + "system_name": (OtelAttr.SYSTEM, None, False, None), "provider_name": (OtelAttr.PROVIDER_NAME, None, False, None), "service_url": (OtelAttr.ADDRESS, None, False, None), "conversation_id": (OtelAttr.CONVERSATION_ID, None, True, None), "seed": (OtelAttr.SEED, None, True, None), "frequency_penalty": (OtelAttr.FREQUENCY_PENALTY, None, True, None), - "max_tokens": (SpanAttributes.LLM_REQUEST_MAX_TOKENS, None, True, None), + "max_tokens": (OtelAttr.REQUEST_MAX_TOKENS, None, True, None), "stop": (OtelAttr.STOP_SEQUENCES, None, True, None), - "temperature": (SpanAttributes.LLM_REQUEST_TEMPERATURE, None, True, None), - "top_p": (SpanAttributes.LLM_REQUEST_TOP_P, None, True, None), + "temperature": (OtelAttr.REQUEST_TEMPERATURE, None, True, None), + "top_p": (OtelAttr.REQUEST_TOP_P, None, True, None), "presence_penalty": (OtelAttr.PRESENCE_PENALTY, None, True, None), "top_k": (OtelAttr.TOP_K, None, True, None), "encoding_formats": ( @@ -1561,7 +1569,7 @@ def _get_instructions_from_options(options: Any) -> str | None: "agent_name": (OtelAttr.AGENT_NAME, None, False, None), "agent_description": (OtelAttr.AGENT_DESCRIPTION, None, False, None), # Multiple source keys - checks model_id in options, then model in kwargs, then model_id in kwargs - ("model_id", "model"): (SpanAttributes.LLM_REQUEST_MODEL, None, True, None), + ("model_id", "model"): (OtelAttr.REQUEST_MODEL, None, True, None), # Tools with validation - returns None if no valid tools "tools": ( OtelAttr.TOOL_DEFINITIONS, @@ -1718,7 +1726,7 @@ def _get_response_attributes( if finish_reason: attributes[OtelAttr.FINISH_REASONS] = json.dumps([finish_reason]) if model_id := getattr(response, "model_id", None): - attributes[SpanAttributes.LLM_RESPONSE_MODEL] = model_id + attributes[OtelAttr.RESPONSE_MODEL] = model_id if capture_usage and (usage := response.usage_details): if usage.get("input_token_count"): attributes[OtelAttr.INPUT_TOKENS] = usage["input_token_count"] @@ -1730,8 +1738,8 @@ def _get_response_attributes( GEN_AI_METRIC_ATTRIBUTES = ( OtelAttr.OPERATION, OtelAttr.PROVIDER_NAME, - SpanAttributes.LLM_REQUEST_MODEL, - SpanAttributes.LLM_RESPONSE_MODEL, + OtelAttr.REQUEST_MODEL, + OtelAttr.RESPONSE_MODEL, OtelAttr.ADDRESS, OtelAttr.PORT, ) @@ -1749,10 +1757,10 @@ def _capture_response( attrs: dict[str, Any] = {k: v for k, v in attributes.items() if k in GEN_AI_METRIC_ATTRIBUTES} if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)): token_usage_histogram.record( - input_tokens, attributes={**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_INPUT} + input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT} ) if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)): - token_usage_histogram.record(output_tokens, {**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_OUTPUT}) + token_usage_histogram.record(output_tokens, {**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_OUTPUT}) if operation_duration_histogram and duration is not None: if OtelAttr.ERROR_TYPE in attributes: attrs[OtelAttr.ERROR_TYPE] = attributes[OtelAttr.ERROR_TYPE] diff --git a/python/packages/core/tests/core/test_observability.py b/python/packages/core/tests/core/test_observability.py index fccaf2f9f1..0e81b7580c 100644 --- a/python/packages/core/tests/core/test_observability.py +++ b/python/packages/core/tests/core/test_observability.py @@ -7,7 +7,6 @@ import pytest from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.trace import StatusCode from agent_framework import ( @@ -48,8 +47,8 @@ def test_role_event_map(): def test_enum_values(): """Test that OtelAttr enum has expected values.""" assert OtelAttr.OPERATION == "gen_ai.operation.name" - assert SpanAttributes.LLM_SYSTEM == "gen_ai.system" - assert SpanAttributes.LLM_REQUEST_MODEL == "gen_ai.request.model" + assert OtelAttr.SYSTEM == "gen_ai.system" + assert OtelAttr.REQUEST_MODEL == "gen_ai.request.model" assert OtelAttr.CHAT_COMPLETION_OPERATION == "chat" assert OtelAttr.TOOL_EXECUTION_OPERATION == "execute_tool" assert OtelAttr.AGENT_INVOKE_OPERATION == "invoke_agent" @@ -213,7 +212,7 @@ async def test_chat_client_observability(mock_chat_client, span_exporter: InMemo span = spans[0] assert span.name == "chat Test" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test" assert span.attributes[OtelAttr.INPUT_TOKENS] == 10 assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 20 if enable_sensitive_data: @@ -243,7 +242,7 @@ async def test_chat_client_streaming_observability( span = spans[0] assert span.name == "chat Test" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test" if enable_sensitive_data: assert span.attributes[OtelAttr.INPUT_MESSAGES] is not None assert span.attributes[OtelAttr.OUTPUT_MESSAGES] is not None @@ -392,7 +391,7 @@ async def test_chat_client_without_model_id_observability(mock_chat_client, span assert span.name == "chat unknown" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown" async def test_chat_client_streaming_without_model_id_observability( @@ -416,7 +415,7 @@ async def test_chat_client_streaming_without_model_id_observability( span = spans[0] assert span.name == "chat unknown" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown" def test_prepend_user_agent_with_none_value(): @@ -491,7 +490,7 @@ async def test_agent_instrumentation_enabled( assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id" assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent" assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description" - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel" assert span.attributes[OtelAttr.INPUT_TOKENS] == 15 assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 25 if enable_sensitive_data: @@ -521,7 +520,7 @@ async def test_agent_streaming_response_with_diagnostics_enabled( assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id" assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent" assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description" - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel" if enable_sensitive_data: assert span.attributes.get(OtelAttr.OUTPUT_MESSAGES) is not None # Streaming, so no usage yet @@ -1381,8 +1380,6 @@ def test_get_response_attributes_with_model_id(): """Test _get_response_attributes includes model_id.""" from unittest.mock import Mock - from opentelemetry.semconv_ai import SpanAttributes - from agent_framework.observability import _get_response_attributes response = Mock() @@ -1395,7 +1392,7 @@ def test_get_response_attributes_with_model_id(): attrs = {} result = _get_response_attributes(attrs, response) - assert result[SpanAttributes.LLM_RESPONSE_MODEL] == "gpt-4" + assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4" def test_get_response_attributes_with_usage(): From 5810f656a0cb27c9ff00931f3d8c3344f5ff544d Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 23 Feb 2026 10:48:15 +0100 Subject: [PATCH 3/5] Flatten text-only message content to string for all roles Extend the system/developer fix to all message roles. Text-only content lists are now post-processed into plain strings, while multimodal content (text + images/audio) remains as a list. This fixes compatibility with OpenAI-like endpoints that cannot deserialize list content (e.g. Foundry Local's Neutron backend). Partially fixes https://github.com/microsoft/agent-framework/issues/4084 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/openai/_chat_client.py | 9 ++++ .../tests/openai/test_openai_chat_client.py | 52 ++++++++++++++++--- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index 4ee9abb181..a947e4c01d 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -588,6 +588,15 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: args["content"].append(self._prepare_content_for_openai(content)) # type: ignore if "content" in args or "tool_calls" in args: all_messages.append(args) + + # Flatten text-only content lists to plain strings for broader + # compatibility with OpenAI-like endpoints (e.g. Foundry Local). + # See https://github.com/microsoft/agent-framework/issues/4084 + for msg in all_messages: + content = msg.get("content") + if isinstance(content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in content): + msg["content"] = "\n".join(c.get("text", "") for c in content) + return all_messages def _prepare_content_for_openai(self, content: Content) -> dict[str, Any]: diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py index 9b6baadaea..d2ed4b0a74 100644 --- a/python/packages/core/tests/openai/test_openai_chat_client.py +++ b/python/packages/core/tests/openai/test_openai_chat_client.py @@ -642,9 +642,8 @@ def test_prepare_message_with_text_reasoning_content(openai_unit_test_env: dict[ assert len(prepared) == 1 assert "reasoning_details" in prepared[0] assert prepared[0]["reasoning_details"] == mock_reasoning_data - # Should also have the text content - assert prepared[0]["content"][0]["type"] == "text" - assert prepared[0]["content"][0]["text"] == "The answer is 42." + # Should also have the text content (flattened to string for text-only) + assert prepared[0]["content"] == "The answer is 42." def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_env: dict[str, str]) -> None: @@ -690,8 +689,7 @@ def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_en ) prepared_mixed = client._prepare_message_for_openai(mixed_message) assert len(prepared_mixed) == 1 # Only text content should remain - assert prepared_mixed[0]["content"][0]["type"] == "text" - assert prepared_mixed[0]["content"][0]["text"] == "I need approval for this action." + assert prepared_mixed[0]["content"] == "I need approval for this action." def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str]) -> None: @@ -903,8 +901,12 @@ def test_prepare_system_message_multiple_text_contents_joined(openai_unit_test_e assert prepared[0]["content"] == "You are a helpful assistant.\nBe concise." -def test_prepare_user_message_content_remains_list(openai_unit_test_env: dict[str, str]) -> None: - """Test that user message content remains a list to support multimodal content.""" +def test_prepare_user_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that text-only user message content is flattened to a plain string. + + Some OpenAI-compatible endpoints (e.g. Foundry Local) cannot deserialize + the list format. See https://github.com/microsoft/agent-framework/issues/4084 + """ client = OpenAIChatClient() message = Message(role="user", contents=[Content.from_text(text="Hello")]) @@ -913,7 +915,41 @@ def test_prepare_user_message_content_remains_list(openai_unit_test_env: dict[st assert len(prepared) == 1 assert prepared[0]["role"] == "user" - assert isinstance(prepared[0]["content"], list) + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Hello" + + +def test_prepare_user_message_multimodal_content_remains_list(openai_unit_test_env: dict[str, str]) -> None: + """Test that multimodal user message content remains a list.""" + client = OpenAIChatClient() + + message = Message( + role="user", + contents=[ + Content.from_text(text="What's in this image?"), + Content.from_uri(uri="https://example.com/image.png", media_type="image/png"), + ], + ) + + prepared = client._prepare_message_for_openai(message) + + # Multimodal content must stay as list for the API + has_list_content = any(isinstance(m.get("content"), list) for m in prepared) + assert has_list_content + + +def test_prepare_assistant_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that text-only assistant message content is flattened to a plain string.""" + client = OpenAIChatClient() + + message = Message(role="assistant", contents=[Content.from_text(text="Sure, I can help.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "assistant" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Sure, I can help." def test_tool_choice_required_with_function_name(openai_unit_test_env: dict[str, str]) -> None: From 0788805d2d401bf36d071827f448dfb3cc0108aa Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 23 Feb 2026 10:50:23 +0100 Subject: [PATCH 4/5] Fix streaming text lost when usage data in same chunk Some providers (e.g. Gemini) include both usage data and text content in the same streaming chunk. The early return on chunk.usage caused text and tool call parsing to be skipped entirely. Remove the early return and process usage alongside text/tool calls. Fixes https://github.com/microsoft/agent-framework/issues/3434 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/openai/_chat_client.py | 23 +++++------- .../tests/openai/test_openai_chat_client.py | 37 +++++++++++++++++++ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index a947e4c01d..7fdaf0ddb6 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -404,21 +404,18 @@ def _parse_response_update_from_openai( ) -> ChatResponseUpdate: """Parse a streaming response update from OpenAI.""" chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk) - if chunk.usage: - return ChatResponseUpdate( - role="assistant", - contents=[ - Content.from_usage( - usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk - ) - ], - model_id=chunk.model, - additional_properties=chunk_metadata, - response_id=chunk.id, - message_id=chunk.id, - ) contents: list[Content] = [] finish_reason: FinishReason | None = None + + # Process usage data (may coexist with text/tool content in providers like Gemini). + # See https://github.com/microsoft/agent-framework/issues/3434 + if chunk.usage: + contents.append( + Content.from_usage( + usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk + ) + ) + for choice in chunk.choices: chunk_metadata.update(self._get_metadata_from_chat_choice(choice)) contents.extend(self._parse_tool_calls_from_openai(choice)) diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py index d2ed4b0a74..8aa2c1f890 100644 --- a/python/packages/core/tests/openai/test_openai_chat_client.py +++ b/python/packages/core/tests/openai/test_openai_chat_client.py @@ -728,6 +728,43 @@ def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str assert usage_content.usage_details["total_token_count"] == 150 +def test_streaming_chunk_with_usage_and_text(openai_unit_test_env: dict[str, str]) -> None: + """Test that text content is not lost when usage data is in the same chunk. + + Some providers (e.g. Gemini) include both usage and text content in the + same streaming chunk. See https://github.com/microsoft/agent-framework/issues/3434 + """ + from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta + from openai.types.completion_usage import CompletionUsage + + client = OpenAIChatClient() + + mock_chunk = ChatCompletionChunk( + id="test-chunk", + object="chat.completion.chunk", + created=1234567890, + model="gemini-2.0-flash-lite", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content="Hello world", role="assistant"), + finish_reason=None, + ) + ], + usage=CompletionUsage(prompt_tokens=18, completion_tokens=5, total_tokens=23), + ) + + update = client._parse_response_update_from_openai(mock_chunk) + + # Should have BOTH text and usage content + content_types = [c.type for c in update.contents] + assert "text" in content_types, "Text content should not be lost when usage is present" + assert "usage" in content_types, "Usage content should still be present" + + text_content = next(c for c in update.contents if c.type == "text") + assert text_content.text == "Hello world" + + def test_parse_text_with_refusal(openai_unit_test_env: dict[str, str]) -> None: """Test that refusal content is parsed correctly.""" from openai.types.chat.chat_completion import ChatCompletion, Choice From 0c8b612e227db6622eb698439e8ffbd2bad3245a Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 23 Feb 2026 10:56:54 +0100 Subject: [PATCH 5/5] Fix mypy errors in _chat_client.py Rename shadowed variable 'args' in system/developer branch to 'sys_args' and rename loop variable 'content' to 'msg_content' to avoid type conflict. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../core/agent_framework/openai/_chat_client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index 7fdaf0ddb6..5d6f66491c 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -543,10 +543,10 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: if message.role in ("system", "developer"): texts = [content.text for content in message.contents if content.type == "text" and content.text] if texts: - args: dict[str, Any] = {"role": message.role, "content": "\n".join(texts)} + sys_args: dict[str, Any] = {"role": message.role, "content": "\n".join(texts)} if message.author_name: - args["name"] = message.author_name - return [args] + sys_args["name"] = message.author_name + return [sys_args] return [] all_messages: list[dict[str, Any]] = [] @@ -590,9 +590,9 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: # compatibility with OpenAI-like endpoints (e.g. Foundry Local). # See https://github.com/microsoft/agent-framework/issues/4084 for msg in all_messages: - content = msg.get("content") - if isinstance(content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in content): - msg["content"] = "\n".join(c.get("text", "") for c in content) + msg_content: Any = msg.get("content") + if isinstance(msg_content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in msg_content): + msg["content"] = "\n".join(c.get("text", "") for c in msg_content) return all_messages