diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py index 8f581a605d..338e4c6ab7 100644 --- a/python/packages/core/agent_framework/observability.py +++ b/python/packages/core/agent_framework/observability.py @@ -203,6 +203,14 @@ class OtelAttr(str, Enum): INPUT_MESSAGES = "gen_ai.input.messages" OUTPUT_MESSAGES = "gen_ai.output.messages" SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + # Attributes previously from opentelemetry-semantic-conventions-ai SpanAttributes, + # removed in v0.4.14. Defined here for forward compatibility. + SYSTEM = "gen_ai.system" + REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" + REQUEST_TEMPERATURE = "gen_ai.request.temperature" + REQUEST_TOP_P = "gen_ai.request.top_p" + REQUEST_MODEL = "gen_ai.request.model" + RESPONSE_MODEL = "gen_ai.response.model" # Workflow attributes WORKFLOW_ID = "workflow.id" @@ -1167,7 +1175,7 @@ def get_response( # in a different async context than creation — using use_span() would # cause "Failed to detach context" errors from OpenTelemetry. operation = attributes.get(OtelAttr.OPERATION, "operation") - span_name = attributes.get(SpanAttributes.LLM_REQUEST_MODEL, "unknown") + span_name = attributes.get(OtelAttr.REQUEST_MODEL, "unknown") span = get_tracer().start_span(f"{operation} {span_name}") span.set_attributes(attributes) if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages: @@ -1229,7 +1237,7 @@ async def _finalize_stream() -> None: return wrapped_stream async def _get_response() -> ChatResponse: - with _get_span(attributes=attributes, span_name_attribute=SpanAttributes.LLM_REQUEST_MODEL) as span: + with _get_span(attributes=attributes, span_name_attribute=OtelAttr.REQUEST_MODEL) as span: if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages: _capture_messages( span=span, @@ -1539,16 +1547,16 @@ def _get_instructions_from_options(options: Any) -> str | None: OTEL_ATTR_MAP: dict[str | tuple[str, ...], tuple[str, Callable[[Any], Any] | None, bool, Any]] = { "choice_count": (OtelAttr.CHOICE_COUNT, None, False, 1), "operation_name": (OtelAttr.OPERATION, None, False, None), - "system_name": (SpanAttributes.LLM_SYSTEM, None, False, None), + "system_name": (OtelAttr.SYSTEM, None, False, None), "provider_name": (OtelAttr.PROVIDER_NAME, None, False, None), "service_url": (OtelAttr.ADDRESS, None, False, None), "conversation_id": (OtelAttr.CONVERSATION_ID, None, True, None), "seed": (OtelAttr.SEED, None, True, None), "frequency_penalty": (OtelAttr.FREQUENCY_PENALTY, None, True, None), - "max_tokens": (SpanAttributes.LLM_REQUEST_MAX_TOKENS, None, True, None), + "max_tokens": (OtelAttr.REQUEST_MAX_TOKENS, None, True, None), "stop": (OtelAttr.STOP_SEQUENCES, None, True, None), - "temperature": (SpanAttributes.LLM_REQUEST_TEMPERATURE, None, True, None), - "top_p": (SpanAttributes.LLM_REQUEST_TOP_P, None, True, None), + "temperature": (OtelAttr.REQUEST_TEMPERATURE, None, True, None), + "top_p": (OtelAttr.REQUEST_TOP_P, None, True, None), "presence_penalty": (OtelAttr.PRESENCE_PENALTY, None, True, None), "top_k": (OtelAttr.TOP_K, None, True, None), "encoding_formats": ( @@ -1561,7 +1569,7 @@ def _get_instructions_from_options(options: Any) -> str | None: "agent_name": (OtelAttr.AGENT_NAME, None, False, None), "agent_description": (OtelAttr.AGENT_DESCRIPTION, None, False, None), # Multiple source keys - checks model_id in options, then model in kwargs, then model_id in kwargs - ("model_id", "model"): (SpanAttributes.LLM_REQUEST_MODEL, None, True, None), + ("model_id", "model"): (OtelAttr.REQUEST_MODEL, None, True, None), # Tools with validation - returns None if no valid tools "tools": ( OtelAttr.TOOL_DEFINITIONS, @@ -1718,7 +1726,7 @@ def _get_response_attributes( if finish_reason: attributes[OtelAttr.FINISH_REASONS] = json.dumps([finish_reason]) if model_id := getattr(response, "model_id", None): - attributes[SpanAttributes.LLM_RESPONSE_MODEL] = model_id + attributes[OtelAttr.RESPONSE_MODEL] = model_id if capture_usage and (usage := response.usage_details): if usage.get("input_token_count"): attributes[OtelAttr.INPUT_TOKENS] = usage["input_token_count"] @@ -1730,8 +1738,8 @@ def _get_response_attributes( GEN_AI_METRIC_ATTRIBUTES = ( OtelAttr.OPERATION, OtelAttr.PROVIDER_NAME, - SpanAttributes.LLM_REQUEST_MODEL, - SpanAttributes.LLM_RESPONSE_MODEL, + OtelAttr.REQUEST_MODEL, + OtelAttr.RESPONSE_MODEL, OtelAttr.ADDRESS, OtelAttr.PORT, ) @@ -1749,10 +1757,10 @@ def _capture_response( attrs: dict[str, Any] = {k: v for k, v in attributes.items() if k in GEN_AI_METRIC_ATTRIBUTES} if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)): token_usage_histogram.record( - input_tokens, attributes={**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_INPUT} + input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT} ) if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)): - token_usage_histogram.record(output_tokens, {**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_OUTPUT}) + token_usage_histogram.record(output_tokens, {**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_OUTPUT}) if operation_duration_histogram and duration is not None: if OtelAttr.ERROR_TYPE in attributes: attrs[OtelAttr.ERROR_TYPE] = attributes[OtelAttr.ERROR_TYPE] diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index 60e0daaf2b..5d6f66491c 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -5,7 +5,14 @@ import json import logging import sys -from collections.abc import AsyncIterable, Awaitable, Callable, Mapping, MutableMapping, Sequence +from collections.abc import ( + AsyncIterable, + Awaitable, + Callable, + Mapping, + MutableMapping, + Sequence, +) from datetime import datetime, timezone from itertools import chain from typing import Any, Generic, Literal @@ -16,7 +23,9 @@ from openai.types.chat.chat_completion import ChatCompletion, Choice from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice -from openai.types.chat.chat_completion_message_custom_tool_call import ChatCompletionMessageCustomToolCall +from openai.types.chat.chat_completion_message_custom_tool_call import ( + ChatCompletionMessageCustomToolCall, +) from openai.types.chat.completion_create_params import WebSearchOptions from pydantic import BaseModel @@ -395,21 +404,18 @@ def _parse_response_update_from_openai( ) -> ChatResponseUpdate: """Parse a streaming response update from OpenAI.""" chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk) - if chunk.usage: - return ChatResponseUpdate( - role="assistant", - contents=[ - Content.from_usage( - usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk - ) - ], - model_id=chunk.model, - additional_properties=chunk_metadata, - response_id=chunk.id, - message_id=chunk.id, - ) contents: list[Content] = [] finish_reason: FinishReason | None = None + + # Process usage data (may coexist with text/tool content in providers like Gemini). + # See https://github.com/microsoft/agent-framework/issues/3434 + if chunk.usage: + contents.append( + Content.from_usage( + usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk + ) + ) + for choice in chunk.choices: chunk_metadata.update(self._get_metadata_from_chat_choice(choice)) contents.extend(self._parse_tool_calls_from_openai(choice)) @@ -532,6 +538,17 @@ def _prepare_messages_for_openai( def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: """Prepare a chat message for OpenAI.""" + # System/developer messages must use plain string content because some + # OpenAI-compatible endpoints reject list content for non-user roles. + if message.role in ("system", "developer"): + texts = [content.text for content in message.contents if content.type == "text" and content.text] + if texts: + sys_args: dict[str, Any] = {"role": message.role, "content": "\n".join(texts)} + if message.author_name: + sys_args["name"] = message.author_name + return [sys_args] + return [] + all_messages: list[dict[str, Any]] = [] for content in message.contents: # Skip approval content - it's internal framework state, not for the LLM @@ -568,6 +585,15 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: args["content"].append(self._prepare_content_for_openai(content)) # type: ignore if "content" in args or "tool_calls" in args: all_messages.append(args) + + # Flatten text-only content lists to plain strings for broader + # compatibility with OpenAI-like endpoints (e.g. Foundry Local). + # See https://github.com/microsoft/agent-framework/issues/4084 + for msg in all_messages: + msg_content: Any = msg.get("content") + if isinstance(msg_content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in msg_content): + msg["content"] = "\n".join(c.get("text", "") for c in msg_content) + return all_messages def _prepare_content_for_openai(self, content: Content) -> dict[str, Any]: diff --git a/python/packages/core/tests/core/test_observability.py b/python/packages/core/tests/core/test_observability.py index fccaf2f9f1..0e81b7580c 100644 --- a/python/packages/core/tests/core/test_observability.py +++ b/python/packages/core/tests/core/test_observability.py @@ -7,7 +7,6 @@ import pytest from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.trace import StatusCode from agent_framework import ( @@ -48,8 +47,8 @@ def test_role_event_map(): def test_enum_values(): """Test that OtelAttr enum has expected values.""" assert OtelAttr.OPERATION == "gen_ai.operation.name" - assert SpanAttributes.LLM_SYSTEM == "gen_ai.system" - assert SpanAttributes.LLM_REQUEST_MODEL == "gen_ai.request.model" + assert OtelAttr.SYSTEM == "gen_ai.system" + assert OtelAttr.REQUEST_MODEL == "gen_ai.request.model" assert OtelAttr.CHAT_COMPLETION_OPERATION == "chat" assert OtelAttr.TOOL_EXECUTION_OPERATION == "execute_tool" assert OtelAttr.AGENT_INVOKE_OPERATION == "invoke_agent" @@ -213,7 +212,7 @@ async def test_chat_client_observability(mock_chat_client, span_exporter: InMemo span = spans[0] assert span.name == "chat Test" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test" assert span.attributes[OtelAttr.INPUT_TOKENS] == 10 assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 20 if enable_sensitive_data: @@ -243,7 +242,7 @@ async def test_chat_client_streaming_observability( span = spans[0] assert span.name == "chat Test" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test" if enable_sensitive_data: assert span.attributes[OtelAttr.INPUT_MESSAGES] is not None assert span.attributes[OtelAttr.OUTPUT_MESSAGES] is not None @@ -392,7 +391,7 @@ async def test_chat_client_without_model_id_observability(mock_chat_client, span assert span.name == "chat unknown" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown" async def test_chat_client_streaming_without_model_id_observability( @@ -416,7 +415,7 @@ async def test_chat_client_streaming_without_model_id_observability( span = spans[0] assert span.name == "chat unknown" assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown" def test_prepend_user_agent_with_none_value(): @@ -491,7 +490,7 @@ async def test_agent_instrumentation_enabled( assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id" assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent" assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description" - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel" assert span.attributes[OtelAttr.INPUT_TOKENS] == 15 assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 25 if enable_sensitive_data: @@ -521,7 +520,7 @@ async def test_agent_streaming_response_with_diagnostics_enabled( assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id" assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent" assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description" - assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel" + assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel" if enable_sensitive_data: assert span.attributes.get(OtelAttr.OUTPUT_MESSAGES) is not None # Streaming, so no usage yet @@ -1381,8 +1380,6 @@ def test_get_response_attributes_with_model_id(): """Test _get_response_attributes includes model_id.""" from unittest.mock import Mock - from opentelemetry.semconv_ai import SpanAttributes - from agent_framework.observability import _get_response_attributes response = Mock() @@ -1395,7 +1392,7 @@ def test_get_response_attributes_with_model_id(): attrs = {} result = _get_response_attributes(attrs, response) - assert result[SpanAttributes.LLM_RESPONSE_MODEL] == "gpt-4" + assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4" def test_get_response_attributes_with_usage(): diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py index d2d027fcb1..8aa2c1f890 100644 --- a/python/packages/core/tests/openai/test_openai_chat_client.py +++ b/python/packages/core/tests/openai/test_openai_chat_client.py @@ -642,9 +642,8 @@ def test_prepare_message_with_text_reasoning_content(openai_unit_test_env: dict[ assert len(prepared) == 1 assert "reasoning_details" in prepared[0] assert prepared[0]["reasoning_details"] == mock_reasoning_data - # Should also have the text content - assert prepared[0]["content"][0]["type"] == "text" - assert prepared[0]["content"][0]["text"] == "The answer is 42." + # Should also have the text content (flattened to string for text-only) + assert prepared[0]["content"] == "The answer is 42." def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_env: dict[str, str]) -> None: @@ -690,8 +689,7 @@ def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_en ) prepared_mixed = client._prepare_message_for_openai(mixed_message) assert len(prepared_mixed) == 1 # Only text content should remain - assert prepared_mixed[0]["content"][0]["type"] == "text" - assert prepared_mixed[0]["content"][0]["text"] == "I need approval for this action." + assert prepared_mixed[0]["content"] == "I need approval for this action." def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str]) -> None: @@ -730,6 +728,43 @@ def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str assert usage_content.usage_details["total_token_count"] == 150 +def test_streaming_chunk_with_usage_and_text(openai_unit_test_env: dict[str, str]) -> None: + """Test that text content is not lost when usage data is in the same chunk. + + Some providers (e.g. Gemini) include both usage and text content in the + same streaming chunk. See https://github.com/microsoft/agent-framework/issues/3434 + """ + from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta + from openai.types.completion_usage import CompletionUsage + + client = OpenAIChatClient() + + mock_chunk = ChatCompletionChunk( + id="test-chunk", + object="chat.completion.chunk", + created=1234567890, + model="gemini-2.0-flash-lite", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content="Hello world", role="assistant"), + finish_reason=None, + ) + ], + usage=CompletionUsage(prompt_tokens=18, completion_tokens=5, total_tokens=23), + ) + + update = client._parse_response_update_from_openai(mock_chunk) + + # Should have BOTH text and usage content + content_types = [c.type for c in update.contents] + assert "text" in content_types, "Text content should not be lost when usage is present" + assert "usage" in content_types, "Usage content should still be present" + + text_content = next(c for c in update.contents if c.type == "text") + assert text_content.text == "Hello world" + + def test_parse_text_with_refusal(openai_unit_test_env: dict[str, str]) -> None: """Test that refusal content is parsed correctly.""" from openai.types.chat.chat_completion import ChatCompletion, Choice @@ -814,7 +849,7 @@ def test_prepare_options_with_instructions(openai_unit_test_env: dict[str, str]) assert "messages" in prepared_options assert len(prepared_options["messages"]) == 2 assert prepared_options["messages"][0]["role"] == "system" - assert prepared_options["messages"][0]["content"][0]["text"] == "You are a helpful assistant." + assert prepared_options["messages"][0]["content"] == "You are a helpful assistant." def test_prepare_message_with_author_name(openai_unit_test_env: dict[str, str]) -> None: @@ -851,6 +886,109 @@ def test_prepare_message_with_tool_result_author_name(openai_unit_test_env: dict assert "name" not in prepared[0] +def test_prepare_system_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that system message content is a plain string, not a list. + + Some OpenAI-compatible endpoints (e.g. NVIDIA NIM) reject system messages + with list content. See https://github.com/microsoft/agent-framework/issues/1407 + """ + client = OpenAIChatClient() + + message = Message(role="system", contents=[Content.from_text(text="You are a helpful assistant.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "system" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "You are a helpful assistant." + + +def test_prepare_developer_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that developer message content is a plain string, not a list.""" + client = OpenAIChatClient() + + message = Message(role="developer", contents=[Content.from_text(text="Follow these rules.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "developer" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Follow these rules." + + +def test_prepare_system_message_multiple_text_contents_joined(openai_unit_test_env: dict[str, str]) -> None: + """Test that system messages with multiple text contents are joined into a single string.""" + client = OpenAIChatClient() + + message = Message( + role="system", + contents=[ + Content.from_text(text="You are a helpful assistant."), + Content.from_text(text="Be concise."), + ], + ) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "system" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "You are a helpful assistant.\nBe concise." + + +def test_prepare_user_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that text-only user message content is flattened to a plain string. + + Some OpenAI-compatible endpoints (e.g. Foundry Local) cannot deserialize + the list format. See https://github.com/microsoft/agent-framework/issues/4084 + """ + client = OpenAIChatClient() + + message = Message(role="user", contents=[Content.from_text(text="Hello")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "user" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Hello" + + +def test_prepare_user_message_multimodal_content_remains_list(openai_unit_test_env: dict[str, str]) -> None: + """Test that multimodal user message content remains a list.""" + client = OpenAIChatClient() + + message = Message( + role="user", + contents=[ + Content.from_text(text="What's in this image?"), + Content.from_uri(uri="https://example.com/image.png", media_type="image/png"), + ], + ) + + prepared = client._prepare_message_for_openai(message) + + # Multimodal content must stay as list for the API + has_list_content = any(isinstance(m.get("content"), list) for m in prepared) + assert has_list_content + + +def test_prepare_assistant_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: + """Test that text-only assistant message content is flattened to a plain string.""" + client = OpenAIChatClient() + + message = Message(role="assistant", contents=[Content.from_text(text="Sure, I can help.")]) + + prepared = client._prepare_message_for_openai(message) + + assert len(prepared) == 1 + assert prepared[0]["role"] == "assistant" + assert isinstance(prepared[0]["content"], str) + assert prepared[0]["content"] == "Sure, I can help." + + def test_tool_choice_required_with_function_name(openai_unit_test_env: dict[str, str]) -> None: """Test that tool_choice with required mode and function name is correctly prepared.""" client = OpenAIChatClient()