Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions python/packages/core/agent_framework/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ class OtelAttr(str, Enum):
INPUT_MESSAGES = "gen_ai.input.messages"
OUTPUT_MESSAGES = "gen_ai.output.messages"
SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
# Attributes previously from opentelemetry-semantic-conventions-ai SpanAttributes,
# removed in v0.4.14. Defined here for forward compatibility.
SYSTEM = "gen_ai.system"
REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
REQUEST_TEMPERATURE = "gen_ai.request.temperature"
REQUEST_TOP_P = "gen_ai.request.top_p"
REQUEST_MODEL = "gen_ai.request.model"
RESPONSE_MODEL = "gen_ai.response.model"

# Workflow attributes
WORKFLOW_ID = "workflow.id"
Expand Down Expand Up @@ -1167,7 +1175,7 @@ def get_response(
# in a different async context than creation — using use_span() would
# cause "Failed to detach context" errors from OpenTelemetry.
operation = attributes.get(OtelAttr.OPERATION, "operation")
span_name = attributes.get(SpanAttributes.LLM_REQUEST_MODEL, "unknown")
span_name = attributes.get(OtelAttr.REQUEST_MODEL, "unknown")
span = get_tracer().start_span(f"{operation} {span_name}")
span.set_attributes(attributes)
if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages:
Expand Down Expand Up @@ -1229,7 +1237,7 @@ async def _finalize_stream() -> None:
return wrapped_stream

async def _get_response() -> ChatResponse:
with _get_span(attributes=attributes, span_name_attribute=SpanAttributes.LLM_REQUEST_MODEL) as span:
with _get_span(attributes=attributes, span_name_attribute=OtelAttr.REQUEST_MODEL) as span:
if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages:
_capture_messages(
span=span,
Expand Down Expand Up @@ -1539,16 +1547,16 @@ def _get_instructions_from_options(options: Any) -> str | None:
OTEL_ATTR_MAP: dict[str | tuple[str, ...], tuple[str, Callable[[Any], Any] | None, bool, Any]] = {
"choice_count": (OtelAttr.CHOICE_COUNT, None, False, 1),
"operation_name": (OtelAttr.OPERATION, None, False, None),
"system_name": (SpanAttributes.LLM_SYSTEM, None, False, None),
"system_name": (OtelAttr.SYSTEM, None, False, None),
"provider_name": (OtelAttr.PROVIDER_NAME, None, False, None),
"service_url": (OtelAttr.ADDRESS, None, False, None),
"conversation_id": (OtelAttr.CONVERSATION_ID, None, True, None),
"seed": (OtelAttr.SEED, None, True, None),
"frequency_penalty": (OtelAttr.FREQUENCY_PENALTY, None, True, None),
"max_tokens": (SpanAttributes.LLM_REQUEST_MAX_TOKENS, None, True, None),
"max_tokens": (OtelAttr.REQUEST_MAX_TOKENS, None, True, None),
"stop": (OtelAttr.STOP_SEQUENCES, None, True, None),
"temperature": (SpanAttributes.LLM_REQUEST_TEMPERATURE, None, True, None),
"top_p": (SpanAttributes.LLM_REQUEST_TOP_P, None, True, None),
"temperature": (OtelAttr.REQUEST_TEMPERATURE, None, True, None),
"top_p": (OtelAttr.REQUEST_TOP_P, None, True, None),
"presence_penalty": (OtelAttr.PRESENCE_PENALTY, None, True, None),
"top_k": (OtelAttr.TOP_K, None, True, None),
"encoding_formats": (
Expand All @@ -1561,7 +1569,7 @@ def _get_instructions_from_options(options: Any) -> str | None:
"agent_name": (OtelAttr.AGENT_NAME, None, False, None),
"agent_description": (OtelAttr.AGENT_DESCRIPTION, None, False, None),
# Multiple source keys - checks model_id in options, then model in kwargs, then model_id in kwargs
("model_id", "model"): (SpanAttributes.LLM_REQUEST_MODEL, None, True, None),
("model_id", "model"): (OtelAttr.REQUEST_MODEL, None, True, None),
# Tools with validation - returns None if no valid tools
"tools": (
OtelAttr.TOOL_DEFINITIONS,
Expand Down Expand Up @@ -1718,7 +1726,7 @@ def _get_response_attributes(
if finish_reason:
attributes[OtelAttr.FINISH_REASONS] = json.dumps([finish_reason])
if model_id := getattr(response, "model_id", None):
attributes[SpanAttributes.LLM_RESPONSE_MODEL] = model_id
attributes[OtelAttr.RESPONSE_MODEL] = model_id
if capture_usage and (usage := response.usage_details):
if usage.get("input_token_count"):
attributes[OtelAttr.INPUT_TOKENS] = usage["input_token_count"]
Expand All @@ -1730,8 +1738,8 @@ def _get_response_attributes(
GEN_AI_METRIC_ATTRIBUTES = (
OtelAttr.OPERATION,
OtelAttr.PROVIDER_NAME,
SpanAttributes.LLM_REQUEST_MODEL,
SpanAttributes.LLM_RESPONSE_MODEL,
OtelAttr.REQUEST_MODEL,
OtelAttr.RESPONSE_MODEL,
OtelAttr.ADDRESS,
OtelAttr.PORT,
)
Expand All @@ -1749,10 +1757,10 @@ def _capture_response(
attrs: dict[str, Any] = {k: v for k, v in attributes.items() if k in GEN_AI_METRIC_ATTRIBUTES}
if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)):
token_usage_histogram.record(
input_tokens, attributes={**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_INPUT}
input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT}
)
if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)):
token_usage_histogram.record(output_tokens, {**attrs, SpanAttributes.LLM_TOKEN_TYPE: OtelAttr.T_TYPE_OUTPUT})
token_usage_histogram.record(output_tokens, {**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_OUTPUT})
if operation_duration_histogram and duration is not None:
if OtelAttr.ERROR_TYPE in attributes:
attrs[OtelAttr.ERROR_TYPE] = attributes[OtelAttr.ERROR_TYPE]
Expand Down
56 changes: 41 additions & 15 deletions python/packages/core/agent_framework/openai/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
import json
import logging
import sys
from collections.abc import AsyncIterable, Awaitable, Callable, Mapping, MutableMapping, Sequence
from collections.abc import (
AsyncIterable,
Awaitable,
Callable,
Mapping,
MutableMapping,
Sequence,
)
from datetime import datetime, timezone
from itertools import chain
from typing import Any, Generic, Literal
Expand All @@ -16,7 +23,9 @@
from openai.types.chat.chat_completion import ChatCompletion, Choice
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from openai.types.chat.chat_completion_message_custom_tool_call import ChatCompletionMessageCustomToolCall
from openai.types.chat.chat_completion_message_custom_tool_call import (
ChatCompletionMessageCustomToolCall,
)
from openai.types.chat.completion_create_params import WebSearchOptions
from pydantic import BaseModel

Expand Down Expand Up @@ -395,21 +404,18 @@ def _parse_response_update_from_openai(
) -> ChatResponseUpdate:
"""Parse a streaming response update from OpenAI."""
chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk)
if chunk.usage:
return ChatResponseUpdate(
role="assistant",
contents=[
Content.from_usage(
usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk
)
],
model_id=chunk.model,
additional_properties=chunk_metadata,
response_id=chunk.id,
message_id=chunk.id,
)
contents: list[Content] = []
finish_reason: FinishReason | None = None

# Process usage data (may coexist with text/tool content in providers like Gemini).
# See https://github.com/microsoft/agent-framework/issues/3434
if chunk.usage:
contents.append(
Content.from_usage(
usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk
)
)

for choice in chunk.choices:
chunk_metadata.update(self._get_metadata_from_chat_choice(choice))
contents.extend(self._parse_tool_calls_from_openai(choice))
Expand Down Expand Up @@ -532,6 +538,17 @@ def _prepare_messages_for_openai(

def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]:
"""Prepare a chat message for OpenAI."""
# System/developer messages must use plain string content because some
# OpenAI-compatible endpoints reject list content for non-user roles.
if message.role in ("system", "developer"):
texts = [content.text for content in message.contents if content.type == "text" and content.text]
if texts:
sys_args: dict[str, Any] = {"role": message.role, "content": "\n".join(texts)}
if message.author_name:
sys_args["name"] = message.author_name
return [sys_args]
return []

all_messages: list[dict[str, Any]] = []
for content in message.contents:
# Skip approval content - it's internal framework state, not for the LLM
Expand Down Expand Up @@ -568,6 +585,15 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]:
args["content"].append(self._prepare_content_for_openai(content)) # type: ignore
if "content" in args or "tool_calls" in args:
all_messages.append(args)

# Flatten text-only content lists to plain strings for broader
# compatibility with OpenAI-like endpoints (e.g. Foundry Local).
# See https://github.com/microsoft/agent-framework/issues/4084
for msg in all_messages:
msg_content: Any = msg.get("content")
if isinstance(msg_content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in msg_content):
msg["content"] = "\n".join(c.get("text", "") for c in msg_content)

return all_messages

def _prepare_content_for_openai(self, content: Content) -> dict[str, Any]:
Expand Down
21 changes: 9 additions & 12 deletions python/packages/core/tests/core/test_observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import pytest
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.trace import StatusCode

from agent_framework import (
Expand Down Expand Up @@ -48,8 +47,8 @@ def test_role_event_map():
def test_enum_values():
"""Test that OtelAttr enum has expected values."""
assert OtelAttr.OPERATION == "gen_ai.operation.name"
assert SpanAttributes.LLM_SYSTEM == "gen_ai.system"
assert SpanAttributes.LLM_REQUEST_MODEL == "gen_ai.request.model"
assert OtelAttr.SYSTEM == "gen_ai.system"
assert OtelAttr.REQUEST_MODEL == "gen_ai.request.model"
assert OtelAttr.CHAT_COMPLETION_OPERATION == "chat"
assert OtelAttr.TOOL_EXECUTION_OPERATION == "execute_tool"
assert OtelAttr.AGENT_INVOKE_OPERATION == "invoke_agent"
Expand Down Expand Up @@ -213,7 +212,7 @@ async def test_chat_client_observability(mock_chat_client, span_exporter: InMemo
span = spans[0]
assert span.name == "chat Test"
assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test"
assert span.attributes[OtelAttr.INPUT_TOKENS] == 10
assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 20
if enable_sensitive_data:
Expand Down Expand Up @@ -243,7 +242,7 @@ async def test_chat_client_streaming_observability(
span = spans[0]
assert span.name == "chat Test"
assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "Test"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "Test"
if enable_sensitive_data:
assert span.attributes[OtelAttr.INPUT_MESSAGES] is not None
assert span.attributes[OtelAttr.OUTPUT_MESSAGES] is not None
Expand Down Expand Up @@ -392,7 +391,7 @@ async def test_chat_client_without_model_id_observability(mock_chat_client, span

assert span.name == "chat unknown"
assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown"


async def test_chat_client_streaming_without_model_id_observability(
Expand All @@ -416,7 +415,7 @@ async def test_chat_client_streaming_without_model_id_observability(
span = spans[0]
assert span.name == "chat unknown"
assert span.attributes[OtelAttr.OPERATION.value] == OtelAttr.CHAT_COMPLETION_OPERATION
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "unknown"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "unknown"


def test_prepend_user_agent_with_none_value():
Expand Down Expand Up @@ -491,7 +490,7 @@ async def test_agent_instrumentation_enabled(
assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id"
assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent"
assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description"
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel"
assert span.attributes[OtelAttr.INPUT_TOKENS] == 15
assert span.attributes[OtelAttr.OUTPUT_TOKENS] == 25
if enable_sensitive_data:
Expand Down Expand Up @@ -521,7 +520,7 @@ async def test_agent_streaming_response_with_diagnostics_enabled(
assert span.attributes[OtelAttr.AGENT_ID] == "test_agent_id"
assert span.attributes[OtelAttr.AGENT_NAME] == "test_agent"
assert span.attributes[OtelAttr.AGENT_DESCRIPTION] == "Test agent description"
assert span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "TestModel"
assert span.attributes[OtelAttr.REQUEST_MODEL] == "TestModel"
if enable_sensitive_data:
assert span.attributes.get(OtelAttr.OUTPUT_MESSAGES) is not None # Streaming, so no usage yet

Expand Down Expand Up @@ -1381,8 +1380,6 @@ def test_get_response_attributes_with_model_id():
"""Test _get_response_attributes includes model_id."""
from unittest.mock import Mock

from opentelemetry.semconv_ai import SpanAttributes

from agent_framework.observability import _get_response_attributes

response = Mock()
Expand All @@ -1395,7 +1392,7 @@ def test_get_response_attributes_with_model_id():
attrs = {}
result = _get_response_attributes(attrs, response)

assert result[SpanAttributes.LLM_RESPONSE_MODEL] == "gpt-4"
assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4"


def test_get_response_attributes_with_usage():
Expand Down
Loading