diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 6362433892..0f19305c35 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -475,9 +475,9 @@ async def invoke( } } attributes.update({ - OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json() + OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json(ensure_ascii=False) if arguments - else json.dumps(serializable_kwargs, default=str) + else json.dumps(serializable_kwargs, default=str, ensure_ascii=False) if serializable_kwargs else "None" }) diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py index 64ceefe673..36d32007e3 100644 --- a/python/packages/core/agent_framework/observability.py +++ b/python/packages/core/agent_framework/observability.py @@ -1557,7 +1557,7 @@ def _get_instructions_from_options(options: Any) -> str | None: "tools": ( OtelAttr.TOOL_DEFINITIONS, lambda tools: ( - json.dumps(tools_dict) + json.dumps(tools_dict, ensure_ascii=False) if (tools_dict := __import__("agent_framework._tools", fromlist=["_tools_to_dict"])._tools_to_dict(tools)) else None ), @@ -1639,12 +1639,14 @@ def _capture_messages( ) if finish_reason: otel_messages[-1]["finish_reason"] = FINISH_REASON_MAP[finish_reason] - span.set_attribute(OtelAttr.OUTPUT_MESSAGES if output else OtelAttr.INPUT_MESSAGES, json.dumps(otel_messages)) + span.set_attribute( + OtelAttr.OUTPUT_MESSAGES if output else OtelAttr.INPUT_MESSAGES, json.dumps(otel_messages, ensure_ascii=False) + ) if system_instructions: if not isinstance(system_instructions, list): system_instructions = [system_instructions] otel_sys_instructions = [{"type": "text", "content": instruction} for instruction in system_instructions] - span.set_attribute(OtelAttr.SYSTEM_INSTRUCTIONS, json.dumps(otel_sys_instructions)) + span.set_attribute(OtelAttr.SYSTEM_INSTRUCTIONS, json.dumps(otel_sys_instructions, ensure_ascii=False)) def _to_otel_message(message: Message) -> dict[str, Any]: diff --git a/python/packages/core/tests/core/test_observability.py b/python/packages/core/tests/core/test_observability.py index 77b88a873e..0d51a1d5a6 100644 --- a/python/packages/core/tests/core/test_observability.py +++ b/python/packages/core/tests/core/test_observability.py @@ -30,6 +30,7 @@ ChatTelemetryLayer, MessageListTimestampFilter, OtelAttr, + _capture_messages, get_function_span, ) @@ -2263,3 +2264,176 @@ async def _get() -> ChatResponse: # Third span: second chat (LLM call with function result) assert sorted_spans[2].name.startswith("chat"), f"Third span should be 'chat', got '{sorted_spans[2].name}'" + + +# region Test non-ASCII character handling in JSON serialization + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_capture_messages_preserves_non_ascii_characters(mock_chat_client, span_exporter: InMemorySpanExporter): + """Test that non-ASCII characters (e.g., Japanese) are preserved in span attributes.""" + import json + + japanese_text = "こんにちは世界" # "Hello World" in Japanese + + class ClientWithJapanese(mock_chat_client): + async def _inner_get_response(self, *, messages, options, **kwargs): + return ChatResponse( + messages=[Message(role="assistant", text=japanese_text)], + usage_details=UsageDetails(input_token_count=5, output_token_count=10), + ) + + client = ClientWithJapanese() + messages = [Message(role="user", text=japanese_text)] + + span_exporter.clear() + response = await client.get_response(messages=messages, model_id="Test") + + assert response is not None + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + # Verify input messages preserve Japanese characters + input_messages_json = span.attributes[OtelAttr.INPUT_MESSAGES] + assert japanese_text in input_messages_json + # Ensure it's not escaped to Unicode + assert "\\u" not in input_messages_json + + # Verify output messages preserve Japanese characters + output_messages_json = span.attributes[OtelAttr.OUTPUT_MESSAGES] + assert japanese_text in output_messages_json + assert "\\u" not in output_messages_json + + # Verify JSON is valid and contains the text + input_messages = json.loads(input_messages_json) + assert input_messages[0]["parts"][0]["content"] == japanese_text + output_messages = json.loads(output_messages_json) + assert output_messages[0]["parts"][0]["content"] == japanese_text + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_system_instructions_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): + """Test that non-ASCII characters are preserved in system instructions span attribute.""" + import json + + from opentelemetry import trace + + chinese_text = "你好世界" # "Hello World" in Chinese + + tracer = trace.get_tracer("test") + span_exporter.clear() + + with tracer.start_as_current_span("test_span") as span: + _capture_messages( + span=span, + provider_name="test_provider", + messages=[Message(role="user", text="Test")], + system_instructions=chinese_text, + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + # Verify system instructions preserve Chinese characters + system_instructions_json = span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS] + assert chinese_text in system_instructions_json + assert "\\u" not in system_instructions_json + + # Verify JSON is valid and contains the text + system_instructions = json.loads(system_instructions_json) + assert system_instructions[0]["content"] == chinese_text + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_tool_arguments_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): + """Test that non-ASCII characters are preserved in tool arguments span attribute.""" + import json + + korean_text = "안녕하세요" # "Hello" in Korean + + @tool + def greet(message: str) -> str: + """Greet with a message.""" + return f"Greeted: {message}" + + span_exporter.clear() + await greet.invoke(message=korean_text) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + # Verify tool arguments preserve Korean characters + tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS] + assert korean_text in tool_arguments_json + assert "\\u" not in tool_arguments_json + + # Verify JSON is valid and contains the text + tool_arguments = json.loads(tool_arguments_json) + assert tool_arguments["message"] == korean_text + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_tool_result_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): + """Test that non-ASCII characters are preserved in tool result span attribute.""" + arabic_text = "مرحبا بالعالم" # "Hello World" in Arabic + + @tool + def echo(text: str) -> str: + """Echo the text back.""" + return text + + span_exporter.clear() + result = await echo.invoke(text=arabic_text) + + assert result == arabic_text + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + # Verify tool result preserves Arabic characters + tool_result = span.attributes[OtelAttr.TOOL_RESULT] + assert arabic_text in tool_result + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_tool_arguments_pydantic_preserves_non_ascii_characters( + span_exporter: InMemorySpanExporter, +) -> None: + """Test that non-ASCII characters are preserved in tool arguments when using a Pydantic model.""" + import json + + from pydantic import BaseModel + + japanese_text = "こんにちは" # "Hello" in Japanese + + class Greeting(BaseModel): + message: str + + @tool + def greet_with_model(greeting: Greeting) -> str: + """Greet with a message contained in a Pydantic model.""" + # When invoked via the tool's input_model, greeting is passed as a dict + if isinstance(greeting, dict): + return f"Greeted: {greeting['message']}" + return f"Greeted: {greeting.message}" + + span_exporter.clear() + # Use the tool's input_model to properly pass the Pydantic model argument + input_model = greet_with_model.input_model + await greet_with_model.invoke(arguments=input_model(greeting=Greeting(message=japanese_text))) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + # Verify tool arguments preserve Japanese characters + tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS] + assert japanese_text in tool_arguments_json + assert "\\u" not in tool_arguments_json + + # Verify JSON is valid and contains the text + tool_arguments = json.loads(tool_arguments_json) + assert tool_arguments["greeting"]["message"] == japanese_text