From 238f87cf9d7b79893a690545ee1a7357a8bddc88 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 13:31:54 +0100 Subject: [PATCH 01/58] fix(ai): Keep single content input message --- sentry_sdk/ai/utils.py | 63 +--------- .../integrations/anthropic/test_anthropic.py | 114 ++++++++---------- .../google_genai/test_google_genai.py | 5 +- .../integrations/langgraph/test_langgraph.py | 10 +- tests/integrations/litellm/test_litellm.py | 11 +- .../openai_agents/test_openai_agents.py | 78 +++++------- .../pydantic_ai/test_pydantic_ai.py | 75 ------------ 7 files changed, 93 insertions(+), 263 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index a4ebe96d99..51a75b1706 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -15,7 +15,6 @@ import sentry_sdk from sentry_sdk.utils import logger -MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB # Maximum characters when only a single message is left after bytes truncation MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000 @@ -550,22 +549,6 @@ def _truncate_single_message_content_if_present( return message -def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int: - """ - Find the index of the first message that would exceed the max bytes limit. - Compute the individual message sizes, and return the index of the first message from the back - of the list that would exceed the max bytes limit. - """ - running_sum = 0 - for idx in range(len(messages) - 1, -1, -1): - size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8")) - running_sum += size - if running_sum > max_bytes: - return idx + 1 - - return 0 - - def redact_blob_message_parts( messages: "List[Dict[str, Any]]", ) -> "List[Dict[str, Any]]": @@ -645,55 +628,21 @@ def redact_blob_message_parts( return messages_copy -def truncate_messages_by_size( - messages: "List[Dict[str, Any]]", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, - max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, -) -> "Tuple[List[Dict[str, Any]], int]": - """ - Returns a truncated messages list, consisting of - - the last message, with its content truncated to `max_single_message_chars` characters, - if the last message's size exceeds `max_bytes` bytes; otherwise, - - the maximum number of messages, starting from the end of the `messages` list, whose total - serialized size does not exceed `max_bytes` bytes. - - In the single message case, the serialized message size may exceed `max_bytes`, because - truncation is based only on character count in that case. - """ - serialized_json = json.dumps(messages, separators=(",", ":")) - current_size = len(serialized_json.encode("utf-8")) - - if current_size <= max_bytes: - return messages, 0 - - truncation_index = _find_truncation_index(messages, max_bytes) - if truncation_index < len(messages): - truncated_messages = messages[truncation_index:] - else: - truncation_index = len(messages) - 1 - truncated_messages = messages[-1:] - - if len(truncated_messages) == 1: - truncated_messages[0] = _truncate_single_message_content_if_present( - deepcopy(truncated_messages[0]), max_chars=max_single_message_chars - ) - - return truncated_messages, truncation_index - - def truncate_and_annotate_messages( messages: "Optional[List[Dict[str, Any]]]", span: "Any", scope: "Any", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, + max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": if not messages: return None messages = redact_blob_message_parts(messages) - truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) - if removed_count > 0: + truncated_message = _truncate_single_message_content_if_present( + deepcopy(messages[-1]), max_chars=max_single_message_chars + ) + if len(messages) > 1: scope._gen_ai_original_message_count[span.span_id] = len(messages) - return truncated_messages + return [truncated_message] diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e8bc4648b6..942ba00537 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -899,7 +899,25 @@ def test_set_output_data_with_input_json_delta(sentry_init): assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30 -def test_anthropic_message_role_mapping(sentry_init, capture_events): +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_anthropic_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], @@ -924,13 +942,7 @@ def mock_messages_create(*args, **kwargs): client.messages._post = mock.Mock(return_value=mock_messages_create()) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" - test_messages = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] + test_messages = [test_message] with start_transaction(name="anthropic tx"): client.messages.create( @@ -948,22 +960,7 @@ def mock_messages_create(*args, **kwargs): # Parse the stored messages stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" - - # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" - - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages] - assert "ai" not in roles + assert stored_messages[0]["role"] == expected_role def test_anthropic_message_truncation(sentry_init, capture_events): @@ -1010,9 +1007,8 @@ def test_anthropic_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -1076,13 +1072,9 @@ def test_nonstreaming_create_message_with_system_prompt( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -1155,13 +1147,9 @@ async def test_nonstreaming_create_message_with_system_prompt_async( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -1266,13 +1254,9 @@ def test_streaming_create_message_with_system_prompt( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: @@ -1381,13 +1365,9 @@ async def test_streaming_create_message_with_system_prompt_async( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: @@ -1411,17 +1391,17 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - # System prompt as list of text blocks system_prompt = [ {"type": "text", "text": "You are a helpful assistant."}, - {"type": "text", "text": "Be concise and clear."}, ] + # User prompt as list of text blocks messages = [ { "role": "user", "content": "Hello", - } + }, + {"type": "user", "text": "World!"}, ] with start_transaction(name="anthropic"): @@ -1441,17 +1421,17 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) # Should have system message first, then user message - assert len(stored_messages) == 2 - assert stored_messages[0]["role"] == "system" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" # System content should be a list of text blocks assert isinstance(stored_messages[0]["content"], list) - assert len(stored_messages[0]["content"]) == 2 - assert stored_messages[0]["content"][0]["type"] == "text" - assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." - assert stored_messages[0]["content"][1]["type"] == "text" - assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello" + assert len(stored_messages[0]["content"]) == 1 + # assert stored_messages[0]["content"][0]["type"] == "text" + # assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." + # assert stored_messages[0]["content"][1]["type"] == "text" + # assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello" # Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ad89b878ea..3655e32e70 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -234,9 +234,8 @@ def test_generate_content_with_system_instruction( messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] # Parse the JSON string to verify content messages = json.loads(messages_str) - assert len(messages) == 2 - assert messages[0] == {"role": "system", "content": "You are a helpful assistant"} - assert messages[1] == {"role": "user", "content": "What is 2+2?"} + assert len(messages) == 1 + assert messages[0] == {"role": "user", "content": "What is 2+2?"} def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client): diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 99ab216957..2a385d8a78 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -270,9 +270,8 @@ def original_invoke(self, *args, **kwargs): import json request_messages = json.loads(request_messages) - assert len(request_messages) == 2 - assert request_messages[0]["content"] == "Hello, can you help me?" - assert request_messages[1]["content"] == "Of course! How can I assist you?" + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert response_text == expected_assistant_response @@ -1383,7 +1382,6 @@ def original_invoke(self, *args, **kwargs): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8849ab0372..0c2b349cff 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -344,11 +344,7 @@ def test_embeddings_create_with_list_input( assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" # Check that list of embeddings input is captured (it's JSON serialized) embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == [ - "First text", - "Second text", - "Third text", - ] + assert json.loads(embeddings_input) == ["Third text"] def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): @@ -752,9 +748,8 @@ def test_litellm_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9d463f8de5..dba73cc027 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -189,12 +189,6 @@ async def test_agent_invocation_span( assert invoke_agent_span["description"] == "invoke_agent test_agent" assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "content": [ - {"text": "You are a helpful test assistant.", "type": "text"} - ], - "role": "system", - }, {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, ] ) @@ -617,12 +611,12 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, + # { + # "role": "system", + # "content": [ + # {"type": "text", "text": "You are a helpful test assistant."} + # ], + # }, { "role": "user", "content": [ @@ -982,12 +976,6 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent assert "gen_ai.request.messages" in ai_client_span["data"] request_messages = safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, ] ) @@ -1366,7 +1354,25 @@ async def run(): assert txn3["transaction"] == "test_agent workflow" -def test_openai_agents_message_role_mapping(sentry_init, capture_events): +# Test input messages with mixed roles including "ai" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_openai_agents_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[OpenAIAgentsIntegration()], @@ -1374,15 +1380,7 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): send_default_pii=True, ) - # Test input messages with mixed roles including "ai" - test_input = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] - - get_response_kwargs = {"input": test_input} + get_response_kwargs = {"input": [test_message]} from sentry_sdk.integrations.openai_agents.utils import _set_input_data from sentry_sdk import start_span @@ -1393,23 +1391,10 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify that messages were processed and roles were mapped from sentry_sdk.consts import SPANDATA - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data: - import json - - stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Verify roles were properly mapped - found_assistant_roles = 0 - for message in stored_messages: - if message["role"] == "assistant": - found_assistant_roles += 1 - - # Should have 2 assistant roles (1 from original "assistant", 1 from mapped "ai") - assert found_assistant_roles == 2 + stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify no "ai" roles remain in any message - for message in stored_messages: - assert message["role"] != "ai" + # Verify roles were properly mapped + assert stored_messages[0]["role"] == expected_role @pytest.mark.asyncio @@ -2120,6 +2105,5 @@ def test_openai_agents_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 575eae35cc..86e524dc7b 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -513,43 +513,6 @@ async def test_model_settings(sentry_init, capture_events, test_agent_with_setti assert chat_span["data"].get("gen_ai.request.top_p") == 0.9 -@pytest.mark.asyncio -async def test_system_prompt_in_messages(sentry_init, capture_events): - """ - Test that system prompts are included as the first message. - """ - agent = Agent( - "test", - name="test_system", - system_prompt="You are a helpful assistant specialized in testing.", - ) - - sentry_init( - integrations=[PydanticAIIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) - - events = capture_events() - - await agent.run("Hello") - - (transaction,) = events - spans = transaction["spans"] - - # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 - - chat_span = chat_spans[0] - messages_str = chat_span["data"]["gen_ai.request.messages"] - - # Messages is serialized as a string - # Should contain system role and helpful assistant text - assert "system" in messages_str - assert "helpful assistant" in messages_str - - @pytest.mark.asyncio async def test_error_handling(sentry_init, capture_events): """ @@ -1183,44 +1146,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events): assert "Second part" in messages_str -@pytest.mark.asyncio -async def test_invoke_agent_with_instructions(sentry_init, capture_events): - """ - Test that invoke_agent span handles instructions correctly. - """ - from pydantic_ai import Agent - - # Create agent with instructions (can be string or list) - agent = Agent( - "test", - name="test_instructions", - ) - - # Add instructions via _instructions attribute (internal API) - agent._instructions = ["Instruction 1", "Instruction 2"] - agent._system_prompts = ["System prompt"] - - sentry_init( - integrations=[PydanticAIIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) - - events = capture_events() - - await agent.run("Test input") - - (transaction,) = events - - # Check that the invoke_agent transaction has messages data - if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: - messages_str = transaction["contexts"]["trace"]["data"][ - "gen_ai.request.messages" - ] - # Should contain both instructions and system prompts - assert "Instruction" in messages_str or "System prompt" in messages_str - - @pytest.mark.asyncio async def test_model_name_extraction_with_callable(sentry_init, capture_events): """ From de7996d4609abbd2027a0f23ea2d5ecbac6e94a6 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 13:52:13 +0100 Subject: [PATCH 02/58] remove system prompt anthropic test --- .../integrations/anthropic/test_anthropic.py | 54 ------------------- 1 file changed, 54 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 942ba00537..e197475d5b 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1380,60 +1380,6 @@ async def test_streaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_system_prompt_with_complex_structure(sentry_init, capture_events): - """Test that complex system prompt structures (list of text blocks) are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - system_prompt = [ - {"type": "text", "text": "You are a helpful assistant."}, - ] - - # User prompt as list of text blocks - messages = [ - { - "role": "user", - "content": "Hello", - }, - {"type": "user", "text": "World!"}, - ] - - with start_transaction(name="anthropic"): - response = client.messages.create( - max_tokens=1024, messages=messages, model="model", system=system_prompt - ) - - assert response == EXAMPLE_MESSAGE - assert len(events) == 1 - (event,) = events - - assert len(event["spans"]) == 1 - (span,) = event["spans"] - - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Should have system message first, then user message - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - # System content should be a list of text blocks - assert isinstance(stored_messages[0]["content"], list) - assert len(stored_messages[0]["content"]) == 1 - # assert stored_messages[0]["content"][0]["type"] == "text" - # assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." - # assert stored_messages[0]["content"][1]["type"] == "text" - # assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." - assert stored_messages[0]["role"] == "user" - assert stored_messages[0]["content"] == "Hello" - - # Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions From 7aedbdefa8540b341af70bbdd75fc5520c4c547a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 13:56:34 +0100 Subject: [PATCH 03/58] update openai agents test --- .../openai_agents/test_openai_agents.py | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index dba73cc027..b9380ba1b7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -682,30 +682,6 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - { - "role": "assistant", - "content": [ - { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - } - ], - }, { "role": "tool", "content": [ From 67b768aafb7ca9597adbdca674d7ba3f8b2bc665 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 13:58:59 +0100 Subject: [PATCH 04/58] openai tests --- tests/integrations/openai/test_openai.py | 64 +++++++++++------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..4f6eb37ec8 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -44,7 +44,6 @@ OpenAIIntegration, _calculate_token_usage, ) -from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize @@ -1458,7 +1457,25 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] -def test_openai_message_role_mapping(sentry_init, capture_events): +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_openai_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -1470,13 +1487,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" - test_messages = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] + + test_messages = [test_message] with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) @@ -1491,22 +1503,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" - - # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" - - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages] - assert "ai" not in roles + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == expected_role def test_openai_message_truncation(sentry_init, capture_events): @@ -1548,14 +1546,8 @@ def test_openai_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) - if "_meta" in event and len(parsed_messages) < len(large_messages): - meta_path = event["_meta"] - if ( - "spans" in meta_path - and "0" in meta_path["spans"] - and "data" in meta_path["spans"]["0"] - ): - span_meta = meta_path["spans"]["0"]["data"] - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "len" in messages_meta.get("", {}) + meta_path = event["_meta"] + span_meta = meta_path["spans"]["0"]["data"] + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) From 3e1cb35dc911b759b1607f91b7b553e56d056208 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 14:07:53 +0100 Subject: [PATCH 05/58] ai monitoring tests --- tests/test_ai_monitoring.py | 180 +++++++++--------------------------- 1 file changed, 46 insertions(+), 134 deletions(-) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index f6852d54bb..767d79b747 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -11,12 +11,9 @@ ) from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( - MAX_GEN_AI_MESSAGE_BYTES, MAX_SINGLE_MESSAGE_CONTENT_CHARS, set_data_normalized, truncate_and_annotate_messages, - truncate_messages_by_size, - _find_truncation_index, parse_data_uri, redact_blob_message_parts, get_modality_from_mime_type, @@ -222,127 +219,7 @@ def large_messages(): ] -class TestTruncateMessagesBySize: - def test_no_truncation_needed(self, sample_messages): - """Test that messages under the limit are not truncated""" - result, truncation_index = truncate_messages_by_size( - sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES - ) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert truncation_index == 0 - - def test_truncation_removes_oldest_first(self, large_messages): - """Test that oldest messages are removed first during truncation""" - small_limit = 3000 - result, truncation_index = truncate_messages_by_size( - large_messages, max_bytes=small_limit - ) - assert len(result) < len(large_messages) - - assert result[-1] == large_messages[-1] - assert truncation_index == len(large_messages) - len(result) - - def test_empty_messages_list(self): - """Test handling of empty messages list""" - result, truncation_index = truncate_messages_by_size( - [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 - ) - assert result == [] - assert truncation_index == 0 - - def test_find_truncation_index( - self, - ): - """Test that the truncation index is found correctly""" - # when represented in JSON, these are each 7 bytes long - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 20) - assert truncation_index == 3 - assert messages[truncation_index:] == ["D" * 5, "E" * 5] - - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 40) - assert truncation_index == 0 - assert messages[truncation_index:] == [ - "A" * 5, - "B" * 5, - "C" * 5, - "D" * 5, - "E" * 5, - ] - - def test_progressive_truncation(self, large_messages): - """Test that truncation works progressively with different limits""" - limits = [ - MAX_GEN_AI_MESSAGE_BYTES // 5, - MAX_GEN_AI_MESSAGE_BYTES // 10, - MAX_GEN_AI_MESSAGE_BYTES // 25, - MAX_GEN_AI_MESSAGE_BYTES // 100, - MAX_GEN_AI_MESSAGE_BYTES // 500, - ] - prev_count = len(large_messages) - - for limit in limits: - result = truncate_messages_by_size(large_messages, max_bytes=limit) - current_count = len(result) - - assert current_count <= prev_count - assert current_count >= 1 - prev_count = current_count - - def test_single_message_truncation(self): - large_content = "This is a very long message. " * 10_000 - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": large_content}, - ] - - result, truncation_index = truncate_messages_by_size( - messages, max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - assert len(result) == 1 - assert ( - len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - # If the last message is too large, the system message is not present - system_msgs = [m for m in result if m.get("role") == "system"] - assert len(system_msgs) == 0 - - # Confirm the user message is truncated with '...' - user_msgs = [m for m in result if m.get("role") == "user"] - assert len(user_msgs) == 1 - assert user_msgs[0]["content"].endswith("...") - assert len(user_msgs[0]["content"]) < len(large_content) - - class TestTruncateAndAnnotateMessages: - def test_no_truncation_returns_list(self, sample_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - span = MockSpan() - scope = MockScope() - result = truncate_and_annotate_messages(sample_messages, span, scope) - - assert isinstance(result, list) - assert not isinstance(result, AnnotatedValue) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert span.span_id not in scope._gen_ai_original_message_count - def test_truncation_sets_metadata_on_scope(self, large_messages): class MockSpan: def __init__(self): @@ -361,7 +238,7 @@ def __init__(self): scope = MockScope() original_count = len(large_messages) result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -388,7 +265,7 @@ def __init__(self): scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert scope._gen_ai_original_message_count[span.span_id] == original_count @@ -415,6 +292,47 @@ def __init__(self): result = truncate_and_annotate_messages(None, span, scope) assert result is None + def test_single_message_truncation(self, large_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_original_message_count = {} + + large_content = "This is a very long message. " * 10_000 + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + ] + + span = MockSpan() + scope = MockScope() + result = truncate_and_annotate_messages( + messages, + span, + scope, + max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS, + ) + assert result is not None + + assert len(result) == 1 + assert ( + len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + # Confirm the user message is truncated with '...' + user_msgs = [m for m in result if m.get("role") == "user"] + assert len(user_msgs) == 1 + assert user_msgs[0]["content"].endswith("...") + assert len(user_msgs[0]["content"]) < len(large_content) + def test_truncated_messages_newest_first(self, large_messages): class MockSpan: def __init__(self): @@ -432,7 +350,7 @@ def __init__(self): span = MockSpan() scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -500,15 +418,12 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - small_limit = 3000 span = MockSpan() scope = MockScope() original_count = len(large_messages) # Simulate what integrations do - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit - ) + truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) # Verify metadata was set on scope @@ -557,14 +472,11 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - small_limit = 3000 span = MockSpan() scope = MockScope() original_message_count = len(large_messages) - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit - ) + truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) assert len(truncated_messages) < original_message_count From 49bbcd864fb81e5776092c6cc8508e41dcf0ed3d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 14:36:16 +0100 Subject: [PATCH 06/58] google_genai tests --- .../google_genai/test_google_genai.py | 91 ++----------------- 1 file changed, 10 insertions(+), 81 deletions(-) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 3655e32e70..ba296871d5 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1458,60 +1458,6 @@ def test_generate_content_with_content_object( ] -def test_generate_content_with_conversation_history( - sentry_init, capture_events, mock_genai_client -): - """Test generate_content with list of Content objects (conversation history).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Create conversation history - contents = [ - genai_types.Content( - role="user", parts=[genai_types.Part(text="What is the capital of France?")] - ), - genai_types.Content( - role="model", - parts=[genai_types.Part(text="The capital of France is Paris.")], - ), - genai_types.Content( - role="user", parts=[genai_types.Part(text="What about Germany?")] - ), - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "What is the capital of France?", "type": "text"} - ] - assert ( - messages[1]["role"] == "assistant" - ) # "model" should be normalized to "assistant" - assert messages[1]["content"] == [ - {"text": "The capital of France is Paris.", "type": "text"} - ] - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "What about Germany?", "type": "text"}] - - def test_generate_content_with_dict_format( sentry_init, capture_events, mock_genai_client ): @@ -1681,17 +1627,12 @@ def test_generate_content_with_function_response( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 2 + assert len(messages) == 1 # First message is user message - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "What's the weather in Paris?", "type": "text"} - ] - # Second message is tool message - assert messages[1]["role"] == "tool" - assert messages[1]["content"]["toolCallId"] == "call_123" - assert messages[1]["content"]["toolName"] == "get_weather" - assert messages[1]["content"]["output"] == '"Sunny, 72F"' + assert messages[0]["role"] == "tool" + assert messages[0]["content"]["toolCallId"] == "call_123" + assert messages[0]["content"]["toolName"] == "get_weather" + assert messages[0]["content"]["output"] == '"Sunny, 72F"' def test_generate_content_with_mixed_string_and_content( @@ -1732,18 +1673,10 @@ def test_generate_content_with_mixed_string_and_content( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 - # String becomes user message - assert messages[0]["role"] == "user" - assert messages[0]["content"] == "Hello, this is a string message" - # Model role normalized to assistant - assert messages[1]["role"] == "assistant" - assert messages[1]["content"] == [ - {"text": "Hi! How can I help you?", "type": "text"} - ] + assert len(messages) == 1 # User message - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "Tell me a joke", "type": "text"}] + assert messages[0]["role"] == "user" + assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}] def test_generate_content_with_part_object_directly( @@ -1811,13 +1744,9 @@ def test_generate_content_with_list_of_dicts( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 + assert len(messages) == 1 assert messages[0]["role"] == "user" - assert messages[0]["content"] == [{"text": "First user message", "type": "text"}] - assert messages[1]["role"] == "assistant" - assert messages[1]["content"] == [{"text": "First model response", "type": "text"}] - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "Second user message", "type": "text"}] + assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] def test_generate_content_with_dict_inline_data( From 9d192b47045164a0357fb8fae1de5cdd3f955ae6 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 14:42:04 +0100 Subject: [PATCH 07/58] pydantic-ai tests --- tests/integrations/langchain/test_langchain.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 6f5f9f14a1..e03c4db940 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -218,15 +218,14 @@ def test_langchain_agent( if send_default_pii and include_prompts: assert ( - "You are very powerful" + "How many letters in the word" in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] ) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) assert ( - "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + "get_word_length" in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] ) assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -1035,9 +1034,8 @@ def test_langchain_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From 708b55a271ecd37e533f9feac6b428244f59faed Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 14:47:53 +0100 Subject: [PATCH 08/58] remove comment --- tests/integrations/openai_agents/test_openai_agents.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index b9380ba1b7..59c5b7edb7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -611,12 +611,6 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( [ - # { - # "role": "system", - # "content": [ - # {"type": "text", "text": "You are a helpful test assistant."} - # ], - # }, { "role": "user", "content": [ From 95d15f6590861b907f0bb7ee52463bec0c49a53c Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 15:19:43 +0100 Subject: [PATCH 09/58] remove if statement in test --- tests/integrations/openai/test_openai.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4f6eb37ec8..f6ffb24f80 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1548,6 +1548,5 @@ def test_openai_message_truncation(sentry_init, capture_events): meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "len" in messages_meta.get("", {}) + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) From 01d061934cf40255165be0620932d2dacf4610d0 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 20 Jan 2026 15:47:32 +0100 Subject: [PATCH 10/58] deduplicate --- tests/conftest.py | 24 +++++++++++++++++++ .../integrations/anthropic/test_anthropic.py | 22 ++++------------- tests/integrations/openai/test_openai.py | 19 ++------------- .../openai_agents/test_openai_agents.py | 20 +++------------- 4 files changed, 34 insertions(+), 51 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index dea36f8bda..08175be527 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -592,6 +592,30 @@ def suppress_deprecation_warnings(): yield +@pytest.fixture( + params=[ + pytest.param( + ({"role": "system", "content": "You are helpful."}, "system"), + id="system", + ), + pytest.param( + ({"role": "user", "content": "Hello"}, "user"), + id="user", + ), + pytest.param( + ({"role": "ai", "content": "Hi there!"}, "assistant"), + id="ai", + ), + pytest.param( + ({"role": "assistant", "content": "How can I help?"}, "assistant"), + id="assistant", + ), + ] +) +def input_ai_message_and_expected_role(request): + return request.param + + class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e197475d5b..7955b07497 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -899,26 +899,14 @@ def test_set_output_data_with_input_json_delta(sentry_init): assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30 -# Test messages with mixed roles including "ai" that should be mapped to "assistant" -@pytest.mark.parametrize( - "test_message,expected_role", - [ - ({"role": "system", "content": "You are helpful."}, "system"), - ({"role": "user", "content": "Hello"}, "user"), - ( - {"role": "ai", "content": "Hi there!"}, - "assistant", - ), # Should be mapped to "assistant" - ( - {"role": "assistant", "content": "How can I help?"}, - "assistant", - ), # Should stay "assistant" - ], -) def test_anthropic_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, + capture_events, + input_ai_message_and_expected_role, ): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" + test_message, expected_role = input_ai_message_and_expected_role + sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index f6ffb24f80..e1ce07f944 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1457,26 +1457,11 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] -# Test messages with mixed roles including "ai" that should be mapped to "assistant" -@pytest.mark.parametrize( - "test_message,expected_role", - [ - ({"role": "system", "content": "You are helpful."}, "system"), - ({"role": "user", "content": "Hello"}, "user"), - ( - {"role": "ai", "content": "Hi there!"}, - "assistant", - ), # Should be mapped to "assistant" - ( - {"role": "assistant", "content": "How can I help?"}, - "assistant", - ), # Should stay "assistant" - ], -) def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_events, input_ai_message_and_expected_role ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" + test_message, expected_role = input_ai_message_and_expected_role sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 59c5b7edb7..7ac1d7fe3b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1324,26 +1324,12 @@ async def run(): assert txn3["transaction"] == "test_agent workflow" -# Test input messages with mixed roles including "ai" -@pytest.mark.parametrize( - "test_message,expected_role", - [ - ({"role": "system", "content": "You are helpful."}, "system"), - ({"role": "user", "content": "Hello"}, "user"), - ( - {"role": "ai", "content": "Hi there!"}, - "assistant", - ), # Should be mapped to "assistant" - ( - {"role": "assistant", "content": "How can I help?"}, - "assistant", - ), # Should stay "assistant" - ], -) def test_openai_agents_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_events, input_ai_message_and_expected_role ): """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'""" + test_message, expected_role = input_ai_message_and_expected_role + sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, From 9cf56434ca1b8d0ceb1230e977ed756871ac01cb Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 09:12:11 +0100 Subject: [PATCH 11/58] Revert "deduplicate" This reverts commit 01d061934cf40255165be0620932d2dacf4610d0. --- tests/conftest.py | 24 ------------------- .../integrations/anthropic/test_anthropic.py | 22 +++++++++++++---- tests/integrations/openai/test_openai.py | 19 +++++++++++++-- .../openai_agents/test_openai_agents.py | 20 +++++++++++++--- 4 files changed, 51 insertions(+), 34 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 08175be527..dea36f8bda 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -592,30 +592,6 @@ def suppress_deprecation_warnings(): yield -@pytest.fixture( - params=[ - pytest.param( - ({"role": "system", "content": "You are helpful."}, "system"), - id="system", - ), - pytest.param( - ({"role": "user", "content": "Hello"}, "user"), - id="user", - ), - pytest.param( - ({"role": "ai", "content": "Hi there!"}, "assistant"), - id="ai", - ), - pytest.param( - ({"role": "assistant", "content": "How can I help?"}, "assistant"), - id="assistant", - ), - ] -) -def input_ai_message_and_expected_role(request): - return request.param - - class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 7955b07497..e197475d5b 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -899,14 +899,26 @@ def test_set_output_data_with_input_json_delta(sentry_init): assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30 +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) def test_anthropic_message_role_mapping( - sentry_init, - capture_events, - input_ai_message_and_expected_role, + sentry_init, capture_events, test_message, expected_role ): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" - test_message, expected_role = input_ai_message_and_expected_role - sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index e1ce07f944..f6ffb24f80 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1457,11 +1457,26 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) def test_openai_message_role_mapping( - sentry_init, capture_events, input_ai_message_and_expected_role + sentry_init, capture_events, test_message, expected_role ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" - test_message, expected_role = input_ai_message_and_expected_role sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 7ac1d7fe3b..59c5b7edb7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1324,12 +1324,26 @@ async def run(): assert txn3["transaction"] == "test_agent workflow" +# Test input messages with mixed roles including "ai" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) def test_openai_agents_message_role_mapping( - sentry_init, capture_events, input_ai_message_and_expected_role + sentry_init, capture_events, test_message, expected_role ): """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'""" - test_message, expected_role = input_ai_message_and_expected_role - sentry_init( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, From fe5d1e1f1503038e1cc7e4d62ceee6edf47bb056 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 09:15:06 +0100 Subject: [PATCH 12/58] remove if condition in test --- tests/integrations/openai_agents/test_openai_agents.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 59c5b7edb7..da4af4ebdd 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -2063,11 +2063,10 @@ def test_openai_agents_message_truncation(sentry_init, capture_events): with start_span(op="gen_ai.chat") as span: scope = sentry_sdk.get_current_scope() _set_input_data(span, get_response_kwargs) - if hasattr(scope, "_gen_ai_original_message_count"): - truncated_count = scope._gen_ai_original_message_count.get(span.span_id) - assert truncated_count == 5, ( - f"Expected 5 original messages, got {truncated_count}" - ) + truncated_count = scope._gen_ai_original_message_count.get(span.span_id) + assert truncated_count == 5, ( + f"Expected 5 original messages, got {truncated_count}" + ) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data messages_data = span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES] From a7990adc4b2ab3e589643f57d434fc6d20e2123e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 17:56:25 +0100 Subject: [PATCH 13/58] ref(openai): Separate input handling to improve typing --- sentry_sdk/integrations/openai.py | 146 ++++++++++++++++++++++++++++-- 1 file changed, 136 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 66dc4a1c48..55ec5e4085 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -23,9 +23,20 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator + from typing import ( + Any, + Iterable, + List, + Optional, + Callable, + AsyncIterator, + Iterator, + Union, + ) from sentry_sdk.tracing import Span + from openai.types.responses import ResponseInputParam + try: try: from openai import NotGiven @@ -182,6 +193,124 @@ def _calculate_token_usage( ) +def _get_input_messages(kwargs: "dict[str, Any]") -> "Optional[list[Any] | list[str]]": + # Input messages (the prompt or data sent to the model) + messages = kwargs.get("messages") + if messages is None: + messages = kwargs.get("input") + + if isinstance(messages, str): + messages = [messages] + + return messages + + +def _commmon_set_input_data( + span: "Span", + kwargs: "dict[str, Any]", +): + # Input attributes: Common + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") + + # Input attributes: Optional + kwargs_keys_to_attributes = { + "model": SPANDATA.GEN_AI_REQUEST_MODEL, + "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, + "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, + "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, + "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, + "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, + "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, + } + for key, attribute in kwargs_keys_to_attributes.items(): + value = kwargs.get(key) + + if value is not None and _is_given(value): + set_data_normalized(span, attribute, value) + + # Input attributes: Tools + tools = kwargs.get("tools") + if tools is not None and _is_given(tools) and len(tools) > 0: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) + ) + + +def _set_responses_api_input_data( + span: "Span", + kwargs: "dict[str, Any]", + integration: "OpenAIIntegration", +): + messages: "Optional[ResponseInputParam | list[str]]" = _get_input_messages(kwargs) + + if ( + messages is not None + and len(messages) > 0 + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles(messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) + + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + + +def _set_completions_api_input_data( + span: "Span", + kwargs: "dict[str, Any]", + integration: "OpenAIIntegration", +): + messages: "Optional[ChatCompletionMessageParam]" = _get_input_messages(kwargs) + + if ( + messages is not None + and len(messages) > 0 + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles(messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) + + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + _commmon_set_input_data(span, kwargs) + + +def _set_embeddings_input_data( + span: "Span", + kwargs: "dict[str, Any]", + integration: "OpenAIIntegration", +): + messages = _get_input_messages(kwargs) + + if ( + messages is not None + and len(messages) > 0 + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles(messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False + ) + + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") + _commmon_set_input_data(span, kwargs) + + def _set_input_data( span: "Span", kwargs: "dict[str, Any]", @@ -454,16 +583,15 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any return f(*args, **kwargs) model = kwargs.get("model") - operation = "chat" span = sentry_sdk.start_span( op=consts.OP.GEN_AI_CHAT, - name=f"{operation} {model}", + name=f"chat {model}", origin=OpenAIIntegration.origin, ) span.__enter__() - _set_input_data(span, kwargs, operation, integration) + _set_completions_api_input_data(span, kwargs, integration) response = yield f, args, kwargs @@ -546,14 +674,13 @@ def _new_embeddings_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A return f(*args, **kwargs) model = kwargs.get("model") - operation = "embeddings" with sentry_sdk.start_span( op=consts.OP.GEN_AI_EMBEDDINGS, - name=f"{operation} {model}", + name=f"embeddings {model}", origin=OpenAIIntegration.origin, ) as span: - _set_input_data(span, kwargs, operation, integration) + _set_embeddings_input_data(span, kwargs, integration) response = yield f, args, kwargs @@ -634,16 +761,15 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An return f(*args, **kwargs) model = kwargs.get("model") - operation = "responses" span = sentry_sdk.start_span( op=consts.OP.GEN_AI_RESPONSES, - name=f"{operation} {model}", + name=f"responses {model}", origin=OpenAIIntegration.origin, ) span.__enter__() - _set_input_data(span, kwargs, operation, integration) + _set_responses_api_input_data(span, kwargs, integration) response = yield f, args, kwargs From c6ebc0f353992afb6e37d77082c8e4804238b7e6 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 17:57:12 +0100 Subject: [PATCH 14/58] remove old func --- sentry_sdk/integrations/openai.py | 62 ------------------------------- 1 file changed, 62 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 55ec5e4085..a9bccec5ea 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -311,68 +311,6 @@ def _set_embeddings_input_data( _commmon_set_input_data(span, kwargs) -def _set_input_data( - span: "Span", - kwargs: "dict[str, Any]", - operation: str, - integration: "OpenAIIntegration", -) -> None: - # Input messages (the prompt or data sent to the model) - messages = kwargs.get("messages") - if messages is None: - messages = kwargs.get("input") - - if isinstance(messages, str): - messages = [messages] - - if ( - messages is not None - and len(messages) > 0 - and should_send_default_pii() - and integration.include_prompts - ): - normalized_messages = normalize_message_roles(messages) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) - if messages_data is not None: - # Use appropriate field based on operation type - if operation == "embeddings": - set_data_normalized( - span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False - ) - else: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) - - # Input attributes: Common - set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) - - # Input attributes: Optional - kwargs_keys_to_attributes = { - "model": SPANDATA.GEN_AI_REQUEST_MODEL, - "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, - "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, - "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, - "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, - "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, - "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, - } - for key, attribute in kwargs_keys_to_attributes.items(): - value = kwargs.get(key) - - if value is not None and _is_given(value): - set_data_normalized(span, attribute, value) - - # Input attributes: Tools - tools = kwargs.get("tools") - if tools is not None and _is_given(tools) and len(tools) > 0: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) - ) - - def _set_output_data( span: "Span", response: "Any", From 8d9fa37482e14f41b55aeaca7ef3f0c5b638ccef Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:04:43 +0100 Subject: [PATCH 15/58] . --- sentry_sdk/integrations/openai.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index a9bccec5ea..9c0321d3cd 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -193,7 +193,9 @@ def _calculate_token_usage( ) -def _get_input_messages(kwargs: "dict[str, Any]") -> "Optional[list[Any] | list[str]]": +def _get_input_messages( + kwargs: "dict[str, Any]", +) -> "Optional[Iterable[Any] | list[str]]": # Input messages (the prompt or data sent to the model) messages = kwargs.get("messages") if messages is None: @@ -241,7 +243,7 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ): - messages: "Optional[ResponseInputParam | list[str]]" = _get_input_messages(kwargs) + messages: "Optional[ResponseInputParam | list[str]]" = _get_input_messages(kwargs) # type: ignore if ( messages is not None @@ -249,7 +251,7 @@ def _set_responses_api_input_data( and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) + normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: @@ -266,15 +268,17 @@ def _set_completions_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ): - messages: "Optional[ChatCompletionMessageParam]" = _get_input_messages(kwargs) + messages: "Optional[Iterable[ChatCompletionMessageParam]]" = _get_input_messages( + kwargs + ) if ( messages is not None - and len(messages) > 0 + and len(messages) > 0 # type: ignore and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) + normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: From ab41f1eee58d20e38110237e00aa6088caae0440 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:19:45 +0100 Subject: [PATCH 16/58] expand type --- sentry_sdk/integrations/openai.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9c0321d3cd..c21b1f8cad 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,6 +9,7 @@ normalize_message_roles, truncate_and_annotate_messages, ) +from sentry_sdk.ai._openai_completions_api import _get_system_instructions from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -268,8 +269,8 @@ def _set_completions_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ): - messages: "Optional[Iterable[ChatCompletionMessageParam]]" = _get_input_messages( - kwargs + messages: "Optional[Iterable[ChatCompletionMessageParam] | list[str]]" = ( + _get_input_messages(kwargs) ) if ( From b3117ac9a0c95cb05a09f737f1b60a064cc836db Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:22:31 +0100 Subject: [PATCH 17/58] add ignores --- sentry_sdk/integrations/openai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index c21b1f8cad..459d782763 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -300,11 +300,11 @@ def _set_embeddings_input_data( if ( messages is not None - and len(messages) > 0 + and len(messages) > 0 # type: ignore and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) + normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: From 847d4b253d37fc75a7029fd84d569c1b8c1f0332 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:23:56 +0100 Subject: [PATCH 18/58] revert unrelated change --- sentry_sdk/integrations/openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 459d782763..553d93d195 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,7 +9,6 @@ normalize_message_roles, truncate_and_annotate_messages, ) -from sentry_sdk.ai._openai_completions_api import _get_system_instructions from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii From 4f25a56b1cb514a493c4eb6a278b2ff66cefe8e3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:40:02 +0100 Subject: [PATCH 19/58] feat(openai): Set system instruction attribute --- sentry_sdk/ai/_openai_completions_api.py | 20 +++++++++++ sentry_sdk/consts.py | 6 ++++ sentry_sdk/integrations/openai.py | 42 +++++++++++++++++++++++- 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 sentry_sdk/ai/_openai_completions_api.py diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py new file mode 100644 index 0000000000..6697f285c6 --- /dev/null +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -0,0 +1,20 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openai.types.chat import ( + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ) + from typing import Iterable, Union + + +def _get_system_instructions( + messages: "Iterable[Union[ChatCompletionMessageParam, str]]", +) -> "list[ChatCompletionSystemMessageParam]": + system_messages = [] + + for message in messages: + if isinstance(message, dict) and message.get("role") == "system": + system_messages.append(message) + + return system_messages diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 93fca6ba3e..4b61a317fb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -542,6 +542,12 @@ class SPANDATA: Example: 2048 """ + GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + """ + The system instructions passed to the model. + Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}] + """ + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ The messages passed to the model. The "content" can be a string or an array of objects. diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 553d93d195..6a269c40c7 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,6 +9,9 @@ normalize_message_roles, truncate_and_annotate_messages, ) +from sentry_sdk.ai._openai_completions_api import ( + _get_system_instructions as _get_system_instructions_completions, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -35,7 +38,7 @@ ) from sentry_sdk.tracing import Span - from openai.types.responses import ResponseInputParam + from openai.types.responses import ResponseInputParam, ResponseInputItemParam try: try: @@ -193,6 +196,25 @@ def _calculate_token_usage( ) +def _get_system_instructions_responses( + input_items: "Union[ResponseInputParam, list[str]]", +) -> "list[ResponseInputItemParam]": + if isinstance(input_items, str): + return [] + + system_messages = [] + + for item in input_items: + if ( + isinstance(item, dict) + and item.get("type") == "message" + and item.get("role") == "system" + ): + system_messages.append(item) + + return system_messages + + def _get_input_messages( kwargs: "dict[str, Any]", ) -> "Optional[Iterable[Any] | list[str]]": @@ -245,6 +267,15 @@ def _set_responses_api_input_data( ): messages: "Optional[ResponseInputParam | list[str]]" = _get_input_messages(kwargs) # type: ignore + if messages is not None: + system_instructions = _get_system_instructions_responses(messages) + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + system_instructions, + unpack=False, + ) + if ( messages is not None and len(messages) > 0 @@ -272,6 +303,15 @@ def _set_completions_api_input_data( _get_input_messages(kwargs) ) + if messages is not None: + system_instructions = _get_system_instructions_completions(messages) + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + system_instructions, + unpack=False, + ) + if ( messages is not None and len(messages) > 0 # type: ignore From 903bbbdb2c1a5dd466cd72a866ba988cbfc45235 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 18:55:00 +0100 Subject: [PATCH 20/58] use union --- sentry_sdk/integrations/openai.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 553d93d195..75218ff803 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -195,7 +195,7 @@ def _calculate_token_usage( def _get_input_messages( kwargs: "dict[str, Any]", -) -> "Optional[Iterable[Any] | list[str]]": +) -> "Optional[Union[Iterable[Any], list[str]]]": # Input messages (the prompt or data sent to the model) messages = kwargs.get("messages") if messages is None: @@ -243,7 +243,9 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ): - messages: "Optional[ResponseInputParam | list[str]]" = _get_input_messages(kwargs) # type: ignore + messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages( + kwargs + ) # type: ignore if ( messages is not None @@ -268,7 +270,7 @@ def _set_completions_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ): - messages: "Optional[Iterable[ChatCompletionMessageParam] | list[str]]" = ( + messages: "Optional[Union[Iterable[ChatCompletionMessageParam], list[str]]]" = ( _get_input_messages(kwargs) ) From f304bece93d306af1966c4e052b908cac0e67f90 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 19:00:54 +0100 Subject: [PATCH 21/58] mypy --- sentry_sdk/integrations/openai.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 75218ff803..a7af385d12 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -210,7 +210,7 @@ def _get_input_messages( def _commmon_set_input_data( span: "Span", kwargs: "dict[str, Any]", -): +) -> None: # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") @@ -242,10 +242,10 @@ def _set_responses_api_input_data( span: "Span", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", -): +) -> None: messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages( kwargs - ) # type: ignore + ) if ( messages is not None @@ -269,7 +269,7 @@ def _set_completions_api_input_data( span: "Span", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", -): +) -> None: messages: "Optional[Union[Iterable[ChatCompletionMessageParam], list[str]]]" = ( _get_input_messages(kwargs) ) @@ -296,7 +296,7 @@ def _set_embeddings_input_data( span: "Span", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", -): +) -> None: messages = _get_input_messages(kwargs) if ( From 9f5d8015f5ab0e222c784f1bee3ea05ef26b9f7e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 19:06:03 +0100 Subject: [PATCH 22/58] . --- sentry_sdk/ai/_openai_completions_api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py index 6697f285c6..8309c76c31 100644 --- a/sentry_sdk/ai/_openai_completions_api.py +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -5,12 +5,14 @@ ChatCompletionMessageParam, ChatCompletionSystemMessageParam, ) - from typing import Iterable, Union + from typing import Iterable, Union, TypeVar + + T = TypeVar("T") def _get_system_instructions( - messages: "Iterable[Union[ChatCompletionMessageParam, str]]", -) -> "list[ChatCompletionSystemMessageParam]": + messages: "Iterable[Union[T, str]]", +) -> "list[T]": system_messages = [] for message in messages: From d66ffe12aee0f979d22140b3e4c05d4782127e37 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 19:20:08 +0100 Subject: [PATCH 23/58] use specific openai types --- sentry_sdk/ai/_openai_completions_api.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py index 8309c76c31..6697f285c6 100644 --- a/sentry_sdk/ai/_openai_completions_api.py +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -5,14 +5,12 @@ ChatCompletionMessageParam, ChatCompletionSystemMessageParam, ) - from typing import Iterable, Union, TypeVar - - T = TypeVar("T") + from typing import Iterable, Union def _get_system_instructions( - messages: "Iterable[Union[T, str]]", -) -> "list[T]": + messages: "Iterable[Union[ChatCompletionMessageParam, str]]", +) -> "list[ChatCompletionSystemMessageParam]": system_messages = [] for message in messages: From ac3ce00e26e6e86b5cec7ecb5170fad729f9a31d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 10:43:32 +0100 Subject: [PATCH 24/58] wip --- sentry_sdk/integrations/openai.py | 107 +++++++++++++++++------ tests/integrations/openai/test_openai.py | 73 +++++++++++++++- 2 files changed, 147 insertions(+), 33 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index b747596a61..cace34ebf1 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -11,6 +11,7 @@ ) from sentry_sdk.ai._openai_completions_api import ( _get_system_instructions as _get_system_instructions_completions, + _is_system_instruction as _is_system_instruction_completions, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -196,23 +197,27 @@ def _calculate_token_usage( ) +def _is_system_instruction_responses(message: "ResponseInputItemParam"): + return ( + isinstance(message, dict) + and message.get("type") == "message" + and message.get("role") == "system" + ) + + def _get_system_instructions_responses( - input_items: "Union[ResponseInputParam, list[str]]", + messages: "Union[str, ResponseInputParam]", ) -> "list[ResponseInputItemParam]": - if isinstance(input_items, str): + if isinstance(messages, str): return [] - system_messages = [] + system_instructions = [] - for item in input_items: - if ( - isinstance(item, dict) - and item.get("type") == "message" - and item.get("role") == "system" - ): - system_messages.append(item) + for message in messages: + if _is_system_instruction_responses(message): + system_instructions.append(message) - return system_messages + return system_instructions def _get_input_messages( @@ -265,12 +270,19 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: - messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages( - kwargs - ) + messages: "Optional[Union[str, ResponseInputParam]]" = kwargs.get("input") + + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return - if messages is not None: - system_instructions = _get_system_instructions_responses(messages) + system_instructions = _get_system_instructions_responses(messages) + if ( + len(system_instructions) > 0 + and should_send_default_pii() + and integration.include_prompts + ): set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, @@ -279,12 +291,11 @@ def _set_responses_api_input_data( ) if ( - messages is not None - and len(messages) > 0 + isinstance(messages, str) and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) # type: ignore + normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: @@ -292,6 +303,23 @@ def _set_responses_api_input_data( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) + elif should_send_default_pii() and integration.include_prompts: + non_system_messages = [ + message + for message in messages + if not _is_system_instruction_responses(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") _commmon_set_input_data(span, kwargs) @@ -301,12 +329,21 @@ def _set_completions_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: - messages: "Optional[Union[Iterable[ChatCompletionMessageParam], list[str]]]" = ( - _get_input_messages(kwargs) + messages: "Optional[Union[str, Iterable[ChatCompletionMessageParam]]]" = kwargs.get( + "messages" ) - if messages is not None: - system_instructions = _get_system_instructions_completions(messages) + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + _commmon_set_input_data(span, kwargs) + return + + system_instructions = _get_system_instructions_completions(messages) + if ( + len(system_instructions) > 0 + and should_send_default_pii() + and integration.include_prompts + ): set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, @@ -315,21 +352,33 @@ def _set_completions_api_input_data( ) if ( - messages is not None - and len(messages) > 0 # type: ignore + isinstance(messages, str) and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) # type: ignore + normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) - - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") - _commmon_set_input_data(span, kwargs) + elif should_send_default_pii() and integration.include_prompts: + non_system_messages = [ + message + for message in messages + if not _is_system_instruction_completions(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) def _set_embeddings_input_data( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..c09ca61cb3 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -147,7 +147,11 @@ def test_nonstreaming_chat_completion( with start_transaction(name="openai tx"): response = ( client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) .choices[0] .message.content @@ -160,9 +164,17 @@ def test_nonstreaming_chat_completion( assert span["op"] == "gen_ai.chat" if send_default_pii and include_prompts: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "content": "You are a helpful assistant.", + "role": "system", + } + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -191,7 +203,11 @@ async def test_nonstreaming_chat_completion_async( with start_transaction(name="openai tx"): response = await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) response = response.choices[0].message.content @@ -202,9 +218,17 @@ async def test_nonstreaming_chat_completion_async( assert span["op"] == "gen_ai.chat" if send_default_pii and include_prompts: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "content": "You are a helpful assistant.", + "role": "system", + } + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -283,7 +307,11 @@ def test_streaming_chat_completion( client.chat.completions._post = mock.Mock(return_value=returned_stream) with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -298,6 +326,7 @@ def test_streaming_chat_completion( assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -377,7 +406,11 @@ async def test_streaming_chat_completion_async( client.chat.completions._post = AsyncMock(return_value=returned_stream) with start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) response_string = "" @@ -394,6 +427,7 @@ async def test_streaming_chat_completion_async( assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -1427,6 +1461,37 @@ async def test_streaming_responses_api_async( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.skipif( + OPENAI_VERSION <= (1, 1, 0), + reason="OpenAI versions <=1.1.0 do not support the tools parameter.", +) +@pytest.mark.parametrize( + "tools", + [[], None, NOT_GIVEN, omit], +) +def test_chat_completion_with_system_instruction(sentry_init, capture_events, tools): + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + tools=tools, + ) + + (event,) = events + span = event["spans"][0] + + assert "gen_ai.request.available_tools" not in span["data"] + + @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the tools parameter.", From ef9fe6fee5c70cf2091effcbd190def07fbcc985 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 10:44:10 +0100 Subject: [PATCH 25/58] . --- sentry_sdk/ai/_openai_completions_api.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py index 6697f285c6..d1c1c7d23b 100644 --- a/sentry_sdk/ai/_openai_completions_api.py +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -5,16 +5,20 @@ ChatCompletionMessageParam, ChatCompletionSystemMessageParam, ) - from typing import Iterable, Union + from typing import Iterable + + +def _is_system_instruction(message: "ChatCompletionMessageParam"): + return isinstance(message, dict) and message.get("role") == "system" def _get_system_instructions( - messages: "Iterable[Union[ChatCompletionMessageParam, str]]", + messages: "Iterable[ChatCompletionMessageParam]", ) -> "list[ChatCompletionSystemMessageParam]": - system_messages = [] + system_instructions = [] for message in messages: - if isinstance(message, dict) and message.get("role") == "system": - system_messages.append(message) + if _is_system_instruction(message): + system_instructions.append(message) - return system_messages + return system_instructions From ce84a29356a29c4313b83bf4717e0dbbb64c9b32 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 13:13:40 +0100 Subject: [PATCH 26/58] . --- sentry_sdk/_types.py | 4 + sentry_sdk/ai/_openai_completions_api.py | 26 +++ sentry_sdk/integrations/openai.py | 22 +- tests/integrations/openai/test_openai.py | 262 +++++++++++++++++++---- 4 files changed, 267 insertions(+), 47 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 7043bbc2ee..ecb8abcd10 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -359,3 +359,7 @@ class SDKInfo(TypedDict): ) HttpStatusCodeRange = Union[int, Container[int]] + + class TextPart(TypedDict): + type: Literal["text"] + content: str diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py index d1c1c7d23b..3bb4c82448 100644 --- a/sentry_sdk/ai/_openai_completions_api.py +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -7,6 +7,8 @@ ) from typing import Iterable + from sentry_sdk._types import TextPart + def _is_system_instruction(message: "ChatCompletionMessageParam"): return isinstance(message, dict) and message.get("role") == "system" @@ -22,3 +24,27 @@ def _get_system_instructions( system_instructions.append(message) return system_instructions + + +def _transform_system_instructions( + system_instructions: "list[ChatCompletionSystemMessageParam]", +) -> "list[TextPart]": + instruction_text_parts = [] + + for instruction in system_instructions: + if not isinstance(instruction, dict): + continue + + content = instruction.get("content") + + if isinstance(content, str): + instruction_text_parts.append({"type": "text", "content": content}) + + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + instruction_text_parts.append({"type": "text", "content": text}) + + return instruction_text_parts diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index cace34ebf1..1e587b3a3e 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -12,6 +12,7 @@ from sentry_sdk.ai._openai_completions_api import ( _get_system_instructions as _get_system_instructions_completions, _is_system_instruction as _is_system_instruction_completions, + _transform_system_instructions, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -40,6 +41,7 @@ from sentry_sdk.tracing import Span from openai.types.responses import ResponseInputParam, ResponseInputItemParam + from openai import Omit try: try: @@ -277,16 +279,28 @@ def _set_responses_api_input_data( _commmon_set_input_data(span, kwargs) return + explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") system_instructions = _get_system_instructions_responses(messages) if ( - len(system_instructions) > 0 + (_is_given(explicit_instructions) or len(system_instructions) > 0) and should_send_default_pii() and integration.include_prompts ): + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts = _transform_system_instructions(system_instructions) # type: ignore + if _is_given(explicit_instructions): + instructions_text_parts.append( + { + "type": "text", + "content": explicit_instructions, + } + ) + set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - system_instructions, + instructions_text_parts, unpack=False, ) @@ -310,7 +324,7 @@ def _set_responses_api_input_data( if not _is_system_instruction_responses(message) ] if len(non_system_messages) > 0: - normalized_messages = normalize_message_roles(messages) # type: ignore + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages( normalized_messages, span, scope @@ -347,7 +361,7 @@ def _set_completions_api_input_data( set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - system_instructions, + _transform_system_instructions(system_instructions), unpack=False, ) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c09ca61cb3..a9bfa40fef 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -166,8 +166,8 @@ def test_nonstreaming_chat_completion( if send_default_pii and include_prompts: assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { + "type": "text", "content": "You are a helpful assistant.", - "role": "system", } ] @@ -220,8 +220,8 @@ async def test_nonstreaming_chat_completion_async( if send_default_pii and include_prompts: assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { + "type": "text", "content": "You are a helpful assistant.", - "role": "system", } ] @@ -251,8 +251,38 @@ def tiktoken_encoding_if_installed(): "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, input, request ): sentry_init( integrations=[ @@ -308,10 +338,7 @@ def test_streaming_chat_completion( with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "hello"}, - ], + messages=input, ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -322,7 +349,27 @@ def test_streaming_chat_completion( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" + param_id = request.node.callspec.id if send_default_pii and include_prompts: + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: @@ -333,9 +380,14 @@ def test_streaming_chat_completion( try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 1 - assert span["data"]["gen_ai.usage.total_tokens"] == 3 + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 1 + assert span["data"]["gen_ai.usage.total_tokens"] == 3 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -346,8 +398,38 @@ def test_streaming_chat_completion( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) async def test_streaming_chat_completion_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, input, request ): sentry_init( integrations=[ @@ -407,10 +489,7 @@ async def test_streaming_chat_completion_async( with start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( model="some-model", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "hello"}, - ], + messages=input, ) response_string = "" @@ -423,7 +502,27 @@ async def test_streaming_chat_completion_async( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" + param_id = request.node.callspec.id if send_default_pii and include_prompts: + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: @@ -434,9 +533,15 @@ async def test_streaming_chat_completion_async( try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 1 - assert span["data"]["gen_ai.usage.total_tokens"] == 3 + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 1 + assert span["data"]["gen_ai.usage.total_tokens"] == 3 + except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1068,12 +1173,46 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "thread.name": mock.ANY, } + assert "gen_ai.system_instructions" not in spans[0]["data"] assert "gen_ai.request.messages" not in spans[0]["data"] assert "gen_ai.response.text" not in spans[0]["data"] +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api(sentry_init, capture_events): +def test_ai_client_span_responses_api(sentry_init, capture_events, input, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1088,7 +1227,7 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): client.responses.create( model="gpt-4o", instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + input=input, ) (transaction,) = events @@ -1097,21 +1236,59 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.system": "openai", - "gen_ai.response.model": "response-model-id", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + + param_id = request.node.callspec.id + if param_id == "string": + assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', + "gen_ai.request.model": "gpt-4o", + "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', + "gen_ai.response.model": "response-model-id", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + elif param_id == "blocks": + assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.messages": '[{"type": "message", "role": "user", "content": "hello"}]', + "gen_ai.request.model": "gpt-4o", + "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', + "gen_ai.response.model": "response-model-id", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + else: + assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.messages": '[{"type": "message", "role": "user", "content": "hello"}]', + "gen_ai.request.model": "gpt-4o", + "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, {"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', + "gen_ai.response.model": "response-model-id", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -1183,6 +1360,7 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, @@ -1230,6 +1408,7 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.response.model": "response-model-id", "gen_ai.response.streaming": True, "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, @@ -1537,7 +1716,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ - {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" @@ -1557,17 +1735,15 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" + assert len(stored_messages) == 3 assert ( - stored_messages[2]["role"] == "assistant" + stored_messages[1]["role"] == "assistant" ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" # should stay "assistant" # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" + assert stored_messages[1]["content"] == "Hi there!" + assert stored_messages[2]["content"] == "How can I help?" # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] From dee993063e181fef4359036e3f3ea3bf73740fca Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 13:14:42 +0100 Subject: [PATCH 27/58] remove test --- tests/integrations/openai/test_openai.py | 31 ------------------------ 1 file changed, 31 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index a9bfa40fef..49fc7baf1b 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1640,37 +1640,6 @@ async def test_streaming_responses_api_async( assert span["data"]["gen_ai.usage.total_tokens"] == 30 -@pytest.mark.skipif( - OPENAI_VERSION <= (1, 1, 0), - reason="OpenAI versions <=1.1.0 do not support the tools parameter.", -) -@pytest.mark.parametrize( - "tools", - [[], None, NOT_GIVEN, omit], -) -def test_chat_completion_with_system_instruction(sentry_init, capture_events, tools): - sentry_init( - integrations=[OpenAIIntegration()], - traces_sample_rate=1.0, - ) - events = capture_events() - - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=[{"role": "system", "content": "hello"}], - tools=tools, - ) - - (event,) = events - span = event["spans"][0] - - assert "gen_ai.request.available_tools" not in span["data"] - - @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), reason="OpenAI versions <=1.1.0 do not support the tools parameter.", From cb00ab3acd79431612b01f4edd1465b096ec8d75 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 13:18:52 +0100 Subject: [PATCH 28/58] . --- sentry_sdk/ai/_openai_completions_api.py | 4 ++-- sentry_sdk/integrations/openai.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py index 3bb4c82448..c77fdb82dc 100644 --- a/sentry_sdk/ai/_openai_completions_api.py +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -10,7 +10,7 @@ from sentry_sdk._types import TextPart -def _is_system_instruction(message: "ChatCompletionMessageParam"): +def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: return isinstance(message, dict) and message.get("role") == "system" @@ -29,7 +29,7 @@ def _get_system_instructions( def _transform_system_instructions( system_instructions: "list[ChatCompletionSystemMessageParam]", ) -> "list[TextPart]": - instruction_text_parts = [] + instruction_text_parts: "list[TextPart]" = [] for instruction in system_instructions: if not isinstance(instruction, dict): diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 1e587b3a3e..538db1a603 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -199,7 +199,7 @@ def _calculate_token_usage( ) -def _is_system_instruction_responses(message: "ResponseInputItemParam"): +def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: return ( isinstance(message, dict) and message.get("type") == "message" @@ -288,12 +288,12 @@ def _set_responses_api_input_data( ): # Deliberate use of function accepting completions API type because # of shared structure FOR THIS PURPOSE ONLY. - instructions_text_parts = _transform_system_instructions(system_instructions) # type: ignore + instructions_text_parts = _transform_system_instructions(system_instructions) if _is_given(explicit_instructions): instructions_text_parts.append( { "type": "text", - "content": explicit_instructions, + "content": explicit_instructions, # type: ignore } ) From 26b932bad88285d50a59c06a436d47ab2cb3d5c8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 13:40:58 +0100 Subject: [PATCH 29/58] . --- tests/integrations/openai/test_openai.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 49fc7baf1b..33d476bb53 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -257,25 +257,23 @@ def tiktoken_encoding_if_installed(): pytest.param( [ { - "type": "message", "role": "system", "content": "You are a helpful assistant.", }, - {"type": "message", "role": "user", "content": "hello"}, + {"role": "user", "content": "hello"}, ], id="blocks", ), pytest.param( [ { - "type": "message", "role": "system", "content": [ {"type": "text", "text": "You are a helpful assistant."}, {"type": "text", "text": "Be concise and clear."}, ], }, - {"type": "message", "role": "user", "content": "hello"}, + {"role": "user", "content": "hello"}, ], id="parts", ), @@ -404,25 +402,23 @@ def test_streaming_chat_completion( pytest.param( [ { - "type": "message", "role": "system", "content": "You are a helpful assistant.", }, - {"type": "message", "role": "user", "content": "hello"}, + {"role": "user", "content": "hello"}, ], id="blocks", ), pytest.param( [ { - "type": "message", "role": "system", "content": [ {"type": "text", "text": "You are a helpful assistant."}, {"type": "text", "text": "Be concise and clear."}, ], }, - {"type": "message", "role": "user", "content": "hello"}, + {"role": "user", "content": "hello"}, ], id="parts", ), From 04dc92c064feffdacf708f2b229a4a48852c26c0 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 14:40:10 +0100 Subject: [PATCH 30/58] . --- sentry_sdk/integrations/openai.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 538db1a603..2d6daf8822 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -282,7 +282,10 @@ def _set_responses_api_input_data( explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") system_instructions = _get_system_instructions_responses(messages) if ( - (_is_given(explicit_instructions) or len(system_instructions) > 0) + ( + (explicit_instructions is not None and _is_given(explicit_instructions)) + or len(system_instructions) > 0 + ) and should_send_default_pii() and integration.include_prompts ): @@ -394,6 +397,9 @@ def _set_completions_api_input_data( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + _commmon_set_input_data(span, kwargs) + def _set_embeddings_input_data( span: "Span", From c7263ea349822c20960d53058ca57392440ed007 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 15:45:59 +0100 Subject: [PATCH 31/58] edge case --- sentry_sdk/integrations/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 2d6daf8822..4520301df5 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -292,7 +292,7 @@ def _set_responses_api_input_data( # Deliberate use of function accepting completions API type because # of shared structure FOR THIS PURPOSE ONLY. instructions_text_parts = _transform_system_instructions(system_instructions) - if _is_given(explicit_instructions): + if explicit_instructions is not None and _is_given(explicit_instructions): instructions_text_parts.append( { "type": "text", From d947899bc3bab05dfce5bdecdabbae623930a548 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 17:36:07 +0100 Subject: [PATCH 32/58] full responses api tests --- sentry_sdk/integrations/openai.py | 10 +- tests/integrations/openai/test_openai.py | 310 ++++++++++++++++++----- 2 files changed, 256 insertions(+), 64 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 4520301df5..676c8c8612 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -39,6 +39,7 @@ Union, ) from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart from openai.types.responses import ResponseInputParam, ResponseInputItemParam from openai import Omit @@ -289,16 +290,17 @@ def _set_responses_api_input_data( and should_send_default_pii() and integration.include_prompts ): - # Deliberate use of function accepting completions API type because - # of shared structure FOR THIS PURPOSE ONLY. - instructions_text_parts = _transform_system_instructions(system_instructions) + instructions_text_parts: "list[TextPart]" = [] if explicit_instructions is not None and _is_given(explicit_instructions): instructions_text_parts.append( { "type": "text", - "content": explicit_instructions, # type: ignore + "content": explicit_instructions, } ) + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts += _transform_system_instructions(system_instructions) set_data_normalized( span, diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 33d476bb53..35a0b04f96 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -9,8 +9,10 @@ NOT_GIVEN = None try: from openai import omit + from openai import Omit except ImportError: omit = None + Omit = None from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding @@ -47,6 +49,7 @@ from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize +from sentry_sdk.utils import safe_serialize from unittest import mock # python 3.3 and above @@ -1174,6 +1177,13 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): assert "gen_ai.response.text" not in spans[0]["data"] +@pytest.mark.parametrize( + "instructions", + ( + omit, + "You are a coding assistant that talks like a pirate.", + ), +) @pytest.mark.parametrize( "input", [ @@ -1208,7 +1218,9 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): ], ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api(sentry_init, capture_events, input, request): +def test_ai_client_span_responses_api( + sentry_init, capture_events, instructions, input, request +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1222,7 +1234,7 @@ def test_ai_client_span_responses_api(sentry_init, capture_events, input, reques with start_transaction(name="openai tx"): client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", + instructions=instructions, input=input, ) @@ -1233,58 +1245,109 @@ def test_ai_client_span_responses_api(sentry_init, capture_events, input, reques assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.response.streaming": True, + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "the model response", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + param_id = request.node.callspec.id - if param_id == "string": - assert spans[0]["data"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.system": "openai", - "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', - "gen_ai.response.model": "response-model-id", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - elif param_id == "blocks": - assert spans[0]["data"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '[{"type": "message", "role": "user", "content": "hello"}]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.system": "openai", - "gen_ai.system_instructions": '[{"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', - "gen_ai.response.model": "response-model-id", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + if "string" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) else: - assert spans[0]["data"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '[{"type": "message", "role": "user", "content": "hello"}]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.system": "openai", - "gen_ai.system_instructions": '[{"type": "text", "content": "You are a helpful assistant."}, {"type": "text", "content": "Be concise and clear."}, {"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', - "gen_ai.response.model": "response-model-id", - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -1369,9 +1432,49 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): @pytest.mark.asyncio +@pytest.mark.parametrize( + "instructions", + ( + omit, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( - sentry_init, capture_events + sentry_init, capture_events, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -1386,8 +1489,8 @@ async def test_ai_client_span_streaming_responses_async_api( with start_transaction(name="openai tx"): await client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, stream=True, ) @@ -1397,24 +1500,111 @@ async def test_ai_client_span_streaming_responses_async_api( assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", "gen_ai.response.streaming": True, "gen_ai.system": "openai", - "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', + "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") From 8cbeac119f9618475de35b0b04e78ffa1d4bd720 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 17:41:22 +0100 Subject: [PATCH 33/58] remove sentry_sdk/ai/_openai_completions_api.py --- sentry_sdk/ai/_openai_completions_api.py | 50 ---------------------- sentry_sdk/integrations/openai.py | 53 ++++++++++++++++++++---- 2 files changed, 46 insertions(+), 57 deletions(-) delete mode 100644 sentry_sdk/ai/_openai_completions_api.py diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py deleted file mode 100644 index c77fdb82dc..0000000000 --- a/sentry_sdk/ai/_openai_completions_api.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from openai.types.chat import ( - ChatCompletionMessageParam, - ChatCompletionSystemMessageParam, - ) - from typing import Iterable - - from sentry_sdk._types import TextPart - - -def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: - return isinstance(message, dict) and message.get("role") == "system" - - -def _get_system_instructions( - messages: "Iterable[ChatCompletionMessageParam]", -) -> "list[ChatCompletionSystemMessageParam]": - system_instructions = [] - - for message in messages: - if _is_system_instruction(message): - system_instructions.append(message) - - return system_instructions - - -def _transform_system_instructions( - system_instructions: "list[ChatCompletionSystemMessageParam]", -) -> "list[TextPart]": - instruction_text_parts: "list[TextPart]" = [] - - for instruction in system_instructions: - if not isinstance(instruction, dict): - continue - - content = instruction.get("content") - - if isinstance(content, str): - instruction_text_parts.append({"type": "text", "content": content}) - - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - text = part.get("text", "") - if text: - instruction_text_parts.append({"type": "text", "content": text}) - - return instruction_text_parts diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 676c8c8612..7a5d449e23 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,11 +9,6 @@ normalize_message_roles, truncate_and_annotate_messages, ) -from sentry_sdk.ai._openai_completions_api import ( - _get_system_instructions as _get_system_instructions_completions, - _is_system_instruction as _is_system_instruction_completions, - _transform_system_instructions, -) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -30,13 +25,13 @@ if TYPE_CHECKING: from typing import ( Any, - Iterable, List, Optional, Callable, AsyncIterator, Iterator, Union, + Iterable, ) from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart @@ -59,7 +54,11 @@ from openai.resources import Embeddings, AsyncEmbeddings if TYPE_CHECKING: - from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk + from openai.types.chat import ( + ChatCompletionMessageParam, + ChatCompletionChunk, + ChatCompletionSystemMessageParam, + ) except ImportError: raise DidNotEnable("OpenAI not installed") @@ -200,6 +199,22 @@ def _calculate_token_usage( ) +def _is_system_instruction_completions(message: "ChatCompletionMessageParam") -> bool: + return isinstance(message, dict) and message.get("role") == "system" + + +def _get_system_instructions_completions( + messages: "Iterable[ChatCompletionMessageParam]", +) -> "list[ChatCompletionSystemMessageParam]": + system_instructions = [] + + for message in messages: + if _is_system_instruction_completions(message): + system_instructions.append(message) + + return system_instructions + + def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: return ( isinstance(message, dict) @@ -223,6 +238,30 @@ def _get_system_instructions_responses( return system_instructions +def _transform_system_instructions( + system_instructions: "list[ChatCompletionSystemMessageParam]", +) -> "list[TextPart]": + instruction_text_parts: "list[TextPart]" = [] + + for instruction in system_instructions: + if not isinstance(instruction, dict): + continue + + content = instruction.get("content") + + if isinstance(content, str): + instruction_text_parts.append({"type": "text", "content": content}) + + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + instruction_text_parts.append({"type": "text", "content": text}) + + return instruction_text_parts + + def _get_input_messages( kwargs: "dict[str, Any]", ) -> "Optional[Union[Iterable[Any], list[str]]]": From bcebcc84857503fd0365dc44c0e282be2a2ab7ab Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 17:44:28 +0100 Subject: [PATCH 34/58] fix test --- tests/integrations/openai/test_openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 35a0b04f96..2deb1f6d5c 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1247,7 +1247,6 @@ def test_ai_client_span_responses_api( expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.response.streaming": True, "gen_ai.system": "openai", "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, From 4aed172c4d7ddaaa461300a43000d5d1cb729fcc Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 13:32:26 +0100 Subject: [PATCH 35/58] handle embeddings separately --- sentry_sdk/ai/utils.py | 71 ++++++++++++++++++++++ sentry_sdk/integrations/litellm.py | 5 +- sentry_sdk/integrations/openai.py | 5 +- tests/integrations/litellm/test_litellm.py | 6 +- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 51a75b1706..57f08a4382 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -15,6 +15,7 @@ import sentry_sdk from sentry_sdk.utils import logger +MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB # Maximum characters when only a single message is left after bytes truncation MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000 @@ -549,6 +550,22 @@ def _truncate_single_message_content_if_present( return message +def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int: + """ + Find the index of the first message that would exceed the max bytes limit. + Compute the individual message sizes, and return the index of the first message from the back + of the list that would exceed the max bytes limit. + """ + running_sum = 0 + for idx in range(len(messages) - 1, -1, -1): + size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8")) + running_sum += size + if running_sum > max_bytes: + return idx + 1 + + return 0 + + def redact_blob_message_parts( messages: "List[Dict[str, Any]]", ) -> "List[Dict[str, Any]]": @@ -628,6 +645,42 @@ def redact_blob_message_parts( return messages_copy +def truncate_messages_by_size( + messages: "List[Dict[str, Any]]", + max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, + max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, +) -> "Tuple[List[Dict[str, Any]], int]": + """ + Returns a truncated messages list, consisting of + - the last message, with its content truncated to `max_single_message_chars` characters, + if the last message's size exceeds `max_bytes` bytes; otherwise, + - the maximum number of messages, starting from the end of the `messages` list, whose total + serialized size does not exceed `max_bytes` bytes. + + In the single message case, the serialized message size may exceed `max_bytes`, because + truncation is based only on character count in that case. + """ + serialized_json = json.dumps(messages, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size <= max_bytes: + return messages, 0 + + truncation_index = _find_truncation_index(messages, max_bytes) + if truncation_index < len(messages): + truncated_messages = messages[truncation_index:] + else: + truncation_index = len(messages) - 1 + truncated_messages = messages[-1:] + + if len(truncated_messages) == 1: + truncated_messages[0] = _truncate_single_message_content_if_present( + deepcopy(truncated_messages[0]), max_chars=max_single_message_chars + ) + + return truncated_messages, truncation_index + + def truncate_and_annotate_messages( messages: "Optional[List[Dict[str, Any]]]", span: "Any", @@ -646,3 +699,21 @@ def truncate_and_annotate_messages( scope._gen_ai_original_message_count[span.span_id] = len(messages) return [truncated_message] + + +def truncate_and_annotate_embedding_inputs( + messages: "Optional[List[Dict[str, Any]]]", + span: "Any", + scope: "Any", + max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, +) -> "Optional[List[Dict[str, Any]]]": + if not messages: + return None + + messages = redact_blob_message_parts(messages) + + truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) + if removed_count > 0: + scope._gen_ai_original_message_count[span.span_id] = len(messages) + + return truncated_messages diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 5ec079367e..28bcc34d3e 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -9,6 +9,7 @@ set_data_normalized, truncate_and_annotate_messages, transform_openai_content_part, + truncate_and_annotate_embedding_inputs, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -118,7 +119,9 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: if isinstance(embedding_input, list) else [embedding_input] ) - messages_data = truncate_and_annotate_messages(input_list, span, scope) + messages_data = truncate_and_annotate_embedding_inputs( + input_list, span, scope + ) if messages_data is not None: set_data_normalized( span, diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 7a5d449e23..df3addbdf4 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -8,6 +8,7 @@ set_data_normalized, normalize_message_roles, truncate_and_annotate_messages, + truncate_and_annotate_embedding_inputs, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -457,7 +458,9 @@ def _set_embeddings_input_data( ): normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 0c2b349cff..ef129c6cfd 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -344,7 +344,11 @@ def test_embeddings_create_with_list_input( assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" # Check that list of embeddings input is captured (it's JSON serialized) embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == ["Third text"] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): From 085b4966f980b9ef731007d3ed7c55fa42d768e1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 15:14:09 +0100 Subject: [PATCH 36/58] feat(openai-agents): Set system instruction attribute on gen_ai.chat spans --- sentry_sdk/ai/_opanai_completions_api.py | 51 +++++ sentry_sdk/ai/_openai_responses_api.py | 29 +++ sentry_sdk/integrations/openai.py | 74 +------- .../integrations/openai_agents/utils.py | 44 ++++- .../openai_agents/test_openai_agents.py | 174 +++++++++++++++--- 5 files changed, 274 insertions(+), 98 deletions(-) create mode 100644 sentry_sdk/ai/_opanai_completions_api.py create mode 100644 sentry_sdk/ai/_openai_responses_api.py diff --git a/sentry_sdk/ai/_opanai_completions_api.py b/sentry_sdk/ai/_opanai_completions_api.py new file mode 100644 index 0000000000..7bf16a53ee --- /dev/null +++ b/sentry_sdk/ai/_opanai_completions_api.py @@ -0,0 +1,51 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Iterable + + from sentry_sdk._types import TextPart + + from openai.types.chat import ( + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ) + + +def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: + return isinstance(message, dict) and message.get("role") == "system" + + +def _get_system_instructions( + messages: "Iterable[ChatCompletionMessageParam]", +) -> "list[ChatCompletionSystemMessageParam]": + system_instructions = [] + + for message in messages: + if _is_system_instruction(message): + system_instructions.append(message) + + return system_instructions + + +def _transform_system_instructions( + system_instructions: "list[ChatCompletionSystemMessageParam]", +) -> "list[TextPart]": + instruction_text_parts: "list[TextPart]" = [] + + for instruction in system_instructions: + if not isinstance(instruction, dict): + continue + + content = instruction.get("content") + + if isinstance(content, str): + instruction_text_parts.append({"type": "text", "content": content}) + + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + instruction_text_parts.append({"type": "text", "content": text}) + + return instruction_text_parts diff --git a/sentry_sdk/ai/_openai_responses_api.py b/sentry_sdk/ai/_openai_responses_api.py new file mode 100644 index 0000000000..d766ac9869 --- /dev/null +++ b/sentry_sdk/ai/_openai_responses_api.py @@ -0,0 +1,29 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Union + + from openai.types.responses import ResponseInputParam, ResponseInputItemParam + + +def _is_system_instruction(message: "ResponseInputItemParam") -> bool: + return ( + isinstance(message, dict) + and message.get("type") == "message" + and message.get("role") == "system" + ) + + +def _get_system_instructions( + messages: "Union[str, ResponseInputParam]", +) -> "list[ResponseInputItemParam]": + if isinstance(messages, str): + return [] + + system_instructions = [] + + for message in messages: + if _is_system_instruction(message): + system_instructions.append(message) + + return system_instructions diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 7a5d449e23..215655ee39 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,6 +9,15 @@ normalize_message_roles, truncate_and_annotate_messages, ) +from sentry_sdk.ai._opanai_completions_api import ( + _is_system_instruction as _is_system_instruction_completions, + _get_system_instructions as _get_system_instructions_completions, + _transform_system_instructions, +) +from sentry_sdk.ai._openai_responses_api import ( + _is_system_instruction as _is_system_instruction_responses, + _get_system_instructions as _get_system_instructions_responses, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -36,7 +45,7 @@ from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart - from openai.types.responses import ResponseInputParam, ResponseInputItemParam + from openai.types.responses import ResponseInputParam from openai import Omit try: @@ -199,69 +208,6 @@ def _calculate_token_usage( ) -def _is_system_instruction_completions(message: "ChatCompletionMessageParam") -> bool: - return isinstance(message, dict) and message.get("role") == "system" - - -def _get_system_instructions_completions( - messages: "Iterable[ChatCompletionMessageParam]", -) -> "list[ChatCompletionSystemMessageParam]": - system_instructions = [] - - for message in messages: - if _is_system_instruction_completions(message): - system_instructions.append(message) - - return system_instructions - - -def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: - return ( - isinstance(message, dict) - and message.get("type") == "message" - and message.get("role") == "system" - ) - - -def _get_system_instructions_responses( - messages: "Union[str, ResponseInputParam]", -) -> "list[ResponseInputItemParam]": - if isinstance(messages, str): - return [] - - system_instructions = [] - - for message in messages: - if _is_system_instruction_responses(message): - system_instructions.append(message) - - return system_instructions - - -def _transform_system_instructions( - system_instructions: "list[ChatCompletionSystemMessageParam]", -) -> "list[TextPart]": - instruction_text_parts: "list[TextPart]" = [] - - for instruction in system_instructions: - if not isinstance(instruction, dict): - continue - - content = instruction.get("content") - - if isinstance(content, str): - instruction_text_parts.append({"type": "text", "content": content}) - - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - text = part.get("text", "") - if text: - instruction_text_parts.append({"type": "text", "content": text}) - - return instruction_text_parts - - def _get_input_messages( kwargs: "dict[str, Any]", ) -> "Optional[Union[Iterable[Any], list[str]]]": diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index a24d0e909d..e9494fd5da 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -11,14 +11,20 @@ from sentry_sdk.scope import should_send_default_pii from sentry_sdk.tracing_utils import set_span_errored from sentry_sdk.utils import event_from_exception, safe_serialize +from sentry_sdk.ai._opanai_completions_api import _transform_system_instructions +from sentry_sdk.ai._openai_responses_api import ( + _is_system_instruction, + _get_system_instructions, +) from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any - from agents import Usage + from agents import Usage, TResponseInputItem from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart try: import agents @@ -115,16 +121,36 @@ def _set_input_data( return request_messages = [] - system_instructions = get_response_kwargs.get("system_instructions") - if system_instructions: - request_messages.append( - { - "role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM, - "content": [{"type": "text", "text": system_instructions}], - } + messages: "str | list[TResponseInputItem]" = get_response_kwargs.get("input", []) + + explicit_instructions = get_response_kwargs.get("system_instructions") + system_instructions = _get_system_instructions(messages) + + if system_instructions is not None or len(system_instructions) > 0: + instructions_text_parts: "list[TextPart]" = [] + if explicit_instructions is not None: + instructions_text_parts.append( + { + "type": "text", + "content": explicit_instructions, + } + ) + + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts += _transform_system_instructions(system_instructions) + + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + instructions_text_parts, + unpack=False, ) - for message in get_response_kwargs.get("input", []): + non_system_messages = [ + message for message in messages if not _is_system_instruction(message) + ] + for message in non_system_messages: if "role" in message: normalized_role = normalize_message_role(message.get("role")) content = message.get("content") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index a3ae50d5f1..f09a52bbea 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -498,7 +498,43 @@ async def test_max_turns_before_handoff_span(sentry_init, capture_events, mock_u @pytest.mark.asyncio -async def test_tool_execution_span(sentry_init, capture_events, test_agent): +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "Please use the simple test tool", + id="string", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are very powerful assistant, but don't know current events", + }, + {"role": "user", "content": "Please use the simple test tool"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "Please use the simple test tool"}, + ], + id="parts", + ), + ], +) +async def test_tool_execution_span( + sentry_init, capture_events, test_agent, input, request +): """ Test tool execution span creation. """ @@ -569,7 +605,7 @@ def simple_test_tool(message: str) -> str: await agents.Runner.run( agent_with_tool, - "Please use the simple test tool", + input, run_config=test_run_config, ) @@ -625,20 +661,39 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + + param_id = request.node.callspec.id + if "string" in param_id: + assert ai_client_span1["data"]["gen_ai.system_instructions"] == safe_serialize( + [{"type": "text", "content": "You are a helpful test assistant."}] + ) + elif "blocks" in param_id: + assert ai_client_span1["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + }, + ] + ) + else: + assert ai_client_span1["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, { "role": "user", "content": [ {"type": "text", "text": "Please use the simple test tool"} ], - }, + } ] ) assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" @@ -696,14 +751,31 @@ def simple_test_tool(message: str) -> str: == available_tools ) assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + if "string" in param_id: + assert ai_client_span2["data"]["gen_ai.system_instructions"] == safe_serialize( + [{"type": "text", "content": "You are a helpful test assistant."}] + ) + elif "blocks" in param_id: + assert ai_client_span1["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + }, + ] + ) + else: + assert ai_client_span1["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, { "role": "user", "content": [ @@ -950,7 +1022,43 @@ async def test_error_handling(sentry_init, capture_events, test_agent): @pytest.mark.asyncio -async def test_error_captures_input_data(sentry_init, capture_events, test_agent): +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "Test input", + id="string", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are very powerful assistant, but don't know current events", + }, + {"role": "user", "content": "Test input"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "Test input"}, + ], + id="parts", + ), + ], +) +async def test_error_captures_input_data( + sentry_init, capture_events, test_agent, input, request +): """ Test that input data is captured even when the API call raises an exception. This verifies that _set_input_data is called before the API call. @@ -970,9 +1078,7 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent events = capture_events() with pytest.raises(Exception, match="API Error"): - await agents.Runner.run( - test_agent, "Test input", run_config=test_run_config - ) + await agents.Runner.run(test_agent, input, run_config=test_run_config) ( error_event, @@ -989,15 +1095,33 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent assert ai_client_span["status"] == "internal_error" assert ai_client_span["tags"]["status"] == "internal_error" + param_id = request.node.callspec.id + if "string" in param_id: + assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + [{"type": "text", "content": "You are a helpful test assistant."}] + ) + elif "blocks" in param_id: + assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + }, + ] + ) + else: + assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful test assistant."}, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + assert "gen_ai.request.messages" in ai_client_span["data"] request_messages = safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, ] ) From 48c7fbeee331266c5a97faf26dc7e5b0f445aa93 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 15:18:18 +0100 Subject: [PATCH 37/58] add type ignores --- sentry_sdk/integrations/openai_agents/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index e9494fd5da..6048a7d1d9 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -152,8 +152,8 @@ def _set_input_data( ] for message in non_system_messages: if "role" in message: - normalized_role = normalize_message_role(message.get("role")) - content = message.get("content") + normalized_role = normalize_message_role(message.get("role")) # type: ignore + content = message.get("content") # type: ignore request_messages.append( { "role": normalized_role, @@ -165,14 +165,14 @@ def _set_input_data( } ) else: - if message.get("type") == "function_call": + if message.get("type") == "function_call": # type: ignore request_messages.append( { "role": GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT, "content": [message], } ) - elif message.get("type") == "function_call_output": + elif message.get("type") == "function_call_output": # type: ignore request_messages.append( { "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, From 3f5ad1152d9c1d94a30b7384dad216e4ab645bbd Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 15:34:21 +0100 Subject: [PATCH 38/58] more defensive checks in case input is not iterable --- sentry_sdk/integrations/openai.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 7a5d449e23..f9a4dcef38 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,5 +1,6 @@ import sys from functools import wraps +from collections.abc import Iterable import sentry_sdk from sentry_sdk import consts @@ -31,7 +32,6 @@ AsyncIterator, Iterator, Union, - Iterable, ) from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart @@ -205,14 +205,13 @@ def _is_system_instruction_completions(message: "ChatCompletionMessageParam") -> def _get_system_instructions_completions( messages: "Iterable[ChatCompletionMessageParam]", -) -> "list[ChatCompletionSystemMessageParam]": - system_instructions = [] - - for message in messages: - if _is_system_instruction_completions(message): - system_instructions.append(message) +) -> "list[ChatCompletionMessageParam]": + if not isinstance(messages, Iterable): + return [] - return system_instructions + return [ + message for message in messages if _is_system_instruction_completions(message) + ] def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: @@ -226,20 +225,16 @@ def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: def _get_system_instructions_responses( messages: "Union[str, ResponseInputParam]", ) -> "list[ResponseInputItemParam]": - if isinstance(messages, str): + if not isinstance(messages, list): return [] - system_instructions = [] - - for message in messages: - if _is_system_instruction_responses(message): - system_instructions.append(message) - - return system_instructions + return [ + message for message in messages if _is_system_instruction_responses(message) + ] def _transform_system_instructions( - system_instructions: "list[ChatCompletionSystemMessageParam]", + system_instructions: "list[ChatCompletionMessageParam]", ) -> "list[TextPart]": instruction_text_parts: "list[TextPart]" = [] From fc9f1faebf2c6b00eaf3b035c49c23f14abff81f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 15:39:18 +0100 Subject: [PATCH 39/58] pick up changes to extraction functions --- sentry_sdk/ai/_opanai_completions_api.py | 13 +++++-------- sentry_sdk/ai/_openai_responses_api.py | 12 +++--------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/sentry_sdk/ai/_opanai_completions_api.py b/sentry_sdk/ai/_opanai_completions_api.py index 7bf16a53ee..6e8c33378b 100644 --- a/sentry_sdk/ai/_opanai_completions_api.py +++ b/sentry_sdk/ai/_opanai_completions_api.py @@ -15,16 +15,13 @@ def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: return isinstance(message, dict) and message.get("role") == "system" -def _get_system_instructions( +def _get_system_instructions_completions( messages: "Iterable[ChatCompletionMessageParam]", -) -> "list[ChatCompletionSystemMessageParam]": - system_instructions = [] +) -> "list[ChatCompletionMessageParam]": + if not isinstance(messages, Iterable): + return [] - for message in messages: - if _is_system_instruction(message): - system_instructions.append(message) - - return system_instructions + return [message for message in messages if _is_system_instruction(message)] def _transform_system_instructions( diff --git a/sentry_sdk/ai/_openai_responses_api.py b/sentry_sdk/ai/_openai_responses_api.py index d766ac9869..2fb35cda77 100644 --- a/sentry_sdk/ai/_openai_responses_api.py +++ b/sentry_sdk/ai/_openai_responses_api.py @@ -14,16 +14,10 @@ def _is_system_instruction(message: "ResponseInputItemParam") -> bool: ) -def _get_system_instructions( +def _get_system_instructions_responses( messages: "Union[str, ResponseInputParam]", ) -> "list[ResponseInputItemParam]": - if isinstance(messages, str): + if not isinstance(messages, list): return [] - system_instructions = [] - - for message in messages: - if _is_system_instruction(message): - system_instructions.append(message) - - return system_instructions + return [message for message in messages if _is_system_instruction(message)] From bcdd87c86cd432157bf4bbda0fafedf2ac56af31 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 15:41:48 +0100 Subject: [PATCH 40/58] fix func name --- sentry_sdk/ai/_opanai_completions_api.py | 2 +- sentry_sdk/ai/_openai_responses_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/_opanai_completions_api.py b/sentry_sdk/ai/_opanai_completions_api.py index 6e8c33378b..fc60f8bf00 100644 --- a/sentry_sdk/ai/_opanai_completions_api.py +++ b/sentry_sdk/ai/_opanai_completions_api.py @@ -15,7 +15,7 @@ def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: return isinstance(message, dict) and message.get("role") == "system" -def _get_system_instructions_completions( +def _get_system_instructions( messages: "Iterable[ChatCompletionMessageParam]", ) -> "list[ChatCompletionMessageParam]": if not isinstance(messages, Iterable): diff --git a/sentry_sdk/ai/_openai_responses_api.py b/sentry_sdk/ai/_openai_responses_api.py index 2fb35cda77..b0cd8f768f 100644 --- a/sentry_sdk/ai/_openai_responses_api.py +++ b/sentry_sdk/ai/_openai_responses_api.py @@ -14,7 +14,7 @@ def _is_system_instruction(message: "ResponseInputItemParam") -> bool: ) -def _get_system_instructions_responses( +def _get_system_instructions( messages: "Union[str, ResponseInputParam]", ) -> "list[ResponseInputItemParam]": if not isinstance(messages, list): From 68b853fff94d9f2f4f3801cebf74312a1af51299 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 16:00:38 +0100 Subject: [PATCH 41/58] fix Iterable import --- sentry_sdk/ai/_opanai_completions_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/_opanai_completions_api.py b/sentry_sdk/ai/_opanai_completions_api.py index fc60f8bf00..a0f6e16a40 100644 --- a/sentry_sdk/ai/_opanai_completions_api.py +++ b/sentry_sdk/ai/_opanai_completions_api.py @@ -1,8 +1,8 @@ +from collections.abc import Iterable + from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Iterable - from sentry_sdk._types import TextPart from openai.types.chat import ( From 5ee52745a154def9a3a07ea51d68be45cdb60074 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 16:07:18 +0100 Subject: [PATCH 42/58] remove runtime import --- sentry_sdk/integrations/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index dd2a6078bd..215655ee39 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,6 +1,5 @@ import sys from functools import wraps -from collections.abc import Iterable import sentry_sdk from sentry_sdk import consts @@ -41,6 +40,7 @@ AsyncIterator, Iterator, Union, + Iterable, ) from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart From 1f0f98dc17a8671b3c2cd7bcb6309e7fd20ba470 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 08:49:05 +0100 Subject: [PATCH 43/58] feat(ai): Add original input length meta attribute --- sentry_sdk/ai/utils.py | 3 + .../integrations/anthropic/test_anthropic.py | 54 ++++++++ .../google_genai/test_google_genai.py | 120 +++++++++++++++++- .../integrations/langchain/test_langchain.py | 2 + .../integrations/langgraph/test_langgraph.py | 2 + tests/integrations/litellm/test_litellm.py | 2 + 6 files changed, 180 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 57f08a4382..8e98b546e4 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -14,6 +14,7 @@ import sentry_sdk from sentry_sdk.utils import logger +from sentry_sdk.consts import SPANDATA MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB # Maximum characters when only a single message is left after bytes truncation @@ -698,6 +699,8 @@ def truncate_and_annotate_messages( if len(messages) > 1: scope._gen_ai_original_message_count[span.span_id] = len(messages) + span.set_data(SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH, len(messages)) + return [truncated_message] diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 61ba913e60..84d773e129 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1009,6 +1009,60 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 + assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +@pytest.mark.asyncio +async def test_anthropic_message_truncation_async(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncAnthropic(api_key="z") + client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + messages = [ + {"role": "user", "content": "small message 1"}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": "small message 4"}, + {"role": "user", "content": "small message 5"}, + ] + + with start_transaction(): + await client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index c643537a05..afaa284047 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -978,10 +978,123 @@ def test_google_genai_message_truncation( ) (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + _, chat_span = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert parsed_messages[0]["role"] == "user" + assert small_content in parsed_messages[0]["content"] + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 2 + assert ( + event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2 + ) + + +def test_google_genai_message_truncation_stream( + sentry_init, capture_events, mock_genai_client +): + """Test that large messages are truncated properly in Google GenAI integration.""" + sentry_init( + integrations=[GoogleGenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + small_content = "This is a small user message" + + # Create streaming chunks - simulating a multi-chunk response + # Chunk 1: First part of text with partial usage metadata + chunk1_json = { + "candidates": [ + { + "content": { + "role": "model", + "parts": [{"text": "Hello! "}], + }, + # No finishReason in intermediate chunks + } + ], + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 2, + "totalTokenCount": 12, # Not set in intermediate chunks + }, + "responseId": "response-id-stream-123", + "modelVersion": "gemini-1.5-flash", + } + + # Chunk 2: Second part of text with more usage metadata + chunk2_json = { + "candidates": [ + { + "content": { + "role": "model", + "parts": [{"text": "How can I "}], + }, + } + ], + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 3, + "totalTokenCount": 13, + }, + } + + # Chunk 3: Final part with finish reason and complete usage metadata + chunk3_json = { + "candidates": [ + { + "content": { + "role": "model", + "parts": [{"text": "help you today?"}], + }, + "finishReason": "STOP", + } + ], + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 7, + "totalTokenCount": 25, + "cachedContentTokenCount": 5, + "thoughtsTokenCount": 3, + }, + } + + # Create streaming mock responses + stream_chunks = [chunk1_json, chunk2_json, chunk3_json] + mock_stream = create_mock_streaming_responses(stream_chunks) + + with mock.patch.object( + mock_genai_client._api_client, "request_streamed", return_value=mock_stream + ): + with start_transaction(name="google_genai"): + config = create_test_config() + stream = mock_genai_client.models.generate_content_stream( + model="gemini-1.5-flash", + contents=[large_content, small_content], + config=config, + ) + + # Consume the stream (this is what users do with the integration wrapper) + list(stream) + + (event,) = events + _, chat_span = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -990,6 +1103,7 @@ def test_google_genai_message_truncation( assert parsed_messages[0]["role"] == "user" assert small_content in parsed_messages[0]["content"] + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 2 assert ( event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2 ) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 8a8d646113..58cc16cdd7 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1070,6 +1070,8 @@ def test_langchain_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + assert llm_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 2a385d8a78..9ccd84309f 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -1384,4 +1384,6 @@ def original_invoke(self, *args, **kwargs): assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + assert invoke_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index ef129c6cfd..06772342ab 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -754,6 +754,8 @@ def test_litellm_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From a8840d51858adc70edf60d462c927a4f0db905ec Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 08:55:43 +0100 Subject: [PATCH 44/58] more early returns --- sentry_sdk/integrations/openai.py | 159 +-- tests/integrations/openai/test_openai.py | 1496 ++++++++++++++++------ 2 files changed, 1200 insertions(+), 455 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index f9a4dcef38..b8f94be995 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,6 +1,5 @@ import sys from functools import wraps -from collections.abc import Iterable import sentry_sdk from sentry_sdk import consts @@ -9,6 +8,7 @@ set_data_normalized, normalize_message_roles, truncate_and_annotate_messages, + truncate_and_annotate_embedding_inputs, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -32,6 +32,7 @@ AsyncIterator, Iterator, Union, + Iterable, ) from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart @@ -215,11 +216,10 @@ def _get_system_instructions_completions( def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: - return ( - isinstance(message, dict) - and message.get("type") == "message" - and message.get("role") == "system" - ) + if not isinstance(message, dict) or not message.get("role") == "system": + return False + + return "type" not in message or message["type"] == "message" def _get_system_instructions_responses( @@ -234,7 +234,7 @@ def _get_system_instructions_responses( def _transform_system_instructions( - system_instructions: "list[ChatCompletionMessageParam]", + system_instructions: "list[ChatCompletionSystemMessageParam]", ) -> "list[TextPart]": instruction_text_parts: "list[TextPart]" = [] @@ -307,35 +307,53 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: + explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") messages: "Optional[Union[str, ResponseInputParam]]" = kwargs.get("input") - if messages is None: + if not should_send_default_pii() or not integration.include_prompts: set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") _commmon_set_input_data(span, kwargs) return - explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") - system_instructions = _get_system_instructions_responses(messages) if ( - ( - (explicit_instructions is not None and _is_given(explicit_instructions)) - or len(system_instructions) > 0 - ) - and should_send_default_pii() - and integration.include_prompts + messages is None + and explicit_instructions is not None + and _is_given(explicit_instructions) ): - instructions_text_parts: "list[TextPart]" = [] - if explicit_instructions is not None and _is_given(explicit_instructions): - instructions_text_parts.append( - { - "type": "text", - "content": explicit_instructions, - } - ) - # Deliberate use of function accepting completions API type because - # of shared structure FOR THIS PURPOSE ONLY. - instructions_text_parts += _transform_system_instructions(system_instructions) + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + { + "type": "text", + "content": explicit_instructions, + }, + unpack=False, + ) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + instructions_text_parts: "list[TextPart]" = [] + if explicit_instructions is not None and _is_given(explicit_instructions): + instructions_text_parts.append( + { + "type": "text", + "content": explicit_instructions, + } + ) + + system_instructions = _get_system_instructions_responses(messages) + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts += _transform_system_instructions(system_instructions) + + if len(instructions_text_parts) > 0: set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, @@ -343,11 +361,7 @@ def _set_responses_api_input_data( unpack=False, ) - if ( - isinstance(messages, str) - and should_send_default_pii() - and integration.include_prompts - ): + if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) @@ -356,22 +370,21 @@ def _set_responses_api_input_data( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) - elif should_send_default_pii() and integration.include_prompts: - non_system_messages = [ - message - for message in messages - if not _is_system_instruction_responses(message) - ] - if len(non_system_messages) > 0: - normalized_messages = normalize_message_roles(non_system_messages) # type: ignore - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + non_system_messages = [ + message for message in messages if not _is_system_instruction_responses(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") _commmon_set_input_data(span, kwargs) @@ -386,17 +399,18 @@ def _set_completions_api_input_data( "messages" ) + if not should_send_default_pii() or not integration.include_prompts: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + if messages is None: set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") _commmon_set_input_data(span, kwargs) return system_instructions = _get_system_instructions_completions(messages) - if ( - len(system_instructions) > 0 - and should_send_default_pii() - and integration.include_prompts - ): + if len(system_instructions) > 0: set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, @@ -404,11 +418,7 @@ def _set_completions_api_input_data( unpack=False, ) - if ( - isinstance(messages, str) - and should_send_default_pii() - and integration.include_prompts - ): + if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) @@ -416,22 +426,23 @@ def _set_completions_api_input_data( set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) - elif should_send_default_pii() and integration.include_prompts: - non_system_messages = [ - message - for message in messages - if not _is_system_instruction_completions(message) - ] - if len(non_system_messages) > 0: - normalized_messages = normalize_message_roles(non_system_messages) # type: ignore - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + _commmon_set_input_data(span, kwargs) + return + + non_system_messages = [ + message + for message in messages + if not _is_system_instruction_completions(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") _commmon_set_input_data(span, kwargs) @@ -452,7 +463,9 @@ def _set_embeddings_input_data( ): normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 2deb1f6d5c..4d15283ea9 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -46,7 +46,6 @@ OpenAIIntegration, _calculate_token_usage, ) -from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -132,9 +131,13 @@ async def async_iterator(values): @pytest.mark.parametrize( "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], + [ + (True, False), + (False, True), + (False, False), + ], ) -def test_nonstreaming_chat_completion( +def test_nonstreaming_chat_completion_no_prompts( sentry_init, capture_events, send_default_pii, include_prompts ): sentry_init( @@ -166,20 +169,92 @@ def test_nonstreaming_chat_completion( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" - if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + + +@pytest.mark.parametrize( + "messages", + [ + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) +def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", + messages=messages, + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + + param_id = request.node.callspec.id + if "blocks" in param_id: assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] - - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert span["data"]["gen_ai.usage.output_tokens"] == 10 assert span["data"]["gen_ai.usage.input_tokens"] == 20 @@ -189,9 +264,13 @@ def test_nonstreaming_chat_completion( @pytest.mark.asyncio @pytest.mark.parametrize( "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], + [ + (True, False), + (False, True), + (False, False), + ], ) -async def test_nonstreaming_chat_completion_async( +async def test_nonstreaming_chat_completion_async_no_prompts( sentry_init, capture_events, send_default_pii, include_prompts ): sentry_init( @@ -202,7 +281,7 @@ async def test_nonstreaming_chat_completion_async( events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) with start_transaction(name="openai tx"): response = await client.chat.completions.create( @@ -220,42 +299,18 @@ async def test_nonstreaming_chat_completion_async( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" - if send_default_pii and include_prompts: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] - - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] assert span["data"]["gen_ai.usage.output_tokens"] == 10 assert span["data"]["gen_ai.usage.input_tokens"] == 20 assert span["data"]["gen_ai.usage.total_tokens"] == 30 -def tiktoken_encoding_if_installed(): - try: - import tiktoken # type: ignore # noqa # pylint: disable=unused-import - - return "cl100k_base" - except ImportError: - return None - - -# noinspection PyTypeChecker -@pytest.mark.parametrize( - "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], -) +@pytest.mark.asyncio @pytest.mark.parametrize( - "input", + "messages", [ pytest.param( [ @@ -282,8 +337,80 @@ def tiktoken_encoding_if_installed(): ), ], ) -def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, input, request +async def test_nonstreaming_chat_completion_async( + sentry_init, capture_events, messages, request +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = await client.chat.completions.create( + model="some-model", + messages=messages, + ) + response = response.choices[0].message.content + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + + +def tiktoken_encoding_if_installed(): + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + return "cl100k_base" + except ImportError: + return None + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_streaming_chat_completion_no_prompts( + sentry_init, capture_events, send_default_pii, include_prompts ): sentry_init( integrations=[ @@ -339,7 +466,10 @@ def test_streaming_chat_completion( with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( model="some-model", - messages=input, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -350,57 +480,23 @@ def test_streaming_chat_completion( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" - param_id = request.node.callspec.id - if send_default_pii and include_prompts: - if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] - else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] - - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - if "blocks" in param_id: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 - else: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 1 - assert span["data"]["gen_ai.usage.total_tokens"] == 3 + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly # noinspection PyTypeChecker -@pytest.mark.asyncio -@pytest.mark.parametrize( - "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], -) @pytest.mark.parametrize( - "input", + "messages", [ pytest.param( [ @@ -427,74 +523,66 @@ def test_streaming_chat_completion( ), ], ) -async def test_streaming_chat_completion_async( - sentry_init, capture_events, send_default_pii, include_prompts, input, request -): +def test_streaming_chat_completion(sentry_init, capture_events, messages, request): sentry_init( integrations=[ OpenAIIntegration( - include_prompts=include_prompts, + include_prompts=True, tiktoken_encoding_name=tiktoken_encoding_if_installed(), ) ], traces_sample_rate=1.0, - send_default_pii=send_default_pii, + send_default_pii=True, ) events = capture_events() - client = AsyncOpenAI(api_key="z") - returned_stream = AsyncStream(cast_to=None, response=None, client=client) - returned_stream._iterator = async_iterator( - [ - ChatCompletionChunk( - id="1", - choices=[ - DeltaChoice( - index=0, delta=ChoiceDelta(content="hel"), finish_reason=None - ) - ], - created=100000, - model="model-id", - object="chat.completion.chunk", - ), - ChatCompletionChunk( - id="1", - choices=[ - DeltaChoice( - index=1, delta=ChoiceDelta(content="lo "), finish_reason=None - ) - ], - created=100000, - model="model-id", - object="chat.completion.chunk", - ), - ChatCompletionChunk( - id="1", - choices=[ - DeltaChoice( - index=2, - delta=ChoiceDelta(content="world"), - finish_reason="stop", - ) - ], - created=100000, - model="model-id", - object="chat.completion.chunk", - ), - ] - ) - - client.chat.completions._post = AsyncMock(return_value=returned_stream) - with start_transaction(name="openai tx"): - response_stream = await client.chat.completions.create( - model="some-model", - messages=input, - ) - - response_string = "" - async for x in response_stream: - response_string += x.choices[0].delta.content + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=client) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", + messages=messages, + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) assert response_string == "hello world" tx = events[0] assert tx["type"] == "transaction" @@ -502,32 +590,27 @@ async def test_streaming_chat_completion_async( assert span["op"] == "gen_ai.chat" param_id = request.node.callspec.id - if send_default_pii and include_prompts: - if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - } - ] - else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ - { - "type": "text", - "content": "You are a helpful assistant.", - }, - { - "type": "text", - "content": "Be concise and clear.", - }, - ] - - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import @@ -540,127 +623,371 @@ async def test_streaming_chat_completion_async( assert span["data"]["gen_ai.usage.output_tokens"] == 2 assert span["data"]["gen_ai.usage.input_tokens"] == 1 assert span["data"]["gen_ai.usage.total_tokens"] == 3 - except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly -def test_bad_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() - - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock( - side_effect=OpenAIError("API rate limit reached") - ) - with pytest.raises(OpenAIError): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - - (event,) = events - assert event["level"] == "error" - - -def test_span_status_error(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() - - with start_transaction(name="test"): - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock( - side_effect=OpenAIError("API rate limit reached") - ) - with pytest.raises(OpenAIError): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - - (error, transaction) = events - assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" - - +# noinspection PyTypeChecker @pytest.mark.asyncio -async def test_bad_chat_completion_async(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() - - client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock( - side_effect=OpenAIError("API rate limit reached") - ) - with pytest.raises(OpenAIError): - await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - - (event,) = events - assert event["level"] == "error" - - @pytest.mark.parametrize( "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], + [ + (True, False), + (False, True), + (False, False), + ], ) -def test_embeddings_create( +async def test_streaming_chat_completion_async_no_prompts( sentry_init, capture_events, send_default_pii, include_prompts ): sentry_init( - integrations=[OpenAIIntegration(include_prompts=include_prompts)], + integrations=[ + OpenAIIntegration( + include_prompts=include_prompts, + tiktoken_encoding_name=tiktoken_encoding_if_installed(), + ) + ], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) events = capture_events() - client = OpenAI(api_key="z") - - returned_embedding = CreateEmbeddingResponse( - data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], - model="some-model", - object="list", - usage=EmbeddingTokenUsage( - prompt_tokens=20, - total_tokens=30, - ), + client = AsyncOpenAI(api_key="z") + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = async_iterator( + [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, + delta=ChoiceDelta(content="world"), + finish_reason="stop", + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] ) - client.embeddings._post = mock.Mock(return_value=returned_embedding) + client.chat.completions._post = AsyncMock(return_value=returned_stream) with start_transaction(name="openai tx"): - response = client.embeddings.create( - input="hello", model="text-embedding-3-large" + response_stream = await client.chat.completions.create( + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) - assert len(response.data[0].embedding) == 3 + response_string = "" + async for x in response_stream: + response_string += x.choices[0].delta.content + assert response_string == "hello world" tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly +# noinspection PyTypeChecker @pytest.mark.asyncio @pytest.mark.parametrize( - "send_default_pii, include_prompts", - [(True, True), (True, False), (False, True), (False, False)], + "messages", + [ + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts", + ), + ], ) -async def test_embeddings_create_async( - sentry_init, capture_events, send_default_pii, include_prompts +async def test_streaming_chat_completion_async( + sentry_init, capture_events, messages, request ): sentry_init( - integrations=[OpenAIIntegration(include_prompts=include_prompts)], - traces_sample_rate=1.0, - send_default_pii=send_default_pii, - ) + integrations=[ + OpenAIIntegration( + include_prompts=True, + tiktoken_encoding_name=tiktoken_encoding_if_installed(), + ) + ], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = async_iterator( + [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, + delta=ChoiceDelta(content="world"), + finish_reason="stop", + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + ) + + client.chat.completions._post = AsyncMock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = await client.chat.completions.create( + model="some-model", + messages=messages, + ) + + response_string = "" + async for x in response_stream: + response_string += x.choices[0].delta.content + + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + + param_id = request.node.callspec.id + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 1 + assert span["data"]["gen_ai.usage.total_tokens"] == 3 + + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +def test_span_status_error(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + with start_transaction(name="test"): + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (error, transaction) = events + assert error["level"] == "error" + assert transaction["spans"][0]["status"] == "internal_error" + assert transaction["spans"][0]["tags"]["status"] == "internal_error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" + + +@pytest.mark.asyncio +async def test_bad_chat_completion_async(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + client.chat.completions._post = AsyncMock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.embeddings" + if send_default_pii and include_prompts: + assert "hello" in span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +async def test_embeddings_create_async( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) events = capture_events() client = AsyncOpenAI(api_key="z") @@ -1181,6 +1508,311 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "instructions", ( omit, + None, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +def test_ai_client_span_responses_api( + sentry_init, capture_events, instructions, input, request +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + ) + + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + + +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +def test_error_in_responses_api(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.responses._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + + with start_transaction(name="openai tx"): + with pytest.raises(OpenAIError): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (error_event, transaction_event) = events + + assert transaction_event["type"] == "transaction" + # make sure the span where the error occurred is captured + assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) + + +@pytest.mark.asyncio +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +@pytest.mark.parametrize( + "instructions", + ( + omit, + None, "You are a coding assistant that talks like a pirate.", ), ) @@ -1190,6 +1822,16 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): pytest.param( "How do I check if a Python object is an instance of a class?", id="string" ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), pytest.param( [ { @@ -1201,6 +1843,19 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): ], id="blocks", ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), pytest.param( [ { @@ -1217,8 +1872,7 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): ), ], ) -@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api( +async def test_ai_client_span_responses_async_api( sentry_init, capture_events, instructions, input, request ): sentry_init( @@ -1228,11 +1882,11 @@ def test_ai_client_span_responses_api( ) events = capture_events() - client = OpenAI(api_key="z") - client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) + client = AsyncOpenAI(api_key="z") + client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) with start_transaction(name="openai tx"): - client.responses.create( + await client.responses.create( model="gpt-4o", instructions=instructions, input=input, @@ -1247,21 +1901,25 @@ def test_ai_client_span_responses_api( expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.system": "openai", + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', + "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", + "gen_ai.system": "openai", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, - "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } param_id = request.node.callspec.id - if "string" in param_id and isinstance(instructions, Omit): # type: ignore + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore expected_data.update( { "gen_ai.request.messages": safe_serialize( @@ -1285,7 +1943,39 @@ def test_ai_client_span_responses_api( ), } ) - elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore expected_data.update( { "gen_ai.system_instructions": safe_serialize( @@ -1313,7 +2003,41 @@ def test_ai_client_span_responses_api( ), } ) - elif isinstance(instructions, Omit): # type: ignore + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore expected_data.update( { "gen_ai.system_instructions": safe_serialize( @@ -1349,92 +2073,12 @@ def test_ai_client_span_responses_api( assert spans[0]["data"] == expected_data -@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_error_in_responses_api(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - client = OpenAI(api_key="z") - client.responses._post = mock.Mock( - side_effect=OpenAIError("API rate limit reached") - ) - - with start_transaction(name="openai tx"): - with pytest.raises(OpenAIError): - client.responses.create( - model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", - ) - - (error_event, transaction_event) = events - - assert transaction_event["type"] == "transaction" - # make sure the span where the error occurred is captured - assert transaction_event["spans"][0]["op"] == "gen_ai.responses" - - assert error_event["level"] == "error" - assert error_event["exception"]["values"][0]["type"] == "OpenAIError" - - assert ( - error_event["contexts"]["trace"]["trace_id"] - == transaction_event["contexts"]["trace"]["trace_id"] - ) - - -@pytest.mark.asyncio -@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_ai_client_span_responses_async_api(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) - - with start_transaction(name="openai tx"): - await client.responses.create( - model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", - ) - - (transaction,) = events - spans = transaction["spans"] - - assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { - "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", - "gen_ai.system": "openai", - "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', - "gen_ai.usage.input_tokens": 20, - "gen_ai.usage.input_tokens.cached": 5, - "gen_ai.usage.output_tokens": 10, - "gen_ai.usage.output_tokens.reasoning": 8, - "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": "the model response", - "thread.id": mock.ANY, - "thread.name": mock.ANY, - } - - @pytest.mark.asyncio @pytest.mark.parametrize( "instructions", ( omit, + None, "You are a coding assistant that talks like a pirate.", ), ) @@ -1444,6 +2088,16 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): pytest.param( "How do I check if a Python object is an instance of a class?", id="string" ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), pytest.param( [ { @@ -1455,6 +2109,19 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): ], id="blocks", ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), pytest.param( [ { @@ -1512,12 +2179,15 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } param_id = request.node.callspec.id - if "string" in param_id and isinstance(instructions, Omit): # type: ignore + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore expected_data.update( { "gen_ai.request.messages": safe_serialize( @@ -1541,7 +2211,39 @@ async def test_ai_client_span_streaming_responses_async_api( ), } ) - elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore expected_data.update( { "gen_ai.system_instructions": safe_serialize( @@ -1569,7 +2271,41 @@ async def test_ai_client_span_streaming_responses_async_api( ), } ) - elif isinstance(instructions, Omit): # type: ignore + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore expected_data.update( { "gen_ai.system_instructions": safe_serialize( @@ -1856,7 +2592,24 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] -def test_openai_message_role_mapping(sentry_init, capture_events): +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_openai_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -1868,12 +2621,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" - test_messages = [ - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] + + test_messages = [test_message] with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) @@ -1888,20 +2637,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 3 - assert ( - stored_messages[1]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[2]["role"] == "assistant" # should stay "assistant" - - # Verify content is preserved - assert stored_messages[1]["content"] == "Hi there!" - assert stored_messages[2]["content"] == "How can I help?" - - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages] - assert "ai" not in roles + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == expected_role def test_openai_message_truncation(sentry_init, capture_events): @@ -1943,14 +2680,9 @@ def test_openai_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) - if "_meta" in event and len(parsed_messages) < len(large_messages): - meta_path = event["_meta"] - if ( - "spans" in meta_path - and "0" in meta_path["spans"] - and "data" in meta_path["spans"]["0"] - ): - span_meta = meta_path["spans"]["0"]["data"] - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "len" in messages_meta.get("", {}) + meta_path = event["_meta"] + span_meta = meta_path["spans"]["0"]["data"] + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + + assert span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 3 + assert "len" in messages_meta.get("", {}) From 382e933e51305b122e86244fd6a0d66b5e559ad3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 08:58:09 +0100 Subject: [PATCH 45/58] revert unrelated tests --- tests/integrations/openai/test_openai.py | 63 +++++++++++++----------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4d15283ea9..85acbb1486 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -2592,24 +2592,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] -# Test messages with mixed roles including "ai" that should be mapped to "assistant" -@pytest.mark.parametrize( - "test_message,expected_role", - [ - ({"role": "user", "content": "Hello"}, "user"), - ( - {"role": "ai", "content": "Hi there!"}, - "assistant", - ), # Should be mapped to "assistant" - ( - {"role": "assistant", "content": "How can I help?"}, - "assistant", - ), # Should stay "assistant" - ], -) -def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role -): +def test_openai_message_role_mapping(sentry_init, capture_events): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -2621,8 +2604,13 @@ def test_openai_message_role_mapping( client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - - test_messages = [test_message] + # Test messages with mixed roles including "ai" that should be mapped to "assistant" + test_messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" + {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" + ] with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) @@ -2637,8 +2625,22 @@ def test_openai_message_role_mapping( stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == expected_role + # Verify that "ai" role was mapped to "assistant" + assert len(stored_messages) == 4 + assert stored_messages[0]["role"] == "system" + assert stored_messages[1]["role"] == "user" + assert ( + stored_messages[2]["role"] == "assistant" + ) # "ai" should be mapped to "assistant" + assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + + # Verify content is preserved + assert stored_messages[2]["content"] == "Hi there!" + assert stored_messages[3]["content"] == "How can I help?" + + # Verify no "ai" roles remain + roles = [msg["role"] for msg in stored_messages] + assert "ai" not in roles def test_openai_message_truncation(sentry_init, capture_events): @@ -2680,9 +2682,14 @@ def test_openai_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) - meta_path = event["_meta"] - span_meta = meta_path["spans"]["0"]["data"] - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 3 - assert "len" in messages_meta.get("", {}) + if "_meta" in event and len(parsed_messages) < len(large_messages): + meta_path = event["_meta"] + if ( + "spans" in meta_path + and "0" in meta_path["spans"] + and "data" in meta_path["spans"]["0"] + ): + span_meta = meta_path["spans"]["0"]["data"] + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) From 179d59efd34e98bd553506482d0c888e094f67a9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 09:13:46 +0100 Subject: [PATCH 46/58] revert unrelated change --- sentry_sdk/integrations/openai.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index b8f94be995..bd84f3e63e 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -8,7 +8,6 @@ set_data_normalized, normalize_message_roles, truncate_and_annotate_messages, - truncate_and_annotate_embedding_inputs, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -463,9 +462,7 @@ def _set_embeddings_input_data( ): normalized_messages = normalize_message_roles(messages) # type: ignore scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope - ) + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False From 12cb21926816beb17537222e440d438543735b5b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 09:21:52 +0100 Subject: [PATCH 47/58] address comment --- sentry_sdk/integrations/openai.py | 12 +++++++----- tests/integrations/openai/test_openai.py | 17 ++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index bd84f3e63e..69d8d069fd 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,5 +1,6 @@ import sys from functools import wraps +from collections.abc import Iterable import sentry_sdk from sentry_sdk import consts @@ -31,7 +32,6 @@ AsyncIterator, Iterator, Union, - Iterable, ) from sentry_sdk.tracing import Span from sentry_sdk._types import TextPart @@ -322,10 +322,12 @@ def _set_responses_api_input_data( set_data_normalized( span, SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - { - "type": "text", - "content": explicit_instructions, - }, + [ + { + "type": "text", + "content": explicit_instructions, + } + ], unpack=False, ) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 85acbb1486..813c2ab771 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1607,7 +1607,6 @@ def test_ai_client_span_responses_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", - "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1911,7 +1910,6 @@ async def test_ai_client_span_responses_async_api( "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, "gen_ai.response.text": "the model response", - "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -2179,7 +2177,6 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", - "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -2606,7 +2603,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ - {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" @@ -2626,17 +2622,16 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" + assert len(stored_messages) == 3 + assert stored_messages[0]["role"] == "user" assert ( - stored_messages[2]["role"] == "assistant" + stored_messages[1]["role"] == "assistant" ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" # should stay "assistant" # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" + assert stored_messages[1]["content"] == "Hi there!" + assert stored_messages[2]["content"] == "How can I help?" # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] From a6152fe7809e24a70e401a830189a66dd9cba5b5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 09:24:49 +0100 Subject: [PATCH 48/58] remove unused type ignore --- sentry_sdk/integrations/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 69d8d069fd..bdc60a0ce8 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -379,7 +379,7 @@ def _set_responses_api_input_data( message for message in messages if not _is_system_instruction_responses(message) ] if len(non_system_messages) > 0: - normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + normalized_messages = normalize_message_roles(non_system_messages) scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: From eda980cfcaf828be520b4d10dd9de9c0c55b12de Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 10:55:35 +0100 Subject: [PATCH 49/58] fix typo in filename --- .../{_opanai_completions_api.py => _openai_completions_api.py} | 0 sentry_sdk/integrations/openai.py | 2 +- sentry_sdk/integrations/openai_agents/utils.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename sentry_sdk/ai/{_opanai_completions_api.py => _openai_completions_api.py} (100%) diff --git a/sentry_sdk/ai/_opanai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py similarity index 100% rename from sentry_sdk/ai/_opanai_completions_api.py rename to sentry_sdk/ai/_openai_completions_api.py diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index dc3973d31f..c9a112e242 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,7 +9,7 @@ normalize_message_roles, truncate_and_annotate_messages, ) -from sentry_sdk.ai._opanai_completions_api import ( +from sentry_sdk.ai._openai_completions_api import ( _is_system_instruction as _is_system_instruction_completions, _get_system_instructions as _get_system_instructions_completions, _transform_system_instructions, diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 45b00a1366..4936982e95 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -11,7 +11,7 @@ from sentry_sdk.scope import should_send_default_pii from sentry_sdk.tracing_utils import set_span_errored from sentry_sdk.utils import event_from_exception, safe_serialize -from sentry_sdk.ai._opanai_completions_api import _transform_system_instructions +from sentry_sdk.ai._openai_completions_api import _transform_system_instructions from sentry_sdk.ai._openai_responses_api import ( _is_system_instruction, _get_system_instructions, From 5825835ff005cd28cf121c9a7a97406e1385bc4a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 11:31:33 +0100 Subject: [PATCH 50/58] remove unused import --- sentry_sdk/integrations/openai_agents/spans/invoke_agent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 4f7e744ec3..c3a3a04dc9 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -18,8 +18,6 @@ import agents from typing import Any, Optional - from sentry_sdk._types import TextPart - def invoke_agent_span( context: "agents.RunContextWrapper", agent: "agents.Agent", kwargs: "dict[str, Any]" From 753b5c0a35ef2d78ffd785e9a1b6a6a6af9e4639 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:13:29 +0100 Subject: [PATCH 51/58] add anthropic test case --- .../integrations/anthropic/test_anthropic.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 61ba913e60..84d773e129 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1009,6 +1009,60 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 + assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +@pytest.mark.asyncio +async def test_anthropic_message_truncation_async(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncAnthropic(api_key="z") + client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + messages = [ + {"role": "user", "content": "small message 1"}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": "small message 4"}, + {"role": "user", "content": "small message 5"}, + ] + + with start_transaction(): + await client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) + + assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 5 assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From 7400f9d23d78fd0b6c52139f33367e44608a4f6e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:14:46 +0100 Subject: [PATCH 52/58] remove parameter in test case --- tests/integrations/openai/test_openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 3033b7b75c..884b47b709 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -2593,7 +2593,6 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): @pytest.mark.parametrize( "test_message,expected_role", [ - ({"role": "system", "content": "You are helpful."}, "system"), ({"role": "user", "content": "Hello"}, "user"), ( {"role": "ai", "content": "Hi there!"}, From c625e1219185dedf242202fa1f86efb36c48c1bf Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:20:47 +0100 Subject: [PATCH 53/58] revert test_ai_monitoring tests --- tests/test_ai_monitoring.py | 180 +++++++++++++++++++++++++++--------- 1 file changed, 134 insertions(+), 46 deletions(-) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 767d79b747..f6852d54bb 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -11,9 +11,12 @@ ) from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( + MAX_GEN_AI_MESSAGE_BYTES, MAX_SINGLE_MESSAGE_CONTENT_CHARS, set_data_normalized, truncate_and_annotate_messages, + truncate_messages_by_size, + _find_truncation_index, parse_data_uri, redact_blob_message_parts, get_modality_from_mime_type, @@ -219,8 +222,105 @@ def large_messages(): ] +class TestTruncateMessagesBySize: + def test_no_truncation_needed(self, sample_messages): + """Test that messages under the limit are not truncated""" + result, truncation_index = truncate_messages_by_size( + sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES + ) + assert len(result) == len(sample_messages) + assert result == sample_messages + assert truncation_index == 0 + + def test_truncation_removes_oldest_first(self, large_messages): + """Test that oldest messages are removed first during truncation""" + small_limit = 3000 + result, truncation_index = truncate_messages_by_size( + large_messages, max_bytes=small_limit + ) + assert len(result) < len(large_messages) + + assert result[-1] == large_messages[-1] + assert truncation_index == len(large_messages) - len(result) + + def test_empty_messages_list(self): + """Test handling of empty messages list""" + result, truncation_index = truncate_messages_by_size( + [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) + assert result == [] + assert truncation_index == 0 + + def test_find_truncation_index( + self, + ): + """Test that the truncation index is found correctly""" + # when represented in JSON, these are each 7 bytes long + messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] + truncation_index = _find_truncation_index(messages, 20) + assert truncation_index == 3 + assert messages[truncation_index:] == ["D" * 5, "E" * 5] + + messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] + truncation_index = _find_truncation_index(messages, 40) + assert truncation_index == 0 + assert messages[truncation_index:] == [ + "A" * 5, + "B" * 5, + "C" * 5, + "D" * 5, + "E" * 5, + ] + + def test_progressive_truncation(self, large_messages): + """Test that truncation works progressively with different limits""" + limits = [ + MAX_GEN_AI_MESSAGE_BYTES // 5, + MAX_GEN_AI_MESSAGE_BYTES // 10, + MAX_GEN_AI_MESSAGE_BYTES // 25, + MAX_GEN_AI_MESSAGE_BYTES // 100, + MAX_GEN_AI_MESSAGE_BYTES // 500, + ] + prev_count = len(large_messages) + + for limit in limits: + result = truncate_messages_by_size(large_messages, max_bytes=limit) + current_count = len(result) + + assert current_count <= prev_count + assert current_count >= 1 + prev_count = current_count + + def test_single_message_truncation(self): + large_content = "This is a very long message. " * 10_000 + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + ] + + result, truncation_index = truncate_messages_by_size( + messages, max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + assert len(result) == 1 + assert ( + len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + # If the last message is too large, the system message is not present + system_msgs = [m for m in result if m.get("role") == "system"] + assert len(system_msgs) == 0 + + # Confirm the user message is truncated with '...' + user_msgs = [m for m in result if m.get("role") == "user"] + assert len(user_msgs) == 1 + assert user_msgs[0]["content"].endswith("...") + assert len(user_msgs[0]["content"]) < len(large_content) + + class TestTruncateAndAnnotateMessages: - def test_truncation_sets_metadata_on_scope(self, large_messages): + def test_no_truncation_returns_list(self, sample_messages): class MockSpan: def __init__(self): self.span_id = "test_span_id" @@ -233,20 +333,17 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - small_limit = 3000 span = MockSpan() scope = MockScope() - original_count = len(large_messages) - result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) + result = truncate_and_annotate_messages(sample_messages, span, scope) assert isinstance(result, list) assert not isinstance(result, AnnotatedValue) - assert len(result) < len(large_messages) - assert scope._gen_ai_original_message_count[span.span_id] == original_count + assert len(result) == len(sample_messages) + assert result == sample_messages + assert span.span_id not in scope._gen_ai_original_message_count - def test_scope_tracks_original_message_count(self, large_messages): + def test_truncation_sets_metadata_on_scope(self, large_messages): class MockSpan: def __init__(self): self.span_id = "test_span_id" @@ -260,18 +357,19 @@ def __init__(self): self._gen_ai_original_message_count = {} small_limit = 3000 - original_count = len(large_messages) span = MockSpan() scope = MockScope() - + original_count = len(large_messages) result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit + large_messages, span, scope, max_bytes=small_limit ) + assert isinstance(result, list) + assert not isinstance(result, AnnotatedValue) + assert len(result) < len(large_messages) assert scope._gen_ai_original_message_count[span.span_id] == original_count - assert len(result) == 1 - def test_empty_messages_returns_none(self): + def test_scope_tracks_original_message_count(self, large_messages): class MockSpan: def __init__(self): self.span_id = "test_span_id" @@ -284,15 +382,19 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} + small_limit = 3000 + original_count = len(large_messages) span = MockSpan() scope = MockScope() - result = truncate_and_annotate_messages([], span, scope) - assert result is None - result = truncate_and_annotate_messages(None, span, scope) - assert result is None + result = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) + + assert scope._gen_ai_original_message_count[span.span_id] == original_count + assert len(result) == 1 - def test_single_message_truncation(self, large_messages): + def test_empty_messages_returns_none(self): class MockSpan: def __init__(self): self.span_id = "test_span_id" @@ -305,33 +407,13 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - large_content = "This is a very long message. " * 10_000 - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": large_content}, - ] - span = MockSpan() scope = MockScope() - result = truncate_and_annotate_messages( - messages, - span, - scope, - max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS, - ) - assert result is not None - - assert len(result) == 1 - assert ( - len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) + result = truncate_and_annotate_messages([], span, scope) + assert result is None - # Confirm the user message is truncated with '...' - user_msgs = [m for m in result if m.get("role") == "user"] - assert len(user_msgs) == 1 - assert user_msgs[0]["content"].endswith("...") - assert len(user_msgs[0]["content"]) < len(large_content) + result = truncate_and_annotate_messages(None, span, scope) + assert result is None def test_truncated_messages_newest_first(self, large_messages): class MockSpan: @@ -350,7 +432,7 @@ def __init__(self): span = MockSpan() scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit + large_messages, span, scope, max_bytes=small_limit ) assert isinstance(result, list) @@ -418,12 +500,15 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} + small_limit = 3000 span = MockSpan() scope = MockScope() original_count = len(large_messages) # Simulate what integrations do - truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) + truncated_messages = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) # Verify metadata was set on scope @@ -472,11 +557,14 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} + small_limit = 3000 span = MockSpan() scope = MockScope() original_message_count = len(large_messages) - truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) + truncated_messages = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) assert len(truncated_messages) < original_message_count From 2a80a8d1ca0954eec06897671806f9ecbfc3917e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:26:58 +0100 Subject: [PATCH 54/58] restore test change --- tests/integrations/google_genai/test_google_genai.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 558291d8b3..37ba50420f 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -978,11 +978,10 @@ def test_google_genai_message_truncation( ) (event,) = events - _, chat_span = event["spans"] - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + invoke_span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -991,7 +990,7 @@ def test_google_genai_message_truncation( assert parsed_messages[0]["role"] == "user" assert small_content in parsed_messages[0]["content"] - assert chat_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 2 + assert invoke_span["data"][SPANDATA.META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH] == 2 assert ( event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2 ) From 0628d92a10672488c3adf82d3bb5a46d1504c518 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:31:48 +0100 Subject: [PATCH 55/58] update ai_monitoring tests --- tests/test_ai_monitoring.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index f6852d54bb..969d14658d 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -320,7 +320,7 @@ def test_single_message_truncation(self): class TestTruncateAndAnnotateMessages: - def test_no_truncation_returns_list(self, sample_messages): + def test_only_keeps_last_message(self, sample_messages): class MockSpan: def __init__(self): self.span_id = "test_span_id" @@ -339,9 +339,8 @@ def __init__(self): assert isinstance(result, list) assert not isinstance(result, AnnotatedValue) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert span.span_id not in scope._gen_ai_original_message_count + assert len(result) == 1 + assert result[0] == sample_messages[-1] def test_truncation_sets_metadata_on_scope(self, large_messages): class MockSpan: @@ -361,7 +360,7 @@ def __init__(self): scope = MockScope() original_count = len(large_messages) result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -388,7 +387,7 @@ def __init__(self): scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert scope._gen_ai_original_message_count[span.span_id] == original_count @@ -432,7 +431,7 @@ def __init__(self): span = MockSpan() scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -507,7 +506,7 @@ def __init__(self): # Simulate what integrations do truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) @@ -563,7 +562,7 @@ def __init__(self): original_message_count = len(large_messages) truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert len(truncated_messages) < original_message_count From 6dc02f968ddd5580c0e580b604388a1d9c5b0010 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:32:43 +0100 Subject: [PATCH 56/58] add const file --- sentry_sdk/consts.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 4b61a317fb..4b21350ec5 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -863,6 +863,14 @@ class SPANDATA: Example: "a1b2c3d4e5f6" """ + META_GEN_AI_ORIGINAL_INPUT_MESSAGES_LENGTH = ( + "sentry.sdk_meta.gen_ai.input.messages.original_length" + ) + """ + The original number of input non-system instruction messages, before SDK trimming. + Example: 4 + """ + class SPANSTATUS: """ From d1511193becab1f332ffba0190eadc0a6444c797 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 26 Jan 2026 14:43:16 +0100 Subject: [PATCH 57/58] openai tests --- tests/integrations/openai/test_openai.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 884b47b709..aa83a3c5f2 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1607,6 +1607,7 @@ def test_ai_client_span_responses_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1910,6 +1911,7 @@ async def test_ai_client_span_responses_async_api( "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -2177,6 +2179,7 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, } From 9ce59bb3d0b36d8f581e21bea89ed92b29468226 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 28 Jan 2026 10:28:15 +0100 Subject: [PATCH 58/58] resolving openai test conflicts --- tests/integrations/openai/test_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index a4d79ab978..3581a14bd7 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1901,7 +1901,6 @@ async def test_ai_client_span_responses_async_api( expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.response.streaming": True, "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", @@ -2180,6 +2179,7 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.sdk_meta.gen_ai.input.messages.original_length": 1, "thread.id": mock.ANY, "thread.name": mock.ANY, }