Skip to content
88 changes: 88 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -38,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role


def parse_data_uri(url):
# type: (str) -> Tuple[str, str]
"""
Parse a data URI and return (mime_type, content).

Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>

Examples:
data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
data:text/plain,Hello → ("text/plain", "Hello")
data:;base64,SGVsbG8= → ("", "SGVsbG8=")

Raises:
ValueError: If the URL is not a valid data URI (missing comma separator)
"""
if "," not in url:
raise ValueError("Invalid data URI: missing comma separator")

header, content = url.split(",", 1)

# Extract mime type from header
# Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
# Remove "data:" prefix, then take everything before the first semicolon
if header.startswith("data:"):
mime_part = header[5:] # Remove "data:" prefix
else:
mime_part = header

mime_type = mime_part.split(";")[0]

return mime_type, content


def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
if hasattr(data, "model_dump"):
Expand Down Expand Up @@ -141,6 +176,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +272,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
79 changes: 78 additions & 1 deletion sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
parse_data_uri,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import SPANDATA
Expand All @@ -18,7 +19,7 @@
safe_serialize,
)

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict

if TYPE_CHECKING:
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
Expand Down Expand Up @@ -177,6 +178,80 @@ def _calculate_token_usage(
)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
url = image_url.get("url", "")
if url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"uri": url,
}
else:
return {
"type": "uri",
"uri": url,
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing modality field for non-data image URLs

Medium Severity

When handling image_url content parts with regular HTTP URLs (not data: URIs), the returned dictionary is missing the "modality": "image" field. This is inconsistent with other code paths in the same function (lines 226-231 and 234-238 both include "modality": "image") and with the equivalent implementation in openai_agents/utils.py (lines 87-92) which correctly includes "modality": "image" for non-data URLs. This inconsistency means image metadata is lost for HTTP image URLs, potentially affecting downstream processing that relies on the modality field.

Fix in Cursor Fix in Web

return item
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing type check causes crash on non-dict content items

Low Severity

The _map_item function calls item.get("type") without first checking if item is a dict. If the content list contains non-dict items (like strings), this will raise an AttributeError. The equivalent function _transform_openai_agents_content_part in openai_agents/utils.py has a defensive check (if not isinstance(content_part, dict): return content_part) at line 43-44, but this function is missing that guard.

Fix in Cursor Fix in Web


for message in messages:
if not isinstance(message, dict):
continue
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _set_input_data(
span: "Span",
kwargs: "dict[str, Any]",
Expand All @@ -198,6 +273,8 @@ def _set_input_data(
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
normalized_messages = _convert_message_parts(normalized_messages)

scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
if messages_data is not None:
Expand Down
52 changes: 40 additions & 12 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
normalize_message_role,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import safe_serialize

from ..consts import SPAN_ORIGIN
from ..utils import _set_agent_data, _set_usage_data
from ..utils import (
_set_agent_data,
_set_usage_data,
_transform_openai_agents_message_content,
)

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -49,17 +54,40 @@ def invoke_agent_span(

original_input = kwargs.get("original_input")
if original_input is not None:
message = (
original_input
if isinstance(original_input, str)
else safe_serialize(original_input)
)
messages.append(
{
"content": [{"text": message, "type": "text"}],
"role": "user",
}
)
if isinstance(original_input, str):
# String input: wrap in text block
messages.append(
{
"content": [{"text": original_input, "type": "text"}],
"role": "user",
}
)
elif isinstance(original_input, list) and len(original_input) > 0:
# Check if list contains message objects (with type="message")
# or content parts (input_text, input_image, etc.)
first_item = original_input[0]
if isinstance(first_item, dict) and first_item.get("type") == "message":
# List of message objects - process each individually
for msg in original_input:
if isinstance(msg, dict) and msg.get("type") == "message":
role = normalize_message_role(msg.get("role", "user"))
content = msg.get("content")
transformed = _transform_openai_agents_message_content(
content
)
if isinstance(transformed, str):
transformed = [{"text": transformed, "type": "text"}]
elif not isinstance(transformed, list):
transformed = [
{"text": str(transformed), "type": "text"}
]
messages.append({"content": transformed, "role": role})
else:
# List of content parts - transform and wrap as user message
content = _transform_openai_agents_message_content(original_input)
if not isinstance(content, list):
content = [{"text": str(content), "type": "text"}]
messages.append({"content": content, "role": "user"})

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
Expand Down
Loading
Loading