getsentry · constantinius · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
@@ -5,6 +5,8 @@
 from sys import getsizeof
 from typing import TYPE_CHECKING
 
+from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
+
 if TYPE_CHECKING:
     from typing import Any, Callable, Dict, List, Optional, Tuple
 
@@ -38,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
         GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role
 
 
+def parse_data_uri(url):
+    # type: (str) -> Tuple[str, str]
+    """
+    Parse a data URI and return (mime_type, content).
+
+    Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>
+
+    Examples:
+        data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
+        data:text/plain,Hello → ("text/plain", "Hello")
+        data:;base64,SGVsbG8= → ("", "SGVsbG8=")
+
+    Raises:
+        ValueError: If the URL is not a valid data URI (missing comma separator)
+    """
+    if "," not in url:
+        raise ValueError("Invalid data URI: missing comma separator")
+
+    header, content = url.split(",", 1)
+
+    # Extract mime type from header
+    # Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
+    # Remove "data:" prefix, then take everything before the first semicolon
+    if header.startswith("data:"):
+        mime_part = header[5:]  # Remove "data:" prefix
+    else:
+        mime_part = header
+
+    mime_type = mime_part.split(";")[0]
+
+    return mime_type, content
+
+
 def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
     # convert pydantic data (e.g. OpenAI v1+) to json compatible format
     if hasattr(data, "model_dump"):
@@ -141,6 +176,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
     return 0
 
 
+def redact_blob_message_parts(
+    messages: "List[Dict[str, Any]]",
+) -> "List[Dict[str, Any]]":
+    """
+    Redact blob message parts from the messages, by removing the "content" key.
+    e.g:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "data:image/jpeg;base64,..."
+            }
+        ]
+    }
+    becomes:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "[Filtered]"
+            }
+        ]
+    }
+    """
+
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+
+        content = message.get("content")
+        if isinstance(content, list):
+            for item in content:
+                if isinstance(item, dict) and item.get("type") == "blob":
+                    item["content"] = SENSITIVE_DATA_SUBSTITUTE
+    return messages
+
+
 def truncate_messages_by_size(
     messages: "List[Dict[str, Any]]",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
@@ -186,6 +272,8 @@ def truncate_and_annotate_messages(
     if not messages:
         return None
 
+    messages = redact_blob_message_parts(messages)
+
     truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
     if removed_count > 0:
         scope._gen_ai_original_message_count[span.span_id] = len(messages)

@@ -6,6 +6,7 @@
 from sentry_sdk.ai.utils import (
     set_data_normalized,
     normalize_message_roles,
+    parse_data_uri,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import SPANDATA
@@ -18,7 +19,7 @@
     safe_serialize,
 )
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Dict
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
@@ -177,6 +178,80 @@ def _calculate_token_usage(
     )
 
 
+def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
+    """
+    Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
+    e.g:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "data:image/jpeg;base64,...",
+                    "detail": "high"
+                }
+            }
+        ]
+    }
+    becomes:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "data:image/jpeg;base64,..."
+            }
+        ]
+    }
+    """
+
+    def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
+        if item.get("type") == "image_url":
+            image_url = item.get("image_url") or {}
+            url = image_url.get("url", "")
+            if url.startswith("data:"):
+                try:
+                    mime_type, content = parse_data_uri(url)
+                    return {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": mime_type,
+                        "content": content,
+                    }
+                except ValueError:
+                    # If parsing fails, return as URI
+                    return {
+                        "type": "uri",
+                        "modality": "image",
+                        "uri": url,
+                    }
+            else:
+                return {
+                    "type": "uri",
+                    "uri": url,
+                }
+        return item
+
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+        content = message.get("content")
+        if isinstance(content, list):
+            message["content"] = [_map_item(item) for item in content]
+    return messages
+
+
 def _set_input_data(
     span: "Span",
     kwargs: "dict[str, Any]",
@@ -198,6 +273,8 @@ def _set_input_data(
         and integration.include_prompts
     ):
         normalized_messages = normalize_message_roles(messages)
+        normalized_messages = _convert_message_parts(normalized_messages)
+
         scope = sentry_sdk.get_current_scope()
         messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
         if messages_data is not None:

@@ -3,14 +3,19 @@
     get_start_span_function,
     set_data_normalized,
     normalize_message_roles,
+    normalize_message_role,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import safe_serialize
 
 from ..consts import SPAN_ORIGIN
-from ..utils import _set_agent_data, _set_usage_data
+from ..utils import (
+    _set_agent_data,
+    _set_usage_data,
+    _transform_openai_agents_message_content,
+)
 
 from typing import TYPE_CHECKING
 
@@ -49,17 +54,40 @@ def invoke_agent_span(
 
         original_input = kwargs.get("original_input")
         if original_input is not None:
-            message = (
-                original_input
-                if isinstance(original_input, str)
-                else safe_serialize(original_input)
-            )
-            messages.append(
-                {
-                    "content": [{"text": message, "type": "text"}],
-                    "role": "user",
-                }
-            )
+            if isinstance(original_input, str):
+                # String input: wrap in text block
+                messages.append(
+                    {
+                        "content": [{"text": original_input, "type": "text"}],
+                        "role": "user",
+                    }
+                )
+            elif isinstance(original_input, list) and len(original_input) > 0:
+                # Check if list contains message objects (with type="message")
+                # or content parts (input_text, input_image, etc.)
+                first_item = original_input[0]
+                if isinstance(first_item, dict) and first_item.get("type") == "message":
+                    # List of message objects - process each individually
+                    for msg in original_input:
+                        if isinstance(msg, dict) and msg.get("type") == "message":
+                            role = normalize_message_role(msg.get("role", "user"))
+                            content = msg.get("content")
+                            transformed = _transform_openai_agents_message_content(
+                                content
+                            )
+                            if isinstance(transformed, str):
+                                transformed = [{"text": transformed, "type": "text"}]
+                            elif not isinstance(transformed, list):
+                                transformed = [
+                                    {"text": str(transformed), "type": "text"}
+                                ]
+                            messages.append({"content": transformed, "role": role})
+                else:
+                    # List of content parts - transform and wrap as user message
+                    content = _transform_openai_agents_message_content(original_input)
+                    if not isinstance(content, list):
+                        content = [{"text": str(content), "type": "text"}]
+                    messages.append({"content": content, "role": "user"})
 
         if len(messages) > 0:
             normalized_messages = normalize_message_roles(messages)