From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 10:45:45 +0100
Subject: [PATCH 1/8] fix(ai): redact message parts content of type blob

---
 sentry_sdk/ai/utils.py      |  51 +++++++++++++++++
 tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++-
 2 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 1d2b4483c9..73155b0305 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -5,6 +5,8 @@
 from sys import getsizeof
 from typing import TYPE_CHECKING
 
+from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
+
 if TYPE_CHECKING:
     from typing import Any, Callable, Dict, List, Optional, Tuple
 
@@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
     return 0
 
 
+def redact_blob_message_parts(messages):
+    # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]
+    """
+    Redact blob message parts from the messages, by removing the "content" key.
+    e.g:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "data:image/jpeg;base64,..."
+            }
+        ]
+    }
+    becomes:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "[Filtered]"
+            }
+        ]
+    }
+    """
+
+    for message in messages:
+        content = message.get("content")
+        if isinstance(content, list):
+            for item in content:
+                if item.get("type") == "blob":
+                    item["content"] = SENSITIVE_DATA_SUBSTITUTE
+    return messages
+
+
 def truncate_messages_by_size(
     messages: "List[Dict[str, Any]]",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
@@ -186,6 +235,8 @@ def truncate_and_annotate_messages(
     if not messages:
         return None
 
+    messages = redact_blob_message_parts(messages)
+
     truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
     if removed_count > 0:
         scope._gen_ai_original_message_count[span.span_id] = len(messages)
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index 8d3d4ba204..e9f3712cd3 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -4,7 +4,7 @@
 import pytest
 
 import sentry_sdk
-from sentry_sdk._types import AnnotatedValue
+from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
 from sentry_sdk.ai.monitoring import ai_track
 from sentry_sdk.ai.utils import (
     MAX_GEN_AI_MESSAGE_BYTES,
@@ -13,6 +13,7 @@
     truncate_and_annotate_messages,
     truncate_messages_by_size,
     _find_truncation_index,
+    redact_blob_message_parts,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -542,3 +543,106 @@ def __init__(self):
         assert isinstance(messages_value, AnnotatedValue)
         assert messages_value.metadata["len"] == stored_original_length
         assert len(messages_value.value) == len(truncated_messages)
+
+
+class TestRedactBlobMessageParts:
+    def test_redacts_single_blob_content(self):
+        """Test that blob content is redacted in a message with single blob part"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "How many ponies do you see in the image?",
+                        "type": "text",
+                    },
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/jpeg",
+                        "content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+                    },
+                ],
+            }
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages  # Returns the same list
+        assert (
+            messages[0]["content"][0]["text"]
+            == "How many ponies do you see in the image?"
+        )
+        assert messages[0]["content"][0]["type"] == "text"
+        assert messages[0]["content"][1]["type"] == "blob"
+        assert messages[0]["content"][1]["modality"] == "image"
+        assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+
+    def test_redacts_multiple_blob_parts(self):
+        """Test that multiple blob parts in a single message are all redacted"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Compare these images", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/jpeg",
+                        "content": "data:image/jpeg;base64,first_image",
+                    },
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/png",
+                        "content": "data:image/png;base64,second_image",
+                    },
+                ],
+            }
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages
+        assert messages[0]["content"][0]["text"] == "Compare these images"
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+        assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE
+
+    def test_redacts_blobs_in_multiple_messages(self):
+        """Test that blob parts are redacted across multiple messages"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"text": "First message", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "content": "data:image/jpeg;base64,first",
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": "I see the image.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Second message", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "content": "data:image/jpeg;base64,second",
+                    },
+                ],
+            },
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+        assert messages[1]["content"] == "I see the image."  # Unchanged
+        assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:05:04 +0100
Subject: [PATCH 2/8] fix(ai): skip non dict messages

---
 sentry_sdk/ai/utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 73155b0305..ae507e898b 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -182,6 +182,9 @@ def redact_blob_message_parts(messages):
     """
 
     for message in messages:
+        if not isinstance(message, dict):
+            continue
+
         content = message.get("content")
         if isinstance(content, list):
             for item in content:

From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:21:43 +0100
Subject: [PATCH 3/8] fix(ai): typing

---
 sentry_sdk/ai/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index ae507e898b..1b61c7a113 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
     return 0
 
 
-def redact_blob_message_parts(messages):
-    # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]
+def redact_blob_message_parts(
+    messages: "List[Dict[str, Any]]",
+) -> "List[Dict[str, Any]]":
     """
     Redact blob message parts from the messages, by removing the "content" key.
     e.g:

From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:37:12 +0100
Subject: [PATCH 4/8] fix(ai): content items may not be dicts

---
 sentry_sdk/ai/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 1b61c7a113..78a64ab737 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -189,7 +189,7 @@ def redact_blob_message_parts(
         content = message.get("content")
         if isinstance(content, list):
             for item in content:
-                if item.get("type") == "blob":
+                if isinstance(item, dict) and item.get("type") == "blob":
                     item["content"] = SENSITIVE_DATA_SUBSTITUTE
     return messages
 

From c606b66f1dbe62f3235f0b501c9250ba2b54632a Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Mon, 5 Jan 2026 20:15:27 +0100
Subject: [PATCH 5/8] fix(integrations): langchain add multimodal content
 transformation functions for images, audio, and files

---
 sentry_sdk/integrations/langchain.py          | 122 ++++++++-
 .../integrations/langchain/test_langchain.py  | 242 ++++++++++++++++++
 2 files changed, 363 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 950f437d4c..51cce8942d 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -116,6 +116,124 @@
     "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
 }
 
+# Map LangChain content types to Sentry modalities
+LANGCHAIN_TYPE_TO_MODALITY = {
+    "image": "image",
+    "image_url": "image",
+    "audio": "audio",
+    "video": "video",
+    "file": "document",
+}
+
+
+def _transform_langchain_content_block(
+    content_block: "Dict[str, Any]",
+) -> "Dict[str, Any]":
+    """
+    Transform a LangChain content block to Sentry-compatible format.
+
+    Handles multimodal content (images, audio, video, documents) by converting them
+    to the standardized format:
+    - base64 encoded data -> type: "blob"
+    - URL references -> type: "uri"
+    - file_id references -> type: "file"
+    """
+    if not isinstance(content_block, dict):
+        return content_block
+
+    block_type = content_block.get("type")
+
+    # Handle standard multimodal content types (image, audio, video, file)
+    if block_type in ("image", "audio", "video", "file"):
+        modality = LANGCHAIN_TYPE_TO_MODALITY.get(block_type, block_type)
+        mime_type = content_block.get("mime_type", "")
+
+        # Check for base64 encoded content
+        if "base64" in content_block:
+            return {
+                "type": "blob",
+                "modality": modality,
+                "mime_type": mime_type,
+                "content": content_block.get("base64", ""),
+            }
+        # Check for URL reference
+        elif "url" in content_block:
+            return {
+                "type": "uri",
+                "modality": modality,
+                "mime_type": mime_type,
+                "uri": content_block.get("url", ""),
+            }
+        # Check for file_id reference
+        elif "file_id" in content_block:
+            return {
+                "type": "file",
+                "modality": modality,
+                "mime_type": mime_type,
+                "file_id": content_block.get("file_id", ""),
+            }
+
+    # Handle legacy image_url format (OpenAI style)
+    elif block_type == "image_url":
+        image_url_data = content_block.get("image_url", {})
+        if isinstance(image_url_data, dict):
+            url = image_url_data.get("url", "")
+        else:
+            url = str(image_url_data)
+
+        # Check if it's a data URI (base64 encoded)
+        if url.startswith("data:"):
+            # Parse data URI: data:mime_type;base64,content
+            try:
+                # Format: data:image/jpeg;base64,/9j/4AAQ...
+                header, content = url.split(",", 1)
+                mime_type = header.split(":")[1].split(";")[0] if ":" in header else ""
+                return {
+                    "type": "blob",
+                    "modality": "image",
+                    "mime_type": mime_type,
+                    "content": content,
+                }
+            except (ValueError, IndexError):
+                # If parsing fails, return as URI
+                return {
+                    "type": "uri",
+                    "modality": "image",
+                    "mime_type": "",
+                    "uri": url,
+                }
+        else:
+            # Regular URL
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": "",
+                "uri": url,
+            }
+
+    # For text blocks and other types, return as-is
+    return content_block
+
+
+def _transform_langchain_message_content(content: "Any") -> "Any":
+    """
+    Transform LangChain message content, handling both string content and
+    list of content blocks.
+    """
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, (list, tuple)):
+        transformed = []
+        for block in content:
+            if isinstance(block, dict):
+                transformed.append(_transform_langchain_content_block(block))
+            else:
+                transformed.append(block)
+        return transformed
+
+    return content
+
 
 # Contextvar to track agent names in a stack for re-entrant agent support
 _agent_stack: "contextvars.ContextVar[Optional[List[Optional[str]]]]" = (
@@ -234,7 +352,9 @@ def _handle_error(self, run_id: "UUID", error: "Any") -> None:
             del self.span_map[run_id]
 
     def _normalize_langchain_message(self, message: "BaseMessage") -> "Any":
-        parsed = {"role": message.type, "content": message.content}
+        # Transform content to handle multimodal data (images, audio, video, files)
+        transformed_content = _transform_langchain_message_content(message.content)
+        parsed = {"role": message.type, "content": transformed_content}
         parsed.update(message.additional_kwargs)
         return parsed
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 114e819bfb..07a37f2382 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -25,6 +25,8 @@
 from sentry_sdk.integrations.langchain import (
     LangchainIntegration,
     SentryLangchainCallback,
+    _transform_langchain_content_block,
+    _transform_langchain_message_content,
 )
 
 try:
@@ -1747,3 +1749,243 @@ def test_langchain_response_model_extraction(
         assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
     else:
         assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {})
+
+
+# Tests for multimodal content transformation functions
+
+
+class TestTransformLangchainContentBlock:
+    """Tests for _transform_langchain_content_block function."""
+
+    def test_transform_image_base64(self):
+        """Test transformation of base64-encoded image content."""
+        content_block = {
+            "type": "image",
+            "base64": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+            "mime_type": "image/jpeg",
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+        }
+
+    def test_transform_image_url(self):
+        """Test transformation of URL-referenced image content."""
+        content_block = {
+            "type": "image",
+            "url": "https://example.com/image.jpg",
+            "mime_type": "image/jpeg",
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_transform_image_file_id(self):
+        """Test transformation of file_id-referenced image content."""
+        content_block = {
+            "type": "image",
+            "file_id": "file-abc123",
+            "mime_type": "image/png",
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "file",
+            "modality": "image",
+            "mime_type": "image/png",
+            "file_id": "file-abc123",
+        }
+
+    def test_transform_image_url_legacy_with_data_uri(self):
+        """Test transformation of legacy image_url format with data: URI (base64)."""
+        content_block = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD"},
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRgABAQAAAQABAAD",
+        }
+
+    def test_transform_image_url_legacy_with_http_url(self):
+        """Test transformation of legacy image_url format with HTTP URL."""
+        content_block = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/image.png"},
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.png",
+        }
+
+    def test_transform_image_url_legacy_string_url(self):
+        """Test transformation of legacy image_url format with string URL."""
+        content_block = {
+            "type": "image_url",
+            "image_url": "https://example.com/image.gif",
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.gif",
+        }
+
+    def test_transform_image_url_legacy_data_uri_png(self):
+        """Test transformation of legacy image_url format with PNG data URI."""
+        content_block = {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+            },
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
+        }
+
+    def test_transform_missing_mime_type(self):
+        """Test transformation when mime_type is not provided."""
+        content_block = {
+            "type": "image",
+            "base64": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "",
+            "content": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+        }
+
+
+class TestTransformLangchainMessageContent:
+    """Tests for _transform_langchain_message_content function."""
+
+    def test_transform_string_content(self):
+        """Test that string content is returned unchanged."""
+        result = _transform_langchain_message_content("Hello, world!")
+        assert result == "Hello, world!"
+
+    def test_transform_list_with_text_blocks(self):
+        """Test transformation of list with text blocks (unchanged)."""
+        content = [
+            {"type": "text", "text": "First message"},
+            {"type": "text", "text": "Second message"},
+        ]
+        result = _transform_langchain_message_content(content)
+        assert result == content
+
+    def test_transform_list_with_image_blocks(self):
+        """Test transformation of list containing image blocks."""
+        content = [
+            {"type": "text", "text": "Check out this image:"},
+            {
+                "type": "image",
+                "base64": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+                "mime_type": "image/jpeg",
+            },
+        ]
+        result = _transform_langchain_message_content(content)
+        assert len(result) == 2
+        assert result[0] == {"type": "text", "text": "Check out this image:"}
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+        }
+
+    def test_transform_list_with_mixed_content(self):
+        """Test transformation of list with mixed content types."""
+        content = [
+            {"type": "text", "text": "Here are some files:"},
+            {
+                "type": "image",
+                "url": "https://example.com/image.jpg",
+                "mime_type": "image/jpeg",
+            },
+            {
+                "type": "file",
+                "file_id": "doc-123",
+                "mime_type": "application/pdf",
+            },
+            {"type": "audio", "base64": "audio_data...", "mime_type": "audio/mp3"},
+        ]
+        result = _transform_langchain_message_content(content)
+        assert len(result) == 4
+        assert result[0] == {"type": "text", "text": "Here are some files:"}
+        assert result[1] == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+        assert result[2] == {
+            "type": "file",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "file_id": "doc-123",
+        }
+        assert result[3] == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/mp3",
+            "content": "audio_data...",
+        }
+
+    def test_transform_list_with_non_dict_items(self):
+        """Test transformation handles non-dict items in list."""
+        content = ["plain string", {"type": "text", "text": "dict text"}]
+        result = _transform_langchain_message_content(content)
+        assert result == ["plain string", {"type": "text", "text": "dict text"}]
+
+    def test_transform_tuple_content(self):
+        """Test transformation of tuple content."""
+        content = (
+            {"type": "text", "text": "Message"},
+            {"type": "image", "base64": "data...", "mime_type": "image/png"},
+        )
+        result = _transform_langchain_message_content(content)
+        assert len(result) == 2
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "data...",
+        }
+
+    def test_transform_list_with_legacy_image_url(self):
+        """Test transformation of list containing legacy image_url blocks."""
+        content = [
+            {"type": "text", "text": "Check this:"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ..."},
+            },
+        ]
+        result = _transform_langchain_message_content(content)
+        assert len(result) == 2
+        assert result[0] == {"type": "text", "text": "Check this:"}
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQ...",
+        }

From c650799f0b7de741cd77811732644aaa2d722686 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 8 Jan 2026 14:22:59 +0100
Subject: [PATCH 6/8] fix(integrations): ensure URL check for data URIs handles
 empty strings

---
 sentry_sdk/integrations/langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 51cce8942d..1b9389c23a 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -182,7 +182,7 @@ def _transform_langchain_content_block(
             url = str(image_url_data)
 
         # Check if it's a data URI (base64 encoded)
-        if url.startswith("data:"):
+        if url and url.startswith("data:"):
             # Parse data URI: data:mime_type;base64,content
             try:
                 # Format: data:image/jpeg;base64,/9j/4AAQ...

From 510e2ed206be5a01667bdd03719fa2ee7be45876 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 14 Jan 2026 14:22:00 +0100
Subject: [PATCH 7/8] fix(integrations): Langchain: Handle Anthropic and Google
 provider-native content formats

---
 sentry_sdk/integrations/langchain.py          | 49 +++++++++++
 .../integrations/langchain/test_langchain.py  | 86 +++++++++++++++++++
 2 files changed, 135 insertions(+)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 1b9389c23a..68f5d0ad95 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -137,6 +137,12 @@ def _transform_langchain_content_block(
     - base64 encoded data -> type: "blob"
     - URL references -> type: "uri"
     - file_id references -> type: "file"
+
+    Supports multiple content block formats:
+    - LangChain standard: type + base64/url/file_id fields
+    - OpenAI legacy: image_url with nested url field
+    - Anthropic: type + source dict with type/media_type/data or url
+    - Google: inline_data or file_data dicts
     """
     if not isinstance(content_block, dict):
         return content_block
@@ -172,6 +178,27 @@ def _transform_langchain_content_block(
                 "mime_type": mime_type,
                 "file_id": content_block.get("file_id", ""),
             }
+        # Handle Anthropic-style format with nested "source" dict
+        elif "source" in content_block:
+            source = content_block.get("source", {})
+            if isinstance(source, dict):
+                source_type = source.get("type")
+                media_type = source.get("media_type", "") or mime_type
+
+                if source_type == "base64":
+                    return {
+                        "type": "blob",
+                        "modality": modality,
+                        "mime_type": media_type,
+                        "content": source.get("data", ""),
+                    }
+                elif source_type == "url":
+                    return {
+                        "type": "uri",
+                        "modality": modality,
+                        "mime_type": media_type,
+                        "uri": source.get("url", ""),
+                    }
 
     # Handle legacy image_url format (OpenAI style)
     elif block_type == "image_url":
@@ -211,6 +238,28 @@ def _transform_langchain_content_block(
                 "uri": url,
             }
 
+    # Handle Google-style inline_data format
+    if "inline_data" in content_block:
+        inline_data = content_block.get("inline_data", {})
+        if isinstance(inline_data, dict):
+            return {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": inline_data.get("mime_type", ""),
+                "content": inline_data.get("data", ""),
+            }
+
+    # Handle Google-style file_data format
+    if "file_data" in content_block:
+        file_data = content_block.get("file_data", {})
+        if isinstance(file_data, dict):
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": file_data.get("mime_type", ""),
+                "uri": file_data.get("file_uri", ""),
+            }
+
     # For text blocks and other types, return as-is
     return content_block
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 07a37f2382..de5f5841ca 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -1874,6 +1874,92 @@ def test_transform_missing_mime_type(self):
             "content": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
         }
 
+    def test_transform_anthropic_source_base64(self):
+        """Test transformation of Anthropic-style image with base64 source."""
+        content_block = {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": "iVBORw0KGgoAAAANSUhEUgAAAAE...",
+            },
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0KGgoAAAANSUhEUgAAAAE...",
+        }
+
+    def test_transform_anthropic_source_url(self):
+        """Test transformation of Anthropic-style image with URL source."""
+        content_block = {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "media_type": "image/jpeg",
+                "url": "https://example.com/image.jpg",
+            },
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_transform_anthropic_source_without_media_type(self):
+        """Test transformation of Anthropic-style image without media_type falls back to mime_type."""
+        content_block = {
+            "type": "image",
+            "mime_type": "image/webp",
+            "source": {
+                "type": "base64",
+                "data": "UklGRh4AAABXRUJQVlA4IBIAAAAwAQCdASoBAAEAAQAcJYgCdAEO",
+            },
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/webp",
+            "content": "UklGRh4AAABXRUJQVlA4IBIAAAAwAQCdASoBAAEAAQAcJYgCdAEO",
+        }
+
+    def test_transform_google_inline_data(self):
+        """Test transformation of Google-style inline_data format."""
+        content_block = {
+            "inline_data": {
+                "mime_type": "image/jpeg",
+                "data": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+            }
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRgABAQAAAQABAAD...",
+        }
+
+    def test_transform_google_file_data(self):
+        """Test transformation of Google-style file_data format."""
+        content_block = {
+            "file_data": {
+                "mime_type": "image/png",
+                "file_uri": "gs://bucket/path/to/image.png",
+            }
+        }
+        result = _transform_langchain_content_block(content_block)
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/png",
+            "uri": "gs://bucket/path/to/image.png",
+        }
+
 
 class TestTransformLangchainMessageContent:
     """Tests for _transform_langchain_message_content function."""

From 1764e571247a12963148b7bbecec37a6b23bfb4e Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 14 Jan 2026 16:51:10 +0100
Subject: [PATCH 8/8] fix(integrations): Use correct modality for Google-style
 content formats and use common function for data URI parsing

---
 sentry_sdk/integrations/langchain.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 68f5d0ad95..f29dfbe870 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -12,6 +12,7 @@
     GEN_AI_ALLOWED_MESSAGE_ROLES,
     get_start_span_function,
     normalize_message_roles,
+    parse_data_uri,
     set_data_normalized,
     truncate_and_annotate_messages,
 )
@@ -199,6 +200,26 @@ def _transform_langchain_content_block(
                         "mime_type": media_type,
                         "uri": source.get("url", ""),
                     }
+        # Handle Google-style inline_data format with standard type
+        elif "inline_data" in content_block:
+            inline_data = content_block.get("inline_data", {})
+            if isinstance(inline_data, dict):
+                return {
+                    "type": "blob",
+                    "modality": modality,
+                    "mime_type": inline_data.get("mime_type", "") or mime_type,
+                    "content": inline_data.get("data", ""),
+                }
+        # Handle Google-style file_data format with standard type
+        elif "file_data" in content_block:
+            file_data = content_block.get("file_data", {})
+            if isinstance(file_data, dict):
+                return {
+                    "type": "uri",
+                    "modality": modality,
+                    "mime_type": file_data.get("mime_type", "") or mime_type,
+                    "uri": file_data.get("file_uri", ""),
+                }
 
     # Handle legacy image_url format (OpenAI style)
     elif block_type == "image_url":
@@ -210,18 +231,15 @@ def _transform_langchain_content_block(
 
         # Check if it's a data URI (base64 encoded)
         if url and url.startswith("data:"):
-            # Parse data URI: data:mime_type;base64,content
             try:
-                # Format: data:image/jpeg;base64,/9j/4AAQ...
-                header, content = url.split(",", 1)
-                mime_type = header.split(":")[1].split(";")[0] if ":" in header else ""
+                mime_type, content = parse_data_uri(url)
                 return {
                     "type": "blob",
                     "modality": "image",
                     "mime_type": mime_type,
                     "content": content,
                 }
-            except (ValueError, IndexError):
+            except ValueError:
                 # If parsing fails, return as URI
                 return {
                     "type": "uri",