Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 188 additions & 1 deletion sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
GEN_AI_ALLOWED_MESSAGE_ROLES,
get_start_span_function,
normalize_message_roles,
parse_data_uri,
set_data_normalized,
truncate_and_annotate_messages,
)
Expand Down Expand Up @@ -116,6 +117,190 @@
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
}

# Map LangChain content types to Sentry modalities
LANGCHAIN_TYPE_TO_MODALITY = {
"image": "image",
"image_url": "image",
"audio": "audio",
"video": "video",
"file": "document",
}


def _transform_langchain_content_block(
content_block: "Dict[str, Any]",
) -> "Dict[str, Any]":
"""
Transform a LangChain content block to Sentry-compatible format.

Handles multimodal content (images, audio, video, documents) by converting them
to the standardized format:
- base64 encoded data -> type: "blob"
- URL references -> type: "uri"
- file_id references -> type: "file"

Supports multiple content block formats:
- LangChain standard: type + base64/url/file_id fields
- OpenAI legacy: image_url with nested url field
- Anthropic: type + source dict with type/media_type/data or url
- Google: inline_data or file_data dicts
"""
if not isinstance(content_block, dict):
return content_block

block_type = content_block.get("type")

# Handle standard multimodal content types (image, audio, video, file)
if block_type in ("image", "audio", "video", "file"):
modality = LANGCHAIN_TYPE_TO_MODALITY.get(block_type, block_type)
mime_type = content_block.get("mime_type", "")

# Check for base64 encoded content
if "base64" in content_block:
return {
"type": "blob",
"modality": modality,
"mime_type": mime_type,
"content": content_block.get("base64", ""),
}
# Check for URL reference
elif "url" in content_block:
return {
"type": "uri",
"modality": modality,
"mime_type": mime_type,
"uri": content_block.get("url", ""),
}
# Check for file_id reference
elif "file_id" in content_block:
return {
"type": "file",
"modality": modality,
"mime_type": mime_type,
"file_id": content_block.get("file_id", ""),
}
# Handle Anthropic-style format with nested "source" dict
elif "source" in content_block:
source = content_block.get("source", {})
if isinstance(source, dict):
source_type = source.get("type")
media_type = source.get("media_type", "") or mime_type

if source_type == "base64":
return {
"type": "blob",
"modality": modality,
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": modality,
"mime_type": media_type,
"uri": source.get("url", ""),
}
# Handle Google-style inline_data format with standard type
elif "inline_data" in content_block:
inline_data = content_block.get("inline_data", {})
if isinstance(inline_data, dict):
return {
"type": "blob",
"modality": modality,
"mime_type": inline_data.get("mime_type", "") or mime_type,
"content": inline_data.get("data", ""),
}
# Handle Google-style file_data format with standard type
elif "file_data" in content_block:
file_data = content_block.get("file_data", {})
if isinstance(file_data, dict):
return {
"type": "uri",
"modality": modality,
"mime_type": file_data.get("mime_type", "") or mime_type,
"uri": file_data.get("file_uri", ""),
}

# Handle legacy image_url format (OpenAI style)
elif block_type == "image_url":
image_url_data = content_block.get("image_url", {})
if isinstance(image_url_data, dict):
url = image_url_data.get("url", "")
else:
url = str(image_url_data)

# Check if it's a data URI (base64 encoded)
if url and url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}
else:
# Regular URL
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}

# Handle Google-style inline_data format
if "inline_data" in content_block:
inline_data = content_block.get("inline_data", {})
if isinstance(inline_data, dict):
return {
"type": "blob",
Comment on lines +263 to +264
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The code hardcodes "modality": "image" for Google-style inline_data or file_data when a type field is absent, ignoring the mime_type which could indicate audio or video.
Severity: HIGH

Suggested Fix

Infer the modality from the mime_type when the type field is not present in a Google-style content block. Create a helper function that maps MIME types (e.g., "audio/mp3", "video/mp4") to the correct modality ("audio", "video", etc.). Use "image" as a default only if the MIME type is missing or unrecognized.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/langchain.py#L263-L264

Potential issue: When processing Google-style content blocks (`inline_data` or
`file_data`) that lack a `type` field, the function `_format_content_block` hardcodes
the modality as `"image"`. This occurs even when the `mime_type` field indicates other
content types like audio, video, or documents, which are supported by Google's Gemini
API. The `mime_type` is extracted but not used to infer the correct modality. This will
lead to incorrect data categorization in Sentry, where non-image content from the
LangChain integration will be mislabeled as an image.

Did we get this right? 👍 / 👎 to inform future reviews.

"modality": "image",
"mime_type": inline_data.get("mime_type", ""),
"content": inline_data.get("data", ""),
}

Comment on lines +263 to +269
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Google-style content blocks (inline_data, file_data) without an explicit type are always assigned modality: "image", ignoring the actual mime_type for audio or video.
Severity: HIGH

Suggested Fix

Instead of hardcoding modality: "image", derive the modality from the mime_type present in the inline_data or file_data dictionary. A helper function could map MIME type prefixes (e.g., 'audio/', 'video/') to the correct modality ('audio', 'video'), with 'image' as a default.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/langchain.py#L263-L269

Potential issue: The fallback handlers for Google-style content blocks, specifically for
`inline_data` and `file_data`, incorrectly hardcode the `modality` as `"image"`.
According to Google Gemini API documentation, content blocks may not have a `type` field
and rely solely on `mime_type` to determine the content. This means if a block with a
`mime_type` like `"audio/mpeg"` is processed, it will fall through to the handler at
line 260 and be incorrectly categorized with `modality: "image"`. This leads to
incorrect data categorization in Sentry for non-image content like audio or video.

Did we get this right? 👍 / 👎 to inform future reviews.

# Handle Google-style file_data format
if "file_data" in content_block:
file_data = content_block.get("file_data", {})
if isinstance(file_data, dict):
return {
"type": "uri",
"modality": "image",
"mime_type": file_data.get("mime_type", ""),
"uri": file_data.get("file_uri", ""),
}

# For text blocks and other types, return as-is
return content_block


def _transform_langchain_message_content(content: "Any") -> "Any":
"""
Transform LangChain message content, handling both string content and
list of content blocks.
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for block in content:
if isinstance(block, dict):
transformed.append(_transform_langchain_content_block(block))
else:
transformed.append(block)
return transformed

return content


# Contextvar to track agent names in a stack for re-entrant agent support
_agent_stack: "contextvars.ContextVar[Optional[List[Optional[str]]]]" = (
Expand Down Expand Up @@ -234,7 +419,9 @@ def _handle_error(self, run_id: "UUID", error: "Any") -> None:
del self.span_map[run_id]

def _normalize_langchain_message(self, message: "BaseMessage") -> "Any":
parsed = {"role": message.type, "content": message.content}
# Transform content to handle multimodal data (images, audio, video, files)
transformed_content = _transform_langchain_message_content(message.content)
parsed = {"role": message.type, "content": transformed_content}
parsed.update(message.additional_kwargs)
return parsed

Expand Down
Loading
Loading