Skip to content

Commit bd78165

Browse files
committed
feat(ai): Add shared content transformation functions for multimodal AI messages
Add transform_content_part() and transform_message_content() functions to standardize content part handling across all AI integrations. These functions transform various SDK-specific formats (OpenAI, Anthropic, Google, LangChain) into a unified format: - blob: base64-encoded binary data - uri: URL references (including file URIs) - file: file ID references Also adds get_modality_from_mime_type() helper to infer content modality (image/audio/video/document) from MIME types.
1 parent a47ef34 commit bd78165

File tree

2 files changed

+721
-0
lines changed

2 files changed

+721
-0
lines changed

sentry_sdk/ai/utils.py

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
7272
return mime_type, content
7373

7474

75+
def get_modality_from_mime_type(mime_type: str) -> str:
76+
"""
77+
Infer the content modality from a MIME type string.
78+
79+
Args:
80+
mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
81+
82+
Returns:
83+
One of: "image", "audio", "video", or "document"
84+
Defaults to "image" for unknown or empty MIME types.
85+
86+
Examples:
87+
"image/jpeg" -> "image"
88+
"audio/mp3" -> "audio"
89+
"video/mp4" -> "video"
90+
"application/pdf" -> "document"
91+
"text/plain" -> "document"
92+
"""
93+
if not mime_type:
94+
return "image" # Default fallback
95+
96+
mime_lower = mime_type.lower()
97+
if mime_lower.startswith("image/"):
98+
return "image"
99+
elif mime_lower.startswith("audio/"):
100+
return "audio"
101+
elif mime_lower.startswith("video/"):
102+
return "video"
103+
elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
104+
return "document"
105+
else:
106+
return "image" # Default fallback for unknown types
107+
108+
109+
def transform_content_part(
110+
content_part: "Dict[str, Any]",
111+
) -> "Optional[Dict[str, Any]]":
112+
"""
113+
Transform a content part from various AI SDK formats to Sentry's standardized format.
114+
115+
Supported input formats:
116+
- OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}}
117+
- Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}}
118+
- Google: {"inline_data": {...}} or {"file_data": {...}}
119+
- Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
120+
121+
Output format (one of):
122+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
123+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
124+
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
125+
126+
Args:
127+
content_part: A dictionary representing a content part from an AI SDK
128+
129+
Returns:
130+
A transformed dictionary in standardized format, or None if the format
131+
is unrecognized or transformation fails.
132+
"""
133+
if not isinstance(content_part, dict):
134+
return None
135+
136+
block_type = content_part.get("type")
137+
138+
# Handle OpenAI/LiteLLM image_url format
139+
# {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."}
140+
if block_type == "image_url":
141+
image_url_data = content_part.get("image_url")
142+
if isinstance(image_url_data, str):
143+
url = image_url_data
144+
elif isinstance(image_url_data, dict):
145+
url = image_url_data.get("url", "")
146+
else:
147+
return None
148+
149+
if not url:
150+
return None
151+
152+
# Check if it's a data URI (base64 encoded)
153+
if url.startswith("data:"):
154+
try:
155+
mime_type, content = parse_data_uri(url)
156+
return {
157+
"type": "blob",
158+
"modality": get_modality_from_mime_type(mime_type),
159+
"mime_type": mime_type,
160+
"content": content,
161+
}
162+
except ValueError:
163+
# If parsing fails, return as URI
164+
return {
165+
"type": "uri",
166+
"modality": "image",
167+
"mime_type": "",
168+
"uri": url,
169+
}
170+
else:
171+
# Regular URL
172+
return {
173+
"type": "uri",
174+
"modality": "image",
175+
"mime_type": "",
176+
"uri": url,
177+
}
178+
179+
# Handle Anthropic format with source dict
180+
# {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}}
181+
if block_type in ("image", "document") and "source" in content_part:
182+
source = content_part.get("source")
183+
if not isinstance(source, dict):
184+
return None
185+
186+
source_type = source.get("type")
187+
media_type = source.get("media_type", "")
188+
modality = (
189+
"document"
190+
if block_type == "document"
191+
else get_modality_from_mime_type(media_type)
192+
)
193+
194+
if source_type == "base64":
195+
return {
196+
"type": "blob",
197+
"modality": modality,
198+
"mime_type": media_type,
199+
"content": source.get("data", ""),
200+
}
201+
elif source_type == "url":
202+
return {
203+
"type": "uri",
204+
"modality": modality,
205+
"mime_type": media_type,
206+
"uri": source.get("url", ""),
207+
}
208+
elif source_type == "file":
209+
return {
210+
"type": "file",
211+
"modality": modality,
212+
"mime_type": media_type,
213+
"file_id": source.get("file_id", ""),
214+
}
215+
return None
216+
217+
# Handle Google inline_data format
218+
# {"inline_data": {"mime_type": "...", "data": "..."}}
219+
if "inline_data" in content_part:
220+
inline_data = content_part.get("inline_data")
221+
if isinstance(inline_data, dict):
222+
mime_type = inline_data.get("mime_type", "")
223+
return {
224+
"type": "blob",
225+
"modality": get_modality_from_mime_type(mime_type),
226+
"mime_type": mime_type,
227+
"content": inline_data.get("data", ""),
228+
}
229+
return None
230+
231+
# Handle Google file_data format
232+
# {"file_data": {"mime_type": "...", "file_uri": "..."}}
233+
if "file_data" in content_part:
234+
file_data = content_part.get("file_data")
235+
if isinstance(file_data, dict):
236+
mime_type = file_data.get("mime_type", "")
237+
return {
238+
"type": "uri",
239+
"modality": get_modality_from_mime_type(mime_type),
240+
"mime_type": mime_type,
241+
"uri": file_data.get("file_uri", ""),
242+
}
243+
return None
244+
245+
# Handle generic format with direct fields (LangChain style)
246+
# {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
247+
if block_type in ("image", "audio", "video", "file"):
248+
mime_type = content_part.get("mime_type", "")
249+
modality = block_type if block_type != "file" else "document"
250+
251+
# Check for base64 encoded content
252+
if "base64" in content_part:
253+
return {
254+
"type": "blob",
255+
"modality": modality,
256+
"mime_type": mime_type,
257+
"content": content_part.get("base64", ""),
258+
}
259+
# Check for URL reference
260+
elif "url" in content_part:
261+
return {
262+
"type": "uri",
263+
"modality": modality,
264+
"mime_type": mime_type,
265+
"uri": content_part.get("url", ""),
266+
}
267+
# Check for file_id reference
268+
elif "file_id" in content_part:
269+
return {
270+
"type": "file",
271+
"modality": modality,
272+
"mime_type": mime_type,
273+
"file_id": content_part.get("file_id", ""),
274+
}
275+
276+
# Unrecognized format
277+
return None
278+
279+
280+
def transform_message_content(content: "Any") -> "Any":
281+
"""
282+
Transform message content, handling both string content and list of content blocks.
283+
284+
For list content, each item is transformed using transform_content_part().
285+
Items that cannot be transformed (return None) are kept as-is.
286+
287+
Args:
288+
content: Message content - can be a string, list of content blocks, or other
289+
290+
Returns:
291+
- String content: returned as-is
292+
- List content: list with each transformable item converted to standardized format
293+
- Other: returned as-is
294+
"""
295+
if isinstance(content, str):
296+
return content
297+
298+
if isinstance(content, (list, tuple)):
299+
transformed = []
300+
for item in content:
301+
if isinstance(item, dict):
302+
result = transform_content_part(item)
303+
# If transformation succeeded, use the result; otherwise keep original
304+
transformed.append(result if result is not None else item)
305+
else:
306+
transformed.append(item)
307+
return transformed
308+
309+
return content
310+
311+
75312
def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
76313
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
77314
if hasattr(data, "model_dump"):

0 commit comments

Comments
 (0)