Skip to content

Commit c45da0b

Browse files
committed
Merge shared content transformation functions
2 parents 5ca2770 + bd78165 commit c45da0b

File tree

17 files changed

+1109
-102
lines changed

17 files changed

+1109
-102
lines changed

scripts/find_raise_from_none.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import ast
2+
import pathlib
3+
from collections import defaultdict
4+
5+
6+
class RaiseFromNoneVisitor(ast.NodeVisitor):
7+
line_numbers = defaultdict(list)
8+
9+
def __init__(self, filename):
10+
self.filename = filename
11+
12+
def visit_Raise(self, node: ast.Raise):
13+
if node.cause is not None:
14+
if isinstance(node.cause, ast.Constant) and node.cause.value is None:
15+
RaiseFromNoneVisitor.line_numbers[self.filename].append(node.lineno)
16+
self.generic_visit(node)
17+
18+
19+
def scan_file(module_path: pathlib.Path):
20+
source = pathlib.Path(module_path).read_text(encoding="utf-8")
21+
tree = ast.parse(source, filename=module_path)
22+
23+
RaiseFromNoneVisitor(module_path).visit(tree)
24+
25+
26+
def walk_package_modules():
27+
for p in pathlib.Path("sentry_sdk").rglob("*.py"):
28+
yield p
29+
30+
31+
def format_detected_raises(line_numbers) -> str:
32+
lines = []
33+
for filepath, line_numbers_in_file in line_numbers.items():
34+
lines_string = ", ".join(f"line {ln}" for ln in sorted(line_numbers_in_file))
35+
lines.append(
36+
f"{filepath}: {len(line_numbers_in_file)} occurrence(s) at {lines_string}"
37+
)
38+
return "\n".join(lines)
39+
40+
41+
def main():
42+
for module_path in walk_package_modules():
43+
scan_file(module_path)
44+
45+
# TODO: Investigate why we suppress exception chains here.
46+
ignored_raises = {
47+
pathlib.Path("sentry_sdk/integrations/asgi.py"): 2,
48+
pathlib.Path("sentry_sdk/integrations/asyncio.py"): 1,
49+
}
50+
51+
raise_from_none_count = {
52+
file: len(occurences)
53+
for file, occurences in RaiseFromNoneVisitor.line_numbers.items()
54+
}
55+
if raise_from_none_count != ignored_raises:
56+
exc = Exception("Detected unexpected raise ... from None.")
57+
exc.add_note(
58+
"Raise ... from None suppresses chained exceptions, removing valuable context."
59+
)
60+
exc.add_note(format_detected_raises(RaiseFromNoneVisitor.line_numbers))
61+
raise exc
62+
63+
64+
if __name__ == "__main__":
65+
main()

scripts/populate_tox/tox.jinja

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,4 @@ commands =
230230
ruff check tests sentry_sdk
231231
ruff format --check tests sentry_sdk
232232
mypy sentry_sdk
233+
python scripts/find_raise_from_none.py

sentry_sdk/ai/monitoring.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import inspect
2+
import sys
23
from functools import wraps
34

45
from sentry_sdk.consts import SPANDATA
56
import sentry_sdk.utils
67
from sentry_sdk import start_span
78
from sentry_sdk.tracing import Span
8-
from sentry_sdk.utils import ContextVar
9+
from sentry_sdk.utils import ContextVar, reraise, capture_internal_exceptions
910

1011
from typing import TYPE_CHECKING
1112

@@ -44,13 +45,15 @@ def sync_wrapped(*args: "Any", **kwargs: "Any") -> "Any":
4445
try:
4546
res = f(*args, **kwargs)
4647
except Exception as e:
47-
event, hint = sentry_sdk.utils.event_from_exception(
48-
e,
49-
client_options=sentry_sdk.get_client().options,
50-
mechanism={"type": "ai_monitoring", "handled": False},
51-
)
52-
sentry_sdk.capture_event(event, hint=hint)
53-
raise e from None
48+
exc_info = sys.exc_info()
49+
with capture_internal_exceptions():
50+
event, hint = sentry_sdk.utils.event_from_exception(
51+
e,
52+
client_options=sentry_sdk.get_client().options,
53+
mechanism={"type": "ai_monitoring", "handled": False},
54+
)
55+
sentry_sdk.capture_event(event, hint=hint)
56+
reraise(*exc_info)
5457
finally:
5558
_ai_pipeline_name.set(None)
5659
return res
@@ -72,13 +75,15 @@ async def async_wrapped(*args: "Any", **kwargs: "Any") -> "Any":
7275
try:
7376
res = await f(*args, **kwargs)
7477
except Exception as e:
75-
event, hint = sentry_sdk.utils.event_from_exception(
76-
e,
77-
client_options=sentry_sdk.get_client().options,
78-
mechanism={"type": "ai_monitoring", "handled": False},
79-
)
80-
sentry_sdk.capture_event(event, hint=hint)
81-
raise e from None
78+
exc_info = sys.exc_info()
79+
with capture_internal_exceptions():
80+
event, hint = sentry_sdk.utils.event_from_exception(
81+
e,
82+
client_options=sentry_sdk.get_client().options,
83+
mechanism={"type": "ai_monitoring", "handled": False},
84+
)
85+
sentry_sdk.capture_event(event, hint=hint)
86+
reraise(*exc_info)
8287
finally:
8388
_ai_pipeline_name.set(None)
8489
return res

sentry_sdk/ai/utils.py

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
7272
return mime_type, content
7373

7474

75+
def get_modality_from_mime_type(mime_type: str) -> str:
76+
"""
77+
Infer the content modality from a MIME type string.
78+
79+
Args:
80+
mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
81+
82+
Returns:
83+
One of: "image", "audio", "video", or "document"
84+
Defaults to "image" for unknown or empty MIME types.
85+
86+
Examples:
87+
"image/jpeg" -> "image"
88+
"audio/mp3" -> "audio"
89+
"video/mp4" -> "video"
90+
"application/pdf" -> "document"
91+
"text/plain" -> "document"
92+
"""
93+
if not mime_type:
94+
return "image" # Default fallback
95+
96+
mime_lower = mime_type.lower()
97+
if mime_lower.startswith("image/"):
98+
return "image"
99+
elif mime_lower.startswith("audio/"):
100+
return "audio"
101+
elif mime_lower.startswith("video/"):
102+
return "video"
103+
elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
104+
return "document"
105+
else:
106+
return "image" # Default fallback for unknown types
107+
108+
109+
def transform_content_part(
110+
content_part: "Dict[str, Any]",
111+
) -> "Optional[Dict[str, Any]]":
112+
"""
113+
Transform a content part from various AI SDK formats to Sentry's standardized format.
114+
115+
Supported input formats:
116+
- OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}}
117+
- Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}}
118+
- Google: {"inline_data": {...}} or {"file_data": {...}}
119+
- Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
120+
121+
Output format (one of):
122+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
123+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
124+
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
125+
126+
Args:
127+
content_part: A dictionary representing a content part from an AI SDK
128+
129+
Returns:
130+
A transformed dictionary in standardized format, or None if the format
131+
is unrecognized or transformation fails.
132+
"""
133+
if not isinstance(content_part, dict):
134+
return None
135+
136+
block_type = content_part.get("type")
137+
138+
# Handle OpenAI/LiteLLM image_url format
139+
# {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."}
140+
if block_type == "image_url":
141+
image_url_data = content_part.get("image_url")
142+
if isinstance(image_url_data, str):
143+
url = image_url_data
144+
elif isinstance(image_url_data, dict):
145+
url = image_url_data.get("url", "")
146+
else:
147+
return None
148+
149+
if not url:
150+
return None
151+
152+
# Check if it's a data URI (base64 encoded)
153+
if url.startswith("data:"):
154+
try:
155+
mime_type, content = parse_data_uri(url)
156+
return {
157+
"type": "blob",
158+
"modality": get_modality_from_mime_type(mime_type),
159+
"mime_type": mime_type,
160+
"content": content,
161+
}
162+
except ValueError:
163+
# If parsing fails, return as URI
164+
return {
165+
"type": "uri",
166+
"modality": "image",
167+
"mime_type": "",
168+
"uri": url,
169+
}
170+
else:
171+
# Regular URL
172+
return {
173+
"type": "uri",
174+
"modality": "image",
175+
"mime_type": "",
176+
"uri": url,
177+
}
178+
179+
# Handle Anthropic format with source dict
180+
# {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}}
181+
if block_type in ("image", "document") and "source" in content_part:
182+
source = content_part.get("source")
183+
if not isinstance(source, dict):
184+
return None
185+
186+
source_type = source.get("type")
187+
media_type = source.get("media_type", "")
188+
modality = (
189+
"document"
190+
if block_type == "document"
191+
else get_modality_from_mime_type(media_type)
192+
)
193+
194+
if source_type == "base64":
195+
return {
196+
"type": "blob",
197+
"modality": modality,
198+
"mime_type": media_type,
199+
"content": source.get("data", ""),
200+
}
201+
elif source_type == "url":
202+
return {
203+
"type": "uri",
204+
"modality": modality,
205+
"mime_type": media_type,
206+
"uri": source.get("url", ""),
207+
}
208+
elif source_type == "file":
209+
return {
210+
"type": "file",
211+
"modality": modality,
212+
"mime_type": media_type,
213+
"file_id": source.get("file_id", ""),
214+
}
215+
return None
216+
217+
# Handle Google inline_data format
218+
# {"inline_data": {"mime_type": "...", "data": "..."}}
219+
if "inline_data" in content_part:
220+
inline_data = content_part.get("inline_data")
221+
if isinstance(inline_data, dict):
222+
mime_type = inline_data.get("mime_type", "")
223+
return {
224+
"type": "blob",
225+
"modality": get_modality_from_mime_type(mime_type),
226+
"mime_type": mime_type,
227+
"content": inline_data.get("data", ""),
228+
}
229+
return None
230+
231+
# Handle Google file_data format
232+
# {"file_data": {"mime_type": "...", "file_uri": "..."}}
233+
if "file_data" in content_part:
234+
file_data = content_part.get("file_data")
235+
if isinstance(file_data, dict):
236+
mime_type = file_data.get("mime_type", "")
237+
return {
238+
"type": "uri",
239+
"modality": get_modality_from_mime_type(mime_type),
240+
"mime_type": mime_type,
241+
"uri": file_data.get("file_uri", ""),
242+
}
243+
return None
244+
245+
# Handle generic format with direct fields (LangChain style)
246+
# {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
247+
if block_type in ("image", "audio", "video", "file"):
248+
mime_type = content_part.get("mime_type", "")
249+
modality = block_type if block_type != "file" else "document"
250+
251+
# Check for base64 encoded content
252+
if "base64" in content_part:
253+
return {
254+
"type": "blob",
255+
"modality": modality,
256+
"mime_type": mime_type,
257+
"content": content_part.get("base64", ""),
258+
}
259+
# Check for URL reference
260+
elif "url" in content_part:
261+
return {
262+
"type": "uri",
263+
"modality": modality,
264+
"mime_type": mime_type,
265+
"uri": content_part.get("url", ""),
266+
}
267+
# Check for file_id reference
268+
elif "file_id" in content_part:
269+
return {
270+
"type": "file",
271+
"modality": modality,
272+
"mime_type": mime_type,
273+
"file_id": content_part.get("file_id", ""),
274+
}
275+
276+
# Unrecognized format
277+
return None
278+
279+
280+
def transform_message_content(content: "Any") -> "Any":
281+
"""
282+
Transform message content, handling both string content and list of content blocks.
283+
284+
For list content, each item is transformed using transform_content_part().
285+
Items that cannot be transformed (return None) are kept as-is.
286+
287+
Args:
288+
content: Message content - can be a string, list of content blocks, or other
289+
290+
Returns:
291+
- String content: returned as-is
292+
- List content: list with each transformable item converted to standardized format
293+
- Other: returned as-is
294+
"""
295+
if isinstance(content, str):
296+
return content
297+
298+
if isinstance(content, (list, tuple)):
299+
transformed = []
300+
for item in content:
301+
if isinstance(item, dict):
302+
result = transform_content_part(item)
303+
# If transformation succeeded, use the result; otherwise keep original
304+
transformed.append(result if result is not None else item)
305+
else:
306+
transformed.append(item)
307+
return transformed
308+
309+
return content
310+
311+
75312
def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
76313
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
77314
if hasattr(data, "model_dump"):

0 commit comments

Comments
 (0)