Skip to content
76 changes: 75 additions & 1 deletion sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
from typing import TYPE_CHECKING

import sentry_sdk
Expand All @@ -14,7 +15,7 @@
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, List
from datetime import datetime

try:
Expand All @@ -36,6 +37,78 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
return metadata


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""
# Deep copy to avoid mutating original messages from kwargs
messages = copy.deepcopy(messages)

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if not isinstance(item, dict):
return item
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
url = image_url.get("url", "")
if url.startswith("data:") and ";base64," in url:
parts = url.split(";base64,", 1)
# Remove "data:" prefix (5 chars) to get proper MIME type
mime_type = parts[0][5:]
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": parts[1],
}
elif url:
return {
"type": "uri",
"uri": url,
}
return item

for message in messages:
if not isinstance(message, dict):
continue
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand Down Expand Up @@ -102,6 +175,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
Expand Down
239 changes: 239 additions & 0 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import pytest
import time
Expand All @@ -21,8 +22,10 @@ async def __call__(self, *args, **kwargs):
import sentry_sdk
from sentry_sdk import start_transaction
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from sentry_sdk.integrations.litellm import (
LiteLLMIntegration,
_convert_message_parts,
_input_callback,
_success_callback,
_failure_callback,
Expand Down Expand Up @@ -753,3 +756,239 @@ def test_litellm_message_truncation(sentry_init, capture_events):
assert "small message 4" in str(parsed_messages[0])
assert "small message 5" in str(parsed_messages[1])
assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5


IMAGE_DATA = b"fake_image_data_12345"
IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8")
IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}"


def test_binary_content_encoding_image_url(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Look at this image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI, "detail": "high"},
},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

blob_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "blob"
),
None,
)
assert blob_item is not None
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "image/png"
assert (
IMAGE_B64 in blob_item["content"]
or blob_item["content"] == BLOB_DATA_SUBSTITUTE
)


def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Here is an image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
{"type": "text", "text": "What do you see?"},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

content_items = [
item for msg in messages_data if "content" in msg for item in msg["content"]
]
assert any(item.get("type") == "text" for item in content_items)
assert any(item.get("type") == "blob" for item in content_items)


def test_binary_content_encoding_uri_type(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.jpg"},
}
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

uri_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "uri"
),
None,
)
assert uri_item is not None
assert uri_item["uri"] == "https://example.com/image.jpg"


def test_convert_message_parts_direct():
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
],
}
]
converted = _convert_message_parts(messages)
blob_item = next(
item for item in converted[0]["content"] if item.get("type") == "blob"
)
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "image/png"
assert IMAGE_B64 in blob_item["content"]


def test_convert_message_parts_does_not_mutate_original():
"""Ensure _convert_message_parts does not mutate the original messages."""
original_url = IMAGE_DATA_URI
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": original_url},
},
],
}
]
_convert_message_parts(messages)
# Original should be unchanged
assert messages[0]["content"][0]["type"] == "image_url"
assert messages[0]["content"][0]["image_url"]["url"] == original_url


def test_convert_message_parts_data_url_without_base64():
"""Data URLs without ;base64, marker should be treated as regular URIs."""
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": "data:image/png,rawdata"},
},
],
}
]
converted = _convert_message_parts(messages)
uri_item = converted[0]["content"][0]
# Should be converted to uri type, not blob (since no base64 encoding)
assert uri_item["type"] == "uri"
assert uri_item["uri"] == "data:image/png,rawdata"


def test_convert_message_parts_image_url_none():
"""image_url being None should not crash."""
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": None,
},
],
}
]
converted = _convert_message_parts(messages)
# Should return item unchanged
assert converted[0]["content"][0]["type"] == "image_url"


def test_convert_message_parts_image_url_missing_url():
"""image_url missing the url key should not crash."""
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"detail": "high"},
},
],
}
]
converted = _convert_message_parts(messages)
# Should return item unchanged
assert converted[0]["content"][0]["type"] == "image_url"
Loading