Skip to content

Commit ead491d

Browse files
fix(litellm): fix gen_ai.request.messages to be as expected (#5255)
#### Issues Closes https://linear.app/getsentry/issue/TET-1635/redact-images-litellm
1 parent 141eaaa commit ead491d

File tree

2 files changed

+272
-1
lines changed

2 files changed

+272
-1
lines changed

sentry_sdk/integrations/litellm.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
from typing import TYPE_CHECKING
23

34
import sentry_sdk
@@ -7,14 +8,15 @@
78
get_start_span_function,
89
set_data_normalized,
910
truncate_and_annotate_messages,
11+
transform_openai_content_part,
1012
)
1113
from sentry_sdk.consts import SPANDATA
1214
from sentry_sdk.integrations import DidNotEnable, Integration
1315
from sentry_sdk.scope import should_send_default_pii
1416
from sentry_sdk.utils import event_from_exception
1517

1618
if TYPE_CHECKING:
17-
from typing import Any, Dict
19+
from typing import Any, Dict, List
1820
from datetime import datetime
1921

2022
try:
@@ -36,6 +38,33 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
3638
return metadata
3739

3840

41+
def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
42+
"""
43+
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format
44+
using the OpenAI-specific transformer (LiteLLM uses OpenAI's message format).
45+
46+
Deep copies messages to avoid mutating original kwargs.
47+
"""
48+
# Deep copy to avoid mutating original messages from kwargs
49+
messages = copy.deepcopy(messages)
50+
51+
for message in messages:
52+
if not isinstance(message, dict):
53+
continue
54+
content = message.get("content")
55+
if isinstance(content, (list, tuple)):
56+
transformed = []
57+
for item in content:
58+
if isinstance(item, dict):
59+
result = transform_openai_content_part(item)
60+
# If transformation succeeded, use the result; otherwise keep original
61+
transformed.append(result if result is not None else item)
62+
else:
63+
transformed.append(item)
64+
message["content"] = transformed
65+
return messages
66+
67+
3968
def _input_callback(kwargs: "Dict[str, Any]") -> None:
4069
"""Handle the start of a request."""
4170
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
@@ -102,6 +131,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
102131
messages = kwargs.get("messages", [])
103132
if messages:
104133
scope = sentry_sdk.get_current_scope()
134+
messages = _convert_message_parts(messages)
105135
messages_data = truncate_and_annotate_messages(messages, span, scope)
106136
if messages_data is not None:
107137
set_data_normalized(

tests/integrations/litellm/test_litellm.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import base64
12
import json
23
import pytest
34
import time
@@ -21,8 +22,10 @@ async def __call__(self, *args, **kwargs):
2122
import sentry_sdk
2223
from sentry_sdk import start_transaction
2324
from sentry_sdk.consts import OP, SPANDATA
25+
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
2426
from sentry_sdk.integrations.litellm import (
2527
LiteLLMIntegration,
28+
_convert_message_parts,
2629
_input_callback,
2730
_success_callback,
2831
_failure_callback,
@@ -753,3 +756,241 @@ def test_litellm_message_truncation(sentry_init, capture_events):
753756
assert "small message 4" in str(parsed_messages[0])
754757
assert "small message 5" in str(parsed_messages[1])
755758
assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
759+
760+
761+
IMAGE_DATA = b"fake_image_data_12345"
762+
IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8")
763+
IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}"
764+
765+
766+
def test_binary_content_encoding_image_url(sentry_init, capture_events):
767+
sentry_init(
768+
integrations=[LiteLLMIntegration(include_prompts=True)],
769+
traces_sample_rate=1.0,
770+
send_default_pii=True,
771+
)
772+
events = capture_events()
773+
774+
messages = [
775+
{
776+
"role": "user",
777+
"content": [
778+
{"type": "text", "text": "Look at this image:"},
779+
{
780+
"type": "image_url",
781+
"image_url": {"url": IMAGE_DATA_URI, "detail": "high"},
782+
},
783+
],
784+
}
785+
]
786+
mock_response = MockCompletionResponse()
787+
788+
with start_transaction(name="litellm test"):
789+
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
790+
_input_callback(kwargs)
791+
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())
792+
793+
(event,) = events
794+
(span,) = event["spans"]
795+
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
796+
797+
blob_item = next(
798+
(
799+
item
800+
for msg in messages_data
801+
if "content" in msg
802+
for item in msg["content"]
803+
if item.get("type") == "blob"
804+
),
805+
None,
806+
)
807+
assert blob_item is not None
808+
assert blob_item["modality"] == "image"
809+
assert blob_item["mime_type"] == "image/png"
810+
assert (
811+
IMAGE_B64 in blob_item["content"]
812+
or blob_item["content"] == BLOB_DATA_SUBSTITUTE
813+
)
814+
815+
816+
def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
817+
sentry_init(
818+
integrations=[LiteLLMIntegration(include_prompts=True)],
819+
traces_sample_rate=1.0,
820+
send_default_pii=True,
821+
)
822+
events = capture_events()
823+
824+
messages = [
825+
{
826+
"role": "user",
827+
"content": [
828+
{"type": "text", "text": "Here is an image:"},
829+
{
830+
"type": "image_url",
831+
"image_url": {"url": IMAGE_DATA_URI},
832+
},
833+
{"type": "text", "text": "What do you see?"},
834+
],
835+
}
836+
]
837+
mock_response = MockCompletionResponse()
838+
839+
with start_transaction(name="litellm test"):
840+
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
841+
_input_callback(kwargs)
842+
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())
843+
844+
(event,) = events
845+
(span,) = event["spans"]
846+
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
847+
848+
content_items = [
849+
item for msg in messages_data if "content" in msg for item in msg["content"]
850+
]
851+
assert any(item.get("type") == "text" for item in content_items)
852+
assert any(item.get("type") == "blob" for item in content_items)
853+
854+
855+
def test_binary_content_encoding_uri_type(sentry_init, capture_events):
856+
sentry_init(
857+
integrations=[LiteLLMIntegration(include_prompts=True)],
858+
traces_sample_rate=1.0,
859+
send_default_pii=True,
860+
)
861+
events = capture_events()
862+
863+
messages = [
864+
{
865+
"role": "user",
866+
"content": [
867+
{
868+
"type": "image_url",
869+
"image_url": {"url": "https://example.com/image.jpg"},
870+
}
871+
],
872+
}
873+
]
874+
mock_response = MockCompletionResponse()
875+
876+
with start_transaction(name="litellm test"):
877+
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
878+
_input_callback(kwargs)
879+
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())
880+
881+
(event,) = events
882+
(span,) = event["spans"]
883+
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
884+
885+
uri_item = next(
886+
(
887+
item
888+
for msg in messages_data
889+
if "content" in msg
890+
for item in msg["content"]
891+
if item.get("type") == "uri"
892+
),
893+
None,
894+
)
895+
assert uri_item is not None
896+
assert uri_item["uri"] == "https://example.com/image.jpg"
897+
898+
899+
def test_convert_message_parts_direct():
900+
messages = [
901+
{
902+
"role": "user",
903+
"content": [
904+
{"type": "text", "text": "Hello"},
905+
{
906+
"type": "image_url",
907+
"image_url": {"url": IMAGE_DATA_URI},
908+
},
909+
],
910+
}
911+
]
912+
converted = _convert_message_parts(messages)
913+
blob_item = next(
914+
item for item in converted[0]["content"] if item.get("type") == "blob"
915+
)
916+
assert blob_item["modality"] == "image"
917+
assert blob_item["mime_type"] == "image/png"
918+
assert IMAGE_B64 in blob_item["content"]
919+
920+
921+
def test_convert_message_parts_does_not_mutate_original():
922+
"""Ensure _convert_message_parts does not mutate the original messages."""
923+
original_url = IMAGE_DATA_URI
924+
messages = [
925+
{
926+
"role": "user",
927+
"content": [
928+
{
929+
"type": "image_url",
930+
"image_url": {"url": original_url},
931+
},
932+
],
933+
}
934+
]
935+
_convert_message_parts(messages)
936+
# Original should be unchanged
937+
assert messages[0]["content"][0]["type"] == "image_url"
938+
assert messages[0]["content"][0]["image_url"]["url"] == original_url
939+
940+
941+
def test_convert_message_parts_data_url_without_base64():
942+
"""Data URLs without ;base64, marker are still inline data and should be blobs."""
943+
messages = [
944+
{
945+
"role": "user",
946+
"content": [
947+
{
948+
"type": "image_url",
949+
"image_url": {"url": "data:image/png,rawdata"},
950+
},
951+
],
952+
}
953+
]
954+
converted = _convert_message_parts(messages)
955+
blob_item = converted[0]["content"][0]
956+
# Data URIs (with or without base64 encoding) contain inline data and should be blobs
957+
assert blob_item["type"] == "blob"
958+
assert blob_item["modality"] == "image"
959+
assert blob_item["mime_type"] == "image/png"
960+
assert blob_item["content"] == "rawdata"
961+
962+
963+
def test_convert_message_parts_image_url_none():
964+
"""image_url being None should not crash."""
965+
messages = [
966+
{
967+
"role": "user",
968+
"content": [
969+
{
970+
"type": "image_url",
971+
"image_url": None,
972+
},
973+
],
974+
}
975+
]
976+
converted = _convert_message_parts(messages)
977+
# Should return item unchanged
978+
assert converted[0]["content"][0]["type"] == "image_url"
979+
980+
981+
def test_convert_message_parts_image_url_missing_url():
982+
"""image_url missing the url key should not crash."""
983+
messages = [
984+
{
985+
"role": "user",
986+
"content": [
987+
{
988+
"type": "image_url",
989+
"image_url": {"detail": "high"},
990+
},
991+
],
992+
}
993+
]
994+
converted = _convert_message_parts(messages)
995+
# Should return item unchanged
996+
assert converted[0]["content"][0]["type"] == "image_url"

0 commit comments

Comments
 (0)