|
12 | 12 | GEN_AI_ALLOWED_MESSAGE_ROLES, |
13 | 13 | get_start_span_function, |
14 | 14 | normalize_message_roles, |
15 | | - parse_data_uri, |
16 | 15 | set_data_normalized, |
17 | 16 | truncate_and_annotate_messages, |
| 17 | + transform_content_part, |
18 | 18 | ) |
19 | 19 | from sentry_sdk.consts import OP, SPANDATA |
20 | 20 | from sentry_sdk.integrations import DidNotEnable, Integration |
|
117 | 117 | "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, |
118 | 118 | } |
119 | 119 |
|
120 | | -# Map LangChain content types to Sentry modalities |
121 | | -LANGCHAIN_TYPE_TO_MODALITY = { |
122 | | - "image": "image", |
123 | | - "image_url": "image", |
124 | | - "audio": "audio", |
125 | | - "video": "video", |
126 | | - "file": "document", |
127 | | -} |
128 | | - |
129 | | - |
130 | | -def _get_modality_from_mime_type(mime_type: str) -> str: |
131 | | - """Infer the content modality from a MIME type string.""" |
132 | | - if not mime_type: |
133 | | - return "image" # Default fallback |
134 | | - |
135 | | - mime_lower = mime_type.lower() |
136 | | - if mime_lower.startswith("image/"): |
137 | | - return "image" |
138 | | - elif mime_lower.startswith("audio/"): |
139 | | - return "audio" |
140 | | - elif mime_lower.startswith("video/"): |
141 | | - return "video" |
142 | | - elif mime_lower.startswith("application/") or mime_lower.startswith("text/"): |
143 | | - return "document" |
144 | | - else: |
145 | | - return "image" # Default fallback for unknown types |
146 | | - |
147 | 120 |
|
148 | 121 | def _transform_langchain_content_block( |
149 | 122 | content_block: "Dict[str, Any]", |
150 | 123 | ) -> "Dict[str, Any]": |
151 | 124 | """ |
152 | | - Transform a LangChain content block to Sentry-compatible format. |
153 | | -
|
154 | | - Handles multimodal content (images, audio, video, documents) by converting them |
155 | | - to the standardized format: |
156 | | - - base64 encoded data -> type: "blob" |
157 | | - - URL references -> type: "uri" |
158 | | - - file_id references -> type: "file" |
159 | | -
|
160 | | - Supports multiple content block formats: |
161 | | - - LangChain standard: type + base64/url/file_id fields |
162 | | - - OpenAI legacy: image_url with nested url field |
163 | | - - Anthropic: type + source dict with type/media_type/data or url |
164 | | - - Google: inline_data or file_data dicts |
165 | | - """ |
166 | | - if not isinstance(content_block, dict): |
167 | | - return content_block |
168 | | - |
169 | | - block_type = content_block.get("type") |
170 | | - |
171 | | - # Handle standard multimodal content types (image, audio, video, file) |
172 | | - if block_type in ("image", "audio", "video", "file"): |
173 | | - modality = LANGCHAIN_TYPE_TO_MODALITY.get(block_type, block_type) |
174 | | - mime_type = content_block.get("mime_type", "") |
175 | | - |
176 | | - # Check for base64 encoded content |
177 | | - if "base64" in content_block: |
178 | | - return { |
179 | | - "type": "blob", |
180 | | - "modality": modality, |
181 | | - "mime_type": mime_type, |
182 | | - "content": content_block.get("base64", ""), |
183 | | - } |
184 | | - # Check for URL reference |
185 | | - elif "url" in content_block: |
186 | | - return { |
187 | | - "type": "uri", |
188 | | - "modality": modality, |
189 | | - "mime_type": mime_type, |
190 | | - "uri": content_block.get("url", ""), |
191 | | - } |
192 | | - # Check for file_id reference |
193 | | - elif "file_id" in content_block: |
194 | | - return { |
195 | | - "type": "file", |
196 | | - "modality": modality, |
197 | | - "mime_type": mime_type, |
198 | | - "file_id": content_block.get("file_id", ""), |
199 | | - } |
200 | | - # Handle Anthropic-style format with nested "source" dict |
201 | | - elif "source" in content_block: |
202 | | - source = content_block.get("source", {}) |
203 | | - if isinstance(source, dict): |
204 | | - source_type = source.get("type") |
205 | | - media_type = source.get("media_type", "") or mime_type |
206 | | - |
207 | | - if source_type == "base64": |
208 | | - return { |
209 | | - "type": "blob", |
210 | | - "modality": modality, |
211 | | - "mime_type": media_type, |
212 | | - "content": source.get("data", ""), |
213 | | - } |
214 | | - elif source_type == "url": |
215 | | - return { |
216 | | - "type": "uri", |
217 | | - "modality": modality, |
218 | | - "mime_type": media_type, |
219 | | - "uri": source.get("url", ""), |
220 | | - } |
221 | | - # Handle Google-style inline_data format with standard type |
222 | | - elif "inline_data" in content_block: |
223 | | - inline_data = content_block.get("inline_data", {}) |
224 | | - if isinstance(inline_data, dict): |
225 | | - return { |
226 | | - "type": "blob", |
227 | | - "modality": modality, |
228 | | - "mime_type": inline_data.get("mime_type", "") or mime_type, |
229 | | - "content": inline_data.get("data", ""), |
230 | | - } |
231 | | - # Handle Google-style file_data format with standard type |
232 | | - elif "file_data" in content_block: |
233 | | - file_data = content_block.get("file_data", {}) |
234 | | - if isinstance(file_data, dict): |
235 | | - return { |
236 | | - "type": "uri", |
237 | | - "modality": modality, |
238 | | - "mime_type": file_data.get("mime_type", "") or mime_type, |
239 | | - "uri": file_data.get("file_uri", ""), |
240 | | - } |
241 | | - |
242 | | - # Handle legacy image_url format (OpenAI style) |
243 | | - elif block_type == "image_url": |
244 | | - image_url_data = content_block.get("image_url", {}) |
245 | | - if isinstance(image_url_data, dict): |
246 | | - url = image_url_data.get("url", "") |
247 | | - else: |
248 | | - url = str(image_url_data) |
| 125 | + Transform a LangChain content block using the shared transform_content_part function. |
249 | 126 |
|
250 | | - # Check if it's a data URI (base64 encoded) |
251 | | - if url and url.startswith("data:"): |
252 | | - try: |
253 | | - mime_type, content = parse_data_uri(url) |
254 | | - return { |
255 | | - "type": "blob", |
256 | | - "modality": "image", |
257 | | - "mime_type": mime_type, |
258 | | - "content": content, |
259 | | - } |
260 | | - except ValueError: |
261 | | - # If parsing fails, return as URI |
262 | | - return { |
263 | | - "type": "uri", |
264 | | - "modality": "image", |
265 | | - "mime_type": "", |
266 | | - "uri": url, |
267 | | - } |
268 | | - else: |
269 | | - # Regular URL |
270 | | - return { |
271 | | - "type": "uri", |
272 | | - "modality": "image", |
273 | | - "mime_type": "", |
274 | | - "uri": url, |
275 | | - } |
276 | | - |
277 | | - # Handle Google-style inline_data format |
278 | | - if "inline_data" in content_block: |
279 | | - inline_data = content_block.get("inline_data", {}) |
280 | | - if isinstance(inline_data, dict): |
281 | | - mime_type = inline_data.get("mime_type", "") |
282 | | - return { |
283 | | - "type": "blob", |
284 | | - "modality": _get_modality_from_mime_type(mime_type), |
285 | | - "mime_type": mime_type, |
286 | | - "content": inline_data.get("data", ""), |
287 | | - } |
288 | | - |
289 | | - # Handle Google-style file_data format |
290 | | - if "file_data" in content_block: |
291 | | - file_data = content_block.get("file_data", {}) |
292 | | - if isinstance(file_data, dict): |
293 | | - mime_type = file_data.get("mime_type", "") |
294 | | - return { |
295 | | - "type": "uri", |
296 | | - "modality": _get_modality_from_mime_type(mime_type), |
297 | | - "mime_type": mime_type, |
298 | | - "uri": file_data.get("file_uri", ""), |
299 | | - } |
300 | | - |
301 | | - # For text blocks and other types, return as-is |
302 | | - return content_block |
| 127 | + Returns the original content block if transformation is not applicable |
| 128 | + (e.g., for text blocks or unrecognized formats). |
| 129 | + """ |
| 130 | + result = transform_content_part(content_block) |
| 131 | + return result if result is not None else content_block |
303 | 132 |
|
304 | 133 |
|
305 | 134 | def _transform_langchain_message_content(content: "Any") -> "Any": |
|
0 commit comments