diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 7043bbc2ee..ecb8abcd10 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -359,3 +359,7 @@ class SDKInfo(TypedDict): ) HttpStatusCodeRange = Union[int, Container[int]] + + class TextPart(TypedDict): + type: Literal["text"] + content: str diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 93fca6ba3e..4b61a317fb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -542,6 +542,12 @@ class SPANDATA: Example: 2048 """ + GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + """ + The system instructions passed to the model. + Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}] + """ + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ The messages passed to the model. The "content" can be a string or an array of objects. diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8a9bc5167c..1cccfb0fff 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -36,6 +36,7 @@ from uuid import UUID from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart try: @@ -189,6 +190,29 @@ def _get_current_agent() -> "Optional[str]": return None +def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[TextPart]": + system_instructions = [] + + for list_ in messages: + for message in list_: + if message.type == "system": + system_instructions.append(message) + + return system_instructions + + +def _transform_system_instructions( + system_instructions: "List[BaseMessage]", +) -> "List[TextPart]": + return [ + { + "type": "text", + "content": instruction.content, + } + for instruction in system_instructions + ] + + class LangchainIntegration(Integration): identifier = "langchain" origin = f"auto.ai.{identifier}" @@ -430,9 +454,21 @@ def on_chat_model_start( _set_tools_on_span(span, all_params.get("tools")) if should_send_default_pii() and self.include_prompts: + system_instructions = _get_system_instructions(messages) + if len(system_instructions) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) + normalized_messages = [] for list_ in messages: for message in list_: + if message.type == "system": + continue + normalized_messages.append( self._normalize_langchain_message(message) ) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 6f5f9f14a1..54c4664f3e 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -217,17 +217,21 @@ def test_langchain_agent( assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: - assert ( - "You are very powerful" - in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) - assert ( - "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[1]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled @@ -243,8 +247,10 @@ def test_langchain_agent( tool_call_str = str(tool_calls_data) assert "get_word_length" in tool_call_str else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})