From 561688eda2e04ffcc08651bef07e725ac957f4e9 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Thu, 19 Mar 2026 12:06:18 +0000 Subject: [PATCH 1/2] feat(langchain): Broaden AI provider detection beyond OpenAI and Anthropic Extract _get_ai_system() to generically detect AI providers from LangChain's _type field instead of hardcoding only "anthropic" and "openai". The function splits on "-" and skips non-provider segments (cloud prefixes like "azure" and descriptors like "chat"/"llm") to return the actual provider name. This adds support for Cohere, Ollama, Mistral, Fireworks, HuggingFace, Groq, NVIDIA, xAI, DeepSeek, Google, and any future LangChain providers. Co-Authored-By: Claude Opus 4.6 (1M context) --- sentry_sdk/integrations/langchain.py | 41 ++++++--- .../integrations/langchain/test_langchain.py | 88 +++++++++++++++++++ 2 files changed, 119 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index d19d9bbdd5..1e8113872f 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -108,6 +108,31 @@ OllamaEmbeddings = None +_NON_PROVIDER_PARTS = frozenset({"azure", "aws", "gcp", "vertex", "chat", "llm"}) + + +def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]": + """Extract the AI provider from the ``_type`` field in LangChain params. + + Splits on ``-`` and skips generic segments (cloud prefixes and model-type + descriptors like ``chat`` / ``llm``) to return the actual provider name. + """ + ai_type = all_params.get("_type") + + if not ai_type or not isinstance(ai_type, str): + return None + + parts = [p.strip().lower() for p in ai_type.split("-") if p.strip()] + if not parts: + return None + + for part in parts: + if part not in _NON_PROVIDER_PARTS: + return part + + return parts[0] + + DATA_FIELDS = { "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, "function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, @@ -381,11 +406,9 @@ def on_llm_start( model, ) - ai_type = all_params.get("_type", "") - if "anthropic" in ai_type: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic") - elif "openai" in ai_type: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + ai_system = _get_ai_system(all_params) + if ai_system: + span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system) for key, attribute in DATA_FIELDS.items(): if key in all_params and all_params[key] is not None: @@ -449,11 +472,9 @@ def on_chat_model_start( if model: span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) - ai_type = all_params.get("_type", "") - if "anthropic" in ai_type: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic") - elif "openai" in ai_type: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + ai_system = _get_ai_system(all_params) + if ai_system: + span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system) agent_name = _get_current_agent() if agent_name: diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 132da0a9a0..b4554d4b9b 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -2000,6 +2000,94 @@ def test_transform_google_file_data(self): } +@pytest.mark.parametrize( + "ai_type,expected_system", + [ + # Real LangChain _type values (from _llm_type properties) + # OpenAI + ("openai-chat", "openai"), + ("openai", "openai"), + # Azure OpenAI + ("azure-openai-chat", "openai"), + ("azure", "azure"), + # Anthropic + ("anthropic-chat", "anthropic"), + # Google + ("vertexai", "vertexai"), + ("chat-google-generative-ai", "google"), + ("google_gemini", "google_gemini"), + # AWS Bedrock (underscore-separated, no split) + ("amazon_bedrock_chat", "amazon_bedrock_chat"), + ("amazon_bedrock", "amazon_bedrock"), + # Cohere + ("cohere-chat", "cohere"), + # Ollama + ("chat-ollama", "ollama"), + ("ollama-llm", "ollama"), + # Mistral + ("mistralai-chat", "mistralai"), + # Fireworks + ("fireworks-chat", "fireworks"), + ("fireworks", "fireworks"), + # HuggingFace + ("huggingface-chat-wrapper", "huggingface"), + # Groq + ("groq-chat", "groq"), + # NVIDIA + ("chat-nvidia-ai-playground", "nvidia"), + # xAI + ("xai-chat", "xai"), + # DeepSeek + ("chat-deepseek", "deepseek"), + # Edge cases + ("", None), + (None, None), + ], +) +def test_langchain_ai_system_detection( + sentry_init, capture_events, ai_type, expected_system +): + sentry_init( + integrations=[LangchainIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) + + run_id = "test-ai-system-uuid" + serialized = {"_type": ai_type} if ai_type is not None else {} + prompts = ["Test prompt"] + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"_type": ai_type, "model": "test-model"}, + ) + + generation = Mock(text="Test response", message=None) + response = Mock(generations=[[generation]]) + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" + ] + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + if expected_system is not None: + assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system + else: + assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {}) + + class TestTransformLangchainMessageContent: """Tests for _transform_langchain_message_content function.""" From 1f88b70f8c5e73055f09c13bee2f5b7e26fbe31e Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Fri, 20 Mar 2026 10:24:55 +0000 Subject: [PATCH 2/2] ref(langchain): Simplify _get_ai_system to pass through _type value as-is Remove string splitting and filtering logic from _get_ai_system. The function now returns the LangChain _type value directly without attempting to extract a provider name from it. Co-Authored-By: Claude Opus 4.6 (1M context) --- sentry_sdk/integrations/langchain.py | 18 +---------- .../integrations/langchain/test_langchain.py | 30 +++++++++---------- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 1e8113872f..98357a32ae 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -108,29 +108,13 @@ OllamaEmbeddings = None -_NON_PROVIDER_PARTS = frozenset({"azure", "aws", "gcp", "vertex", "chat", "llm"}) - - def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]": - """Extract the AI provider from the ``_type`` field in LangChain params. - - Splits on ``-`` and skips generic segments (cloud prefixes and model-type - descriptors like ``chat`` / ``llm``) to return the actual provider name. - """ ai_type = all_params.get("_type") if not ai_type or not isinstance(ai_type, str): return None - parts = [p.strip().lower() for p in ai_type.split("-") if p.strip()] - if not parts: - return None - - for part in parts: - if part not in _NON_PROVIDER_PARTS: - return part - - return parts[0] + return ai_type DATA_FIELDS = { diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index b4554d4b9b..a440a3b0ae 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -2005,40 +2005,40 @@ def test_transform_google_file_data(self): [ # Real LangChain _type values (from _llm_type properties) # OpenAI - ("openai-chat", "openai"), + ("openai-chat", "openai-chat"), ("openai", "openai"), # Azure OpenAI - ("azure-openai-chat", "openai"), + ("azure-openai-chat", "azure-openai-chat"), ("azure", "azure"), # Anthropic - ("anthropic-chat", "anthropic"), + ("anthropic-chat", "anthropic-chat"), # Google ("vertexai", "vertexai"), - ("chat-google-generative-ai", "google"), + ("chat-google-generative-ai", "chat-google-generative-ai"), ("google_gemini", "google_gemini"), - # AWS Bedrock (underscore-separated, no split) + # AWS Bedrock ("amazon_bedrock_chat", "amazon_bedrock_chat"), ("amazon_bedrock", "amazon_bedrock"), # Cohere - ("cohere-chat", "cohere"), + ("cohere-chat", "cohere-chat"), # Ollama - ("chat-ollama", "ollama"), - ("ollama-llm", "ollama"), + ("chat-ollama", "chat-ollama"), + ("ollama-llm", "ollama-llm"), # Mistral - ("mistralai-chat", "mistralai"), + ("mistralai-chat", "mistralai-chat"), # Fireworks - ("fireworks-chat", "fireworks"), + ("fireworks-chat", "fireworks-chat"), ("fireworks", "fireworks"), # HuggingFace - ("huggingface-chat-wrapper", "huggingface"), + ("huggingface-chat-wrapper", "huggingface-chat-wrapper"), # Groq - ("groq-chat", "groq"), + ("groq-chat", "groq-chat"), # NVIDIA - ("chat-nvidia-ai-playground", "nvidia"), + ("chat-nvidia-ai-playground", "chat-nvidia-ai-playground"), # xAI - ("xai-chat", "xai"), + ("xai-chat", "xai-chat"), # DeepSeek - ("chat-deepseek", "deepseek"), + ("chat-deepseek", "chat-deepseek"), # Edge cases ("", None), (None, None),