From 561688eda2e04ffcc08651bef07e725ac957f4e9 Mon Sep 17 00:00:00 2001
From: Erica Pisani <pisani.erica@gmail.com>
Date: Thu, 19 Mar 2026 12:06:18 +0000
Subject: [PATCH 1/2] feat(langchain): Broaden AI provider detection beyond
 OpenAI and Anthropic

Extract _get_ai_system() to generically detect AI providers from LangChain's
_type field instead of hardcoding only "anthropic" and "openai". The function
splits on "-" and skips non-provider segments (cloud prefixes like "azure" and
descriptors like "chat"/"llm") to return the actual provider name.

This adds support for Cohere, Ollama, Mistral, Fireworks, HuggingFace, Groq,
NVIDIA, xAI, DeepSeek, Google, and any future LangChain providers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 sentry_sdk/integrations/langchain.py          | 41 ++++++---
 .../integrations/langchain/test_langchain.py  | 88 +++++++++++++++++++
 2 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index d19d9bbdd5..1e8113872f 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -108,6 +108,31 @@
     OllamaEmbeddings = None
 
 
+_NON_PROVIDER_PARTS = frozenset({"azure", "aws", "gcp", "vertex", "chat", "llm"})
+
+
+def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]":
+    """Extract the AI provider from the ``_type`` field in LangChain params.
+
+    Splits on ``-`` and skips generic segments (cloud prefixes and model-type
+    descriptors like ``chat`` / ``llm``) to return the actual provider name.
+    """
+    ai_type = all_params.get("_type")
+
+    if not ai_type or not isinstance(ai_type, str):
+        return None
+
+    parts = [p.strip().lower() for p in ai_type.split("-") if p.strip()]
+    if not parts:
+        return None
+
+    for part in parts:
+        if part not in _NON_PROVIDER_PARTS:
+            return part
+
+    return parts[0]
+
+
 DATA_FIELDS = {
     "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
     "function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
@@ -381,11 +406,9 @@ def on_llm_start(
                     model,
                 )
 
-            ai_type = all_params.get("_type", "")
-            if "anthropic" in ai_type:
-                span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
-            elif "openai" in ai_type:
-                span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
+            ai_system = _get_ai_system(all_params)
+            if ai_system:
+                span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)
 
             for key, attribute in DATA_FIELDS.items():
                 if key in all_params and all_params[key] is not None:
@@ -449,11 +472,9 @@ def on_chat_model_start(
             if model:
                 span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
 
-            ai_type = all_params.get("_type", "")
-            if "anthropic" in ai_type:
-                span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
-            elif "openai" in ai_type:
-                span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
+            ai_system = _get_ai_system(all_params)
+            if ai_system:
+                span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)
 
             agent_name = _get_current_agent()
             if agent_name:
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 132da0a9a0..b4554d4b9b 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -2000,6 +2000,94 @@ def test_transform_google_file_data(self):
         }
 
 
+@pytest.mark.parametrize(
+    "ai_type,expected_system",
+    [
+        # Real LangChain _type values (from _llm_type properties)
+        # OpenAI
+        ("openai-chat", "openai"),
+        ("openai", "openai"),
+        # Azure OpenAI
+        ("azure-openai-chat", "openai"),
+        ("azure", "azure"),
+        # Anthropic
+        ("anthropic-chat", "anthropic"),
+        # Google
+        ("vertexai", "vertexai"),
+        ("chat-google-generative-ai", "google"),
+        ("google_gemini", "google_gemini"),
+        # AWS Bedrock (underscore-separated, no split)
+        ("amazon_bedrock_chat", "amazon_bedrock_chat"),
+        ("amazon_bedrock", "amazon_bedrock"),
+        # Cohere
+        ("cohere-chat", "cohere"),
+        # Ollama
+        ("chat-ollama", "ollama"),
+        ("ollama-llm", "ollama"),
+        # Mistral
+        ("mistralai-chat", "mistralai"),
+        # Fireworks
+        ("fireworks-chat", "fireworks"),
+        ("fireworks", "fireworks"),
+        # HuggingFace
+        ("huggingface-chat-wrapper", "huggingface"),
+        # Groq
+        ("groq-chat", "groq"),
+        # NVIDIA
+        ("chat-nvidia-ai-playground", "nvidia"),
+        # xAI
+        ("xai-chat", "xai"),
+        # DeepSeek
+        ("chat-deepseek", "deepseek"),
+        # Edge cases
+        ("", None),
+        (None, None),
+    ],
+)
+def test_langchain_ai_system_detection(
+    sentry_init, capture_events, ai_type, expected_system
+):
+    sentry_init(
+        integrations=[LangchainIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
+
+    run_id = "test-ai-system-uuid"
+    serialized = {"_type": ai_type} if ai_type is not None else {}
+    prompts = ["Test prompt"]
+
+    with start_transaction():
+        callback.on_llm_start(
+            serialized=serialized,
+            prompts=prompts,
+            run_id=run_id,
+            invocation_params={"_type": ai_type, "model": "test-model"},
+        )
+
+        generation = Mock(text="Test response", message=None)
+        response = Mock(generations=[[generation]])
+        callback.on_llm_end(response=response, run_id=run_id)
+
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
+
+    llm_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline"
+    ]
+    assert len(llm_spans) > 0
+
+    llm_span = llm_spans[0]
+
+    if expected_system is not None:
+        assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+    else:
+        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})
+
+
 class TestTransformLangchainMessageContent:
     """Tests for _transform_langchain_message_content function."""
 

From 1f88b70f8c5e73055f09c13bee2f5b7e26fbe31e Mon Sep 17 00:00:00 2001
From: Erica Pisani <pisani.erica@gmail.com>
Date: Fri, 20 Mar 2026 10:24:55 +0000
Subject: [PATCH 2/2] ref(langchain): Simplify _get_ai_system to pass through
 _type value as-is

Remove string splitting and filtering logic from _get_ai_system. The
function now returns the LangChain _type value directly without
attempting to extract a provider name from it.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 sentry_sdk/integrations/langchain.py          | 18 +----------
 .../integrations/langchain/test_langchain.py  | 30 +++++++++----------
 2 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 1e8113872f..98357a32ae 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -108,29 +108,13 @@
     OllamaEmbeddings = None
 
 
-_NON_PROVIDER_PARTS = frozenset({"azure", "aws", "gcp", "vertex", "chat", "llm"})
-
-
 def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]":
-    """Extract the AI provider from the ``_type`` field in LangChain params.
-
-    Splits on ``-`` and skips generic segments (cloud prefixes and model-type
-    descriptors like ``chat`` / ``llm``) to return the actual provider name.
-    """
     ai_type = all_params.get("_type")
 
     if not ai_type or not isinstance(ai_type, str):
         return None
 
-    parts = [p.strip().lower() for p in ai_type.split("-") if p.strip()]
-    if not parts:
-        return None
-
-    for part in parts:
-        if part not in _NON_PROVIDER_PARTS:
-            return part
-
-    return parts[0]
+    return ai_type
 
 
 DATA_FIELDS = {
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index b4554d4b9b..a440a3b0ae 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -2005,40 +2005,40 @@ def test_transform_google_file_data(self):
     [
         # Real LangChain _type values (from _llm_type properties)
         # OpenAI
-        ("openai-chat", "openai"),
+        ("openai-chat", "openai-chat"),
         ("openai", "openai"),
         # Azure OpenAI
-        ("azure-openai-chat", "openai"),
+        ("azure-openai-chat", "azure-openai-chat"),
         ("azure", "azure"),
         # Anthropic
-        ("anthropic-chat", "anthropic"),
+        ("anthropic-chat", "anthropic-chat"),
         # Google
         ("vertexai", "vertexai"),
-        ("chat-google-generative-ai", "google"),
+        ("chat-google-generative-ai", "chat-google-generative-ai"),
         ("google_gemini", "google_gemini"),
-        # AWS Bedrock (underscore-separated, no split)
+        # AWS Bedrock
         ("amazon_bedrock_chat", "amazon_bedrock_chat"),
         ("amazon_bedrock", "amazon_bedrock"),
         # Cohere
-        ("cohere-chat", "cohere"),
+        ("cohere-chat", "cohere-chat"),
         # Ollama
-        ("chat-ollama", "ollama"),
-        ("ollama-llm", "ollama"),
+        ("chat-ollama", "chat-ollama"),
+        ("ollama-llm", "ollama-llm"),
         # Mistral
-        ("mistralai-chat", "mistralai"),
+        ("mistralai-chat", "mistralai-chat"),
         # Fireworks
-        ("fireworks-chat", "fireworks"),
+        ("fireworks-chat", "fireworks-chat"),
         ("fireworks", "fireworks"),
         # HuggingFace
-        ("huggingface-chat-wrapper", "huggingface"),
+        ("huggingface-chat-wrapper", "huggingface-chat-wrapper"),
         # Groq
-        ("groq-chat", "groq"),
+        ("groq-chat", "groq-chat"),
         # NVIDIA
-        ("chat-nvidia-ai-playground", "nvidia"),
+        ("chat-nvidia-ai-playground", "chat-nvidia-ai-playground"),
         # xAI
-        ("xai-chat", "xai"),
+        ("xai-chat", "xai-chat"),
         # DeepSeek
-        ("chat-deepseek", "deepseek"),
+        ("chat-deepseek", "chat-deepseek"),
         # Edge cases
         ("", None),
         (None, None),