fix: ground Kuma with workspace plan context (baserow#5227)

silvestrid · web-flow · commit 6b4d7f48e70d · 2026-04-28T14:54:30.000Z
* fix: ground Kuma with workspace plan context

Inject the current workspace plan tier into Kuma's runtime context and add a docs-first guardrail so feature and plan questions are grounded instead of guessed.

* fix: tighten Kuma plan tier grounding

Handle missing and unknown license tiers explicitly, log unexpected plan lookup failures, and align the prompt with the exact lower-case plan tokens injected into assistant context.

* address feedback
diff --git a/changelog/entries/unreleased/bug/5210_kuma_plan_context_and_hallucination_guardrail.json b/changelog/entries/unreleased/bug/5210_kuma_plan_context_and_hallucination_guardrail.json
@@ -0,0 +1,9 @@
+{
+    "type": "bug",
+    "message": "Give Kuma the current license tier in its context and steer uncertain feature or plan questions to docs search.",
+    "issue_origin": "github",
+    "issue_number": 5210,
+    "domain": "core",
+    "bullet_points": [],
+    "created_at": "2026-04-17"
+}
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/agents.py b/enterprise/backend/src/baserow_enterprise/assistant/agents.py
@@ -5,6 +5,17 @@
 from baserow_enterprise.assistant.prompts import AGENT_SYSTEM_PROMPT
 from baserow_enterprise.assistant.tools.toolset import tool_manifest_line_compact
 
+FREE_LICENSE_TIER = "free"
+_CANONICAL_LICENSE_TIERS = {
+    FREE_LICENSE_TIER,
+    "premium",
+    "advanced",
+    "enterprise",
+}
+_LICENSE_TIER_ALIASES = {
+    "enterprise_without_support": "enterprise",
+}
+
 main_agent: Agent[AssistantDeps, str] = Agent(
     deps_type=AssistantDeps,
     output_type=str,
@@ -14,6 +25,17 @@
 )
 
 
+def _canonical_license_tier(license_tier: str) -> str:
+    """
+    Return the public license tier token that is safe to inject into the prompt.
+    """
+
+    normalized_tier = _LICENSE_TIER_ALIASES.get(license_tier, license_tier)
+    if normalized_tier in _CANONICAL_LICENSE_TIERS:
+        return normalized_tier
+    return FREE_LICENSE_TIER
+
+
 @main_agent.instructions
 def dynamic_ui_context(ctx) -> str:
     """Inject the UI context into the system prompt dynamically."""
@@ -31,6 +53,20 @@ def dynamic_mode(ctx) -> str:
     return f"\n<mode>{ctx.deps.mode.value}</mode>"
 
 
+@main_agent.instructions
+def dynamic_license_tier(ctx) -> str:
+    """Inject the active workspace license tier and its paid features."""
+
+    lt = ctx.deps.license_tier
+    if lt is None:
+        return f"\n<license_tier>{FREE_LICENSE_TIER}</license_tier>"
+    features = ",".join(sorted(lt.features))
+    return (
+        f"\n<license_tier>{_canonical_license_tier(lt.type)}</license_tier>"
+        f"\n<features>{features}</features>"
+    )
+
+
 @main_agent.instructions
 def dynamic_current_task(ctx) -> str:
     """Pin the original user request as immutable context."""
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/assistant.py b/enterprise/backend/src/baserow_enterprise/assistant/assistant.py
@@ -1,6 +1,7 @@
 import asyncio
 from typing import Any, AsyncGenerator
 
+from django.contrib.auth.models import AbstractUser
 from django.core.cache import cache
 from django.utils import translation
 
@@ -22,6 +23,7 @@
 from pydantic_ai.usage import UsageLimits
 
 from baserow.api.sessions import get_client_undo_redo_action_group_id
+from baserow.core.models import Workspace
 from baserow_enterprise.assistant.agents import main_agent, title_agent
 from baserow_enterprise.assistant.deps import (
     AgentMode,
@@ -46,6 +48,8 @@
 )
 from baserow_enterprise.assistant.tools.navigation.utils import unsafe_navigate_to
 from baserow_enterprise.assistant.tools.registries import assistant_tool_registry
+from baserow_premium.api.user.user_data_types import ActiveLicensesDataType
+from baserow_premium.license.registries import LicenseType, license_type_registry
 
 from .models import AssistantChat, AssistantChatMessage, AssistantChatPrediction
 from .types import (
@@ -101,6 +105,37 @@ def set_assistant_cancellation_key(
     cache.set(get_assistant_cancellation_key(chat_uuid), True, timeout=timeout)
 
 
+def _get_workspace_license_type(
+    user: AbstractUser, workspace: Workspace
+) -> LicenseType | None:
+    """
+    Pick the highest-``order`` ``LicenseType`` active for the user in the workspace,
+    reusing the same data the frontend consumes from ``ActiveLicensesDataType``. Returns
+    ``None`` when no license applies.
+
+    :param user: The user for whom to get the license type.
+    :param workspace: The workspace for which to get the license type.
+    :return: The active LicenseType with the highest order, or None if no license is
+        active.
+    """
+
+    try:
+        active = ActiveLicensesDataType().get_user_data(user, None)
+        names = set(active["instance_wide"]) | set(
+            active["per_workspace"].get(workspace.id, {})
+        )
+        return max(
+            (lt for lt in license_type_registry.get_all() if lt.type in names),
+            key=lambda lt: lt.order,
+            default=None,
+        )
+    except Exception:
+        logger.exception(
+            "Failed to determine workspace license type for assistant context."
+        )
+        return None
+
+
 def _extract_tool_thought(event: FunctionToolCallEvent) -> str | None:
     """Extract the chain-of-thought ``thought`` argument from a tool call
     event, if present and non-empty."""
@@ -134,6 +169,7 @@ def __init__(self, chat: AssistantChat):
             user=self._user,
             workspace=self._workspace,
             tool_helpers=self._tool_helpers,
+            license_tier=_get_workspace_license_type(self._user, self._workspace),
         )
         self._toolset, db_m, app_m, auto_m, explain_m = (
             assistant_tool_registry.build_toolset(
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/deps.py b/enterprise/backend/src/baserow_enterprise/assistant/deps.py
@@ -13,6 +13,7 @@
     from baserow_enterprise.assistant.tools.navigation.types import (
         AnyNavigationRequestType,
     )
+    from baserow_premium.license.registries import LicenseType
 
 
 class AgentMode(str, Enum):
@@ -120,6 +121,7 @@ class AssistantDeps:
     workspace: "Workspace"
     tool_helpers: ToolHelpers
     mode: AgentMode = AgentMode.DATABASE
+    license_tier: "LicenseType | None" = None
     sources: list[str] = field(default_factory=list)
     dynamic_tools: list[Tool] = field(default_factory=list)
     database_manifest: str = ""
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/prompts.py b/enterprise/backend/src/baserow_enterprise/assistant/prompts.py
@@ -41,6 +41,15 @@
 </baserow_knowledge>
 """
 
+GROUNDING = """\
+<grounding>
+If you are not sure whether a Baserow feature, plan, limit, setting, or UI behavior exists, do not guess. Use `search_user_docs` first.
+If the docs do not confirm it, say you don't know. Never invent plan names, feature names, pricing, upgrade advice, or UI paths.
+The canonical plan names are Free, Premium, Advanced, and Enterprise. `<license_tier>` uses the lowercase equivalents (`free`, `premium`, `advanced`, `enterprise`); treat them as exact matches.
+`<features>` is the exhaustive list of paid feature flags the current workspace has. Never claim a feature is available if it is not in `<features>`. Use `search_user_docs` to explain what each feature does.
+</grounding>
+"""
+
 LIMITATIONS_AND_SOURCES = f"""\
 <limitations>
 Cannot create/modify/delete: user accounts, workspaces, dashboards, widgets, snapshots, webhooks, integrations, roles, permissions.
@@ -53,5 +62,6 @@
     + RULES
     + HANDLING_AMBIGUITY
     + BASEROW_KNOWLEDGE
+    + GROUNDING
     + LIMITATIONS_AND_SOURCES
 )
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/telemetry.py b/enterprise/backend/src/baserow_enterprise/assistant/telemetry.py
@@ -36,6 +36,7 @@
 from uuid import uuid4
 
 from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
+from opentelemetry.sdk.trace.sampling import ALWAYS_ON
 from opentelemetry.trace import SpanKind
 
 from baserow.core.posthog import get_posthog_client
@@ -461,7 +462,8 @@ def setup_instrumentation():
 
     from pydantic_ai import Agent, InstrumentationSettings
 
-    tracer_provider = TracerProvider()
+    # Prevent environment OTEL_TRACES_SAMPLER config from dropping assistant traces.
+    tracer_provider = TracerProvider(sampler=ALWAYS_ON)
     tracer_provider.add_span_processor(PosthogSpanProcessor())
 
     Agent.instrument_all(
diff --git a/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py b/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py
@@ -170,6 +170,9 @@ async def _search_user_docs_impl(
 ) -> dict[str, Any]:
     """Inner implementation of search_user_docs, separated for error handling."""
 
+    from baserow_enterprise.assistant.model_profiles import get_model_string
+    from baserow_enterprise.assistant.retrying_model import _resolve_model
+
     @sync_to_async
     def _search(question: str) -> list[KnowledgeBaseChunk]:
         chunks = KnowledgeBaseHandler().search(question, 15)
@@ -198,41 +201,35 @@ def _search(question: str) -> list[KnowledgeBaseChunk]:
         f"Question: {question}\n\n"
         f"Documentation context (source URL -> content):\n{context}"
     )
-    from baserow_enterprise.assistant.model_profiles import get_model_string
-    from baserow_enterprise.assistant.retrying_model import _resolve_model
 
     agent_result = await search_docs_agent.run(
         prompt, model=_resolve_model(get_model_string())
     )
     prediction = agent_result.output
 
+    # Force reliability to 0 if model says nothing was found.
+    nothing_found = "nothing found" in prediction.answer.lower()
+    reliability = 0.0 if nothing_found else prediction.reliability
+
     sources = []
     available_urls = {chunk.source_document.source_url for chunk in relevant_chunks}
-    for url in prediction.sources:
-        # somehow LLMs sometimes return sources as objects
-        if isinstance(url, dict) and "url" in url:
-            url = url["url"]
-
-        if not isinstance(url, str):
-            continue
-
-        if url in available_urls and url not in sources:
-            sources.append(url)
-            if len(sources) >= 3:
-                break
-
-    # Only fallback to available URLs if reliability is high AND we have a
-    # real answer. Don't populate sources if the model indicated no relevant
-    # docs were found.
-    nothing_found = "nothing found" in prediction.answer.lower()
-    if not sources and prediction.reliability > 0.8 and not nothing_found:
-        sources = list(available_urls)[:3]
+    if not nothing_found:
+        for url in prediction.sources:
+            # somehow LLMs sometimes return sources as objects
+            if isinstance(url, dict) and "url" in url:
+                url = url["url"]
 
-    # Override reliability to 0 if the model explicitly said nothing was
-    # found. The model sometimes returns high reliability for "nothing
-    # found" answers, which is semantically incorrect - we want reliability
-    # to reflect whether we actually found useful information.
-    reliability = 0.0 if nothing_found else prediction.reliability
+            if not isinstance(url, str):
+                continue
+
+            if url in available_urls and url not in sources:
+                sources.append(url)
+                if len(sources) >= 3:
+                    break
+
+        # Fallback to available URLs if the model didn't cite sources.
+        if not sources:
+            sources = list(available_urls)[:3]
 
     if reliability >= 0.7:
         reliability_note = (
@@ -242,7 +239,8 @@ def _search(question: str) -> list[KnowledgeBaseChunk]:
         reliability_note = (
             "PARTIAL MATCH: Some relevant information was found, but the "
             "documentation may not fully cover this topic. Supplement with "
-            "general knowledge but warn the user that details may be incomplete."
+            "general knowledge if you're confident it is accurate and up to date, "
+            "but warn the user that details may be incomplete."
         )
     else:
         reliability_note = (
diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/evals/eval_utils.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/evals/eval_utils.py
@@ -15,6 +15,7 @@
 from pydantic_ai.usage import UsageLimits
 
 from baserow_enterprise.assistant.agents import main_agent
+from baserow_enterprise.assistant.assistant import _get_workspace_license_type
 from baserow_enterprise.assistant.deps import AssistantDeps, ToolHelpers
 from baserow_enterprise.assistant.tools.registries import assistant_tool_registry
 from baserow_enterprise.assistant.types import (
@@ -209,6 +210,7 @@ def create_eval_assistant(user, workspace, max_iters=15, model=None):
         user=user,
         workspace=workspace,
         tool_helpers=tool_helpers,
+        license_tier=_get_workspace_license_type(user, workspace),
     )
 
     # Build the single-agent toolset (navigation + core + database + automation)
diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/evals/test_eval_search_user_docs.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/evals/test_eval_search_user_docs.py
@@ -111,6 +111,21 @@ def _require_knowledge_base(synced_knowledge_base):
         ["permission", "field", "read", "lock"],
         id="field-permissions",
     ),
+    pytest.param(
+        "Which Baserow plan unlocks field-level permissions for a workspace?",
+        ["field-level-permissions", "permissions"],
+        ["plan", "field-level permissions", "field permissions", "enterprise"],
+        id="plan-for-field-level-permissions",
+    ),
+    pytest.param(
+        (
+            "I can't find the conditional options toggle for my single select field. "
+            "Should I upgrade, or is there another requirement?"
+        ),
+        ["single-select", "select-option", "fields"],
+        ["conditional", "single select", "plan", "upgrade"],
+        id="conditional-options-plan-question",
+    ),
     pytest.param(
         (
             "How can I create a calendar that shows my tasks, but only the ones assigned to me."
@@ -121,9 +136,8 @@ def _require_knowledge_base(synced_knowledge_base):
     ),
     pytest.param(
         (
-            "I'm trying to combine the first name and last name columns "
-            "into one, but I want to make sure it's uppercase. Can you tell me how to "
-            "write that formula?"
+            "What would a formula look like that combines a first name and last name field "
+            "into a full name field?"
         ),
         ["formula", "understanding-formulas"],
         ["concat", "upper", "formula"],
@@ -270,7 +284,7 @@ def test_search_user_docs(
             hint=f"tools called: {[e.get('tool_name') for e in history if e.get('tool_name')]}",
         )
         checks.check(
-            f"returned at least one source URL for user docs",
+            "returned at least one source URL for user docs",
             len(sources) >= 1,
             hint=f"tools called: {[e.get('tool_name') for e in history if e.get('tool_name')]}",
         )
diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py
diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_search_docs_tools.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_search_docs_tools.py