diff --git a/.env.docker-dev.example b/.env.docker-dev.example index db83356691..b7b271ba12 100644 --- a/.env.docker-dev.example +++ b/.env.docker-dev.example @@ -11,7 +11,7 @@ # See: https://docs.docker.com/compose/how-tos/profiles/ COMPOSE_PROFILES=optional -SECRET_KEY=baserow +SECRET_KEY=baserow # CHANGE THIS IN PRODUCTION! DATABASE_PASSWORD=baserow REDIS_PASSWORD=baserow diff --git a/.env.local.example b/.env.local-dev.example similarity index 93% rename from .env.local.example rename to .env.local-dev.example index cca0cf5c7f..e4a5af113c 100644 --- a/.env.local.example +++ b/.env.local-dev.example @@ -12,19 +12,14 @@ DJANGO_SETTINGS_MODULE=baserow.config.settings.dev # DJANGO_SETTINGS_MODULE=baserow.config.settings.test # ============================================================================= -# Security (use simple values for local dev only!) +# Security (use same values as in env.docker-dev for local dev only!) # ============================================================================= -SECRET_KEY=baserow -BASEROW_JWT_SIGNING_KEY=baserow +SECRET_KEY=baserow # CHANGE THIS IN PRODUCTION! # ============================================================================= # Database (local PostgreSQL) # ============================================================================= DATABASE_HOST=localhost -DATABASE_PORT=5432 -DATABASE_NAME=baserow -DATABASE_USER=baserow -DATABASE_PASSWORD=baserow # speed up operations and tests (must be single line, quoted for bash sourcing) POSTGRES_DEV_EXTRA_ARGS="-c shared_buffers=512MB -c fsync=off -c full_page_writes=off -c synchronous_commit=off -c max_locks_per_transaction=512 -c logging_collector=off -c log_statement=none -c log_duration=off -c log_min_duration_statement=-1 -c log_checkpoints=off -c log_connections=off -c log_disconnections=off -c log_lock_waits=off -c log_temp_files=-1 -c checkpoint_timeout=1h -c max_wal_size=10GB -c min_wal_size=1GB -c wal_level=minimal -c max_wal_senders=0 -c autovacuum=off -c random_page_cost=1.0 -c effective_io_concurrency=200 -c work_mem=256MB -c maintenance_work_mem=512MB" @@ -35,7 +30,6 @@ POSTGRES_DEV_EXTRA_ARGS="-c shared_buffers=512MB -c fsync=off -c full_page_write # Redis (local Redis) # ============================================================================= REDIS_HOST=localhost -REDIS_PORT=6379 REDIS_PASSWORD=baserow # ============================================================================= @@ -71,4 +65,4 @@ MIGRATE_ON_STARTUP=false # Media files (path relative to the backend folder) # ============================================================================= MEDIA_ROOT=media -MEDIA_URL=http://localhost:8000/media/ +MEDIA_URL=http://localhost:4000/media/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ea35709bd9..8d875d91da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1195,6 +1195,127 @@ jobs: echo "Publishing $SOURCE → $TARGET" docker buildx imagetools create -t $TARGET $SOURCE + # ========================================================================== + # CI STATUS - Single required check for branch protection + # ========================================================================== + # This job aggregates the results of all CI jobs and provides a single + # status check for branch protection rules. + # + # To configure optional checks: + # 1. Go to Settings → Secrets and variables → Actions → Variables + # 2. Create a repository variable named OPTIONAL_CHECKS + # 3. Set its value to a comma-separated list of job names that should be optional + # Example: "test-e2e,test-zapier,helm-chart-lint" + # + # Jobs listed in OPTIONAL_CHECKS can fail without blocking the PR. + # Jobs NOT listed are required and must pass (or be skipped due to path filtering). + # ========================================================================== + + ci-status: + name: CI Status + runs-on: ubuntu-latest + if: always() + needs: + # Lint jobs (can be skipped based on path changes) + - backend-lint + - frontend-lint + - dockerfile-lint + - helm-chart-lint + # Test jobs (can be skipped based on path changes) + - backend-check-startup + - test-backend + - test-frontend + - test-zapier + - check-mjml-compiled + # E2E tests (can be skipped based on path changes) + - test-e2e + - collect-e2e-reports + # Coverage (depends on test-backend) + - collect-coverage + steps: + - name: Evaluate CI results + env: + OPTIONAL_CHECKS: ${{ vars.OPTIONAL_CHECKS || '' }} + RESULTS: | + backend-lint=${{ needs.backend-lint.result }} + frontend-lint=${{ needs.frontend-lint.result }} + dockerfile-lint=${{ needs.dockerfile-lint.result }} + helm-chart-lint=${{ needs.helm-chart-lint.result }} + backend-check-startup=${{ needs.backend-check-startup.result }} + test-backend=${{ needs.test-backend.result }} + test-frontend=${{ needs.test-frontend.result }} + test-zapier=${{ needs.test-zapier.result }} + check-mjml-compiled=${{ needs.check-mjml-compiled.result }} + test-e2e=${{ needs.test-e2e.result }} + collect-e2e-reports=${{ needs.collect-e2e-reports.result }} + collect-coverage=${{ needs.collect-coverage.result }} + run: | + echo "==================================" + echo "CI Status Check" + echo "==================================" + echo "" + echo "Optional checks (from OPTIONAL_CHECKS variable):" + echo " ${OPTIONAL_CHECKS:-"(none configured)"}" + echo "" + echo "Job results:" + echo "$RESULTS" | grep -v '^$' + echo "" + echo "==================================" + + # Convert OPTIONAL_CHECKS to an array for easier lookup + IFS=',' read -ra OPTIONAL_ARRAY <<< "$OPTIONAL_CHECKS" + + is_optional() { + local job_name="$1" + for optional in "${OPTIONAL_ARRAY[@]}"; do + # Trim whitespace + optional=$(echo "$optional" | xargs) + if [[ "$job_name" == "$optional" ]]; then + return 0 + fi + done + return 1 + } + + has_failure=false + + while IFS='=' read -r job_name result; do + # Skip empty lines + [[ -z "$job_name" ]] && continue + + # Trim whitespace + job_name=$(echo "$job_name" | xargs) + result=$(echo "$result" | xargs) + + if is_optional "$job_name"; then + if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then + echo "⚠️ $job_name: $result (optional - ignored)" + else + echo "✅ $job_name: $result (optional)" + fi + else + if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then + echo "❌ $job_name: $result (required)" + has_failure=true + elif [[ "$result" == "skipped" ]]; then + echo "⏭️ $job_name: $result (skipped due to path filter)" + else + echo "✅ $job_name: $result" + fi + fi + done <<< "$RESULTS" + + echo "" + echo "==================================" + + if [[ "$has_failure" == "true" ]]; then + echo "❌ CI failed: one or more required checks failed" + exit 1 + else + echo "✅ CI passed: all required checks passed (or were skipped)" + exit 0 + fi + trigger-saas-build: name: Trigger SaaS GitLab Pipeline runs-on: ubuntu-latest diff --git a/backend/Dockerfile b/backend/Dockerfile index 67546e2fb4..b2c3b78f20 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -207,6 +207,7 @@ RUN mkdir -p /baserow/backend/reports /baserow/premium/backend /baserow/enterpri # Copy the virtual environment and source code with tests COPY --chown=$UID:$GID --from=builder-ci /baserow /baserow +COPY --chown=$UID:$GID ./docs /baserow/docs/ COPY --chown=$UID:$GID deploy/plugins/*.sh /baserow/plugins/ USER $UID:$GID @@ -355,6 +356,7 @@ COPY --chown=$UID:$GID LICENSE /baserow/LICENSE COPY --chown=$UID:$GID --from=builder-prod /baserow/backend /baserow/backend/ COPY --chown=$UID:$GID --from=builder-prod /baserow/premium /baserow/premium/ COPY --chown=$UID:$GID --from=builder-prod /baserow/enterprise /baserow/enterprise/ +COPY --chown=$UID:$GID ./docs /baserow/docs/ COPY --chown=$UID:$GID deploy/plugins/*.sh /baserow/plugins/ diff --git a/backend/justfile b/backend/justfile index e367a73cd6..fe2769e410 100644 --- a/backend/justfile +++ b/backend/justfile @@ -120,13 +120,13 @@ init: _setup-env _create-venv @echo " just run " @echo " uv run " -# Copy .env.local.example to .env.local if it doesn't exist (in project root) +# Copy .env.local-dev.example to .env.local if it doesn't exist (in project root) _setup-env: #!/usr/bin/env bash set -euo pipefail - if [ ! -f ../.env.local ] && [ -f ../.env.local.example ]; then - echo "Creating .env.local from .env.local.example..." - cp ../.env.local.example ../.env.local + if [ ! -f ../.env.local ] && [ -f ../.env.local-dev.example ]; then + echo "Creating .env.local from .env.local-dev.example..." + cp ../.env.local-dev.example ../.env.local echo "Please review and edit .env.local as needed." fi diff --git a/changelog/entries/unreleased/bug/improve_docs_search_accuracy_for_the_ai_assistant.json b/changelog/entries/unreleased/bug/improve_docs_search_accuracy_for_the_ai_assistant.json new file mode 100644 index 0000000000..5a72f6a5c2 --- /dev/null +++ b/changelog/entries/unreleased/bug/improve_docs_search_accuracy_for_the_ai_assistant.json @@ -0,0 +1,9 @@ +{ + "type": "bug", + "message": "Improve docs search accuracy for the AI Assistant", + "issue_origin": "github", + "issue_number": null, + "domain": "core", + "bullet_points": [], + "created_at": "2026-01-14" +} \ No newline at end of file diff --git a/changelog/entries/unreleased/bug/type_error_formula_array.json b/changelog/entries/unreleased/bug/type_error_formula_array.json new file mode 100644 index 0000000000..60cc48314f --- /dev/null +++ b/changelog/entries/unreleased/bug/type_error_formula_array.json @@ -0,0 +1,9 @@ +{ + "type": "bug", + "message": "Fix Create row modal inside linked row field fails if the primary field is of type formula array", + "issue_origin": "github", + "issue_number": 4547, + "domain": "database", + "bullet_points": [], + "created_at": "2026-01-15" +} diff --git a/docs/development/running-the-dev-env-locally.md b/docs/development/running-the-dev-env-locally.md index 7d997c57d8..ab912b07fa 100644 --- a/docs/development/running-the-dev-env-locally.md +++ b/docs/development/running-the-dev-env-locally.md @@ -121,7 +121,7 @@ All processes log to `/tmp/`: ### The .env.local File -The `just init` command creates `.env.local` in the project root with sensible defaults, taken from `.env.local.example`: +The `just init` command creates `.env.local` in the project root with sensible defaults, taken from `.env.local-dev.example`: ```bash # Key settings in .env.local diff --git a/enterprise/backend/src/baserow_enterprise/assistant/assistant.py b/enterprise/backend/src/baserow_enterprise/assistant/assistant.py index 2404526461..a7679b4805 100644 --- a/enterprise/backend/src/baserow_enterprise/assistant/assistant.py +++ b/enterprise/backend/src/baserow_enterprise/assistant/assistant.py @@ -5,7 +5,6 @@ from django.conf import settings from django.core.cache import cache from django.utils import translation -from django.utils.translation import gettext as _ import udspy from udspy.callback import BaseCallback @@ -21,7 +20,7 @@ from baserow_enterprise.assistant.tools.registries import assistant_tool_registry from .models import AssistantChat, AssistantChatMessage, AssistantChatPrediction -from .signatures import ChatSignature, RequestRouter +from .signatures import ChatSignature from .types import ( AiMessage, AiMessageChunk, @@ -200,33 +199,10 @@ def _init_assistant(self): "temperature": settings.BASEROW_ENTERPRISE_ASSISTANT_LLM_TEMPERATURE, "response_format": {"type": "json_object"}, } - self.search_user_docs_tool = self._get_search_user_docs_tool(tools) - self.agent_tools = tools - self._request_router = udspy.ChainOfThought(RequestRouter, **module_kwargs) self._assistant = udspy.ReAct( - ChatSignature, tools=self.agent_tools, max_iters=20, **module_kwargs + ChatSignature, tools=tools, max_iters=20, **module_kwargs ) - def _get_search_user_docs_tool( - self, tools: list[udspy.Tool | Callable] - ) -> udspy.Tool | None: - """ - Retrieves the search_user_docs tool from the list of tools if available. - - :param tools: The list of tools to search through. - :return: The search_user_docs as udspy.Tool or None if not found. - """ - - search_user_docs_tool = next( - (tool for tool in tools if tool.name == "search_user_docs"), None - ) - if search_user_docs_tool is None or isinstance( - search_user_docs_tool, udspy.Tool - ): - return search_user_docs_tool - - return udspy.Tool(search_user_docs_tool) - async def acreate_chat_message( self, role: AssistantChatMessage.Role, @@ -300,14 +276,14 @@ def list_chat_messages( ) return list(reversed(messages)) - async def afetch_chat_history(self, limit=30): + async def afetch_chat_history(self, limit: int = 50) -> udspy.History: """ Loads the chat history into a udspy.History object. It only loads complete message pairs (human + AI). The history will be in chronological order and must respect the module signature (question, answer). :param limit: The maximum number of message pairs to load. - :return: None + :return: A udspy.History instance containing the chat history. """ history = udspy.History() @@ -425,82 +401,6 @@ def _check_cancellation(self, cache_key: str, message_id: str) -> None: cache.delete(cache_key) raise AssistantMessageCancelled(message_id=message_id) - async def get_router_stream( - self, message: HumanMessage - ) -> AsyncGenerator[Any, None]: - """ - Returns an async generator that streams the router's response to a user - - :param message: The current user message that needs context from history. - :return: An async generator that yields stream events. - """ - - self.history = await self.afetch_chat_history() - - return self._request_router.astream( - question=message.content, - conversation_history=RequestRouter.format_conversation_history( - self.history - ), - ) - - async def _process_router_stream( - self, - event: Any, - human_msg: AssistantChatMessage, - ) -> Tuple[list[AssistantMessageUnion], bool, udspy.Prediction | None]: - """ - Process a single event from the smart router output stream. - - :param event: The event to process. - :param human_msg: The human message instance. - :return: a tuple of (messages_to_yield, prediction). - """ - - messages = [] - prediction = None - - if isinstance(event, (AiThinkingMessage, AiNavigationMessage)): - messages.append(event) - return messages, prediction - - # Stream the final answer - if isinstance(event, udspy.OutputStreamChunk): - if event.field_name == "answer" and event.content.strip(): - messages.append( - AiMessageChunk( - content=event.content, - sources=self._assistant_callbacks.sources, - ) - ) - - elif isinstance(event, udspy.Prediction): - if hasattr(event, "routing_decision"): - prediction = event - - if getattr(event, "routing_decision", None) == "delegate_to_agent": - messages.append(AiThinkingMessage(content=_("Thinking..."))) - elif getattr(event, "routing_decision", None) == "search_user_docs": - if self.search_user_docs_tool is not None: - await self.search_user_docs_tool(question=event.search_query) - else: - messages.append( - AiMessage( - content=_( - "I wanted to search the documentation for you, " - "but the search tool isn't currently available.\n\n" - "To enable documentation search, you'll need to set up " - "the local knowledge base. \n\n" - "You can find setup instructions at: https://baserow.io/user-docs" - ), - ) - ) - elif getattr(event, "answer", None): - ai_msg = await self._acreate_ai_message_response(human_msg, event) - messages.append(ai_msg) - - return messages, prediction - async def _process_agent_stream( self, event: Any, @@ -547,7 +447,7 @@ async def _process_agent_stream( return messages, prediction def get_agent_stream( - self, message: HumanMessage, extracted_context: str + self, message: HumanMessage, conversation_history: udspy.History | None = None ) -> AsyncGenerator[Any, None]: """ Returns an async generator that streams the ReAct agent's response to a user @@ -557,12 +457,19 @@ def get_agent_stream( :return: An async generator that yields stream events. """ - ui_context = message.ui_context.format() if message.ui_context else None + formatted_history = ( + ChatSignature.format_conversation_history(conversation_history) + if conversation_history + else [] + ) + formatted_ui_context = ( + message.ui_context.format() if message.ui_context else None + ) return self._assistant.astream( question=message.content, - context=extracted_context, - ui_context=ui_context, + conversation_history=formatted_history, + ui_context=formatted_ui_context, ) async def _process_stream( @@ -618,31 +525,18 @@ async def astream_messages( message_id = str(human_msg.id) yield AiStartedMessage(message_id=message_id) - router_stream = await self.get_router_stream(message) - routing_decision, extracted_context = None, "" + history = await self.afetch_chat_history(limit=30) - async for msg, prediction in self._process_stream( - human_msg, router_stream, self._process_router_stream + agent_stream = self.get_agent_stream(message, history) + + async for msg, __ in self._process_stream( + human_msg, agent_stream, self._process_agent_stream ): - if prediction is not None: - routing_decision = prediction.routing_decision - extracted_context = prediction.extracted_context yield msg - if routing_decision == "delegate_to_agent": - agent_stream = self.get_agent_stream( - message, - extracted_context=extracted_context, - ) - - async for msg, __ in self._process_stream( - human_msg, agent_stream, self._process_agent_stream - ): - yield msg - - # Generate chat title if needed - if not self._chat.title: - chat_title = await self._generate_chat_title(human_msg.content) - self._chat.title = chat_title - await self._chat.asave(update_fields=["title", "updated_on"]) - yield ChatTitleMessage(content=chat_title) + # Generate chat title if needed + if not self._chat.title: + chat_title = await self._generate_chat_title(human_msg.content) + self._chat.title = chat_title + await self._chat.asave(update_fields=["title", "updated_on"]) + yield ChatTitleMessage(content=chat_title) diff --git a/enterprise/backend/src/baserow_enterprise/assistant/prompts.py b/enterprise/backend/src/baserow_enterprise/assistant/prompts.py index 7e41db1843..84d949a61c 100644 --- a/enterprise/backend/src/baserow_enterprise/assistant/prompts.py +++ b/enterprise/backend/src/baserow_enterprise/assistant/prompts.py @@ -110,103 +110,58 @@ AGENT_SYSTEM_PROMPT = ( ASSISTANT_SYSTEM_PROMPT_BASE + """ -**CRITICAL:** You MUST use your action tools to fulfill the request, loading additional tools if needed. - -### YOUR TOOLS: -- **Action tools**: Navigate, list databases, tables, fields, views, filters, workflows, rows, etc. -- **Tool loaders**: Load additional specialized tools (e.g., load_rows_tools, load_views_tools). Use them to access capabilities not currently available. - -**IMPORTANT - HOW TO UNDERSTAND YOUR TOOLS:** -- Read each tool's NAME, DESCRIPTION, and ARGUMENTS carefully -- Tool names and descriptions tell you what they do (e.g., "list_tables", "create_rows_in_table_X") -- Arguments show what inputs they need -- **NEVER use search_user_docs to learn about tools** - it contains end-user documentation, NOT information about which tools to use or how to call them -- Inspect available tools directly to decide what to use - -### HOW TO WORK: -1. **Use action tools** to accomplish the user's goal -2. **If a needed tool isn't available**, call a tool loader to load it (e.g., if you need to create a field but don't have the tool, load field creation tools) -3. **Keep using tools** until the goal is reached or you confirm NO tool can help and NO tool loader can provide the needed tool - -### EXAMPLE - CORRECT USE OF TOOL LOADERS: -**User request:** "Change all 'Done' tasks to 'Todo'" - -**CORRECT approach:** -✓ Step 1: Identify that Tasks is a table in the open database, and status is the field to update -✓ Step 2: Notice you need to update rows but don't have the tool -✓ Step 3: Call the row tool loader (e.g., `load_rows_tools` for table X, requesting update capabilities) -✓ Step 4: Use the newly loaded `update_rows` tool to update the rows -✓ Step 5: Complete the task - -**CRITICAL:** Before giving up, ALWAYS check if a tool loader can provide the necessary tools to complete the task. - -### IF YOU CANNOT COMPLETE THE REQUEST: -If you've exhausted all available tools and loaders and cannot complete the task, offer: "I wasn't able to complete this using my available tools. Would you like me to search the documentation for instructions on how to do this manually?" - -### YOUR PRIORITY: -1. **First**: Use action tools to complete the request -2. **If tool missing**: Try loading it with a tool loader (scan all available loaders) -3. **If truly unable**: Explain the issue and offer to search documentation (never provide instructions from memory) - -The router determined this requires action. You were chosen because the user wants you to DO something, not provide information. - -Be aware of your limitations. If users ask for something outside your capabilities, finish immediately, explain what you can and cannot do based on the limitations below, and offer to search the documentation for further help. +## YOUR TOOLS + +**CRITICAL - Understanding your tools:** +- Learn what each tool does ONLY from its **name** and **description** +- **NEVER use `search_user_docs` to learn about your tools** - it contains end-user documentation, NOT information about your available tools or how to call them +- `search_user_docs` is ONLY for answering user questions about Baserow features and providing manual instructions + +## REQUEST HANDLING + +### ACTION REQUESTS - CHECK FIRST + +**CRITICAL: Before treating a request as a question, determine if it's an action you can perform.** + +Recognize action requests by: +- Imperative verbs: "Show...", "Filter...", "Create...", "Add...", "Delete...", "Update...", "Sort...", "Hide..." +- Desired states: "I want only...", "I need a field that...", "Make it show..." +- Example: "Show only rows where the primary field is empty" → This is an ACTION (create a filter), not a question about filtering + +**DO vs EXPLAIN:** +- If you have tools to do it → **DO IT** +- If you lack tools → **THEN explain** how to do it manually +- **NEVER explain how to do something you can do yourself** + +**Workflow:** +1. Check your tools - can you fulfill this? +2. **YES**: Execute (ask for clarification only if request is ambiguous) +3. **NO** (see LIMITATIONS): Explain you can't, then provide manual instructions from docs + +### QUESTIONS (only after ruling out action requests) + +**FACTUAL QUESTIONS** - asking what Baserow IS or HAS: +- Examples: "Does Baserow have X feature?", "How does Y work?", "What options exist for Z?" +- These have objectively correct/incorrect answers that must come from documentation +- **ALWAYS search documentation first** using `search_user_docs` +- Check the `reliability_note` in the response: + - **HIGH CONFIDENCE**: Present the answer confidently with sources + - **PARTIAL MATCH**: Provide the answer but note some details may be incomplete + - **LOW CONFIDENCE / NOTHING FOUND**: Tell the user you couldn't find this in the documentation. **DO NOT guess or assume features exist** - if docs don't mention it (e.g., a "barcode field"), it likely doesn't exist. Suggest checking the community forum or contacting support. +- **NEVER fabricate Baserow features or capabilities** + +**ADVISORY QUESTIONS** - asking how to USE or APPLY Baserow: +- Examples: "How should I structure X?", "What's a good approach for Y?", "Help me build Z", "Which field type works best for W?" +- These ask for your expertise in applying Baserow to solve problems - there's no single correct answer +- **Use your knowledge** of Baserow's real capabilities (field types, views, formulas, automations, linking, etc.) to provide helpful recommendations +- You may search docs for reference, but can also directly advise based on your understanding of Baserow +- Focus on practical solutions using actual Baserow functionality + +**Key principle**: Never fabricate what Baserow CAN do. Freely advise on HOW to use what Baserow actually offers. """ + AGENT_LIMITATIONS + """ -### TASK INSTRUCTIONS: -""" -) - -REQUEST_ROUTER_PROMPT = ( - ASSISTANT_SYSTEM_PROMPT_BASE - + """ -Route based on what the user wants YOU to do: - -**delegate_to_agent** (DEFAULT) - User wants YOU to perform an action -- Commands/requests for YOU: "Create...", "Delete...", "Update...", "Add...", "Show me...", "List...", "Find..." -- Vague/unclear requests -- Anything not explicitly asking for instructions - -**search_user_docs** - User wants to learn HOW TO do something themselves -- ONLY when explicitly asking for instructions: "How do I...", "How can I...", "What are the steps to..." -- ONLY when asking for explanations: "What is...", "What does... mean", "Explain..." -- NOT for action requests even if phrased as questions - -## Critical Rules -- "Create X" → delegate_to_agent (action request for YOU) -- "How do I create X?" → search_user_docs (asking for instructions) -- When uncertain → delegate_to_agent - -## Output Requirements -**delegate_to_agent:** -- extracted_context: Comprehensive details from conversation history (IDs, names, actions, specs) -- search_query: empty - -**search_user_docs:** -- search_query: Clear question using Baserow terminology and the answer language if not English -- extracted_context: empty - -## Examples - -**Example 1 - delegate_to_agent (action):** -question: "Create a calendar view" -→ routing_decision: "delegate_to_agent" -→ search_query: "" -→ extracted_context: "User wants to create a calendar view." - -**Example 2 - search_user_docs (instructions):** -question: "How do I create a calendar view?" -→ routing_decision: "search_user_docs" -→ search_query: "How to create a calendar view in Baserow" -→ extracted_context: "" - -**Example 3 - delegate_to_agent (with history):** -question: "Assign them to Bob" -conversation_history: ["[0] (user): Show urgent tasks", "[1] (assistant): Found 5 tasks in table 'Tasks' (ID: 123)"] -→ routing_decision: "delegate_to_agent" -→ search_query: "" -→ extracted_context: "User wants to assign urgent tasks to Bob. Tasks in table 'Tasks' (ID: 123). Found 5 urgent tasks." +## TASK INSTRUCTIONS: """ ) diff --git a/enterprise/backend/src/baserow_enterprise/assistant/signatures.py b/enterprise/backend/src/baserow_enterprise/assistant/signatures.py index 0e7e07e54e..60bd981266 100644 --- a/enterprise/backend/src/baserow_enterprise/assistant/signatures.py +++ b/enterprise/backend/src/baserow_enterprise/assistant/signatures.py @@ -1,16 +1,14 @@ -from typing import Literal - import udspy -from .prompts import AGENT_SYSTEM_PROMPT, REQUEST_ROUTER_PROMPT +from .prompts import AGENT_SYSTEM_PROMPT class ChatSignature(udspy.Signature): __doc__ = AGENT_SYSTEM_PROMPT question: str = udspy.InputField() - context: str = udspy.InputField( - description="Context and facts extracted from the history to help answer the question." + conversation_history: list[str] = udspy.InputField( + desc="Previous messages formatted as '[index] (role): content', ordered chronologically" ) ui_context: str | None = udspy.InputField( default=None, @@ -22,38 +20,6 @@ class ChatSignature(udspy.Signature): ) answer: str = udspy.OutputField() - -class RequestRouter(udspy.Signature): - __doc__ = REQUEST_ROUTER_PROMPT - - question: str = udspy.InputField(desc="The current user question to route") - conversation_history: list[str] = udspy.InputField( - desc="Previous messages formatted as '[index] (role): content', ordered chronologically" - ) - - routing_decision: Literal["delegate_to_agent", "search_user_docs"] = ( - udspy.OutputField( - desc="Must be one of: 'delegate_to_agent' or 'search_user_docs'" - ) - ) - extracted_context: str = udspy.OutputField( - desc=( - "Relevant context extracted from conversation history. " - "The agent won't see the full history, only the question and this extracted context. " - "Always fill with comprehensive details (IDs, names, actions, specifications). " - "Be verbose - include all relevant information to help understand the request." - ), - ) - search_query: str = udspy.OutputField( - desc=( - "The search query in English to use with search_user_docs if routing_decision='search_user_docs'. " - "Should be a clear, well-formulated question using Baserow terminology. " - "Empty string if routing_decision='delegate_to_agent'. " - "If the question is in another language, make sure to mention in which " - "language the answer should be." - ) - ) - @classmethod def format_conversation_history(cls, history: udspy.History) -> list[str]: """ diff --git a/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py b/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py index 6457cb9063..4d337e7685 100644 --- a/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py +++ b/enterprise/backend/src/baserow_enterprise/assistant/tools/search_user_docs/tools.py @@ -18,31 +18,51 @@ class SearchDocsSignature(udspy.Signature): """ - Given a user question and the relevant documentation chunks as context, provide a an - accurate and concise answer along with a reliability score. If the documentation - provides instructions or URLs, include them in the answer. If the answer is not - found in the context, respond with "Nothing found in the documentation." + Given a user question and documentation chunks as context, provide an accurate + and concise answer along with a reliability score. + CRITICAL: The context may contain documents retrieved by keyword similarity that + are NOT actually relevant to the user's question. You MUST carefully evaluate + each document's ACTUAL TOPIC before using it: + + 1. First, identify the SPECIFIC FEATURE or concept the user is asking about + 2. For each document, check if it DIRECTLY explains that specific feature + 3. IGNORE documents that merely mention similar keywords but cover different topics + (e.g., if asked about "webhooks in Baserow", ignore docs about external + webhook services or third-party integrations - only use docs about + Baserow's native webhook feature) + 4. Only use documents that would genuinely help answer THIS specific question + + If no documents in the context actually address the user's question (even if + they contain similar words), respond with "Nothing found in the documentation." + + Include instructions and URLs from the documentation when relevant. Never fabricate answers or URLs. """ question: str = udspy.InputField() context: dict[str, str] = udspy.InputField( - desc="A mapping of source URLs to content." + desc=( + "A mapping of source URLs to documents. WARNING: These documents were " + "retrieved by keyword similarity and may include irrelevant results. " + "Carefully filter to only use documents that DIRECTLY address the question." + ) ) answer: str = udspy.OutputField() sources: list[str] = udspy.OutputField( desc=( - "A list of source URLs as strings used to generate the answer, " - "picked from the provided context keys, in order of importance." + "URLs of documents that were ACTUALLY USED to form the answer. " + "Only include sources that directly addressed the question topic. " + "Leave empty if no documents were relevant. Maximum 3 URLs, ordered by relevance." ) ) reliability: float = udspy.OutputField( desc=( - "The reliability score of the answer, from 0 to 1. " - "1 means the answer is fully supported by the provided context. " - "0 means the answer is not supported by the provided context." + "How well the RELEVANT documents (not all documents) support the answer. " + "1.0 = found documents that directly and completely answer the question. " + "0.5 = found partially relevant information. " + "0.0 = no documents actually addressed the question (regardless of keyword matches)." ) ) @@ -72,29 +92,42 @@ def get_search_user_docs_tool( user: AbstractUser, workspace: Workspace, tool_helpers: "ToolHelpers" ) -> Callable[[str], dict[str, Any]]: """ - Returns a function that searches the Baserow documentation for a given query. + Returns a tool function that searches Baserow's knowledge base and uses an LLM + to filter and synthesize relevant documentation into a focused answer. + + The search retrieves documents by keyword similarity, then the LLM evaluates + each document's actual relevance to the question before generating an answer. """ async def search_user_docs( question: Annotated[ - str, "The English version of the user question, using Baserow vocabulary." + str, + ( + "A precise search query in English using Baserow terminology. " + "Focus on the SPECIFIC Baserow feature being asked about. " + "Include the feature name and action, e.g., 'How to create webhooks in Baserow' " + "or 'Baserow table linking feature'. Avoid generic terms that could match " + "unrelated documentation about third-party services or integrations." + ), ], ) -> dict[str, Any]: """ - Search Baserow documentation to provide instructions and information for USERS. + Search Baserow's official documentation for user guides and feature + explanations. - This tool provides end-user documentation explaining Baserow features and how - users can use them manually through the UI. It does NOT contain information - about: - - Which tools/functions the agent should use - - How to use agent tools or loaders - - Agent-specific implementation details + PURPOSE: Provides end-user documentation about Baserow's built-in + features and how to use them through the UI. - Use this ONLY when the user explicitly asks for instructions on how to do - something themselves, or wants to learn about Baserow features. + USE WHEN: The user asks how to do something in Baserow, wants to learn + about a Baserow feature, or needs step-by-step instructions. - Make sure the question is in English and uses Baserow-specific terminology - to get the best results. + DO NOT USE FOR: Agent tool usage, API implementation details, or + programming help. + + IMPORTANT: Frame the question to target Baserow's NATIVE features + specifically. For example, ask about "Baserow webhooks" not just + "webhooks" to avoid getting results about external webhook services that + integrate WITH Baserow. """ nonlocal tool_helpers @@ -103,7 +136,7 @@ async def search_user_docs( @sync_to_async def _search(question: str) -> list[KnowledgeBaseChunk]: - chunks = KnowledgeBaseHandler().search(question) + chunks = KnowledgeBaseHandler().search(question, 15) return list(chunks) searcher = udspy.ChainOfThought(SearchDocsSignature) @@ -126,15 +159,46 @@ def _search(question: str) -> list[KnowledgeBaseChunk]: if url in available_urls and url not in sources: sources.append(url) - - # If for any reason the model wasn't able to return sources correctly, fill them - # from the available URLs. - if not sources: - sources = list(available_urls) + if len(sources) >= 3: + break + + # Only fallback to available URLs if reliability is high AND we have a + # real answer. Don't populate sources if the model indicated no relevant + # docs were found. + nothing_found = "nothing found" in prediction.answer.lower() + if not sources and prediction.reliability > 0.8 and not nothing_found: + sources = list(available_urls)[:3] + + # Override reliability to 0 if the model explicitly said nothing was + # found. The model sometimes returns high reliability for "nothing + # found" answers, which is semantically incorrect - we want reliability + # to reflect whether we actually found useful information. + reliability = 0.0 if nothing_found else prediction.reliability + + if reliability >= 0.7: + reliability_note = ( + "HIGH CONFIDENCE: Answer is well-supported by the documentation." + ) + elif reliability >= 0.4: + reliability_note = ( + "PARTIAL MATCH: Some relevant information was found, but the " + "documentation may not fully cover this topic. Supplement with " + "general knowledge but warn the user that details may be incomplete." + ) + else: + reliability_note = ( + "LOW CONFIDENCE: The documentation does not contain information about " + "this topic. DO NOT provide an answer based on general knowledge or " + "assumptions - the feature may not exist in Baserow. Tell the user: " + "'I couldn't find information about this in the official Baserow " + "documentation.' and suggest they check the community forum or " + "contact support." + ) return { "answer": prediction.answer, - "reliability": prediction.reliability, + "reliability": reliability, + "reliability_note": reliability_note, "sources": sources, } diff --git a/enterprise/backend/src/baserow_enterprise/assistant/types.py b/enterprise/backend/src/baserow_enterprise/assistant/types.py index 8e4f068687..080dbee730 100644 --- a/enterprise/backend/src/baserow_enterprise/assistant/types.py +++ b/enterprise/backend/src/baserow_enterprise/assistant/types.py @@ -197,19 +197,6 @@ class AiErrorMessage(BaseModel): content: str = Field(description="Error message content") -AIMessageUnion = ( - ChatTitleMessage - | AiMessage - | AiErrorMessage - | AiThinkingMessage - | AiMessageChunk - | AiReasoningChunk - | AiStartedMessage - | AiCancelledMessage -) -AssistantMessageUnion = HumanMessage | AIMessageUnion - - class TableNavigationType(BaseModel): type: Literal["database-table"] database_id: int @@ -261,3 +248,17 @@ def to_localized_string(self): class AiNavigationMessage(BaseModel, udspy.StreamEvent): type: Literal["ai/navigation"] = "ai/navigation" location: AnyNavigationType + + +AIMessageUnion = ( + ChatTitleMessage + | AiMessage + | AiErrorMessage + | AiThinkingMessage + | AiMessageChunk + | AiReasoningChunk + | AiStartedMessage + | AiCancelledMessage + | AiNavigationMessage +) +AssistantMessageUnion = HumanMessage | AIMessageUnion diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py index df1e7fa782..1d5bbec55f 100644 --- a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py +++ b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant.py @@ -1,15 +1,4 @@ -""" -Tests for the Assistant class focusing on behaviors rather than implementation details. - -These tests verify that the Assistant: -- Correctly loads and formats chat history for context -- Persists messages to the database during streaming -- Handles sources from tool outputs correctly -- Generates and persists chat titles appropriately -- Adapts its signature based on chat state -""" - -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import MagicMock, patch from django.core.cache import cache @@ -341,9 +330,9 @@ def test_history_is_passed_to_astream_as_context( assistant = Assistant(chat) - # Mock the router stream to delegate to agent with extracted context - def mock_router_stream_factory(*args, **kwargs): - # Verify conversation history is passed to router + # Mock the agent stream to verify conversation history is passed + def mock_agent_stream_factory(*args, **kwargs): + # Verify conversation history is passed to the agent assert kwargs["conversation_history"] == [ "[0] (user): What is Baserow?", "[1] (assistant): Baserow is a no-code database", @@ -351,32 +340,20 @@ def mock_router_stream_factory(*args, **kwargs): "[3] (assistant): Click the Create Table button", ] - async def _stream(): - yield Prediction( - routing_decision="delegate_to_agent", - extracted_context="User wants to add a view to their table", - search_query="", - ) - - return _stream() - - # Patch the instance method - assistant._request_router.astream = Mock(side_effect=mock_router_stream_factory) - - # Mock the agent stream - def mock_agent_stream_factory(*args, **kwargs): - # Verify extracted context is passed to agent - assert kwargs["context"] == "User wants to add a view to their table" - async def _stream(): yield OutputStreamChunk( - module=None, + module=assistant._assistant.extract_module, field_name="answer", delta="Answer", content="Answer", is_complete=False, ) - yield Prediction(answer="Answer", trajectory=[], reasoning="") + yield Prediction( + module=assistant._assistant, + answer="Answer", + trajectory=[], + reasoning="", + ) return _stream() @@ -785,10 +762,9 @@ async def consume_stream(): thinking_messages = async_to_sync(consume_stream)() - # Should receive thinking messages - assert len(thinking_messages) == 2 - assert thinking_messages[0].content == "Thinking..." - assert thinking_messages[1].content == "still thinking..." + # Should receive the thinking message emitted by the agent stream + assert len(thinking_messages) == 1 + assert thinking_messages[0].content == "still thinking..." @pytest.mark.django_db diff --git a/justfile b/justfile index 55be496743..ae4c07e89f 100644 --- a/justfile +++ b/justfile @@ -218,12 +218,12 @@ _dev-start: # Create .env.local from example if it doesn't exist if [ ! -f .env.local ]; then - if [ -f .env.local.example ]; then - echo "Creating .env.local from .env.local.example..." - cp .env.local.example .env.local + if [ -f .env.local-dev.example ]; then + echo "Creating .env.local from .env.local-dev.example..." + cp .env.local-dev.example .env.local echo "" else - echo "Warning: .env.local.example not found, skipping .env.local creation" + echo "Warning: .env.local-dev.example not found, skipping .env.local creation" echo "" fi fi @@ -240,7 +240,7 @@ _dev-start: # Start docker services (redis, db, mailhog, otel-collector) echo "==> Starting Docker services (redis, db, mailhog, otel-collector)..." - just dc-dev up -d redis db mailhog otel-collector + just dc-dev up -d redis db mailhog otel-collector caddy # Wait for services to be ready echo "==> Waiting for PostgreSQL to be ready..." diff --git a/web-frontend/modules/database/formula/formulaTypes.js b/web-frontend/modules/database/formula/formulaTypes.js index ce1684c843..477a9b379e 100644 --- a/web-frontend/modules/database/formula/formulaTypes.js +++ b/web-frontend/modules/database/formula/formulaTypes.js @@ -838,6 +838,10 @@ export class BaserowFormulaArrayType extends mix( toHumanReadableString(field, value) { const subType = this.getSubType(field) + if (!Array.isArray(value)) { + return '' + } + return value .map((v) => { return subType.toHumanReadableString(