Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions backend/src/baserow/config/asgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from channels.routing import ProtocolTypeRouter, URLRouter

from baserow.config.helpers import ConcurrencyLimiterASGI
from baserow.core.mcp import baserow_mcp
from baserow.config.helpers import ConcurrencyLimiterASGI, check_lazy_loaded_libraries
from baserow.core.mcp import get_baserow_mcp_server
from baserow.core.telemetry.telemetry import setup_logging, setup_telemetry
from baserow.ws.routers import websocket_router

Expand All @@ -18,13 +18,16 @@
# logging setup. Otherwise Django will try to destroy and log handlers we added prior.
setup_logging()

# Check that libraries meant to be lazy-loaded haven't been imported at startup.
# This runs after Django is fully loaded, so it catches imports from all apps.
check_lazy_loaded_libraries()

application = ProtocolTypeRouter(
{
"http": ConcurrencyLimiterASGI(
URLRouter(
[
re_path(r"^mcp", baserow_mcp.sse_app()),
re_path(r"^mcp", get_baserow_mcp_server().sse_app()),
re_path(r"", django_asgi_app),
]
),
Expand Down
13 changes: 13 additions & 0 deletions backend/src/baserow/config/celery.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from django.conf import settings

from celery import Celery, signals

from baserow.config.helpers import check_lazy_loaded_libraries
from baserow.core.telemetry.tasks import BaserowTelemetryTask

app = Celery("baserow")
Expand All @@ -26,3 +29,13 @@ def clear_local(*args, **kwargs):

signals.task_prerun.connect(clear_local)
signals.task_postrun.connect(clear_local)


@signals.worker_process_init.connect
def on_worker_init(**kwargs):
# This is only needed in asgi.py
settings.BASEROW_LAZY_LOADED_LIBRARIES.append("mcp")

# Check that libraries meant to be lazy-loaded haven't been imported at startup.
# This runs after Django is fully loaded, so it catches imports from all apps.
check_lazy_loaded_libraries()
46 changes: 46 additions & 0 deletions backend/src/baserow/config/helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,54 @@
import asyncio
import sys

from django.conf import settings

from loguru import logger


def check_lazy_loaded_libraries():
"""
Check if any libraries that should be lazy-loaded have been imported at startup.

This function checks sys.modules against settings.BASEROW_LAZY_LOADED_LIBRARIES
and emits a warning if any of them have been loaded prematurely. This helps
catch accidental top-level imports that defeat the purpose of lazy loading
these heavy libraries to reduce memory footprint.

Only runs when DEBUG is True.
"""

if not settings.DEBUG:
return

lazy_libs = getattr(settings, "BASEROW_LAZY_LOADED_LIBRARIES", [])
loaded_early = []

for lib in lazy_libs:
if lib in sys.modules:
loaded_early.append(lib)

if loaded_early:
libs_list = ", ".join(f'"{lib}"' for lib in loaded_early)
logger.warning(
f"The following libraries were loaded during startup but should be "
f"lazy-loaded to reduce memory footprint: {', '.join(loaded_early)}. "
f"Either import them inside functions/methods where they're used, or "
f"remove them from BASEROW_LAZY_LOADED_LIBRARIES if they're legitimately "
f"needed at startup. "
f"To debug, add the following code at the very top of your settings file "
f"(e.g., settings/dev.py, before any other imports):\n\n"
f"import sys, traceback\n"
f"class _T:\n"
f" def find_module(self, n, p=None):\n"
f" for lib in [{libs_list}]:\n"
f" if n == lib or n.startswith(lib + '.'):\n"
f" print(f'IMPORT: {{n}}'); traceback.print_stack(); sys.exit(1)\n"
f" return None\n"
f"sys.meta_path.insert(0, _T())\n"
)


class dummy_context:
async def __aenter__(self):
pass
Expand Down
29 changes: 25 additions & 4 deletions backend/src/baserow/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
from django.core.exceptions import ImproperlyConfigured

import dj_database_url
import sentry_sdk
from corsheaders.defaults import default_headers
from sentry_sdk.integrations.django import DjangoIntegration
from sentry_sdk.scrubber import DEFAULT_DENYLIST, EventScrubber

from baserow.config.settings.utils import (
Setting,
Expand Down Expand Up @@ -1303,18 +1300,42 @@ def __setitem__(self, key, value):
print(e)


# Libraries that should be lazy-loaded (imported inside functions/methods) to reduce
# memory footprint at startup. If any of these are found in sys.modules during startup,
# a warning will be shown suggesting to either lazy-load them or remove them from this
# list if they're legitimately needed at startup.
BASEROW_LAZY_LOADED_LIBRARIES = [
"openai",
"anthropic",
"mistralai",
"ollama",
"langchain_core",
"jira2markdown",
"saml2",
"openpyxl",
"numpy",
]


SENTRY_BACKEND_DSN = os.getenv("SENTRY_BACKEND_DSN")
SENTRY_DSN = SENTRY_BACKEND_DSN or os.getenv("SENTRY_DSN")
SENTRY_DENYLIST = DEFAULT_DENYLIST + ["username", "email", "name"]

if SENTRY_DSN:
import sentry_sdk
from sentry_sdk.integrations.django import DjangoIntegration
from sentry_sdk.scrubber import DEFAULT_DENYLIST, EventScrubber

SENTRY_DENYLIST = DEFAULT_DENYLIST + ["username", "email", "name"]

sentry_sdk.init(
dsn=SENTRY_DSN,
integrations=[DjangoIntegration(signals_spans=False, middleware_spans=False)],
send_default_pii=False,
event_scrubber=EventScrubber(recursive=True, denylist=SENTRY_DENYLIST),
environment=os.getenv("SENTRY_ENVIRONMENT", ""),
)
else:
BASEROW_LAZY_LOADED_LIBRARIES.append("sentry_sdk")

BASEROW_OPENAI_API_KEY = os.getenv("BASEROW_OPENAI_API_KEY", None)
BASEROW_OPENAI_ORGANIZATION = os.getenv("BASEROW_OPENAI_ORGANIZATION", "") or None
Expand Down
5 changes: 5 additions & 0 deletions backend/src/baserow/config/settings/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
INSTALLED_APPS.insert(0, "daphne") # noqa: F405
INSTALLED_APPS += ["django_extensions"] # noqa: F405

# daphne imports numpy via autobahn -> flatbuffers, so we exclude it from the
# lazy-load check in dev mode. In production, numpy should still be lazy-loaded.
if "numpy" in BASEROW_LAZY_LOADED_LIBRARIES: # noqa: F405
BASEROW_LAZY_LOADED_LIBRARIES.remove("numpy") # noqa: F405

BASEROW_ENABLE_SILK = str_to_bool(os.getenv("BASEROW_ENABLE_SILK", "on"))
if BASEROW_ENABLE_SILK:
INSTALLED_APPS += ["silk"] # noqa: F405
Expand Down
9 changes: 9 additions & 0 deletions backend/src/baserow/config/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/
"""

from django.conf import settings
from django.core.wsgi import get_wsgi_application

from baserow.config.helpers import check_lazy_loaded_libraries
from baserow.core.telemetry.telemetry import setup_logging, setup_telemetry

# The telemetry instrumentation library setup needs to run prior to django's setup.
Expand All @@ -19,3 +21,10 @@
# It is critical to setup our own logging after django has been setup and done its own
# logging setup. Otherwise Django will try to destroy and log handlers we added prior.
setup_logging()

# This is only needed in asgi.py
settings.BASEROW_LAZY_LOADED_LIBRARIES.append("mcp")

# Check that libraries meant to be lazy-loaded haven't been imported at startup.
# This runs after Django is fully loaded, so it catches imports from all apps.
check_lazy_loaded_libraries()
18 changes: 16 additions & 2 deletions backend/src/baserow/contrib/database/mcp/rows/tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from asgiref.sync import sync_to_async
from mcp import Tool
from mcp.types import TextContent
from rest_framework.response import Response
from starlette.status import HTTP_204_NO_CONTENT

Expand All @@ -23,6 +21,8 @@ class ListRowsMcpTool(MCPTool):
name = "list_table_rows"

async def list(self, endpoint):
from mcp import Tool

return [
Tool(
name=self.name,
Expand Down Expand Up @@ -64,6 +64,8 @@ async def call(
name_parameters,
call_arguments,
):
from mcp.types import TextContent

table_id = call_arguments["table_id"]
if not await sync_to_async(table_in_workspace_of_endpoint)(endpoint, table_id):
return [TextContent(type="text", text="Table not in endpoint workspace.")]
Expand Down Expand Up @@ -92,6 +94,8 @@ class CreateRowMcpTool(MCPTool):
name = "create_row_table_{id}"

async def list(self, endpoint):
from mcp import Tool

tables = await sync_to_async(get_all_tables)(endpoint)
tables = await sync_to_async(remove_table_no_permission)(
endpoint, tables, CreateRowDatabaseTableOperationType
Expand Down Expand Up @@ -127,6 +131,8 @@ async def call(
name_parameters,
call_arguments,
):
from mcp.types import TextContent

table_id = name_parameters["id"]
if not await sync_to_async(table_in_workspace_of_endpoint)(endpoint, table_id):
return [TextContent(type="text", text="Table not in endpoint workspace.")]
Expand All @@ -148,6 +154,8 @@ class UpdateRowMcpTool(MCPTool):
name = "update_row_table_{id}"

async def list(self, endpoint):
from mcp import Tool

tables = await sync_to_async(get_all_tables)(endpoint)
tables = await sync_to_async(remove_table_no_permission)(
endpoint, tables, UpdateDatabaseRowOperationType
Expand Down Expand Up @@ -187,6 +195,8 @@ async def call(
name_parameters,
call_arguments,
):
from mcp.types import TextContent

table_id = name_parameters["id"]
if not await sync_to_async(table_in_workspace_of_endpoint)(endpoint, table_id):
return [TextContent(type="text", text="Table not in endpoint workspace.")]
Expand All @@ -211,6 +221,8 @@ class DeleteRowMcpTool(MCPTool):
name = "delete_table_row"

async def list(self, endpoint):
from mcp import Tool

return [
Tool(
name=self.name,
Expand Down Expand Up @@ -241,6 +253,8 @@ async def call(
name_parameters,
call_arguments,
):
from mcp.types import TextContent

table_id = call_arguments["table_id"]
if not await sync_to_async(table_in_workspace_of_endpoint)(endpoint, table_id):
return [TextContent(type="text", text="Table not in endpoint workspace.")]
Expand Down
6 changes: 4 additions & 2 deletions backend/src/baserow/contrib/database/mcp/table/tools.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json

from asgiref.sync import sync_to_async
from mcp import Tool
from mcp.types import TextContent

from baserow.contrib.database.api.tables.serializers import (
TableWithoutDataSyncSerializer,
Expand All @@ -16,6 +14,8 @@ class ListTablesMcpTool(MCPTool):
name = "list_tables"

async def list(self, endpoint):
from mcp import Tool

return [
Tool(
name=self.name,
Expand All @@ -34,6 +34,8 @@ async def call(
name_parameters,
call_arguments,
):
from mcp.types import TextContent

tables = await sync_to_async(get_all_tables)(endpoint)
serializer = TableWithoutDataSyncSerializer(tables, many=True)
table_json = json.dumps(serializer.data)
Expand Down
9 changes: 5 additions & 4 deletions backend/src/baserow/contrib/integrations/ai/service_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

from django.contrib.auth.models import AbstractUser

from langchain_core.exceptions import OutputParserException
from langchain_core.prompts import PromptTemplate
from rest_framework import serializers
from rest_framework.exceptions import ValidationError as DRFValidationError

Expand All @@ -18,7 +16,7 @@
)
from baserow.core.generative_ai.registries import generative_ai_model_type_registry
from baserow.core.integrations.handler import IntegrationHandler
from baserow.core.output_parsers import StrictEnumOutputParser
from baserow.core.output_parsers import get_strict_enum_output_parser
from baserow.core.services.dispatch_context import DispatchContext
from baserow.core.services.exceptions import (
ServiceImproperlyConfiguredDispatchException,
Expand Down Expand Up @@ -170,6 +168,9 @@ def dispatch_data(
resolved_values: Dict[str, Any],
dispatch_context: DispatchContext,
) -> Dict[str, Any]:
from langchain_core.exceptions import OutputParserException
from langchain_core.prompts import PromptTemplate

if not service.ai_generative_ai_type:
raise ServiceImproperlyConfiguredDispatchException(
"The AI provider type is missing."
Expand Down Expand Up @@ -228,7 +229,7 @@ def dispatch_data(
choices_enum = enum.Enum(
"Choices", {f"OPTION_{i}": choice for i, choice in enumerate(choices)}
)
output_parser = StrictEnumOutputParser(enum=choices_enum)
output_parser = get_strict_enum_output_parser(enum=choices_enum)
format_instructions = output_parser.get_format_instructions()
prompt_template = PromptTemplate(
template=prompt + "\n\nGiven this user query:\n\n{format_instructions}",
Expand Down
Loading
Loading