From 4883984e54b77d9afeb2d10fe318d6692d1b6089 Mon Sep 17 00:00:00 2001
From: Tsering Paljor <tsering@baserow.io>
Date: Tue, 9 Dec 2025 09:12:43 +0400
Subject: [PATCH 1/2] Gracefully handle missing specific instance of DataSource
 service (#4390)

* Optimistically batch fetch specific services, fall back to fetching individually.

* Add changelog

* Add comment

* Simplify

* Add docstring

* Simplify the fix by allow specific_iterator() to skip missing specific objects.
---
 backend/src/baserow/core/db.py                | 13 +++--
 backend/src/baserow/core/services/handler.py  |  1 +
 .../data_sources/test_data_source_handler.py  | 51 +++++++++++++++++++
 backend/tests/baserow/core/test_core_db.py    | 48 ++++++++++++++++-
 ...ing_data_source_services_when_the_spe.json |  9 ++++
 5 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json

diff --git a/backend/src/baserow/core/db.py b/backend/src/baserow/core/db.py
index 0114bdfc8a..573d694254 100644
--- a/backend/src/baserow/core/db.py
+++ b/backend/src/baserow/core/db.py
@@ -91,6 +91,7 @@ def specific_iterator(
     | None = None,
     base_model: T | None = None,
     select_related: List[str] = None,
+    skip_missing_specific_objects: bool = False,
 ) -> List[T]:
     """
     Iterates over the given queryset or list of model instances, and finds the specific
@@ -116,6 +117,8 @@ def specific_iterator(
         is provided in the `queryset_or_list` argument. This should be used if the
         instances provided in the list have select related objects.
     :return: A list of specific objects in the right order.
+    :param skip_missing_specific_objects: When True, missing specific instances
+        are logged without raising an error.
     """
 
     if isinstance(queryset_or_list, QuerySet):
@@ -183,9 +186,13 @@ def specific_iterator(
         try:
             specific_object = specific_objects[item.id]
         except KeyError:
-            raise base_model.DoesNotExist(
-                f"The specific object with id {item.id} does not exist."
-            )
+            error = f"The specific object with id {item.id} does not exist."
+
+            if skip_missing_specific_objects:
+                logger.error(error)
+                continue
+
+            raise base_model.DoesNotExist(error)
 
         # If there are annotation keys, we must extract them from the original item
         # because they should exist there and set them on the specific object so they
diff --git a/backend/src/baserow/core/services/handler.py b/backend/src/baserow/core/services/handler.py
index f74ecff79a..0abad082ac 100644
--- a/backend/src/baserow/core/services/handler.py
+++ b/backend/src/baserow/core/services/handler.py
@@ -112,6 +112,7 @@ def per_content_type_queryset_hook(model, queryset):
                 specific_iterator(
                     queryset,
                     per_content_type_queryset_hook=per_content_type_queryset_hook,
+                    skip_missing_specific_objects=True,
                 )
             )
 
diff --git a/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py b/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py
index 41cc9e7dad..0fd1c31585 100644
--- a/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py
+++ b/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py
@@ -1,6 +1,8 @@
 from decimal import Decimal
+from unittest import mock
 from unittest.mock import patch
 
+from django.db import connection
 from django.http import HttpRequest
 from django.shortcuts import reverse
 
@@ -654,3 +656,52 @@ def test_query_data_sources_excludes_trashed_service(data_fixture):
         DataSource.objects.filter(page=page), specific=True
     )
     assert len(data_sources) == 1
+
+
+@pytest.mark.django_db
+def test_query_data_sources_with_missing_specific_service(data_fixture):
+    """
+    Test that a missing specific instance of a local baserow service
+    doesn't cause an exception.
+    """
+
+    user = data_fixture.create_user()
+    page = data_fixture.create_builder_page()
+    integration = data_fixture.create_local_baserow_integration(
+        user=user, application=page.builder
+    )
+
+    service_1 = data_fixture.create_local_baserow_get_row_service(
+        integration=integration
+    )
+    data_source = data_fixture.create_builder_local_baserow_get_row_data_source(
+        page=page, service=service_1
+    )
+    service_2 = data_fixture.create_local_baserow_get_row_service(
+        integration=integration
+    )
+    data_fixture.create_builder_local_baserow_get_row_data_source(
+        page=page, service=service_2
+    )
+
+    missing_service_id = service_2.id
+
+    # Simulate a data integrity issue
+    specific_table_name = LocalBaserowGetRow._meta.db_table
+    with connection.cursor() as cursor:
+        # Delete from the specific table (LocalBaserowGetRow) instead of using ORM
+        # to better simulate the data integrity issue.
+        cursor.execute(
+            f"DELETE FROM {specific_table_name} WHERE service_ptr_id = %s",
+            [missing_service_id],
+        )
+
+    with mock.patch("baserow.core.db.logger") as mock_logger:
+        data_sources = DataSourceHandler()._query_data_sources(
+            DataSource.objects.filter(page=page), specific=True
+        )
+
+    mock_logger.error.assert_called_once_with(
+        f"The specific object with id {missing_service_id} does not exist."
+    )
+    assert data_sources == [data_source]
diff --git a/backend/tests/baserow/core/test_core_db.py b/backend/tests/baserow/core/test_core_db.py
index 33c9992575..cb6a422321 100644
--- a/backend/tests/baserow/core/test_core_db.py
+++ b/backend/tests/baserow/core/test_core_db.py
@@ -1,4 +1,4 @@
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 from django.contrib.contenttypes.models import ContentType
 from django.db import connection
@@ -655,3 +655,49 @@ def test_multiple_field_prefetch__many_to_many_missing_source(data_fixture):
     )
     row = rows[0]
     assert len(row.field.all()) == 1
+
+
+@pytest.mark.django_db
+def test_specific_iterator_skip_missing_specific_objects(data_fixture):
+    """
+    Tests the optional skip_missing_specific_objects argument.
+
+    When True, a missing specific instance shouldn't raise an exception.
+    """
+
+    user = data_fixture.create_user()
+    table = data_fixture.create_database_table(user=user)
+    field = data_fixture.create_text_field(table=table)
+
+    # Create a field without a specific type (simulating data integrity issue)
+    field_without_specific = Field.objects.create(
+        table=table,
+        order=1,
+        name="Test",
+        primary=False,
+        content_type=field.content_type,
+    )
+
+    base_queryset = Field.objects.filter(
+        id__in=[field.id, field_without_specific.id]
+    ).order_by("id")
+
+    # Without skip_missing_specific_objects, the 2nd field should
+    # cause an exception
+    with pytest.raises(Field.DoesNotExist):
+        list(specific_iterator(base_queryset))
+
+    # With skip_missing_specific_objects, the 2nd field should be
+    # skipped gracefully
+    with patch("baserow.core.db.logger") as mock_logger:
+        specific_objects = list(
+            specific_iterator(base_queryset, skip_missing_specific_objects=True)
+        )
+
+    # Should only return the 1 specific field, skipping the one without
+    # a specific instance
+    assert len(specific_objects) == 1
+    assert specific_objects[0].id == field.id
+    mock_logger.error.assert_called_once_with(
+        f"The specific object with id {field_without_specific.id} does not exist."
+    )
diff --git a/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json b/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json
new file mode 100644
index 0000000000..78cca44c0d
--- /dev/null
+++ b/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json
@@ -0,0 +1,9 @@
+{
+    "type": "bug",
+    "message": "Gracefully handle fetching data source services when the specific instance is missing.",
+    "issue_origin": "github",
+    "issue_number": 4389,
+    "domain": "builder",
+    "bullet_points": [],
+    "created_at": "2025-12-04"
+}
\ No newline at end of file

From 9845d2f43f5a7cfd998827159c57b505b50a8f7b Mon Sep 17 00:00:00 2001
From: Tsering Paljor <tsering@baserow.io>
Date: Tue, 9 Dec 2025 09:13:45 +0400
Subject: [PATCH 2/2] Safely decode FormulaField value to JSON (#4403)

* Add error handling when checking if db value is serializable.

* Add changelog

* Simplify test

* Rename _value_is_serialized_object to _deserialize_baserow_object

---------

Co-authored-by: peter_baserow <peter@baserow.io>
---
 backend/src/baserow/core/formula/field.py     | 30 +++++++++++++++----
 .../core/formula/test_formula_field.py        | 19 ++++++++++++
 ..._when_db_value_is_not_json_serializab.json |  9 ++++++
 3 files changed, 53 insertions(+), 5 deletions(-)
 create mode 100644 backend/tests/baserow/core/formula/test_formula_field.py
 create mode 100644 changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json

diff --git a/backend/src/baserow/core/formula/field.py b/backend/src/baserow/core/formula/field.py
index 6ee50ac174..96302ff909 100644
--- a/backend/src/baserow/core/formula/field.py
+++ b/backend/src/baserow/core/formula/field.py
@@ -1,6 +1,6 @@
 import json
 import logging
-from typing import Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 from django.db import connection, models
 
@@ -41,8 +41,29 @@ def __init__(self, *args, **kwargs):
         self.null = True
         self.blank = True
 
-    def _value_is_serialized_object(self, value: FormulaFieldDatabaseValue) -> bool:
-        return isinstance(value, str) and value[:1] == "{" and value[-1:] == "}"
+    def _deserialize_baserow_object(
+        self, value: FormulaFieldDatabaseValue
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Given a value from the database, attempts to deserialize it into a dictionary
+        representing a Baserow formula object. If the value is not a valid JSON string
+        representing a Baserow formula object, returns None.
+
+        :param value: The value from the database to deserialize.
+        :return: A dictionary representing the Baserow formula object, or None if
+            deserialization fails.
+        """
+
+        if not isinstance(value, str):
+            return None
+
+        if not (value.startswith("{") and value.endswith("}")):
+            return None
+
+        try:
+            return json.loads(value)
+        except (TypeError, json.JSONDecodeError):
+            return None
 
     def _transform_db_value_to_dict(
         self, value: FormulaFieldDatabaseValue
@@ -65,9 +86,8 @@ def _transform_db_value_to_dict(
                 # receive an integer, we convert it to a string.
                 value = str(value)
             # We could encounter a serialized object...
-            if self._value_is_serialized_object(value):
+            if context := self._deserialize_baserow_object(value):
                 # If we have, then we can parse it and return the `BaserowFormulaObject`
-                context = json.loads(value)
                 return BaserowFormulaObject(
                     mode=context["m"], version=context["v"], formula=context["f"]
                 )
diff --git a/backend/tests/baserow/core/formula/test_formula_field.py b/backend/tests/baserow/core/formula/test_formula_field.py
new file mode 100644
index 0000000000..399f150661
--- /dev/null
+++ b/backend/tests/baserow/core/formula/test_formula_field.py
@@ -0,0 +1,19 @@
+from baserow.core.formula.field import FormulaField
+
+
+def test_deserialize_baserow_object_valid():
+    field = FormulaField()
+
+    valid_json = '{"m": "simple", "v": "0.1", "f": "test formula"}'
+    result = field._deserialize_baserow_object(valid_json)
+
+    assert result == {"m": "simple", "v": "0.1", "f": "test formula"}
+
+
+def test_deserialize_baserow_object_invalid():
+    field = FormulaField()
+
+    invalid_json = "{foo}"
+    result = field._deserialize_baserow_object(invalid_json)
+
+    assert result is None
diff --git a/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json b/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json
new file mode 100644
index 0000000000..e0e45b4a26
--- /dev/null
+++ b/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json
@@ -0,0 +1,9 @@
+{
+    "type": "bug",
+    "message": "Improved error handling when FormulaField value is not JSON serializable.",
+    "issue_origin": "github",
+    "issue_number": 4402,
+    "domain": "core",
+    "bullet_points": [],
+    "created_at": "2025-12-05"
+}