From 4883984e54b77d9afeb2d10fe318d6692d1b6089 Mon Sep 17 00:00:00 2001 From: Tsering Paljor Date: Tue, 9 Dec 2025 09:12:43 +0400 Subject: [PATCH 1/2] Gracefully handle missing specific instance of DataSource service (#4390) * Optimistically batch fetch specific services, fall back to fetching individually. * Add changelog * Add comment * Simplify * Add docstring * Simplify the fix by allow specific_iterator() to skip missing specific objects. --- backend/src/baserow/core/db.py | 13 +++-- backend/src/baserow/core/services/handler.py | 1 + .../data_sources/test_data_source_handler.py | 51 +++++++++++++++++++ backend/tests/baserow/core/test_core_db.py | 48 ++++++++++++++++- ...ing_data_source_services_when_the_spe.json | 9 ++++ 5 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json diff --git a/backend/src/baserow/core/db.py b/backend/src/baserow/core/db.py index 0114bdfc8a..573d694254 100644 --- a/backend/src/baserow/core/db.py +++ b/backend/src/baserow/core/db.py @@ -91,6 +91,7 @@ def specific_iterator( | None = None, base_model: T | None = None, select_related: List[str] = None, + skip_missing_specific_objects: bool = False, ) -> List[T]: """ Iterates over the given queryset or list of model instances, and finds the specific @@ -116,6 +117,8 @@ def specific_iterator( is provided in the `queryset_or_list` argument. This should be used if the instances provided in the list have select related objects. :return: A list of specific objects in the right order. + :param skip_missing_specific_objects: When True, missing specific instances + are logged without raising an error. """ if isinstance(queryset_or_list, QuerySet): @@ -183,9 +186,13 @@ def specific_iterator( try: specific_object = specific_objects[item.id] except KeyError: - raise base_model.DoesNotExist( - f"The specific object with id {item.id} does not exist." - ) + error = f"The specific object with id {item.id} does not exist." + + if skip_missing_specific_objects: + logger.error(error) + continue + + raise base_model.DoesNotExist(error) # If there are annotation keys, we must extract them from the original item # because they should exist there and set them on the specific object so they diff --git a/backend/src/baserow/core/services/handler.py b/backend/src/baserow/core/services/handler.py index f74ecff79a..0abad082ac 100644 --- a/backend/src/baserow/core/services/handler.py +++ b/backend/src/baserow/core/services/handler.py @@ -112,6 +112,7 @@ def per_content_type_queryset_hook(model, queryset): specific_iterator( queryset, per_content_type_queryset_hook=per_content_type_queryset_hook, + skip_missing_specific_objects=True, ) ) diff --git a/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py b/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py index 41cc9e7dad..0fd1c31585 100644 --- a/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py +++ b/backend/tests/baserow/contrib/builder/data_sources/test_data_source_handler.py @@ -1,6 +1,8 @@ from decimal import Decimal +from unittest import mock from unittest.mock import patch +from django.db import connection from django.http import HttpRequest from django.shortcuts import reverse @@ -654,3 +656,52 @@ def test_query_data_sources_excludes_trashed_service(data_fixture): DataSource.objects.filter(page=page), specific=True ) assert len(data_sources) == 1 + + +@pytest.mark.django_db +def test_query_data_sources_with_missing_specific_service(data_fixture): + """ + Test that a missing specific instance of a local baserow service + doesn't cause an exception. + """ + + user = data_fixture.create_user() + page = data_fixture.create_builder_page() + integration = data_fixture.create_local_baserow_integration( + user=user, application=page.builder + ) + + service_1 = data_fixture.create_local_baserow_get_row_service( + integration=integration + ) + data_source = data_fixture.create_builder_local_baserow_get_row_data_source( + page=page, service=service_1 + ) + service_2 = data_fixture.create_local_baserow_get_row_service( + integration=integration + ) + data_fixture.create_builder_local_baserow_get_row_data_source( + page=page, service=service_2 + ) + + missing_service_id = service_2.id + + # Simulate a data integrity issue + specific_table_name = LocalBaserowGetRow._meta.db_table + with connection.cursor() as cursor: + # Delete from the specific table (LocalBaserowGetRow) instead of using ORM + # to better simulate the data integrity issue. + cursor.execute( + f"DELETE FROM {specific_table_name} WHERE service_ptr_id = %s", + [missing_service_id], + ) + + with mock.patch("baserow.core.db.logger") as mock_logger: + data_sources = DataSourceHandler()._query_data_sources( + DataSource.objects.filter(page=page), specific=True + ) + + mock_logger.error.assert_called_once_with( + f"The specific object with id {missing_service_id} does not exist." + ) + assert data_sources == [data_source] diff --git a/backend/tests/baserow/core/test_core_db.py b/backend/tests/baserow/core/test_core_db.py index 33c9992575..cb6a422321 100644 --- a/backend/tests/baserow/core/test_core_db.py +++ b/backend/tests/baserow/core/test_core_db.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from django.contrib.contenttypes.models import ContentType from django.db import connection @@ -655,3 +655,49 @@ def test_multiple_field_prefetch__many_to_many_missing_source(data_fixture): ) row = rows[0] assert len(row.field.all()) == 1 + + +@pytest.mark.django_db +def test_specific_iterator_skip_missing_specific_objects(data_fixture): + """ + Tests the optional skip_missing_specific_objects argument. + + When True, a missing specific instance shouldn't raise an exception. + """ + + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + field = data_fixture.create_text_field(table=table) + + # Create a field without a specific type (simulating data integrity issue) + field_without_specific = Field.objects.create( + table=table, + order=1, + name="Test", + primary=False, + content_type=field.content_type, + ) + + base_queryset = Field.objects.filter( + id__in=[field.id, field_without_specific.id] + ).order_by("id") + + # Without skip_missing_specific_objects, the 2nd field should + # cause an exception + with pytest.raises(Field.DoesNotExist): + list(specific_iterator(base_queryset)) + + # With skip_missing_specific_objects, the 2nd field should be + # skipped gracefully + with patch("baserow.core.db.logger") as mock_logger: + specific_objects = list( + specific_iterator(base_queryset, skip_missing_specific_objects=True) + ) + + # Should only return the 1 specific field, skipping the one without + # a specific instance + assert len(specific_objects) == 1 + assert specific_objects[0].id == field.id + mock_logger.error.assert_called_once_with( + f"The specific object with id {field_without_specific.id} does not exist." + ) diff --git a/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json b/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json new file mode 100644 index 0000000000..78cca44c0d --- /dev/null +++ b/changelog/entries/unreleased/bug/4389_gracefully_handle_fetching_data_source_services_when_the_spe.json @@ -0,0 +1,9 @@ +{ + "type": "bug", + "message": "Gracefully handle fetching data source services when the specific instance is missing.", + "issue_origin": "github", + "issue_number": 4389, + "domain": "builder", + "bullet_points": [], + "created_at": "2025-12-04" +} \ No newline at end of file From 9845d2f43f5a7cfd998827159c57b505b50a8f7b Mon Sep 17 00:00:00 2001 From: Tsering Paljor Date: Tue, 9 Dec 2025 09:13:45 +0400 Subject: [PATCH 2/2] Safely decode FormulaField value to JSON (#4403) * Add error handling when checking if db value is serializable. * Add changelog * Simplify test * Rename _value_is_serialized_object to _deserialize_baserow_object --------- Co-authored-by: peter_baserow --- backend/src/baserow/core/formula/field.py | 30 +++++++++++++++---- .../core/formula/test_formula_field.py | 19 ++++++++++++ ..._when_db_value_is_not_json_serializab.json | 9 ++++++ 3 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 backend/tests/baserow/core/formula/test_formula_field.py create mode 100644 changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json diff --git a/backend/src/baserow/core/formula/field.py b/backend/src/baserow/core/formula/field.py index 6ee50ac174..96302ff909 100644 --- a/backend/src/baserow/core/formula/field.py +++ b/backend/src/baserow/core/formula/field.py @@ -1,6 +1,6 @@ import json import logging -from typing import Dict, List, Union +from typing import Any, Dict, List, Optional, Union from django.db import connection, models @@ -41,8 +41,29 @@ def __init__(self, *args, **kwargs): self.null = True self.blank = True - def _value_is_serialized_object(self, value: FormulaFieldDatabaseValue) -> bool: - return isinstance(value, str) and value[:1] == "{" and value[-1:] == "}" + def _deserialize_baserow_object( + self, value: FormulaFieldDatabaseValue + ) -> Optional[Dict[str, Any]]: + """ + Given a value from the database, attempts to deserialize it into a dictionary + representing a Baserow formula object. If the value is not a valid JSON string + representing a Baserow formula object, returns None. + + :param value: The value from the database to deserialize. + :return: A dictionary representing the Baserow formula object, or None if + deserialization fails. + """ + + if not isinstance(value, str): + return None + + if not (value.startswith("{") and value.endswith("}")): + return None + + try: + return json.loads(value) + except (TypeError, json.JSONDecodeError): + return None def _transform_db_value_to_dict( self, value: FormulaFieldDatabaseValue @@ -65,9 +86,8 @@ def _transform_db_value_to_dict( # receive an integer, we convert it to a string. value = str(value) # We could encounter a serialized object... - if self._value_is_serialized_object(value): + if context := self._deserialize_baserow_object(value): # If we have, then we can parse it and return the `BaserowFormulaObject` - context = json.loads(value) return BaserowFormulaObject( mode=context["m"], version=context["v"], formula=context["f"] ) diff --git a/backend/tests/baserow/core/formula/test_formula_field.py b/backend/tests/baserow/core/formula/test_formula_field.py new file mode 100644 index 0000000000..399f150661 --- /dev/null +++ b/backend/tests/baserow/core/formula/test_formula_field.py @@ -0,0 +1,19 @@ +from baserow.core.formula.field import FormulaField + + +def test_deserialize_baserow_object_valid(): + field = FormulaField() + + valid_json = '{"m": "simple", "v": "0.1", "f": "test formula"}' + result = field._deserialize_baserow_object(valid_json) + + assert result == {"m": "simple", "v": "0.1", "f": "test formula"} + + +def test_deserialize_baserow_object_invalid(): + field = FormulaField() + + invalid_json = "{foo}" + result = field._deserialize_baserow_object(invalid_json) + + assert result is None diff --git a/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json b/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json new file mode 100644 index 0000000000..e0e45b4a26 --- /dev/null +++ b/changelog/entries/unreleased/bug/4402_improved_error_handling_when_db_value_is_not_json_serializab.json @@ -0,0 +1,9 @@ +{ + "type": "bug", + "message": "Improved error handling when FormulaField value is not JSON serializable.", + "issue_origin": "github", + "issue_number": 4402, + "domain": "core", + "bullet_points": [], + "created_at": "2025-12-05" +}