diff --git a/backend/src/baserow/config/settings/base.py b/backend/src/baserow/config/settings/base.py index b29d2d61bd..1caf089e29 100644 --- a/backend/src/baserow/config/settings/base.py +++ b/backend/src/baserow/config/settings/base.py @@ -850,6 +850,10 @@ def __setitem__(self, key, value): os.getenv("BATCH_ROWS_SIZE_LIMIT", 200) ) # How many rows can be modified at once. +SEARCH_UPDATE_BATCH_SIZE = int( + os.getenv("BASEROW_SEARCH_UPDATE_BATCH_SIZE", 2000) +) # How many rows to process per batch in search index updates. + # Maximum count of records considered as a 'small table' during field rule operations. FIELD_RULE_ROWS_LIMIT = int(os.getenv("FIELD_RULE_ROWS_LIMIT", BATCH_ROWS_SIZE_LIMIT)) diff --git a/backend/src/baserow/contrib/database/data_sync/handler.py b/backend/src/baserow/contrib/database/data_sync/handler.py index ceda08b982..237febc87f 100644 --- a/backend/src/baserow/contrib/database/data_sync/handler.py +++ b/backend/src/baserow/contrib/database/data_sync/handler.py @@ -15,7 +15,7 @@ from baserow.contrib.database.models import Database from baserow.contrib.database.operations import CreateTableDatabaseTableOperationType from baserow.contrib.database.rows.handler import RowHandler -from baserow.contrib.database.rows.types import CreatedRowsData +from baserow.contrib.database.rows.types import CreatedRowsData, UpdatedRowsData from baserow.contrib.database.search.handler import SearchHandler from baserow.contrib.database.table.models import Table from baserow.contrib.database.table.operations import UpdateDatabaseTableOperationType @@ -43,8 +43,56 @@ from .operations import SyncTableOperationType from .registries import data_sync_type_registry, two_way_sync_strategy_type_registry +LARGE_DATA_SYNC_SEARCH_UPDATE_MIN_CHANGED_ROWS = 10_000 +LARGE_DATA_SYNC_SEARCH_UPDATE_THRESHOLD = 0.5 + class DataSyncHandler: + def _schedule_search_updates_after_sync( + self, + data_sync: DataSync, + enabled_properties: QuerySet[DataSyncSyncedProperty], + existing_row_count: int, + created_rows: CreatedRowsData, + updated_rows: UpdatedRowsData, + row_ids_to_delete: list[int], + ) -> None: + changed_rows_count = ( + len(created_rows.created_rows) + + len(updated_rows.updated_rows) + + len(row_ids_to_delete) + ) + if changed_rows_count == 0: + return + + search_fields = [p.field for p in enabled_properties] + full_field_search_update = ( + len(row_ids_to_delete) > 0 + or changed_rows_count > LARGE_DATA_SYNC_SEARCH_UPDATE_MIN_CHANGED_ROWS + or changed_rows_count + >= existing_row_count * LARGE_DATA_SYNC_SEARCH_UPDATE_THRESHOLD + ) + + if full_field_search_update: + SearchHandler.schedule_update_search_data( + data_sync.table, fields=search_fields + ) + return + + row_ids_to_refresh = {row.id for row in created_rows.created_rows} | { + row.id for row in updated_rows.updated_rows + } + if created_rows.cascade_update: + row_ids_to_refresh.update(created_rows.cascade_update.row_ids) + if updated_rows.cascade_update: + row_ids_to_refresh.update(updated_rows.cascade_update.row_ids) + + SearchHandler.schedule_update_search_data( + data_sync.table, + fields=search_fields, + row_ids=sorted(row_ids_to_refresh), + ) + def get_data_sync( self, data_sync_id: int, base_queryset: Optional[QuerySet] = None ) -> DataSync: @@ -473,8 +521,9 @@ def _do_sync_table(self, user, data_sync, progress_builder): ) progress.increment(by=10) # makes the total `80` + updated_rows = UpdatedRowsData([], [], {}, {}, None, [], None) if len(rows_to_update) > 0: - RowHandler().update_rows( + updated_rows = RowHandler().update_rows( user=user, table=data_sync.table, rows_values=rows_to_update, @@ -500,20 +549,14 @@ def _do_sync_table(self, user, data_sync, progress_builder): ) progress.increment(by=10) # makes the total `100` - if ( - len(rows_to_create) > 0 - or len(rows_to_update) > 0 - or len(row_ids_to_delete) > 0 - ): - # No need to include this in the progress as it triggers a celery task - row_ids = [r["id"] for r in rows_to_update] + [ - r.id for r in created_rows.created_rows - ] - SearchHandler.schedule_update_search_data( - data_sync.table, - fields=[p.field for p in enabled_properties], - row_ids=row_ids, - ) + self._schedule_search_updates_after_sync( + data_sync=data_sync, + enabled_properties=enabled_properties, + existing_row_count=len(existing_rows_queryset), + created_rows=created_rows, + updated_rows=updated_rows, + row_ids_to_delete=row_ids_to_delete, + ) def set_data_sync_synced_properties( self, diff --git a/backend/src/baserow/contrib/database/migrations/0209_optimize_pending_search_value_update.py b/backend/src/baserow/contrib/database/migrations/0209_optimize_pending_search_value_update.py new file mode 100644 index 0000000000..80fa6ba4d4 --- /dev/null +++ b/backend/src/baserow/contrib/database/migrations/0209_optimize_pending_search_value_update.py @@ -0,0 +1,71 @@ +# Generated by Django 5.2.13 on 2026-04-21 20:08 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('database', '0208_gridview_frozen_column_count'), + ] + + operations = [ + migrations.RemoveIndex( + model_name='pendingsearchvalueupdate', + name='pendingsearchvaluedeletion_frd', + ), + # Column and FK kept one release for rollback safety; only the FK index is dropped. + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.RemoveField( + model_name='pendingsearchvalueupdate', + name='table', + ), + ], + database_operations=[ + migrations.RunSQL( + sql=( + "DROP INDEX IF EXISTS " + "database_pendingsearchvalueupdate_table_id_813adfd1;" + ), + reverse_sql=( + "CREATE INDEX IF NOT EXISTS " + "database_pendingsearchvalueupdate_table_id_813adfd1 " + "ON database_pendingsearchvalueupdate (table_id);" + ), + ), + ], + ), + migrations.RunSQL( + sql=""" + ALTER TABLE database_pendingsearchvalueupdate SET ( + autovacuum_analyze_threshold = 2000, + autovacuum_analyze_scale_factor = 0.002, + autovacuum_vacuum_threshold = 5000, + autovacuum_vacuum_scale_factor = 0.01, + autovacuum_vacuum_insert_threshold = 5000, + autovacuum_vacuum_insert_scale_factor = 0.01 + ); + """, + reverse_sql=""" + ALTER TABLE database_pendingsearchvalueupdate RESET ( + autovacuum_analyze_threshold, + autovacuum_vacuum_scale_factor, + autovacuum_analyze_scale_factor, + autovacuum_vacuum_threshold, + autovacuum_vacuum_insert_threshold, + autovacuum_vacuum_insert_scale_factor + ); + """, + ), + migrations.RunSQL( + sql=""" + ALTER TABLE database_pendingsearchvalueupdate + ALTER COLUMN field_id SET STATISTICS 2000; + """, + reverse_sql=""" + ALTER TABLE database_pendingsearchvalueupdate + ALTER COLUMN field_id SET STATISTICS -1; + """, + ), + ] diff --git a/backend/src/baserow/contrib/database/rows/handler.py b/backend/src/baserow/contrib/database/rows/handler.py index d391a53d33..1f8ad636ea 100644 --- a/backend/src/baserow/contrib/database/rows/handler.py +++ b/backend/src/baserow/contrib/database/rows/handler.py @@ -124,6 +124,8 @@ tracer = trace.get_tracer(__name__) BATCH_SIZE = 1024 +LARGE_IMPORT_SEARCH_UPDATE_MIN_CHANGED_ROWS = 10_000 +LARGE_IMPORT_SEARCH_UPDATE_THRESHOLD = 0.5 meter = metrics.get_meter(__name__) rows_created_counter = meter.create_counter( @@ -236,6 +238,19 @@ def get_deleted_link_row_rels_for_update_collector( class RowHandler(metaclass=baserow_trace_methods(tracer)): + def _should_use_full_field_search_update_for_import( + self, + changed_rows: int, + model: GeneratedTableModel, + ) -> bool: + if changed_rows > LARGE_IMPORT_SEARCH_UPDATE_MIN_CHANGED_ROWS: + return True + + existing_row_count = model.objects.count() + return changed_rows >= ( + existing_row_count * LARGE_IMPORT_SEARCH_UPDATE_THRESHOLD + ) + def prepare_values(self, fields, values): """ Prepares a set of values so that they can be created or updated in the database. @@ -1767,6 +1782,7 @@ def force_create_rows_by_batch( progress: Optional[Progress] = None, model: Optional[Type[GeneratedTableModel]] = None, signal_params: Optional[Dict] = None, + skip_search_update: bool = True, ) -> Tuple[List[GeneratedTableModel], Dict[str, Dict[str, Any]]]: """ Creates rows by batch and generates an error report instead of failing on first @@ -1777,6 +1793,9 @@ def force_create_rows_by_batch( :param rows_values: List of rows values for rows that need to be created. :param progress: Give a progress instance to track the progress of the import. :param model: Optional model to prevent recomputing table model. + :param signal_params: Additional parameters that are added to the signal. + :param skip_search_update: When True, skip search updates. The caller is + responsible for managing search updates. :return: The created rows and the error report. """ @@ -1806,9 +1825,7 @@ def force_create_rows_by_batch( generate_error_report=True, send_realtime_update=False, send_webhook_events=False, - # Don't trigger loads of search updates for every batch of rows we - # create but instead a single one for this entire table at the end. - skip_search_update=True, + skip_search_update=skip_search_update, signal_params=signal_params, ) @@ -1822,10 +1839,6 @@ def force_create_rows_by_batch( all_created_rows += created_rows - SearchHandler.schedule_update_search_data( - table, row_ids=[r.id for r in all_created_rows] - ) - return all_created_rows, report def force_update_rows_by_batch( @@ -1836,6 +1849,7 @@ def force_update_rows_by_batch( progress: Progress, model: Optional[Type[GeneratedTableModel]] = None, signal_params: Optional[Dict] = None, + skip_search_update: bool = True, ) -> Tuple[List[Dict[str, Any] | None], Dict[str, Dict[str, Any]]]: """ Updates rows by batch and generates an error report instead of failing on first @@ -1847,6 +1861,8 @@ def force_update_rows_by_batch( :param progress: Give a progress instance to track the progress of the import. :param model: Optional model to prevent recomputing table model. :param signal_params: Additional parameters that are added to the signal. + :param skip_search_update: When True, skip search updates. The caller is + responsible for managing search updates. :return: The updated rows and the error report. """ @@ -1876,6 +1892,7 @@ def force_update_rows_by_batch( send_realtime_update=False, send_webhook_events=False, generate_error_report=True, + skip_search_update=skip_search_update, signal_params=signal_params, ) report.update(result.errors) @@ -2059,12 +2076,18 @@ def import_rows( else: rows_values_to_create = valid_rows + changed_rows = len(rows_values_to_create) + len(rows_values_to_update) + full_field_search_update = self._should_use_full_field_search_update_for_import( + changed_rows, model + ) + created_rows, creation_report = self.force_create_rows_by_batch( user, table, rows_values_to_create, progress=creation_sub_progress, model=model, + skip_search_update=full_field_search_update, ) if rows_values_to_update: @@ -2074,6 +2097,7 @@ def import_rows( rows_values_to_update, progress=creation_sub_progress, model=model, + skip_search_update=full_field_search_update, ) # Add errors to global report @@ -2095,6 +2119,9 @@ def import_rows( # of rows_created because we might import a lot of rows. table_updated.send(self, table=table, user=user, force_table_refresh=True) + if full_field_search_update and changed_rows > 0: + SearchHandler.schedule_update_search_data(table) + return created_rows, error_report.to_dict() def get_fields_metadata_for_row_history( diff --git a/backend/src/baserow/contrib/database/search/handler.py b/backend/src/baserow/contrib/database/search/handler.py index 26abb403c0..7625f8fa34 100644 --- a/backend/src/baserow/contrib/database/search/handler.py +++ b/backend/src/baserow/contrib/database/search/handler.py @@ -12,6 +12,7 @@ """ +import time from collections import defaultdict from datetime import datetime, timezone from enum import Enum @@ -507,15 +508,21 @@ def schedule_update_search_data( row_ids: list[int] | None = None, ): """ - Called when field values for a table have been changed or created. Not called - when a row is deleted as we don't care and don't want to do anything for the - search indexes. + Queue search updates for a table after values have been created or changed. + Not called when a row is deleted as we don't care and don't want to do + anything for the search indexes. + + If ``fields`` is provided, only those fields are queued. If ``row_ids`` is + provided, only those rows are queued. If neither is provided, all searchable + fields in the table are queued as full-field updates, which rebuilds search + data for every row in those fields without creating one pending entry per row. :param table: The table a field value has been created or updated in. :param fields: Optional list of fields that have been changed or created. If - None, all fields in the table will be considered. + omitted together with ``row_ids``, all searchable fields are queued as + full-field updates. :param row_ids: Optional list of row IDs that have been changed or created. If - None, all rows in the table will be considered. + omitted, the queued update applies to all rows for the selected fields. """ workspace_id = table.database.workspace_id @@ -525,6 +532,8 @@ def schedule_update_search_data( field_ids = None if fields: field_ids = [f.id for f in fields] + elif not row_ids: + field_ids = [f.id for f in table.get_model().get_searchable_fields()] transaction.on_commit( lambda: schedule_update_search_data.delay(table.id, field_ids, row_ids) @@ -558,7 +567,7 @@ def mark_search_data_for_deletion( table_field_ids = [ f.id for f in table.get_model().get_fields(include_trash=True) ] - if field_ids is None: + if not field_ids: field_ids = table_field_ids else: field_ids = [fid for fid in set(field_ids) if fid in table_field_ids] @@ -571,12 +580,19 @@ def mark_for_deletion(): `process_search_data_marked_for_deletion` method is called. """ + now = datetime.now(tz=timezone.utc) + if not row_ids: # also remove any pending per-row update + PendingSearchValueUpdate.objects.filter( + field_id__in=field_ids, + row_id__isnull=False, + ).update(deletion_workspace_id=workspace_id, updated_on=now) PendingSearchValueUpdate.objects.bulk_create( [ PendingSearchValueUpdate( field_id=field_id, row_id=row_id, - deletion_workspace_id=table.database.workspace_id, + deletion_workspace_id=workspace_id, + updated_on=now, ) for field_id in sorted(field_ids) for row_id in sorted(row_ids or [None]) @@ -584,7 +600,7 @@ def mark_for_deletion(): update_conflicts=True, unique_fields=["field_id", "row_id"], update_fields=["updated_on", "deletion_workspace_id"], - batch_size=1000, + batch_size=settings.SEARCH_UPDATE_BATCH_SIZE, ) transaction.on_commit(mark_for_deletion) @@ -859,7 +875,9 @@ def delete_pending_updates(cls, q: Q, manager: str = "objects"): ) @classmethod - def process_search_data_updates(cls, table: "Table"): + def process_search_data_updates( + cls, table: "Table", time_budget_seconds: int | None = None + ) -> bool: """ Process pending search updates for a given table in two phases: @@ -869,8 +887,21 @@ def process_search_data_updates(cls, table: "Table"): refreshes only affected cells. :param table: The Table whose pending search updates will be handled. + :param time_budget_seconds: If set, processing will stop once this many + seconds have elapsed. Returns ``False`` to indicate that pending work + remains and the task should be rescheduled. + :return: ``True`` if all pending updates were processed, ``False`` if the + time budget was exhausted before completion. """ + start = time.monotonic() + + def budget_exhausted() -> bool: + return ( + time_budget_seconds is not None + and (time.monotonic() - start) > time_budget_seconds + ) + table_field_ids = list( Field.objects_and_trash.filter(table=table) .order_by() @@ -880,7 +911,7 @@ def process_search_data_updates(cls, table: "Table"): PendingSearchValueUpdate.objects.filter( field_id__in=table_field_ids, row_id=None ) - .order_by("-updated_on") + .order_by() .values_list("field_id", flat=True) ) @@ -891,6 +922,12 @@ def process_search_data_updates(cls, table: "Table"): # row-specific updates on the same field. last = False while not last: + if budget_exhausted(): + logger.info( + "Time budget exhausted for table {} during full-field updates.", + table.id, + ) + return False with transaction.atomic(): field_ids = list(full_field_updates[:fields_batch_size]) # Only delete updates older than this timestamp to avoid @@ -907,13 +944,19 @@ def process_search_data_updates(cls, table: "Table"): def _fetch_next_batch() -> QuerySet[PendingSearchValueUpdate]: return PendingSearchValueUpdate.objects.filter( field_id__in=table_field_ids, row_id__isnull=False - ).order_by("-updated_on") + ).order_by() # Now handle single-cells updates, grouping them for efficiency last = False while not last: + if budget_exhausted(): + logger.info( + "Time budget exhausted for table {} during row updates.", + table.id, + ) + return False with transaction.atomic(): - count = settings.BATCH_ROWS_SIZE_LIMIT + count = settings.SEARCH_UPDATE_BATCH_SIZE pending_cells_updates = _fetch_next_batch()[:count] check_timestamp = datetime.now(tz=timezone.utc) if len(pending_cells_updates) < count: @@ -932,3 +975,5 @@ def _fetch_next_batch() -> QuerySet[PendingSearchValueUpdate]: cls.delete_pending_updates( Q(id__in=update_ids, updated_on__lte=check_timestamp) ) + + return True diff --git a/backend/src/baserow/contrib/database/search/models.py b/backend/src/baserow/contrib/database/search/models.py index 4dd9c7aa49..9db7d9ce41 100644 --- a/backend/src/baserow/contrib/database/search/models.py +++ b/backend/src/baserow/contrib/database/search/models.py @@ -21,10 +21,6 @@ class PendingSearchValueUpdate(models.Model): serialize=False, verbose_name="ID", ) - # DEPRECATED: Remove this FK in future versions. Use `field_id` instead. - table = models.ForeignKey( - "database.Table", on_delete=models.CASCADE, related_name="+", null=True - ) field_id = models.IntegerField( help_text="The ID of the field to update.", ) @@ -59,16 +55,10 @@ class Meta: name="pendingsearchvaluedeletion_idx", condition=models.Q(deletion_workspace_id__isnull=False), ), - # This speeds up `field_id__in=[... many field IDS...]`. models.Index( name="pendingsearchvaluedeletion_ord", fields=["-updated_on"], ), - # This speeds up `field_id__in=[... few field IDS...]`. - models.Index( - name="pendingsearchvaluedeletion_frd", - fields=["field_id", "-updated_on"], - ), ] diff --git a/backend/src/baserow/contrib/database/search/tasks.py b/backend/src/baserow/contrib/database/search/tasks.py index ac6a5d0404..f17af5268b 100644 --- a/backend/src/baserow/contrib/database/search/tasks.py +++ b/backend/src/baserow/contrib/database/search/tasks.py @@ -33,6 +33,12 @@ def schedule_update_search_data( lost updates. Then the singleton task is enqueued; if it’s already scheduled, a pending flag is set so new changes will be processed once the current run finishes. + When ``field_ids`` is provided, pending work is queued for those fields. When + ``row_ids`` is provided, pending work is limited to those rows. A bare table-level + call should therefore expand to full-field updates before reaching this task, rather + than passing ``field_ids=None`` and ``row_ids=None`` and expecting pending work to + be created here. + :param table_id: The ID of the table to update the search data for. :param field_ids: Optional list of field IDs to update. If provided, only these fields will be updated in the search data. @@ -121,12 +127,27 @@ def update_search_data(table_id: int): flag = _get_singleton_autoreschedule_flag(table_id) flag.clear() - SearchHandler.process_search_data_updates(table) + # Leave a safety margin so the task can finish cleanly before the hard limit. + hard_limit = settings.CELERY_SEARCH_UPDATE_HARD_TIME_LIMIT + if hard_limit <= 30: + logger.warning( + "CELERY_SEARCH_UPDATE_HARD_TIME_LIMIT={}s is at or below the 30s " + "safety margin; the task may be killed before it can finish.", + hard_limit, + ) + time_budget = max(1, hard_limit - 30) + completed = SearchHandler.process_search_data_updates( + table, time_budget_seconds=time_budget + ) - # If new updates were queued during processing, schedule another update - if flag.is_set(): + # Reschedule if there are still pending updates (time budget exhausted) + # or if new updates were queued during processing. + if not completed or flag.is_set(): logger.debug( - f"New updates detected, rescheduling the task for table {table_id}." + "Rescheduling search update task for table {} (completed={}, flag={}).", + table_id, + completed, + flag.is_set(), ) schedule_update_search_data.delay(table_id) diff --git a/backend/tests/baserow/contrib/database/data_sync/test_data_sync_handler.py b/backend/tests/baserow/contrib/database/data_sync/test_data_sync_handler.py index cd9fce6ae2..c27382ba17 100644 --- a/backend/tests/baserow/contrib/database/data_sync/test_data_sync_handler.py +++ b/backend/tests/baserow/contrib/database/data_sync/test_data_sync_handler.py @@ -70,6 +70,22 @@ END:VEVENT END:VCALENDAR""" + +def _mock_ical_rows(count, updated_index=None): + rows = [] + for index in range(count): + summary = f"Test event {index}" + if updated_index is not None and index == updated_index: + summary = f"{summary} updated" + rows.append( + { + "uid": f"event-{index}", + "summary": summary, + } + ) + return rows + + ICAL_FEED_WITH_ONE_ITEMS_WITHOUT_DTEND = """BEGIN:VCALENDAR VERSION:2.0 PRODID:-//ical.marudot.com//iCal Event Maker @@ -655,6 +671,103 @@ def test_sync_data_sync_table_create_update_delete_row(data_fixture): assert getattr(sync_3_rows[0], f"field_{fields['summary'].id}") == "Test event 0" +@pytest.mark.django_db +@responses.activate +@patch( + "baserow.contrib.database.data_sync.handler.SearchHandler.schedule_update_search_data" +) +def test_sync_data_sync_table_large_change_uses_full_field_search_update( + mock_schedule_update_search_data, data_fixture +): + responses.add( + responses.GET, + "https://baserow.io/ical.ics", + status=200, + body=ICAL_FEED_WITH_ONE_ITEMS, + ) + + user = data_fixture.create_user() + database = data_fixture.create_database_application(user=user) + handler = DataSyncHandler() + data_sync = handler.create_data_sync_table( + user=user, + database=database, + table_name="Test", + type_name="ical_calendar", + synced_properties=["uid", "summary"], + ical_url="https://baserow.io/ical.ics", + ) + + with patch.object( + ICalCalendarDataSyncType, + "get_all_rows", + return_value=_mock_ical_rows(20), + ): + handler.sync_data_sync_table(user=user, data_sync=data_sync) + + args, kwargs = mock_schedule_update_search_data.call_args + assert args == (data_sync.table,) + assert "fields" in kwargs + assert "row_ids" not in kwargs + + +@pytest.mark.django_db +@responses.activate +@patch( + "baserow.contrib.database.data_sync.handler.SearchHandler.schedule_update_search_data" +) +def test_sync_data_sync_table_small_change_uses_row_specific_search_update( + mock_schedule_update_search_data, data_fixture +): + responses.add( + responses.GET, + "https://baserow.io/ical.ics", + status=200, + body=ICAL_FEED_WITH_ONE_ITEMS, + ) + + user = data_fixture.create_user() + database = data_fixture.create_database_application(user=user) + handler = DataSyncHandler() + data_sync = handler.create_data_sync_table( + user=user, + database=database, + table_name="Test", + type_name="ical_calendar", + synced_properties=["uid", "summary"], + ical_url="https://baserow.io/ical.ics", + ) + + with patch.object( + ICalCalendarDataSyncType, + "get_all_rows", + return_value=_mock_ical_rows(20), + ): + handler.sync_data_sync_table(user=user, data_sync=data_sync) + + uid_field = DataSyncSyncedProperty.objects.get( + data_sync=data_sync, key="uid" + ).field_id + model = data_sync.table.get_model() + changed_row_id = model.objects.values_list("id", flat=True).get( + **{f"field_{uid_field}": "event-5"} + ) + + mock_schedule_update_search_data.reset_mock() + + with patch.object( + ICalCalendarDataSyncType, + "get_all_rows", + return_value=_mock_ical_rows(20, updated_index=5), + ): + handler.sync_data_sync_table(user=user, data_sync=data_sync) + + args, kwargs = mock_schedule_update_search_data.call_args + assert args == (data_sync.table,) + assert "fields" in kwargs + assert kwargs["row_ids"] == [changed_row_id] + + @pytest.mark.django_db @responses.activate def test_sync_data_sync_table_property_removed_from_data_sync_type(data_fixture): diff --git a/backend/tests/baserow/contrib/database/rows/test_rows_handler.py b/backend/tests/baserow/contrib/database/rows/test_rows_handler.py index 1a6558580d..d262aab0d2 100644 --- a/backend/tests/baserow/contrib/database/rows/test_rows_handler.py +++ b/backend/tests/baserow/contrib/database/rows/test_rows_handler.py @@ -1056,6 +1056,68 @@ def test_import_rows( assert sorted(report.keys()) == sorted([1, 2]) +@pytest.mark.django_db +@patch( + "baserow.contrib.database.search.handler.SearchHandler.schedule_update_search_data" +) +def test_import_rows_large_change_uses_full_field_search_update( + mock_schedule_update_search_data, data_fixture +): + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + data_fixture.create_text_field(table=table, name="Name", order=1) + + rows, report = RowHandler().import_rows( + user=user, + table=table, + data=[["Tesla"]], + send_realtime_update=False, + ) + + assert len(rows) == 1 + assert report == {} + args, kwargs = mock_schedule_update_search_data.call_args + assert args == (table,) + assert kwargs == {} + + +@pytest.mark.django_db +@patch( + "baserow.contrib.database.search.handler.SearchHandler.schedule_update_search_data" +) +def test_import_rows_small_change_keeps_row_specific_search_update( + mock_schedule_update_search_data, data_fixture +): + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + name_field = data_fixture.create_text_field(table=table, name="Name", order=1) + handler = RowHandler() + + handler.create_rows( + user=user, + table=table, + rows_values=[{f"field_{name_field.id}": f"Car {i}"} for i in range(20)], + send_realtime_update=False, + send_webhook_events=False, + ) + + mock_schedule_update_search_data.reset_mock() + + rows, report = handler.import_rows( + user=user, + table=table, + data=[["New car"]], + send_realtime_update=False, + ) + + assert len(rows) == 1 + assert report == {} + args, kwargs = mock_schedule_update_search_data.call_args + assert args == (table,) + assert "fields" in kwargs + assert kwargs["row_ids"] == [rows[0].id] + + @pytest.mark.django_db def test_import_rows_with_read_only_field( data_fixture, diff --git a/backend/tests/baserow/contrib/database/search/test_search_handler.py b/backend/tests/baserow/contrib/database/search/test_search_handler.py index 7b0a7afead..debe5cad23 100644 --- a/backend/tests/baserow/contrib/database/search/test_search_handler.py +++ b/backend/tests/baserow/contrib/database/search/test_search_handler.py @@ -5,7 +5,6 @@ import pytest from freezegun import freeze_time -from pytest_unordered import unordered from baserow.contrib.database.fields.handler import FieldHandler from baserow.contrib.database.rows.handler import RowHandler @@ -328,43 +327,31 @@ def test_update_rows_process_update_entries(mock, data_fixture): PendingSearchValueUpdate.objects.bulk_create( [ - PendingSearchValueUpdate( - table_id=table.id, field_id=text_field.id, row_id=1 - ), - PendingSearchValueUpdate( - table_id=table.id, field_id=text_field.id, row_id=2 - ), - PendingSearchValueUpdate( - table_id=table.id, field_id=formula_field.id, row_id=1 - ), - PendingSearchValueUpdate( - table_id=table.id, field_id=formula_field.id, row_id=2 - ), + PendingSearchValueUpdate(field_id=text_field.id, row_id=1), + PendingSearchValueUpdate(field_id=text_field.id, row_id=2), + PendingSearchValueUpdate(field_id=formula_field.id, row_id=1), + PendingSearchValueUpdate(field_id=formula_field.id, row_id=2), ] ) mock.reset_mock() SearchHandler.process_search_data_updates(table) + # Both fields fit in a single chunk, so one batched call with field_ids and + # row_ids. assert mock.call_count == 1 assert mock.call_args[0][0] == table - assert mock.call_args[1] == { - "field_ids": unordered([text_field.id, formula_field.id]), - "row_ids": unordered([1, 2]), - } + assert set(mock.call_args[1]["field_ids"]) == {text_field.id, formula_field.id} + assert set(mock.call_args[1]["row_ids"]) == {1, 2} - PendingSearchValueUpdate.objects.count() == 0 + assert PendingSearchValueUpdate.objects.count() == 0 # If there's an update for all the rows (row_id=None), all other individual # updates are ignored. PendingSearchValueUpdate.objects.bulk_create( [ - PendingSearchValueUpdate(table_id=table.id, field_id=text_field.id), - PendingSearchValueUpdate( - table_id=table.id, field_id=text_field.id, row_id=2 - ), - PendingSearchValueUpdate( - table_id=table.id, field_id=text_field.id, row_id=3 - ), + PendingSearchValueUpdate(field_id=text_field.id), + PendingSearchValueUpdate(field_id=text_field.id, row_id=2), + PendingSearchValueUpdate(field_id=text_field.id, row_id=3), ] ) @@ -373,7 +360,7 @@ def test_update_rows_process_update_entries(mock, data_fixture): assert mock.call_count == 1 assert mock.call_args[0][0] == table assert mock.call_args[1] == {"field_ids": [text_field.id]} - PendingSearchValueUpdate.objects.count() == 0 + assert PendingSearchValueUpdate.objects.count() == 0 @pytest.mark.django_db(transaction=True) diff --git a/changelog/entries/unreleased/feature/3719_clear_singleselect_record_selector.json b/changelog/entries/unreleased/feature/3719_clear_singleselect_record_selector.json new file mode 100644 index 0000000000..06bf7d2dc1 --- /dev/null +++ b/changelog/entries/unreleased/feature/3719_clear_singleselect_record_selector.json @@ -0,0 +1,9 @@ +{ + "type": "feature", + "message": "clear single-select record selector", + "issue_origin": "github", + "issue_number": 3719, + "domain": "builder", + "bullet_points": [], + "created_at": "2026-05-05" +} \ No newline at end of file diff --git a/changelog/entries/unreleased/refactor/optimize_pending_search_update_scheduling.json b/changelog/entries/unreleased/refactor/optimize_pending_search_update_scheduling.json new file mode 100644 index 0000000000..c6762409de --- /dev/null +++ b/changelog/entries/unreleased/refactor/optimize_pending_search_update_scheduling.json @@ -0,0 +1,8 @@ +{ + "type": "refactor", + "message": "Optimize pending search update scheduling for large imports and syncs.", + "issue_origin": "github", + "issue_number": null, + "domain": "database", + "created_at": "2026-04-22" +} diff --git a/web-frontend/modules/builder/components/elements/baseComponents/ABDropdownItem.vue b/web-frontend/modules/builder/components/elements/baseComponents/ABDropdownItem.vue index de442371a0..42a7e70af8 100644 --- a/web-frontend/modules/builder/components/elements/baseComponents/ABDropdownItem.vue +++ b/web-frontend/modules/builder/components/elements/baseComponents/ABDropdownItem.vue @@ -2,6 +2,7 @@
  • -
    +
    diff --git a/web-frontend/modules/builder/components/elements/components/ChoiceElement.vue b/web-frontend/modules/builder/components/elements/components/ChoiceElement.vue index 2f8a091e3a..e0c420f24d 100644 --- a/web-frontend/modules/builder/components/elements/components/ChoiceElement.vue +++ b/web-frontend/modules/builder/components/elements/components/ChoiceElement.vue @@ -14,6 +14,7 @@ " :show-search="false" :multiple="element.multiple" + :clearable="!element.multiple && !element.required" @hide="onFormElementTouch" > as manual dropdown 1`] = `
  • as manual dropdown 1`] = ` />
  • { expect(wrapper.element).toMatchSnapshot() }) + test('clearable single select clears when choosing the active value', async () => { + let wrapper = null + + const onInput = vi.fn(async (newVal) => { + wrapper.setProps({ value: newVal }) + await wrapper.vm.$nextTick() + }) + + wrapper = await mountComponent({ + props: { value: 'a', clearable: true }, + slots: { + default: ` + `, + }, + listeners: { input: onInput }, + }) + + await wrapper.find('.dropdown__selected').trigger('click') + await wrapper + .find('.select__items :nth-child(1)') + .find('.select__item-link') + .trigger('click') + await wrapper.vm.$nextTick() + + expect(onInput).toHaveBeenLastCalledWith(null) + }) + test('focus', async () => { let wrapper = null