Skip to content

Commit 3da4797

Browse files
chore: ai auto update debounce (baserow#4378)
* run ai model values in paralllel baserow#4227 * review fixes * reworked ai threaded execution * review fixes * lint fix after rebase/conflict, removed unused serializer fields * Debounce ai field value generation from auto-update. baserow#4317 * failing test sanity check * review fixes * review fixes * review fix * review fixes * Ensure tasks are re-scheduled on changes during generation * Debug: Add container logs on E2E service startup failure * Move serializer to the correct file * lint fixes * Fix filter empty values * address feedback * Fix flaky test * Fix tests * Add comment for sync=True, as suggested --------- Co-authored-by: Davide Silvestri <silvestri.eng@gmail.com>
1 parent e703b34 commit 3da4797

22 files changed

Lines changed: 664 additions & 196 deletions

File tree

.github/workflows/ci.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,21 @@ jobs:
733733
cd e2e-tests
734734
./wait-for-services.sh
735735
736+
- name: Show container logs on failure
737+
if: failure()
738+
run: |
739+
echo "=== Backend container logs ==="
740+
docker logs backend 2>&1 | tail -500 || true
741+
echo ""
742+
echo "=== Celery container logs ==="
743+
docker logs celery 2>&1 | tail -200 || true
744+
echo ""
745+
echo "=== Web-frontend container logs ==="
746+
docker logs web-frontend 2>&1 | tail -200 || true
747+
echo ""
748+
echo "=== Container status ==="
749+
docker ps -a
750+
736751
- name: Run E2E tests (shard ${{ matrix.shard }})
737752
env:
738753
PUBLIC_BACKEND_URL: http://localhost:8000

backend/src/baserow/celery_singleton_backend.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from django.conf import settings
2+
from django.core.cache import cache
3+
14
from celery_singleton.backends import RedisBackend
25
from django_redis import get_redis_connection
36

@@ -9,3 +12,45 @@ def __init__(self, *args, **kwargs):
912
"""
1013

1114
self.redis = get_redis_connection("default")
15+
16+
17+
class SingletonAutoRescheduleFlag:
18+
"""
19+
Flag is used to indicate that a task of this type is pending reschedule.
20+
21+
When the task ends, if this flag is set, it will re-schedule itself to
22+
ensure that task is eventually run.
23+
"""
24+
25+
def __init__(self, key: str):
26+
self.key = key
27+
28+
def is_set(self) -> bool:
29+
"""
30+
Checks if the flag is set.
31+
32+
:return: True if the lock is set, False otherwise.
33+
"""
34+
35+
return cache.get(key=self.key) or False
36+
37+
def set(self) -> bool:
38+
"""
39+
Sets the flag for the task, indicating it needs to be rescheduled.
40+
41+
:return: True if the flag was set, False if it was already set.
42+
"""
43+
44+
return cache.set(
45+
key=self.key,
46+
value=True,
47+
timeout=settings.AUTO_INDEX_LOCK_EXPIRY * 2,
48+
)
49+
50+
def clear(self) -> bool:
51+
"""
52+
Clears the flag for the task.
53+
:return: True if the flag was cleared, False otherwise.
54+
"""
55+
56+
return cache.delete(key=self.key)

backend/src/baserow/contrib/database/search/tasks.py

Lines changed: 7 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
from typing import List, Optional
33

44
from django.conf import settings
5-
from django.core.cache import cache
65
from django.db.models import Q
76

87
from celery_singleton import DuplicateTaskError, Singleton
98
from django_cte import With
109
from loguru import logger
1110

11+
from baserow.celery_singleton_backend import SingletonAutoRescheduleFlag
1212
from baserow.config.celery import app
1313
from baserow.contrib.database.search.models import PendingSearchValueUpdate
1414
from baserow.contrib.database.table.exceptions import TableDoesNotExist
@@ -17,53 +17,8 @@
1717
PERIODIC_CHECK_TIME_LIMIT = 60 * PERIODIC_CHECK_MINUTES # 15 minutes.
1818

1919

20-
class PendingSearchUpdateFlag:
21-
"""
22-
Flag is used to indicate that a search data update task is pending for a
23-
specific table and it has not been possible to schedule it yet due to a concurrent
24-
task already running for the same table.
25-
26-
When the task ends, if this flag is set, it will re-schedule itself to ensure that
27-
the search data is eventually updated.
28-
"""
29-
30-
def __init__(self, table_id: int):
31-
self.table_id = table_id
32-
33-
@property
34-
def key(self):
35-
"""
36-
Returns the cache key to use for the table lock.
37-
"""
38-
39-
return f"database_search_data_lock_{self.table_id}"
40-
41-
def get(self):
42-
"""
43-
Gets the lock for the search data update task.
44-
45-
:return: True if the lock is set, False otherwise.
46-
"""
47-
48-
return cache.get(key=self.key)
49-
50-
def set(self):
51-
"""
52-
Sets the lock for the search data update task.
53-
"""
54-
55-
return cache.set(
56-
key=self.key,
57-
value=True,
58-
timeout=settings.AUTO_INDEX_LOCK_EXPIRY * 2,
59-
)
60-
61-
def clear(self):
62-
"""
63-
Clears the lock for the search data update task.
64-
"""
65-
66-
return cache.delete(key=self.key)
20+
def _get_singleton_autoreschedule_flag(table_id: int) -> SingletonAutoRescheduleFlag:
21+
return SingletonAutoRescheduleFlag(f"database_search_data_lock_{table_id}")
6722

6823

6924
@app.task(queue="export")
@@ -114,7 +69,8 @@ def schedule_update_search_data(
11469
# There are new updates pending to be processed, make sure the flag is set
11570
# so the task will be re-scheduled at the end of the current run.
11671
if new_pending_updates:
117-
PendingSearchUpdateFlag(table_id).set()
72+
flag = _get_singleton_autoreschedule_flag(table_id)
73+
flag.set()
11874

11975

12076
@app.task(
@@ -162,13 +118,13 @@ def update_search_data(table_id: int):
162118
SearchHandler.initialize_missing_search_data(table)
163119

164120
# Make sure newer updates will re-schedule this task at the end if needed.
165-
flag = PendingSearchUpdateFlag(table_id)
121+
flag = _get_singleton_autoreschedule_flag(table_id)
166122
flag.clear()
167123

168124
SearchHandler.process_search_data_updates(table)
169125

170126
# If new updates were queued during processing, schedule another update
171-
if flag.get():
127+
if flag.is_set():
172128
logger.debug(
173129
f"New updates detected, rescheduling the task for table {table_id}."
174130
)

backend/src/baserow/core/jobs/tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def run_async_job(self, job_id: int):
4646
job.set_state_cancelled()
4747
job.save()
4848
except BaseException as e:
49-
# We also want to catch SystemExit exception here and all other possible
49+
# BaseException allows catching SystemExit exceptions and all other possible
5050
# exceptions to set the job state in a failed state.
5151
error = f"Something went wrong during the {job_type.type} job execution."
5252

backend/tests/baserow/core/test_basic_permissions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def test_workspace_member_permission_manager(data_fixture, django_assert_num_que
243243
perm_manager.check_permissions(
244244
user, ListApplicationsWorkspaceOperationType.type, workspace_2, workspace_2
245245
)
246-
except Exception: # noqa:W0718
246+
except Exception: # noqa
247247
...
248248

249249
with django_assert_num_queries(0):
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"type": "refactor",
3+
"message": "Debounce AI field value generation, that has been triggered from auto update.",
4+
"issue_origin": "github",
5+
"issue_number": 4317,
6+
"domain": "database",
7+
"bullet_points": [],
8+
"created_at": "2025-12-03"
9+
}

docs/installation/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ The installation methods referred to in the variable descriptions are:
150150
| BASEROW\_OLLAMA\_HOST | Provide an OLLAMA host to allow using OLLAMA for generative AI features like the AI field. | |
151151
| BASEROW\_OLLAMA\_MODELS | Provide a comma separated list of Ollama models (https://ollama.com/library) that you would like to enable in the instance (e.g. `llama2`). Note that this only works if an Ollama host is set. If this variable is not provided, the user won't be able to choose a model. | |
152152
| BASEROW\_AI\_FIELD\_MAX\_CONCURRENT\_GENERATIONS | If AI field values are recalculated in a large number (i.e. recalculating whole table, empty rows, or a selection of rows), this controls the number of concurrent requests issued to AI model to generate values. | 5 |
153+
| BASEROW\_AI\_FIELD\_AUTO\_UPDATE\_DEBOUNCE\_TIME | Debounce time in seconds for AI field updates scheduled from auto-update feature. If AI field has auto-update feature enabled, and many changes occur on fields that are referenced by that AI field, this will delay AI field generation by a number of seconds to accumulate many short updates into one bigger. | 3 |
153154

154155
### Backend Misc Configuration
155156
| Name | Description | Defaults |

premium/backend/src/baserow_premium/api/fields/serializers.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,15 @@ class GenerateFormulaWithAIRequestSerializer(serializers.Serializer):
3737

3838
class GenerateFormulaWithAIResponseSerializer(serializers.Serializer):
3939
formula = serializers.CharField(help_text="The formula generated by the AI.")
40+
41+
42+
class GenerateAIValuesJobFiltersSerializer(serializers.Serializer):
43+
"""
44+
Adds the ability to filter GenerateAIValuesJob by AI field ID.
45+
"""
46+
47+
generate_ai_values_field_id = serializers.IntegerField(
48+
min_value=1,
49+
required=False,
50+
help_text="Filter by the AI field ID.",
51+
)

premium/backend/src/baserow_premium/apps.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,5 @@ def ready(self):
263263
)
264264
widget_type_registry.register(ChartWidgetType())
265265
widget_type_registry.register(PieChartWidgetType())
266+
267+
from baserow_premium.fields import tasks # noqa: F401

premium/backend/src/baserow_premium/config/settings/settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,9 @@ def setup(settings):
4040
settings.BASEROW_AI_FIELD_MAX_CONCURRENT_GENERATIONS = try_int(
4141
os.getenv("BASEROW_AI_FIELD_MAX_CONCURRENT_GENERATIONS"), 5
4242
)
43+
44+
# Debounce time for AI field generation, if changes are triggered from
45+
# auto-update feature. In seconds.
46+
settings.BASEROW_AI_FIELD_AUTO_UPDATE_DEBOUNCE_TIME = try_int(
47+
os.getenv("BASEROW_AI_FIELD_AUTO_UPDATE_DEBOUNCE_TIME"), 3
48+
)

0 commit comments

Comments
 (0)