From 1b69168c797ab776520206d6226210a954ce04c7 Mon Sep 17 00:00:00 2001 From: Tsering Paljor Date: Tue, 5 May 2026 08:48:37 +0700 Subject: [PATCH] feat: Introduce min retention days to keep more history entries (#5274) * Introduce min retention days for history to keep more entries for recent workflow runs. * Fix test --- .env.example | 1 + backend/src/baserow/config/settings/base.py | 3 + .../contrib/automation/workflows/handler.py | 17 +++- .../workflows/test_workflow_tasks.py | 82 ++++++++++++++++++- ..._days_for_history_to_keep_more_entrie.json | 9 ++ docker-compose.no-caddy.yml | 2 + docker-compose.yml | 2 + 7 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 changelog/entries/unreleased/refactor/5249_introduce_min_retention_days_for_history_to_keep_more_entrie.json diff --git a/.env.example b/.env.example index 9f16ba315d..3fc42ca322 100644 --- a/.env.example +++ b/.env.example @@ -72,6 +72,7 @@ DATABASE_NAME=baserow # BASEROW_AUTOMATION_WORKFLOW_TIMEOUT_HOURS= # BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS= # BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES= +# BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS= # BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES= # BASEROW_EXTRA_ALLOWED_HOSTS= # ADDITIONAL_APPS= diff --git a/backend/src/baserow/config/settings/base.py b/backend/src/baserow/config/settings/base.py index 8d63049d44..b29d2d61bd 100644 --- a/backend/src/baserow/config/settings/base.py +++ b/backend/src/baserow/config/settings/base.py @@ -947,6 +947,9 @@ def __setitem__(self, key, value): AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES = int( os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES", 200) ) +AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS = int( + os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS", 2) +) AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES = int( os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES", 60) ) diff --git a/backend/src/baserow/contrib/automation/workflows/handler.py b/backend/src/baserow/contrib/automation/workflows/handler.py index 7fc1803ad4..7008b5717a 100644 --- a/backend/src/baserow/contrib/automation/workflows/handler.py +++ b/backend/src/baserow/contrib/automation/workflows/handler.py @@ -859,7 +859,10 @@ def clear_old_history(self) -> None: Clears any old history entries across all workflows. It will delete any history entries that are older than MAX_HISTORY_DAYS - and only keep the most recent MAX_HISTORY_ENTRIES entries. + and only keep the most recent MAX_HISTORY_ENTRIES entries, but only for + entries older than MIN_RETENTION_DAYS. This ensures that recent history + is always preserved for investigation, even when a workflow is misbehaving + (e.g. running in a tight loop) and the MAX_ENTRIES limit is exceeded. """ # Delete all history entries older than max days @@ -870,8 +873,14 @@ def clear_old_history(self) -> None: status=HistoryStatusChoices.STARTED ).filter(started_on__lt=oldest_history_date).delete() - # Delete all history entries older than max entries + # Delete history entries beyond max entries, but only if they are also + # older than min retention days. Entries within the retention window are + # always kept regardless of count, so that recent history is available + # for investigation even when a workflow runs excessively. max_entries = settings.AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES + recent_threshold = timezone.now() - timedelta( + days=settings.AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS + ) cutoff_date = Subquery( AutomationWorkflowHistory.objects.filter( original_workflow_id=OuterRef("original_workflow_id") @@ -884,7 +893,9 @@ def clear_old_history(self) -> None: ) AutomationWorkflowHistory.objects.exclude( status=HistoryStatusChoices.STARTED - ).filter(started_on__lt=cutoff_date).delete() + ).filter( + Q(started_on__lt=cutoff_date) & Q(started_on__lt=recent_threshold), + ).delete() # Clean up published automations that no longer have any history entries empty_published = ( diff --git a/backend/tests/baserow/contrib/automation/workflows/test_workflow_tasks.py b/backend/tests/baserow/contrib/automation/workflows/test_workflow_tasks.py index b6f804dc00..ebed5fc5f7 100644 --- a/backend/tests/baserow/contrib/automation/workflows/test_workflow_tasks.py +++ b/backend/tests/baserow/contrib/automation/workflows/test_workflow_tasks.py @@ -12,7 +12,10 @@ from baserow.core.cache import global_cache -@override_settings(AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES=2) +@override_settings( + AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES=2, + AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS=0, +) @pytest.mark.django_db def test_automation_periodic_cleanup_keeps_max_entries_per_workflow(data_fixture): workflow_a = data_fixture.create_automation_workflow() @@ -152,6 +155,7 @@ def test_automation_periodic_cleanup_deletes_entries_older_than_max_days(data_fi @override_settings( AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS=2, AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES=2, + AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS=0, ) @pytest.mark.django_db def test_automation_periodic_cleanup_keeps_entries_within_both_limits(data_fixture): @@ -273,3 +277,79 @@ def test_automation_periodic_cleanup_max_entries_with_different_clones(data_fixt automation_periodic_cleanup() assert original_workflow.workflow_histories.count() == 2 + + +@override_settings( + AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES=2, + AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS=2, +) +@pytest.mark.django_db +def test_automation_periodic_cleanup_recent_entries_protected_from_count_cleanup( + data_fixture, +): + """ + Entries within MIN_RETENTION_DAYS are never deleted by MAX_ENTRIES, even when + the workflow has more entries than the limit. This ensures that history from a + misbehaving workflow (e.g. a tight loop) is preserved for investigation. + """ + + workflow = data_fixture.create_automation_workflow() + + with freeze_time("2026-04-29 10:00:00"): + history_1 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + with freeze_time("2026-04-29 11:00:00"): + history_2 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + with freeze_time("2026-04-29 12:00:00"): + history_3 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + + # Run cleanup on the same day — all 3 entries are within MIN_RETENTION_DAYS=2, + # so none should be deleted despite exceeding MAX_ENTRIES=2. + with freeze_time("2026-04-29 13:00:00"): + automation_periodic_cleanup() + + assert workflow.workflow_histories.filter(id=history_1.id).exists() + assert workflow.workflow_histories.filter(id=history_2.id).exists() + assert workflow.workflow_histories.filter(id=history_3.id).exists() + + +@override_settings( + AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES=2, + AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS=2, + # Disable timeout so STARTED entries from old dates aren't marked as errors. + AUTOMATION_WORKFLOW_TIMEOUT_HOURS=9999, +) +@pytest.mark.django_db +def test_automation_periodic_cleanup_old_entries_beyond_count_are_deleted(data_fixture): + """ + Entries older than MIN_RETENTION_DAYS that exceed MAX_ENTRIES are deleted normally. + """ + + workflow = data_fixture.create_automation_workflow() + + with freeze_time("2026-04-25 10:00:00"): + history_1 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + with freeze_time("2026-04-25 11:00:00"): + history_2 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + with freeze_time("2026-04-25 12:00:00"): + history_3 = data_fixture.create_automation_workflow_history( + workflow=workflow, status=HistoryStatusChoices.SUCCESS + ) + + # Run cleanup 4 days later — all entries are older than MIN_RETENTION_DAYS=2, + # so the oldest entry should be deleted to enforce MAX_ENTRIES=2. + with freeze_time("2026-04-29 12:00:00"): + automation_periodic_cleanup() + + assert not workflow.workflow_histories.filter(id=history_1.id).exists() + assert workflow.workflow_histories.filter(id=history_2.id).exists() + assert workflow.workflow_histories.filter(id=history_3.id).exists() diff --git a/changelog/entries/unreleased/refactor/5249_introduce_min_retention_days_for_history_to_keep_more_entrie.json b/changelog/entries/unreleased/refactor/5249_introduce_min_retention_days_for_history_to_keep_more_entrie.json new file mode 100644 index 0000000000..f7612da79c --- /dev/null +++ b/changelog/entries/unreleased/refactor/5249_introduce_min_retention_days_for_history_to_keep_more_entrie.json @@ -0,0 +1,9 @@ +{ + "type": "refactor", + "message": "Introduce min retention days for history to keep more entries for recent workflow runs.", + "issue_origin": "github", + "issue_number": 5249, + "domain": "automation", + "bullet_points": [], + "created_at": "2026-04-29" +} \ No newline at end of file diff --git a/docker-compose.no-caddy.yml b/docker-compose.no-caddy.yml index ccdfa56767..f235566e0a 100644 --- a/docker-compose.no-caddy.yml +++ b/docker-compose.no-caddy.yml @@ -96,6 +96,7 @@ x-backend-variables: BASEROW_AUTOMATION_WORKFLOW_TIMEOUT_HOURS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES: + BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES: BASEROW_EXTRA_ALLOWED_HOSTS: @@ -265,6 +266,7 @@ services: BASEROW_AUTOMATION_WORKFLOW_TIMEOUT_HOURS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES: + BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES: depends_on: - backend diff --git a/docker-compose.yml b/docker-compose.yml index 2236937a4e..1ee0859859 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -110,6 +110,7 @@ x-backend-variables: BASEROW_AUTOMATION_WORKFLOW_TIMEOUT_HOURS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES: + BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES: BASEROW_EXTRA_ALLOWED_HOSTS: @@ -349,6 +350,7 @@ services: BASEROW_AUTOMATION_WORKFLOW_TIMEOUT_HOURS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES: + BASEROW_AUTOMATION_WORKFLOW_HISTORY_MIN_RETENTION_DAYS: BASEROW_AUTOMATION_WORKFLOW_HISTORY_CLEANUP_INTERVAL_MINUTES: BASEROW_INTEGRATIONS_PERIODIC_MINUTE_MIN: BASEROW_ENTERPRISE_ASSISTANT_LLM_MODEL: