Skip to content

Commit 8383f02

Browse files
authored
chore: improve workflow rate limiting (baserow#5178)
1 parent e4fa67f commit 8383f02

7 files changed

Lines changed: 216 additions & 132 deletions

File tree

backend/src/baserow/config/settings/base.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -829,16 +829,50 @@ def __setitem__(self, key, value):
829829
AUTOMATION_HISTORY_PAGE_SIZE_LIMIT = int(
830830
os.getenv("BASEROW_AUTOMATION_HISTORY_PAGE_SIZE_LIMIT", 100)
831831
)
832-
AUTOMATION_WORKFLOW_RATE_LIMIT_MAX_RUNS = int(
833-
os.getenv("BASEROW_AUTOMATION_WORKFLOW_RATE_LIMIT_MAX_RUNS", 10)
832+
_legacy_workflow_rate_limit_max_runs = os.getenv(
833+
"BASEROW_AUTOMATION_WORKFLOW_RATE_LIMIT_MAX_RUNS"
834834
)
835-
AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS = int(
836-
os.getenv("BASEROW_AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS", 5)
835+
_legacy_workflow_rate_limit_window_seconds = os.getenv(
836+
"BASEROW_AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS"
837+
)
838+
_automation_workflow_rate_limits_env = os.getenv(
839+
"BASEROW_AUTOMATION_WORKFLOW_RATE_LIMITS"
840+
)
841+
842+
if _automation_workflow_rate_limits_env is not None:
843+
_automation_workflow_rate_limit_values = [
844+
int(value.strip())
845+
for value in _automation_workflow_rate_limits_env.split(",")
846+
if value.strip()
847+
]
848+
elif (
849+
_legacy_workflow_rate_limit_max_runs is not None
850+
or _legacy_workflow_rate_limit_window_seconds is not None
851+
):
852+
_automation_workflow_rate_limit_values = [
853+
int(_legacy_workflow_rate_limit_max_runs or 10),
854+
int(_legacy_workflow_rate_limit_window_seconds or 5),
855+
]
856+
else:
857+
_automation_workflow_rate_limit_values = [10, 5, 30, 60 * 5, 100, 60 * 60]
858+
859+
if len(_automation_workflow_rate_limit_values) % 2 != 0:
860+
raise ImproperlyConfigured(
861+
"BASEROW_AUTOMATION_WORKFLOW_RATE_LIMITS must contain an even number of "
862+
"comma-separated integers formatted as max_runs,window_seconds pairs."
863+
)
864+
865+
AUTOMATION_WORKFLOW_RATE_LIMITS = tuple(
866+
(
867+
_automation_workflow_rate_limit_values[index],
868+
_automation_workflow_rate_limit_values[index + 1],
869+
)
870+
for index in range(0, len(_automation_workflow_rate_limit_values), 2)
837871
)
838872
AUTOMATION_WORKFLOW_HISTORY_RATE_LIMIT_CACHE_EXPIRY_SECONDS = int(
839873
os.getenv(
840874
"BASEROW_AUTOMATION_WORKFLOW_HISTORY_RATE_LIMIT_CACHE_EXPIRY_SECONDS",
841-
AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS,
875+
_legacy_workflow_rate_limit_window_seconds or 5,
842876
)
843877
)
844878
AUTOMATION_WORKFLOW_MAX_CONSECUTIVE_ERRORS = int(
@@ -851,7 +885,7 @@ def __setitem__(self, key, value):
851885
os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_DAYS", 30)
852886
)
853887
AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES = int(
854-
os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES", 50)
888+
os.getenv("BASEROW_AUTOMATION_WORKFLOW_HISTORY_MAX_ENTRIES", 200)
855889
)
856890

857891
TRASH_PAGE_SIZE_LIMIT = 200 # How many trash entries can be requested at once.

backend/src/baserow/contrib/automation/workflows/handler.py

Lines changed: 42 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from collections import defaultdict
2-
from datetime import datetime, timedelta
2+
from datetime import timedelta
33
from typing import Any, Dict, List, Optional
44
from zipfile import ZipFile
55

66
from django.conf import settings
77
from django.contrib.auth.models import AbstractUser
88
from django.core.files.storage import Storage
99
from django.db import transaction
10-
from django.db.models import QuerySet
10+
from django.db.models import Q, QuerySet
1111
from django.utils import timezone
1212

1313
from celery.canvas import Signature, chain
@@ -62,7 +62,6 @@
6262
find_unused_name,
6363
)
6464

65-
WORKFLOW_RATE_LIMIT_CACHE_PREFIX = "automation_workflow_{}"
6665
WORKFLOW_HISTORY_RATE_LIMIT_CACHE_PREFIX = "automation_workflow_history_{}"
6766
AUTOMATION_WORKFLOW_CACHE_LOCK_SECONDS = 5
6867

@@ -134,7 +133,6 @@ def _invalidate_workflow_caches(self, workflow: AutomationWorkflow) -> None:
134133
original_workflow = workflow.get_original()
135134

136135
global_cache.invalidate(f"wa_published_workflow_{original_workflow.id}")
137-
global_cache.invalidate(self._get_rate_limit_cache_key(original_workflow))
138136
global_cache.invalidate(
139137
self._get_workflow_history_rate_limit_cache_key(original_workflow)
140138
)
@@ -804,75 +802,65 @@ def _mark_failure_for_timed_out_history(
804802
message="This workflow took too long and was timed out.",
805803
)
806804

807-
def _get_rate_limit_cache_key(self, original_workflow: AutomationWorkflow) -> str:
808-
return WORKFLOW_RATE_LIMIT_CACHE_PREFIX.format(original_workflow.id)
809-
810805
def _get_workflow_history_rate_limit_cache_key(
811806
self, original_workflow: AutomationWorkflow
812807
) -> str:
813808
return WORKFLOW_HISTORY_RATE_LIMIT_CACHE_PREFIX.format(original_workflow.id)
814809

815810
def _get_histories_for_current_workflow_version(self, workflow: AutomationWorkflow):
816-
histories = AutomationHistoryHandler().get_workflow_histories(
817-
workflow.get_original()
818-
)
811+
original_workflow = workflow.get_original()
812+
histories = AutomationHistoryHandler().get_workflow_histories(original_workflow)
819813

820-
if workflow != workflow.get_original():
814+
if workflow != original_workflow:
821815
histories = histories.filter(started_on__gte=workflow.created_on)
822816

823817
return histories
824818

825819
def _check_is_rate_limited(self, workflow: AutomationWorkflow) -> bool:
826-
"""Uses a global cache key to track recent runs for the given workflow."""
827-
828-
original_workflow = workflow.get_original()
829-
830-
cache_key = self._get_rate_limit_cache_key(original_workflow)
831-
rate_cache_timeout = (
832-
settings.AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS
833-
)
834-
835-
now = timezone.now()
836-
837-
def update_last_run_cache(previous_last_runs):
838-
"""
839-
Given a list of recent workflow run timestamps, determines whether
840-
the workflow run should be rate limited. If so, raises the
841-
AutomationWorkflowRateLimited error.
842-
"""
843-
start_window = now - timedelta(
844-
seconds=settings.AUTOMATION_WORKFLOW_RATE_LIMIT_CACHE_EXPIRY_SECONDS
845-
)
820+
"""
821+
Checks workflow histories against the configured rate limit windows.
846822
847-
# Keep only past runs that are in the window
848-
runs_in_window = [
849-
timestamp
850-
for timestamp in previous_last_runs
851-
if isinstance(timestamp, datetime) and timestamp > start_window
852-
]
823+
The histories are fetched once for the largest configured window and each
824+
smaller window is evaluated in Python to avoid issuing one COUNT query per
825+
configured rate limit.
853826
854-
runs_in_window.append(now)
827+
Raises AutomationWorkflowRateLimited when the workflow exceeds one of the
828+
configured rate limits.
829+
"""
855830

856-
return runs_in_window
831+
rate_limits = settings.AUTOMATION_WORKFLOW_RATE_LIMITS
832+
if not rate_limits:
833+
return False
857834

858-
runs_in_window = global_cache.update(
859-
cache_key,
860-
update_last_run_cache,
861-
default_value=lambda: [],
862-
timeout=rate_cache_timeout,
835+
now = timezone.now()
836+
largest_window_seconds = max(
837+
window_seconds for _, window_seconds in rate_limits
863838
)
864-
865-
if len(runs_in_window) > settings.AUTOMATION_WORKFLOW_RATE_LIMIT_MAX_RUNS:
866-
return True
867-
868-
started_workflows = (
839+
oldest_start_window = now - timedelta(seconds=largest_window_seconds)
840+
history_windows = list(
869841
self._get_histories_for_current_workflow_version(workflow)
870-
.filter(status=HistoryStatusChoices.STARTED)
871-
.count()
842+
.filter(
843+
Q(started_on__gte=oldest_start_window)
844+
| Q(status=HistoryStatusChoices.STARTED)
845+
)
846+
.order_by()
847+
.values_list("started_on", "status")
872848
)
873849

874-
if started_workflows > settings.AUTOMATION_WORKFLOW_RATE_LIMIT_MAX_RUNS:
875-
return True
850+
for max_runs, window_seconds in rate_limits:
851+
start_window = now - timedelta(seconds=window_seconds)
852+
if (
853+
sum(
854+
started_on >= start_window or status == HistoryStatusChoices.STARTED
855+
for started_on, status in history_windows
856+
)
857+
>= max_runs
858+
):
859+
raise AutomationWorkflowRateLimited(
860+
"The workflow was rate limited due to too many recent or "
861+
f"unfinished runs. Limit exceeded: {max_runs} runs in "
862+
f"{window_seconds} seconds."
863+
)
876864

877865
return False
878866

@@ -926,12 +914,7 @@ def before_run(self, workflow: AutomationWorkflow) -> None:
926914
"The workflow was disabled due to too many consecutive errors."
927915
)
928916

929-
if self._check_is_rate_limited(workflow):
930-
# Early return if we had too many execution during a short amount of time
931-
raise AutomationWorkflowRateLimited(
932-
"The workflow was rate limited due to too many recent or unfinished "
933-
"runs."
934-
)
917+
self._check_is_rate_limited(workflow)
935918

936919
def async_start_workflow(
937920
self,

0 commit comments

Comments
 (0)