Skip to content

Commit d9acf4f

Browse files
authored
refactor: use approximate count for admin pagination (baserow#5119)
1 parent ee1371f commit d9acf4f

11 files changed

Lines changed: 114 additions & 16 deletions

File tree

backend/src/baserow/api/admin/users/views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
)
2424
from baserow.api.admin.views import AdminListingView
2525
from baserow.api.decorators import map_exceptions, validate_body
26+
from baserow.api.pagination import PageNumberPaginationWithApproximateCount
2627
from baserow.api.schemas import get_error_schema
2728
from baserow.api.user.registries import member_data_registry
2829
from baserow.api.user.schemas import authenticate_user_schema
@@ -43,6 +44,7 @@
4344

4445
class UsersAdminView(AdminListingView):
4546
serializer_class = UserAdminResponseSerializer
47+
pagination_class = PageNumberPaginationWithApproximateCount
4648
search_fields = ["id", "username", "first_name"]
4749
sort_field_mapping = {
4850
"id": "id",

backend/src/baserow/api/admin/views.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class APIListingView(
3030
APIView, SearchableViewMixin, SortableViewMixin, FilterableViewMixin
3131
):
3232
serializer_class = None
33+
pagination_class = PageNumberPagination
3334
search_fields: List[str] = ["id"]
3435
filters_field_mapping: Dict[str, str] = {}
3536
sort_field_mapping: Dict[str, str] = {}
@@ -56,7 +57,7 @@ def get(self, request):
5657
queryset = self.apply_sorts_or_default_sort(sorts, queryset)
5758
queryset = self.apply_ids_filter(ids_param, queryset)
5859

59-
paginator = PageNumberPagination(limit_page_size=100)
60+
paginator = self.pagination_class(limit_page_size=100)
6061
page = paginator.paginate_queryset(queryset, request, self)
6162
serializer = self.get_serializer(request, page, many=True)
6263

backend/src/baserow/api/admin/workspaces/views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from baserow.api.admin.views import AdminListingView, APIListingView
1111
from baserow.api.decorators import map_exceptions
1212
from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST
13+
from baserow.api.pagination import PageNumberPaginationWithApproximateCount
1314
from baserow.api.schemas import get_error_schema
1415
from baserow.core.admin.workspaces.exceptions import CannotDeleteATemplateGroupError
1516
from baserow.core.admin.workspaces.handler import WorkspacesAdminHandler
@@ -27,6 +28,7 @@
2728

2829
class WorkspacesAdminView(AdminListingView):
2930
serializer_class = WorkspacesAdminResponseSerializer
31+
pagination_class = PageNumberPaginationWithApproximateCount
3032
search_fields = ["id", "name"]
3133
sort_field_mapping = {
3234
"id": "id",

backend/src/baserow/api/pagination.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from functools import cached_property
12
from typing import Protocol
23

34
from django.core.paginator import Paginator as DjangoPaginator
@@ -12,6 +13,8 @@
1213
from rest_framework.response import Response
1314
from rest_framework.status import HTTP_400_BAD_REQUEST
1415

16+
from baserow.core.db import get_approximate_row_count
17+
1518

1619
class Pageable(Protocol):
1720
def paginate_queryset(self, queryset, request, view=None):
@@ -150,3 +153,24 @@ def get_paginated_response_schema(self, schema):
150153
"results": schema,
151154
},
152155
}
156+
157+
158+
class ApproximateCountPaginator(Paginator):
159+
"""
160+
A paginator that uses Postgres EXPLAIN to estimate the total row count
161+
instead of running an expensive COUNT(*) query.
162+
"""
163+
164+
@cached_property
165+
def count(self):
166+
return get_approximate_row_count(self.object_list)
167+
168+
169+
class PageNumberPaginationWithApproximateCount(PageNumberPagination):
170+
"""
171+
Page number pagination that uses an approximate count from Postgres EXPLAIN
172+
instead of COUNT(*). Suitable for large tables like audit logs where an
173+
exact count is not required.
174+
"""
175+
176+
django_paginator_class = ApproximateCountPaginator

backend/src/baserow/core/db.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import contextlib
2+
import json
23
import random
34
import time
45
from collections import defaultdict
@@ -20,7 +21,12 @@
2021

2122
from django.conf import settings
2223
from django.contrib.contenttypes.models import ContentType
23-
from django.db import DEFAULT_DB_ALIAS, OperationalError, connection, transaction
24+
from django.db import (
25+
DEFAULT_DB_ALIAS,
26+
OperationalError,
27+
connection,
28+
transaction,
29+
)
2430
from django.db.models import ForeignKey, ManyToManyField, Max, Model, Prefetch, QuerySet
2531
from django.db.models.functions import Collate
2632
from django.db.models.query import ModelIterable
@@ -37,6 +43,30 @@
3743
ModelInstance = TypeVar("ModelInstance", bound=object)
3844

3945

46+
APPROXIMATE_COUNT_THRESHOLD = 50_000
47+
48+
49+
def get_approximate_row_count(queryset: QuerySet) -> int:
50+
"""
51+
Uses Postgres EXPLAIN to estimate the row count for the given queryset.
52+
If the estimate is below APPROXIMATE_COUNT_THRESHOLD, falls back to an
53+
exact COUNT(*) since the cost is negligible for small result sets and
54+
the planner estimate is unreliable at that scale.
55+
56+
:param queryset: The queryset to estimate the row count for.
57+
:return: An estimate of the row count for the queryset.
58+
"""
59+
60+
queryset = queryset.order_by()
61+
plan = json.loads(queryset.explain(format="json"))
62+
estimate = int(plan[0]["Plan"]["Plan Rows"])
63+
64+
if estimate < APPROXIMATE_COUNT_THRESHOLD:
65+
return queryset.count()
66+
67+
return estimate
68+
69+
4070
def get_database_dsn() -> str:
4171
"""
4272
Constructs the database DSN from the default database settings.

backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class as the list users endpoint which already has extensive tests. We only need
5252
)
5353
assert response.status_code == HTTP_403_FORBIDDEN
5454

55-
with django_assert_num_queries(5):
55+
with django_assert_num_queries(6):
5656
response = api_client.get(
5757
reverse("api:admin:workspaces:list"),
5858
format="json",

backend/tests/baserow/api/admin/users/test_users_admin_views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ def test_admin_getting_view_users_only_runs_two_queries_instead_of_n(
771771
first_name="Test1",
772772
is_staff=True,
773773
)
774-
fixed_num_of_queries_unrelated_to_number_of_rows = 6
774+
fixed_num_of_queries_unrelated_to_number_of_rows = 7
775775

776776
for i in range(10):
777777
data_fixture.create_user_workspace()

backend/tests/baserow/contrib/database/api/rows/test_row_serializers.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,10 @@ def test_get_row_serializer_with_user_field_names(
521521
"value": "E",
522522
},
523523
],
524-
"formula_multiple_collaborators": unordered(
525-
[
526-
{"id": u2.id, "name": u2.first_name},
527-
{"id": u3.id, "name": u3.first_name},
528-
]
529-
),
524+
"formula_multiple_collaborators": [
525+
{"id": u2.id, "name": u2.first_name},
526+
{"id": u3.id, "name": u3.first_name},
527+
],
530528
"formula_text": "test FORMULA",
531529
"count": "3",
532530
"rollup": "-122.222",
@@ -540,12 +538,10 @@ def test_get_row_serializer_with_user_field_names(
540538
"multiple_collaborators_lookup": [
541539
{
542540
"id": 1,
543-
"value": unordered(
544-
[
545-
{"id": u2.id, "name": u2.first_name},
546-
{"id": u3.id, "name": u3.first_name},
547-
]
548-
),
541+
"value": [
542+
{"id": u2.id, "name": u2.first_name},
543+
{"id": u3.id, "name": u3.first_name},
544+
],
549545
},
550546
{
551547
"id": 2,
@@ -573,6 +569,12 @@ def test_get_row_serializer_with_user_field_names(
573569
)
574570
)
575571
test_result = json.loads(json.dumps(serializer_instance.data[0]))
572+
expected_result["formula_multiple_collaborators"] = unordered(
573+
expected_result["formula_multiple_collaborators"]
574+
)
575+
expected_result["multiple_collaborators_lookup"][0]["value"] = unordered(
576+
expected_result["multiple_collaborators_lookup"][0]["value"]
577+
)
576578
assert test_result == expected_result
577579

578580

backend/tests/baserow/core/test_core_db.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
LockedAtomicTransaction,
2929
MultiFieldPrefetchQuerysetMixin,
3030
QuerySet,
31+
get_approximate_row_count,
3132
specific_iterator,
3233
specific_queryset,
3334
)
@@ -701,3 +702,28 @@ def test_specific_iterator_skip_missing_specific_objects(data_fixture):
701702
mock_logger.error.assert_called_once_with(
702703
f"The specific object with id {field_without_specific.id} does not exist."
703704
)
705+
706+
707+
@pytest.mark.django_db
708+
def test_get_approximate_row_count_falls_back_to_exact_for_small_tables():
709+
queryset = Workspace.objects.all()
710+
count = get_approximate_row_count(queryset)
711+
assert count == queryset.count()
712+
713+
714+
@pytest.mark.django_db
715+
def test_get_approximate_row_count_returns_estimate_above_threshold():
716+
queryset = Workspace.objects.all()
717+
with patch("baserow.core.db.APPROXIMATE_COUNT_THRESHOLD", 0):
718+
count = get_approximate_row_count(queryset)
719+
assert isinstance(count, int)
720+
assert count >= 0
721+
722+
723+
@pytest.mark.django_db
724+
def test_get_approximate_row_count_works_with_filtered_queryset(data_fixture):
725+
data_fixture.create_workspace(name="test_ws_1")
726+
data_fixture.create_workspace(name="test_ws_2")
727+
queryset = Workspace.objects.filter(name__startswith="test_ws_")
728+
count = get_approximate_row_count(queryset)
729+
assert count == 2
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"type": "refactor",
3+
"message": "Use Postgres EXPLAIN-based approximate count for audit log pagination to avoid expensive COUNT(*) on large tables.",
4+
"issue_origin": "github",
5+
"issue_number": null,
6+
"domain": "core",
7+
"bullet_points": [],
8+
"created_at": "2026-04-02"
9+
}

0 commit comments

Comments
 (0)