From 4d14594ba53942bec5dac59712c01f55446419c6 Mon Sep 17 00:00:00 2001 From: dimmur-brw Date: Fri, 3 Apr 2026 07:04:21 +0200 Subject: [PATCH 1/5] fix: AutoNumber sequence (#5117) --- .../contrib/database/fields/field_types.py | 13 +++++--- .../field/test_autonumber_field_type.py | 32 +++++++++++++++++++ .../bug/5115_fix_autonumber_sequuence.json | 9 ++++++ 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 changelog/entries/unreleased/bug/5115_fix_autonumber_sequuence.json diff --git a/backend/src/baserow/contrib/database/fields/field_types.py b/backend/src/baserow/contrib/database/fields/field_types.py index fcc210dda0..40876c1dc7 100755 --- a/backend/src/baserow/contrib/database/fields/field_types.py +++ b/backend/src/baserow/contrib/database/fields/field_types.py @@ -7414,12 +7414,17 @@ def create_field_sequence( cursor.execute( f"ALTER SEQUENCE {db_column}_seq OWNED BY {db_table}.{db_column};" ) - # Set the sequence to the count of rows in the table, only if there - # is at least one row. + # Use COALESCE(MAX, COUNT) to set the sequence correctly in all + # cases: MAX handles gaps from deleted rows or imported data, + # COUNT is the fallback when the column is all NULLs (e.g. when + # creating a new autonumber field on an existing table). cursor.execute( f""" - WITH count AS (SELECT COUNT(*) FROM {db_table}) - SELECT setval('{db_column}_seq', count) FROM count WHERE count > 0; + WITH seq_val AS ( + SELECT COALESCE(MAX({db_column}), COUNT(*)) AS val + FROM {db_table} + ) + SELECT setval('{db_column}_seq', val) FROM seq_val WHERE val > 0; """ # noqa: S608 ) diff --git a/backend/tests/baserow/contrib/database/field/test_autonumber_field_type.py b/backend/tests/baserow/contrib/database/field/test_autonumber_field_type.py index cc51fb6f82..f8933d3d82 100644 --- a/backend/tests/baserow/contrib/database/field/test_autonumber_field_type.py +++ b/backend/tests/baserow/contrib/database/field/test_autonumber_field_type.py @@ -8,6 +8,7 @@ DeleteFieldActionType, UpdateFieldActionType, ) +from baserow.contrib.database.fields.field_types import AutonumberFieldType from baserow.contrib.database.fields.handler import FieldHandler from baserow.contrib.database.rows.handler import RowHandler from baserow.contrib.database.table.handler import TableHandler @@ -678,3 +679,34 @@ def test_autonumber_field_can_be_looked_up(data_fixture): ) assert getattr(row, f"field_{formula_field.id}") == 3 + + +@pytest.mark.field_autonumber +@pytest.mark.django_db +def test_autonumber_sequence_uses_max_value_not_row_count(data_fixture): + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + autonumber_field = data_fixture.create_autonumber_field( + table=table, name="autonumber" + ) + + model = table.get_model() + + rows = [model.objects.create() for _ in range(5)] + + for row in rows[:3]: + row.delete() + + db_column = f"field_{autonumber_field.id}" + with connection.cursor() as cursor: + cursor.execute( + f"UPDATE {model._meta.db_table} SET {db_column} = 100 WHERE id = %s", + [rows[4].id], + ) + + field_type = AutonumberFieldType() + field_type.create_field_sequence(autonumber_field, model, connection) + + new_row = model.objects.create() + new_row.refresh_from_db() + assert getattr(new_row, db_column) == 101 diff --git a/changelog/entries/unreleased/bug/5115_fix_autonumber_sequuence.json b/changelog/entries/unreleased/bug/5115_fix_autonumber_sequuence.json new file mode 100644 index 0000000000..62a3eb11cb --- /dev/null +++ b/changelog/entries/unreleased/bug/5115_fix_autonumber_sequuence.json @@ -0,0 +1,9 @@ +{ + "type": "bug", + "message": "Fix autonumber sequence", + "issue_origin": "github", + "issue_number": 5115, + "domain": "database", + "bullet_points": [], + "created_at": "2026-04-02" +} \ No newline at end of file From 3670bf42c1ce88141f94a9ef9d0488af2f53fafe Mon Sep 17 00:00:00 2001 From: Davide Silvestri <75379892+silvestrid@users.noreply.github.com> Date: Fri, 3 Apr 2026 08:43:12 +0200 Subject: [PATCH 2/5] feat (database): add `array_unique` db formula function (#2326) (#5056) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add ARRAYUNIQUE() database formula function (#2326) Add array_unique() formula that removes duplicate values from lookup arrays, preserving first-occurrence order. Mirrors Airtable's ARRAYUNIQUE() — operates on true arrays from lookup fields, not plain text. Supports text, number, boolean, date, duration, URL, email, phone, rating, single select, multiple select, link row, created_on, and autonumber sub-types. Rejects file fields (different JSONB structure). * feat: enable count() and join() composability with array_unique() Add internal non-aggregate functions (array_length, array_join_values) that operate on JSONB arrays directly, and wire them into count() and join() so that count(array_unique(...)) and join(array_unique(...), sep) work. The existing many-expression paths remain unchanged. * docs: add array_unique to formula docs, mark internals as exceptions Add array_unique to formula_docs.md so the formula prompt completeness tests pass. Add array_length and array_join_values (internal delegation targets) to the formula_exceptions lists in both test files. * chore: ensure tests works also for formulas referencing different field types --- .../database/formula/ast/function_defs.py | 80 ++- .../django_expressions.py | 44 ++ .../test_array_unique_composability.py | 337 +++++++++++ .../formula/test_baserow_formula_results.py | 533 ++++++++++++++++++ .../feature/2326_array_unique_formula.json | 8 + .../test_assistant_database_table_tools.py | 2 + .../baserow_premium/prompts/formula_docs.md | 3 +- .../fields/test_generate_formula_prompt.py | 2 + web-frontend/locales/en.json | 3 +- .../modules/database/formula/functions.js | 23 + web-frontend/modules/database/plugin.js | 2 + .../database/formula/formulaFunctions.spec.js | 1 + 12 files changed, 1035 insertions(+), 3 deletions(-) create mode 100644 backend/tests/baserow/contrib/database/formula/test_array_unique_composability.py create mode 100644 changelog/entries/unreleased/feature/2326_array_unique_formula.json diff --git a/backend/src/baserow/contrib/database/formula/ast/function_defs.py b/backend/src/baserow/contrib/database/formula/ast/function_defs.py index 714790016f..f061cddb95 100644 --- a/backend/src/baserow/contrib/database/formula/ast/function_defs.py +++ b/backend/src/baserow/contrib/database/formula/ast/function_defs.py @@ -84,6 +84,8 @@ GreaterThanExpr, GreaterThanOrEqualExpr, IsNullExpr, + JSONBArrayJoinValues, + JSONBArrayUniqueByValue, LessThanEqualOrExpr, LessThanExpr, NotEqualsExpr, @@ -257,6 +259,10 @@ def register_formula_functions(registry): registry.register(BaserowArrayAggNoNesting()) registry.register(BaserowGetFileCount()) registry.register(BaserowToURL()) + # Array utility functions + registry.register(BaserowArrayUnique()) + registry.register(BaserowArrayLength()) + registry.register(BaserowArrayJoinValues()) # ManyToMany functions registry.register(BaserowStringAggManyToManyValues()) registry.register(BaserowManyToManyCount()) @@ -2401,6 +2407,7 @@ class BaserowCount(OneArgumentBaserowFunction): MustBeManyExprChecker(BaserowFormulaValidType), BaserowFormulaMultipleSelectType, BaserowFormulaMultipleCollaboratorsType, + BaserowFormulaArrayType, ] aggregate = True try_coerce_nullable_args_to_not_null = False @@ -2413,6 +2420,9 @@ def type_function( if BaserowGetFileCount().can_accept_arg(arg): return BaserowGetFileCount()(arg) + if isinstance(arg.expression_type, BaserowFormulaArrayType): + return BaserowArrayLength()(arg) + return arg.expression_type.count(func_call, arg).with_valid_type( BaserowFormulaNumberType(number_decimal_places=0) ) @@ -2458,6 +2468,72 @@ def to_django_expression(self, arg: Expression) -> Expression: ) +class BaserowArrayUnique(OneArgumentBaserowFunction): + type = "array_unique" + arg_type = [BaserowFormulaValidType] + + def type_function( + self, + func_call: BaserowFunctionCall[UnTyped], + arg: BaserowExpression[BaserowFormulaValidType], + ) -> BaserowExpression[BaserowFormulaType]: + # When referencing a lookup field, unwrap_at_field_level converts it + # back to a "many" expression. Collapse it to an array first. + if arg.many: + arg = arg.expression_type.collapse_many(arg) + + if not isinstance(arg.expression_type, BaserowFormulaArrayType): + return func_call.with_invalid_type( + "array_unique requires an array field as input." + ) + + sub_type = arg.expression_type.sub_type + if not sub_type.item_is_in_nested_value_object_when_in_array: + return func_call.with_invalid_type( + "array_unique does not support file fields." + ) + return func_call.with_args([arg]).with_valid_type(arg.expression_type) + + def to_django_expression(self, arg: Expression) -> Expression: + return JSONBArrayUniqueByValue(arg) + + +class BaserowArrayLength(OneArgumentBaserowFunction): + type = "array_length" + arg_type = [BaserowFormulaArrayType] + + def type_function( + self, + func_call: BaserowFunctionCall[UnTyped], + arg: BaserowExpression[BaserowFormulaValidType], + ) -> BaserowExpression[BaserowFormulaType]: + return func_call.with_valid_type( + BaserowFormulaNumberType(number_decimal_places=0) + ) + + def to_django_expression(self, arg: Expression) -> Expression: + return Func( + arg, function="jsonb_array_length", output_field=fields.IntegerField() + ) + + +class BaserowArrayJoinValues(TwoArgumentBaserowFunction): + type = "array_join_values" + arg1_type = [BaserowFormulaArrayType] + arg2_type = [BaserowFormulaTextType] + + def type_function( + self, + func_call: BaserowFunctionCall[UnTyped], + arg1: BaserowExpression[BaserowFormulaValidType], + arg2: BaserowExpression[BaserowFormulaValidType], + ) -> BaserowExpression[BaserowFormulaType]: + return func_call.with_valid_type(BaserowFormulaTextType()) + + def to_django_expression(self, arg1: Expression, arg2: Expression) -> Expression: + return JSONBArrayJoinValues(arg1, arg2) + + class BaserowFilter(TwoArgumentBaserowFunction): type = "filter" arg1_type = [BaserowFormulaValidType] @@ -2663,7 +2739,7 @@ def to_django_expression(self, arg: Expression) -> Expression: class BaserowAggJoin(TwoArgumentBaserowFunction): type = "join" - arg1_type = [MustBeManyExprChecker(BaserowFormulaTextType)] + arg1_type = [MustBeManyExprChecker(BaserowFormulaTextType), BaserowFormulaArrayType] arg2_type = [BaserowFormulaTextType] aggregate = True @@ -2673,6 +2749,8 @@ def type_function( arg1: BaserowExpression[BaserowFormulaValidType], arg2: BaserowExpression[BaserowFormulaValidType], ) -> BaserowExpression[BaserowFormulaType]: + if isinstance(arg1.expression_type, BaserowFormulaArrayType): + return BaserowArrayJoinValues()(arg1, arg2) return func_call.with_valid_type(BaserowFormulaTextType()) def to_django_expression(self, arg1: Expression, arg2: Expression) -> Expression: diff --git a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py index 994ab22a7a..ceca4f699f 100644 --- a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py +++ b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py @@ -119,6 +119,50 @@ def as_postgresql(self, compiler, connection, **extra_context): ) +class JSONBArrayUniqueByValue(Func): + """ + Dedup a JSONB array by the 'value' key of each element, preserving + first-occurrence order. For arrays with elements like + {"id": row_id, "value": actual_value}. + """ + + template = ( + "(SELECT COALESCE(jsonb_agg(sub.elem ORDER BY sub.rn), '[]'::jsonb) " + "FROM (SELECT DISTINCT ON (t.elem -> 'value') t.elem, t.rn " + "FROM jsonb_array_elements(%(expressions)s) WITH ORDINALITY AS t(elem, rn) " + "ORDER BY t.elem -> 'value', t.rn) sub)" + ) + output_field = JSONField() + + +class JSONBArrayJoinValues(Func): + """ + Extract the 'value' text from each element of a JSONB array and join them + with a separator, preserving the original array order. + """ + + function = "jsonb_array_join_values" + template = ( + "(SELECT COALESCE(string_agg(t.elem->>'value', %(separator)s ORDER BY t.rn), '') " + "FROM jsonb_array_elements(%(expressions)s) WITH ORDINALITY AS t(elem, rn))" + ) + output_field: typing.ClassVar[Field] = None # set in __init__ + + def __init__(self, expression, separator, **extra): + from django.db.models import fields as model_fields + + super().__init__(expression, output_field=model_fields.TextField(), **extra) + self.separator = separator + + def as_sql(self, compiler, connection, **extra_context): + separator_sql, separator_params = compiler.compile(self.separator) + extra_context["separator"] = separator_sql + sql, params = super().as_sql(compiler, connection, **extra_context) + # separator appears before %(expressions)s in the template, + # so its params must come first + return sql, (*separator_params, *params) + + class BaserowFilterExpression(Expression): """ Baserow expression that works with field_name and value diff --git a/backend/tests/baserow/contrib/database/formula/test_array_unique_composability.py b/backend/tests/baserow/contrib/database/formula/test_array_unique_composability.py new file mode 100644 index 0000000000..ebee303b88 --- /dev/null +++ b/backend/tests/baserow/contrib/database/formula/test_array_unique_composability.py @@ -0,0 +1,337 @@ +""" +Tests for array_unique composability with other formula functions +(count, join, has_option). +""" + +import pytest + +from baserow.contrib.database.fields.handler import FieldHandler +from baserow.contrib.database.rows.handler import RowHandler + + +def _setup_text_lookup(data_fixture): + """Create a text lookup with duplicates: apple, banana, apple → 2 unique.""" + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "apple"}, + {target.db_column: "banana"}, + {target.db_column: "apple"}, + ], + ) + .created_rows + ) + + row_a1, row_a2 = ( + RowHandler() + .create_rows( + user, + table_a, + [ + {link_field.db_column: [r.id for r in rows_b]}, + {link_field.db_column: []}, + ], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + return user, table_a, table_b, link_field, target, lookup_field, row_a1, row_a2 + + +def _setup_number_lookup(data_fixture): + """Create a number lookup with duplicates: 10, 20, 10 → 2 unique.""" + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_number_field( + table=table_b, name="target", number_decimal_places=0 + ) + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: 10}, + {target.db_column: 20}, + {target.db_column: 10}, + ], + ) + .created_rows + ) + + row_a1, row_a2 = ( + RowHandler() + .create_rows( + user, + table_a, + [ + {link_field.db_column: [r.id for r in rows_b]}, + {link_field.db_column: []}, + ], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + return user, table_a, table_b, link_field, target, lookup_field, row_a1, row_a2 + + +@pytest.mark.django_db +def test_count_array_unique_text(data_fixture): + """count(array_unique(field('lookup'))) returns number of unique text values.""" + user, table_a, *_, lookup_field, row_a1, row_a2 = _setup_text_lookup(data_fixture) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_count", + formula="count(array_unique(field('lookup')))", + ) + + model = table_a.get_model() + rows = {r.id: getattr(r, count_field.db_column) for r in model.objects.all()} + + assert rows[row_a1.id] == 2 # apple, banana + assert rows[row_a2.id] == 0 # empty + + +@pytest.mark.django_db +def test_count_array_unique_number(data_fixture): + """count(array_unique(field('lookup'))) returns number of unique number values.""" + user, table_a, *_, lookup_field, row_a1, row_a2 = _setup_number_lookup(data_fixture) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_count", + formula="count(array_unique(field('lookup')))", + ) + + model = table_a.get_model() + rows = {r.id: getattr(r, count_field.db_column) for r in model.objects.all()} + + assert rows[row_a1.id] == 2 # 10, 20 + assert rows[row_a2.id] == 0 # empty + + +@pytest.mark.django_db +def test_join_array_unique_text(data_fixture): + """join(array_unique(field('lookup')), ', ') returns comma-separated unique values.""" + user, table_a, *_, lookup_field, row_a1, row_a2 = _setup_text_lookup(data_fixture) + + join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_joined", + formula="join(array_unique(field('lookup')), ', ')", + ) + + model = table_a.get_model() + rows = {r.id: getattr(r, join_field.db_column) for r in model.objects.all()} + + assert rows[row_a1.id] == "apple, banana" + assert rows[row_a2.id] == "" + + +@pytest.mark.django_db +def test_count_array_unique_boolean(data_fixture): + """count(array_unique(field('lookup'))) works with boolean lookups.""" + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_boolean_field(table=table_b, name="target") + + RowHandler().create_rows( + user, + table_b, + [ + {target.db_column: True}, + {target.db_column: False}, + {target.db_column: True}, # duplicate + ], + ) + + rows_b = list(table_b.get_model().objects.all().order_by("id")) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_count", + formula="count(array_unique(field('lookup')))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, count_field.db_column) == 2 # True, False + + +@pytest.mark.django_db +def test_count_regular_lookup_still_works(data_fixture): + """count(field('lookup')) still works (regression check for many-expression path).""" + user, table_a, *_, lookup_field, row_a1, row_a2 = _setup_text_lookup(data_fixture) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="count", + formula="count(field('lookup'))", + ) + + model = table_a.get_model() + rows = {r.id: getattr(r, count_field.db_column) for r in model.objects.all()} + + assert rows[row_a1.id] == 3 # all 3 including duplicates + assert rows[row_a2.id] == 0 + + +@pytest.mark.django_db +def test_join_regular_lookup_still_works(data_fixture): + """join(field('lookup'), ', ') still works (regression check).""" + user, table_a, *_, lookup_field, row_a1, row_a2 = _setup_text_lookup(data_fixture) + + join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="joined", + formula="join(field('lookup'), ', ')", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + val = getattr(result, join_field.db_column) + # Should contain all 3 values including duplicate + assert val.count(",") == 2 # 3 items, 2 commas + + +@pytest.mark.django_db +def test_count_array_unique_inline_lookup(data_fixture): + """count(array_unique(lookup('link', 'target'))) works with inline lookup.""" + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "X"}, + {target.db_column: "Y"}, + {target.db_column: "X"}, + ], + ) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_count", + formula=f"count(array_unique(lookup('{link_field.name}', '{target.name}')))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, count_field.db_column) == 2 + + +@pytest.mark.django_db +def test_join_array_unique_inline_lookup(data_fixture): + """join(array_unique(lookup('link', 'target')), sep) works with inline lookup.""" + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "X"}, + {target.db_column: "Y"}, + {target.db_column: "X"}, + ], + ) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_joined", + formula=f"join(array_unique(lookup('{link_field.name}', '{target.name}')), ', ')", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, join_field.db_column) == "X, Y" diff --git a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py index 9684b0042b..3ae974b962 100644 --- a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py +++ b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py @@ -1,6 +1,7 @@ import datetime import sys import traceback +from datetime import timedelta from decimal import Decimal from re import search from typing import Any, List, Optional @@ -2168,3 +2169,535 @@ def test_regexp_replace(data_fixture): ["a123", "[a-", "#ERROR!"], ["a123", "\\", "#ERROR!"], ] + + +def _setup_single_select(df, table): + field = df.create_single_select_field(table=table, name="target") + opt_a = df.create_select_option(field=field, value="Active", color="blue", order=0) + opt_b = df.create_select_option(field=field, value="Inactive", color="red", order=1) + return field, opt_a.id, opt_b.id + + +def _setup_multiple_select(df, table): + field = df.create_multiple_select_field(table=table, name="target") + opt_x = df.create_select_option(field=field, value="X", color="blue", order=0) + opt_y = df.create_select_option(field=field, value="Y", color="red", order=1) + # val_a=[X,Y], val_b=[X] — dedup is by per-row option combination + return field, [opt_x.id, opt_y.id], [opt_x.id] + + +def _setup_multiple_collaborators(df, table): + workspace = table.database.workspace + user_a = df.create_user(workspace=workspace) + user_b = df.create_user(workspace=workspace) + field = df.create_multiple_collaborators_field(table=table, name="target") + return field, [{"id": user_a.id}], [{"id": user_b.id}] + + +def _setup_file_field(df, table): + field = df.create_file_field(table=table, name="target") + return ( + field, + [{"name": "a.txt", "visible_name": "a.txt"}], + [{"name": "b.txt", "visible_name": "b.txt"}], + ) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "setup_fn", + [ + lambda df, table: ( + df.create_text_field(table=table, name="target"), + "apple", + "banana", + ), + lambda df, table: ( + df.create_number_field(table=table, name="target", number_decimal_places=2), + Decimal("10.50"), + Decimal("20.00"), + ), + lambda df, table: ( + df.create_boolean_field(table=table, name="target"), + True, + False, + ), + lambda df, table: ( + df.create_date_field(table=table, name="target"), + "2024-01-15", + "2024-06-01", + ), + lambda df, table: ( + df.create_duration_field( + table=table, name="target", duration_format="h:mm" + ), + timedelta(hours=1, minutes=30), + timedelta(hours=2), + ), + lambda df, table: ( + df.create_url_field(table=table, name="target"), + "https://example.com", + "https://baserow.io", + ), + lambda df, table: ( + df.create_email_field(table=table, name="target"), + "alice@example.com", + "bob@example.com", + ), + lambda df, table: ( + df.create_phone_number_field(table=table, name="target"), + "+1234567890", + "+0987654321", + ), + lambda df, table: ( + df.create_rating_field(table=table, name="target"), + 3, + 5, + ), + _setup_single_select, + _setup_multiple_select, + _setup_multiple_collaborators, + ], + ids=[ + "text", + "number", + "boolean", + "date", + "duration", + "url", + "email", + "phone", + "rating", + "single_select", + "multiple_select", + "multiple_collaborators", + ], +) +def test_array_unique_lookup(data_fixture, api_client, setup_fn): + """ + array_unique deduplicates a lookup array, preserving first-occurrence order. + Parameterized across field types. + + Also verifies that row updates, additions, and deletions in the linked + table correctly trigger formula recalculation, and that the formula table + can still be fetched via the API afterwards. + """ + + user, token = data_fixture.create_user_and_token() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target_field, val_a, val_b = setup_fn(data_fixture, table_b) + + # 3 rows: val_a, val_b, val_a (duplicate) + row_b1, row_b2, row_b3 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target_field.db_column: val_a}, + {target_field.db_column: val_b}, + {target_field.db_column: val_a}, + ], + ) + .created_rows + ) + + # Row A1: links to all 3 (has duplicate val_a) + # Row A2: links to 2 (all unique) + # Row A3: empty + row_a1, row_a2, row_a3 = ( + RowHandler() + .create_rows( + user, + table_a, + [ + {link_field.db_column: [row_b1.id, row_b2.id, row_b3.id]}, + {link_field.db_column: [row_b1.id, row_b2.id]}, + {link_field.db_column: []}, + ], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target_field.name}')", + ) + unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_lookup", + formula="array_unique(field('lookup'))", + ) + + # Same via a formula field that references the target field indirectly. + # Formula-backed fields are stored differently (no physical column on + # table_b), so this path can surface serialisation mismatches. + ref_target_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target_field.name}')", + ) + ref_lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target_field.name}')", + ) + ref_unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_unique_lookup", + formula="array_unique(field('ref_lookup'))", + ) + + def _read_unique_rows(): + """Return (direct_rows, ref_rows) for both unique fields.""" + model = table_a.get_model() + qs = model.objects.all().order_by("id") + direct = list(qs.values_list(unique_field.db_column, flat=True)) + ref = list(qs.values_list(ref_unique_field.db_column, flat=True)) + return direct, ref + + rows, ref_rows = _read_unique_rows() + + # Row A1: val_a, val_b, val_a → val_a, val_b (deduped, first-occurrence order) + assert len(rows[0]) == 2 + assert rows[0][0]["id"] == row_b1.id + assert rows[0][1]["id"] == row_b2.id + + # Row A2: val_a, val_b → unchanged (already unique) + assert len(rows[1]) == 2 + assert rows[1][0]["id"] == row_b1.id + assert rows[1][1]["id"] == row_b2.id + + # Row A3: empty + assert rows[2] == [] + + # Formula-referenced path must produce identical results + assert ref_rows == rows, ( + f"Formula-ref path diverged from direct path:\n" + f" direct: {rows}\n" + f" ref: {ref_rows}" + ) + + # ── Step 2: update a linked row's value → triggers recalculation ── + + RowHandler().update_rows( + user, + table_b, + [{"id": row_b1.id, target_field.db_column: val_b}], + ) + + # Now row_b1 and row_b2 both have val_b, row_b3 has val_a. + # Row A1 links to all 3 → unique is [val_b, val_a] (first-occurrence). + rows, ref_rows = _read_unique_rows() + assert len(rows[0]) == 2 + assert ref_rows == rows + + # ── Step 3: add a new linked row with empty/default value ── + + (row_b4,) = RowHandler().create_rows(user, table_b, [{}]).created_rows + RowHandler().update_rows( + user, + table_a, + [ + { + "id": row_a1.id, + link_field.db_column: [ + row_b1.id, + row_b2.id, + row_b3.id, + row_b4.id, + ], + } + ], + ) + + rows, ref_rows = _read_unique_rows() + assert isinstance(rows[0], list) + assert ref_rows == rows + + # ── Step 4: delete a linked row → triggers recalculation ── + + RowHandler().delete_rows(user, table_b, [row_b3.id]) + + rows, ref_rows = _read_unique_rows() + assert isinstance(rows[0], list) + assert ref_rows == rows + + # ── Step 5: API fetch must not crash ── + + from baserow.contrib.database.views.handler import ViewHandler + + grid = ViewHandler().create_view(user, table_a, "grid", name="test") + response = api_client.get( + f"/api/database/views/grid/{grid.id}/", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == 200, ( + f"API crash after update/delete: {response.content.decode()[:300]}" + ) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "create_field_fn", + [ + lambda df, table: df.create_created_on_field(table=table, name="target"), + lambda df, table: df.create_autonumber_field(table=table, name="target"), + ], + ids=["created_on", "autonumber"], +) +def test_array_unique_auto_field_lookup(data_fixture, create_field_fn): + """ + array_unique works on auto-populated fields (created_on, autonumber). + Values can't be controlled, so we verify dedup count ≤ original count + and first-occurrence ordering is preserved. + """ + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target_field = create_field_fn(data_fixture, table_b) + + row_b1, row_b2, row_b3 = ( + RowHandler().create_rows(user, table_b, [{}, {}, {}]).created_rows + ) + + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [row_b1.id, row_b2.id, row_b3.id]}], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target_field.name}')", + ) + unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_lookup", + formula="array_unique(field('lookup'))", + ) + + # Same via a formula field referencing the target indirectly. + ref_target_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target_field.name}')", + ) + ref_lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target_field.name}')", + ) + ref_unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_unique_lookup", + formula="array_unique(field('ref_lookup'))", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + lookup_val = getattr(result, lookup_field.db_column) + unique_val = getattr(result, unique_field.db_column) + ref_unique_val = getattr(result, ref_unique_field.db_column) + + assert len(unique_val) <= len(lookup_val) + assert unique_val[0]["id"] == lookup_val[0]["id"] + + # The formula-ref path must also deduplicate without errors. + # We don't assert equality with the direct path because date fields with + # date_include_time=False truncate values before dedup (so all same-day + # rows collapse), while field() exposes the underlying full datetime + # (so rows with distinct timestamps stay separate). + assert len(ref_unique_val) <= len(lookup_val) + assert ref_unique_val[0]["id"] == lookup_val[0]["id"] + + +@pytest.mark.django_db +def test_array_unique_link_row_lookup(data_fixture): + """ + Test array_unique on a lookup through a link to another link's primary + field (A→B→C), where C primary values have duplicates. + """ + + user = data_fixture.create_user() + database = data_fixture.create_database_application(user=user) + + table_a = data_fixture.create_database_table(database=database, name="A") + table_b = data_fixture.create_database_table(database=database, name="B") + table_c = data_fixture.create_database_table(database=database, name="C") + + data_fixture.create_text_field(table=table_a, name="primary_a", primary=True) + data_fixture.create_text_field(table=table_b, name="primary_b", primary=True) + primary_c = data_fixture.create_text_field( + table=table_c, name="primary_c", primary=True + ) + + link_a_b = FieldHandler().create_field( + user, table_a, "link_row", name="link_ab", link_row_table=table_b + ) + link_b_c = FieldHandler().create_field( + user, table_b, "link_row", name="link_bc", link_row_table=table_c + ) + + row_c1, row_c2 = ( + RowHandler() + .create_rows( + user, + table_c, + [{primary_c.db_column: "X"}, {primary_c.db_column: "Y"}], + ) + .created_rows + ) + + row_b1, row_b2, row_b3 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {link_b_c.db_column: [row_c1.id]}, + {link_b_c.db_column: [row_c2.id]}, + {link_b_c.db_column: [row_c1.id]}, # duplicate link to X + ], + ) + .created_rows + ) + + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_a_b.db_column: [row_b1.id, row_b2.id, row_b3.id]}], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup_bc", + formula=f"lookup('{link_a_b.name}', '{link_b_c.name}')", + ) + unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_bc", + formula="array_unique(field('lookup_bc'))", + ) + + # Same via a formula field referencing the link field indirectly. + ref_link_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_link_bc", + formula=f"field('{link_b_c.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup_bc", + formula=f"lookup('{link_a_b.name}', '{ref_link_field.name}')", + ) + ref_unique_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_unique_bc", + formula="array_unique(field('ref_lookup_bc'))", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + unique_val = getattr(result, unique_field.db_column) + ref_unique_val = getattr(result, ref_unique_field.db_column) + + unique_values = [elem["value"] for elem in unique_val] + assert unique_values == ["X", "Y"] + + # The formula-ref path goes through an extra indirection (field() wrapping + # the link field), which produces a different id structure (multi-table + # 'ids' dict vs single 'id'). We compare only the deduplicated values. + ref_unique_values = [elem["value"] for elem in ref_unique_val] + assert ref_unique_values == ["X", "Y"] + + +@pytest.mark.django_db +def test_array_unique_rejects_non_array_input(data_fixture): + """array_unique on a plain text field should produce a formula error.""" + + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + data_fixture.create_text_field(table=table, name="name", primary=True) + + with pytest.raises(InvalidFormulaType, match="array"): + FieldHandler().create_field( + user, + table, + "formula", + name="bad", + formula="array_unique(field('name'))", + ) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "setup_fn,error_match", + [ + (_setup_file_field, "file"), + ], + ids=["file"], +) +def test_array_unique_rejects_unsupported_lookup(data_fixture, setup_fn, error_match): + """array_unique rejects lookups of unsupported field types.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target_field, _, _ = setup_fn(data_fixture, table_b) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target_field.name}')", + ) + + with pytest.raises(InvalidFormulaType, match=error_match): + FieldHandler().create_field( + user, + table_a, + "formula", + name="unique_lookup", + formula="array_unique(field('lookup'))", + ) diff --git a/changelog/entries/unreleased/feature/2326_array_unique_formula.json b/changelog/entries/unreleased/feature/2326_array_unique_formula.json new file mode 100644 index 0000000000..f8db698e94 --- /dev/null +++ b/changelog/entries/unreleased/feature/2326_array_unique_formula.json @@ -0,0 +1,8 @@ +{ + "type": "feature", + "message": "Add the `array_unique` formula function to deduplicate lookup arrays.", + "domain": "database", + "issue_number": 2326, + "bullet_points": [], + "created_at": "2026-03-26" +} diff --git a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_database_table_tools.py b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_database_table_tools.py index 97c9560f35..fec18187a5 100644 --- a/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_database_table_tools.py +++ b/enterprise/backend/tests/baserow_enterprise_tests/assistant/test_assistant_database_table_tools.py @@ -585,6 +585,8 @@ def mock_run_sync(prompt, **kwargs): "string_agg_many_to_many_values", "many_to_many_agg", "many_to_many_count", + "array_length", + "array_join_values", ] missing_functions = [] diff --git a/premium/backend/src/baserow_premium/prompts/formula_docs.md b/premium/backend/src/baserow_premium/prompts/formula_docs.md index 606fdf3d9d..02dad1c8aa 100644 --- a/premium/backend/src/baserow_premium/prompts/formula_docs.md +++ b/premium/backend/src/baserow_premium/prompts/formula_docs.md @@ -189,7 +189,8 @@ These functions work with arrays and lookup values to perform calculations acros | lookup | Looks up the values from a field in another table for rows in a link row field. The first argument should be the name of a link row field in the current table and the second should be the name of a field in the linked table. | lookup('a link row field name', 'field name in other the table') | lookup('link row field', 'first name') = lookup('link row field', 'last name') | | row_id | Returns the rows unique identifying number. | row_id() | concat("Row ", row_id()) | | when_empty | If the first input is calculated to be empty the second input will be returned instead, otherwise if the first input is not empty the first will be returned. | when_empty(any, same type as the first) | when_empty(field("a"), "default") | -| has_option | Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options. | has_option(multiple select, text); has_option(lookup(link row, single select), text) | has_option(field('multiple select'), 'option_a'); has_option(lookup(field('link row'), field('single select')), 'option_a') | +| has_option | Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options. | has_option(multiple select, text); has_option(lookup(link row, single select), text) | has_option(field('multiple select'), 'option_a'); has_option(lookup(field('link row'), field('single select')), 'option_a') | +| array_unique | Removes duplicate values from a lookup array, preserving first-occurrence order. Only works on arrays from lookup fields; does not support file fields. | array_unique(a lookup array) | array_unique(field('my lookup field')); count(array_unique(field('lookup'))); join(array_unique(field('lookup')), ', ') | ### URL Functions diff --git a/premium/backend/tests/baserow_premium_tests/api/fields/test_generate_formula_prompt.py b/premium/backend/tests/baserow_premium_tests/api/fields/test_generate_formula_prompt.py index 872d67e3ff..f9b8fc8c1b 100644 --- a/premium/backend/tests/baserow_premium_tests/api/fields/test_generate_formula_prompt.py +++ b/premium/backend/tests/baserow_premium_tests/api/fields/test_generate_formula_prompt.py @@ -23,6 +23,8 @@ def test_if_prompt_contains_all_formula_functions(): "string_agg_many_to_many_values", "many_to_many_agg", "many_to_many_count", + "array_length", + "array_join_values", ] for function in formula_function_registry.registry.keys(): diff --git a/web-frontend/locales/en.json b/web-frontend/locales/en.json index 1410f87ab7..03e3d3cf17 100644 --- a/web-frontend/locales/en.json +++ b/web-frontend/locales/en.json @@ -620,7 +620,8 @@ "indexDescription": "Returns the file from a file field at the position provided by the second argument.", "secondsToDurationDescription": "Converts the number of seconds provided into a duration.", "durationToSecondsDescription": "Converts the duration provided into the corresponding number of seconds.", - "hasOptionDescription": "Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options." + "hasOptionDescription": "Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options.", + "arrayUniqueDescription": "Returns only the unique items from an array, removing duplicates while preserving the order of first occurrence." }, "functionnalGridViewFieldLinkRow": { "unnamed": "unnamed row {value}" diff --git a/web-frontend/modules/database/formula/functions.js b/web-frontend/modules/database/formula/functions.js index fe2fb41080..b3aab7ad11 100644 --- a/web-frontend/modules/database/formula/functions.js +++ b/web-frontend/modules/database/formula/functions.js @@ -2604,3 +2604,26 @@ export class BaserowToUrl extends BaserowFunctionDefinition { return 'url' } } + +export class BaserowArrayUnique extends BaserowFunctionDefinition { + static getType() { + return 'array_unique' + } + + getDescription() { + const { $i18n: i18n } = this.app + return i18n.t('formulaFunctions.arrayUniqueDescription') + } + + getSyntaxUsage() { + return ['array_unique(array)'] + } + + getExamples() { + return ["array_unique(field('my lookup field'))"] + } + + getFormulaType() { + return 'array' + } +} diff --git a/web-frontend/modules/database/plugin.js b/web-frontend/modules/database/plugin.js index df7b7cebad..fe292fc834 100644 --- a/web-frontend/modules/database/plugin.js +++ b/web-frontend/modules/database/plugin.js @@ -273,6 +273,7 @@ import { BaserowIndex, BaserowGetFileCount, BaserowToUrl, + BaserowArrayUnique, } from '@baserow/modules/database/formula/functions' import { BaserowFormulaArrayType, @@ -856,6 +857,7 @@ export default defineNuxtPlugin({ $registry.register('formula_function', new BaserowGetFileCount(context)) $registry.register('formula_function', new BaserowIndex(context)) $registry.register('formula_function', new BaserowToUrl(context)) + $registry.register('formula_function', new BaserowArrayUnique(context)) // Formula Types $registry.register('formula_type', new BaserowFormulaTextType(context)) diff --git a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js index 2992a78dda..8f4f4ba632 100644 --- a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js +++ b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js @@ -106,6 +106,7 @@ describe('Formula Functions Test', () => { 'index', 'is_image', 'tourl', + 'array_unique', ] const frontendFunctionTypes = Object.keys( testApp.store.$registry.getAll('formula_function') From ee1371ff555882f07752c8870ef2633e945ef173 Mon Sep 17 00:00:00 2001 From: Davide Silvestri <75379892+silvestrid@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:10:16 +0200 Subject: [PATCH 3/5] feat (database): `array_slice` formula (#5062) * feat: add array_slice() database formula function * fix(formula): handle NaN arguments in array_slice to prevent DB crash Wrap start/count args with handle_arg_being_nan before truncating to int, so expressions like tonumber('x') no longer cause "cannot convert NaN to integer" at the PostgreSQL level. NaN is treated as 0 for both arguments. --- .../database/formula/ast/function_defs.py | 109 +++ .../django_expressions.py | 59 ++ .../formula/test_baserow_formula_results.py | 656 ++++++++++++++++++ .../feature/array_slice_formula.json | 8 + .../baserow_premium/prompts/formula_docs.md | 1 + web-frontend/locales/en.json | 3 +- .../modules/database/formula/functions.js | 27 + web-frontend/modules/database/plugin.js | 2 + .../database/formula/formulaFunctions.spec.js | 1 + 9 files changed, 865 insertions(+), 1 deletion(-) create mode 100644 changelog/entries/unreleased/feature/array_slice_formula.json diff --git a/backend/src/baserow/contrib/database/formula/ast/function_defs.py b/backend/src/baserow/contrib/database/formula/ast/function_defs.py index f061cddb95..779c537c47 100644 --- a/backend/src/baserow/contrib/database/formula/ast/function_defs.py +++ b/backend/src/baserow/contrib/database/formula/ast/function_defs.py @@ -85,6 +85,7 @@ GreaterThanOrEqualExpr, IsNullExpr, JSONBArrayJoinValues, + JSONBArraySlice, JSONBArrayUniqueByValue, LessThanEqualOrExpr, LessThanExpr, @@ -263,6 +264,7 @@ def register_formula_functions(registry): registry.register(BaserowArrayUnique()) registry.register(BaserowArrayLength()) registry.register(BaserowArrayJoinValues()) + registry.register(BaserowArraySlice()) # ManyToMany functions registry.register(BaserowStringAggManyToManyValues()) registry.register(BaserowManyToManyCount()) @@ -2498,6 +2500,113 @@ def to_django_expression(self, arg: Expression) -> Expression: return JSONBArrayUniqueByValue(arg) +class BaserowArraySlice(ThreeArgumentBaserowFunction): + type = "array_slice" + arg1_type = [BaserowFormulaValidType] + arg2_type = [BaserowFormulaNumberType] + arg3_type = [BaserowFormulaNumberType] + + def type_function( + self, + func_call: BaserowFunctionCall[UnTyped], + arg1: BaserowExpression[BaserowFormulaValidType], + arg2: BaserowExpression[BaserowFormulaNumberType], + arg3: BaserowExpression[BaserowFormulaNumberType], + ) -> BaserowExpression[BaserowFormulaType]: + if arg1.many: + arg1 = arg1.expression_type.collapse_many(arg1) + + if not isinstance(arg1.expression_type, BaserowFormulaArrayType): + return func_call.with_invalid_type("array_slice requires an array input.") + + return func_call.with_args([arg1, arg2, arg3]).with_valid_type( + arg1.expression_type + ) + + def to_django_expression( + self, arg1: Expression, arg2: Expression, arg3: Expression + ) -> Expression: + either_nan = EqualsExpr( + arg2, Value(Decimal("NaN")), output_field=fields.BooleanField() + ) | EqualsExpr(arg3, Value(Decimal("NaN")), output_field=fields.BooleanField()) + + start_int = trunc_numeric_to_int(arg2) + count_int = trunc_numeric_to_int(arg3) + abs_count = Func(count_int, function="ABS", output_field=fields.IntegerField()) + + is_reverse = LessThanExpr( + count_int, Value(0), output_field=fields.BooleanField() + ) + + array_len = Func( + arg1, function="jsonb_array_length", output_field=fields.IntegerField() + ) + + # Resolve negative start to a 0-based position + resolved_start = Case( + When( + condition=GreaterThanOrEqualExpr( + start_int, Value(0), output_field=fields.BooleanField() + ), + then=start_int, + ), + default=Greatest( + ExpressionWrapper( + array_len + start_int, output_field=fields.IntegerField() + ), + Value(0), + ), + output_field=fields.IntegerField(), + ) + + # Forward: offset = resolved_start + # Backward: offset = max(0, resolved_start - abs_count + 1) + offset_expr = Case( + When( + condition=is_reverse, + then=Greatest( + ExpressionWrapper( + resolved_start - abs_count + Value(1), + output_field=fields.IntegerField(), + ), + Value(0), + ), + ), + default=resolved_start, + output_field=fields.IntegerField(), + ) + + # Forward: 0 → NULL (all remaining), else count + # Backward: abs(count) — but clamped to (resolved_start + 1) + # so we don't go past the beginning + limit_expr = Case( + When( + condition=is_reverse, + then=Least( + abs_count, + ExpressionWrapper( + resolved_start + Value(1), + output_field=fields.IntegerField(), + ), + ), + ), + When( + condition=EqualsExpr( + count_int, Value(0), output_field=fields.BooleanField() + ), + then=Value(None), + ), + default=count_int, + output_field=fields.IntegerField(), + ) + + return Case( + When(condition=either_nan, then=Value([], output_field=JSONField())), + default=JSONBArraySlice(arg1, offset_expr, limit_expr, is_reverse), + output_field=JSONField(), + ) + + class BaserowArrayLength(OneArgumentBaserowFunction): type = "array_length" arg_type = [BaserowFormulaArrayType] diff --git a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py index ceca4f699f..ba37d0ad26 100644 --- a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py +++ b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py @@ -163,6 +163,65 @@ def as_sql(self, compiler, connection, **extra_context): return sql, (*separator_params, *params) +class JSONBArraySlice(Expression): + """ + Slice a JSONB array with offset, limit, and optional reverse. + + All parameters should be pre-computed Django expressions: + - offset_expr / limit_expr: the forward window (limit may be NULL for "all") + - reverse_expr: boolean — when true, output order is reversed + """ + + def __init__(self, array_expr, offset_expr, limit_expr, reverse_expr): + super().__init__(output_field=JSONField()) + self.array_expr = array_expr + self.offset_expr = offset_expr + self.limit_expr = limit_expr + self.reverse_expr = reverse_expr + + def resolve_expression( + self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False + ): + c = self.copy() + c.is_summary = summarize + c.array_expr = self.array_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + c.offset_expr = self.offset_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + c.limit_expr = self.limit_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + c.reverse_expr = self.reverse_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + return c + + def as_sql(self, compiler, connection): + array_sql, array_params = compiler.compile(self.array_expr) + offset_sql, offset_params = compiler.compile(self.offset_expr) + limit_sql, limit_params = compiler.compile(self.limit_expr) + reverse_sql, reverse_params = compiler.compile(self.reverse_expr) + + sql = ( + "(SELECT COALESCE(jsonb_agg(sub.elem ORDER BY " # noqa: S608 + f"CASE WHEN {reverse_sql} THEN -sub.rn ELSE sub.rn END" + "), '[]'::jsonb) " + "FROM (SELECT t.elem, t.rn " + f"FROM jsonb_array_elements({array_sql}) WITH ORDINALITY AS t(elem, rn) " + "ORDER BY t.rn " + f"OFFSET {offset_sql} " + f"LIMIT {limit_sql}) sub)" + ) + return sql, ( + list(reverse_params) + + list(array_params) + + list(offset_params) + + list(limit_params) + ) + + class BaserowFilterExpression(Expression): """ Baserow expression that works with field_name and value diff --git a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py index 3ae974b962..b833246332 100644 --- a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py +++ b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py @@ -2701,3 +2701,659 @@ def test_array_unique_rejects_unsupported_lookup(data_fixture, setup_fn, error_m name="unique_lookup", formula="array_unique(field('lookup'))", ) + + +def _setup_text_5_rows(df, table_a, table_b, link_field, user): + """Create 5 text rows [A, B, C, D, E] linked from a single row in table_a.""" + + text_field = df.create_text_field(table=table_b, name="target") + row_b1, row_b2, row_b3, row_b4, row_b5 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {text_field.db_column: "A"}, + {text_field.db_column: "B"}, + {text_field.db_column: "C"}, + {text_field.db_column: "D"}, + {text_field.db_column: "E"}, + ], + ) + .created_rows + ) + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [ + { + link_field.db_column: [ + row_b1.id, + row_b2.id, + row_b3.id, + row_b4.id, + row_b5.id, + ] + } + ], + ) + .created_rows + ) + return text_field, [row_b1, row_b2, row_b3, row_b4, row_b5], row_a1 + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "start,count,expected_values", + [ + # Forward slicing (0-based start, positive count) + (0, 2, ["A", "B"]), + (1, 3, ["B", "C", "D"]), + (3, 1, ["D"]), + (0, 5, ["A", "B", "C", "D", "E"]), + # count exceeds remaining → returns up to end + (3, 100, ["D", "E"]), + (0, 999, ["A", "B", "C", "D", "E"]), + # count = 0 → all remaining forward + (0, 0, ["A", "B", "C", "D", "E"]), + (2, 0, ["C", "D", "E"]), + (-1, 0, ["E"]), + (-3, 0, ["C", "D", "E"]), + # Negative start (from end), forward + (-1, 1, ["E"]), + (-2, 2, ["D", "E"]), + (-3, 2, ["C", "D"]), + (-5, 3, ["A", "B", "C"]), + # Negative start exceeding length → clamp to 0 + (-100, 2, ["A", "B"]), + # start beyond end → empty + (10, 2, []), + # Reverse slicing (negative count = backward from start) + (2, -1, ["C"]), + (2, -2, ["C", "B"]), + (2, -3, ["C", "B", "A"]), + (4, -3, ["E", "D", "C"]), + (-1, -3, ["E", "D", "C"]), + (-1, -5, ["E", "D", "C", "B", "A"]), + # Reverse count exceeds available → clamp + (1, -10, ["B", "A"]), + (0, -1, ["A"]), + (0, -5, ["A"]), + ], + ids=[ + "fwd_first_2", + "fwd_mid_3", + "fwd_single_mid", + "fwd_all_exact", + "fwd_count_exceeds", + "fwd_count_way_exceeds", + "all_from_start", + "all_from_2", + "all_from_last", + "all_from_neg3", + "fwd_last_1", + "fwd_last_2", + "fwd_last_3_take_2", + "fwd_neg_start_exact_len", + "neg_start_clamped", + "start_beyond", + "rev_1_from_2", + "rev_2_from_2", + "rev_3_from_2", + "rev_3_from_end", + "rev_3_from_neg1", + "rev_all_from_end", + "rev_exceeds", + "rev_from_0", + "rev_from_0_exceeds", + ], +) +def test_array_slice_text_lookup(data_fixture, start, count, expected_values): + """ + array_slice returns a sub-array from a lookup. 0-based start (negative + counts from end). count > 0 = forward, count = 0 = all remaining, + count < 0 = backward (reversed). + """ + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field, b_rows, row_a1 = _setup_text_5_rows( + data_fixture, table_a, table_b, link_field, user + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="sliced", + formula=f"array_slice(field('lookup'), {start}, {count})", + ) + + # Same via a formula field referencing the text field indirectly. + ref_target_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{text_field.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target_field.name}')", + ) + ref_slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_sliced", + formula=f"array_slice(field('ref_lookup'), {start}, {count})", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + sliced = getattr(result, slice_field.db_column) + ref_sliced = getattr(result, ref_slice_field.db_column) + + actual_values = [elem["value"] for elem in sliced] + assert actual_values == expected_values + + # Formula-referenced path must produce identical results + assert ref_sliced == sliced + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "setup_fn", + [ + lambda df, table: ( + df.create_text_field(table=table, name="target"), + "apple", + "banana", + ), + lambda df, table: ( + df.create_number_field(table=table, name="target", number_decimal_places=2), + Decimal("10.50"), + Decimal("20.00"), + ), + lambda df, table: ( + df.create_boolean_field(table=table, name="target"), + True, + False, + ), + lambda df, table: ( + df.create_date_field(table=table, name="target"), + "2024-01-15", + "2024-06-01", + ), + _setup_single_select, + _setup_multiple_select, + _setup_multiple_collaborators, + ], + ids=[ + "text", + "number", + "boolean", + "date", + "single_select", + "multiple_select", + "multiple_collaborators", + ], +) +def test_array_slice_field_types(data_fixture, setup_fn): + """array_slice works across field types — takes first 2 of 3 elements.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target_field, val_a, val_b = setup_fn(data_fixture, table_b) + + row_b1, row_b2, row_b3 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target_field.db_column: val_a}, + {target_field.db_column: val_b}, + {target_field.db_column: val_a}, + ], + ) + .created_rows + ) + + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [row_b1.id, row_b2.id, row_b3.id]}], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target_field.name}')", + ) + slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="sliced", + formula="array_slice(field('lookup'), 0, 1)", + ) + + # Same via a formula field referencing the target field indirectly. + ref_target_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target_field.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target_field.name}')", + ) + ref_slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_sliced", + formula="array_slice(field('ref_lookup'), 0, 1)", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + sliced = getattr(result, slice_field.db_column) + ref_sliced = getattr(result, ref_slice_field.db_column) + + assert len(sliced) == 1 + assert sliced[0]["id"] == row_b1.id + + # Formula-referenced path must produce identical results + assert ref_sliced == sliced + + # Now pick only the last element via negative count + FieldHandler().update_field( + user, slice_field, formula="array_slice(field('lookup'), -1, 1)" + ) + FieldHandler().update_field( + user, ref_slice_field, formula="array_slice(field('ref_lookup'), -1, 1)" + ) + + result.refresh_from_db() + sliced = getattr(result, slice_field.db_column) + assert len(sliced) == 1 + assert sliced[0]["id"] == row_b3.id + ref_sliced = getattr(result, ref_slice_field.db_column) + assert ref_sliced == sliced + + +@pytest.mark.django_db +def test_array_slice_empty_array(data_fixture): + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + table_b_primary = table_b.field_set.get(primary=True) + + (row_a1,) = ( + RowHandler() + .create_rows(user, table_a, [{link_field.db_column: []}]) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{table_b_primary.name}')", + ) + slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="sliced", + formula="array_slice(field('lookup'), 0, 5)", + ) + + # Same via a formula field referencing the primary field indirectly. + ref_target_field = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{table_b_primary.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target_field.name}')", + ) + ref_slice_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_sliced", + formula="array_slice(field('ref_lookup'), 0, 5)", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + assert getattr(result, slice_field.db_column) == [] + assert getattr(result, ref_slice_field.db_column) == [] + + +@pytest.mark.django_db +def test_array_slice_rejects_non_array_input(data_fixture): + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + data_fixture.create_text_field(table=table, name="name", primary=True) + + with pytest.raises(InvalidFormulaType, match="array"): + FieldHandler().create_field( + user, + table, + "formula", + name="bad", + formula="array_slice(field('name'), 0, 1)", + ) + + +@pytest.mark.django_db +def test_count_array_slice(data_fixture): + """count(array_slice(...)) returns the length of the sliced sub-array.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field, b_rows, row_a1 = _setup_text_5_rows( + data_fixture, table_a, table_b, link_field, user + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="count_sliced", + formula="count(array_slice(field('lookup'), 1, 3))", + ) + + # Also via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{text_field.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_count_sliced", + formula="count(array_slice(field('ref_lookup'), 1, 3))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + assert getattr(result, count_field.db_column) == 3 # B, C, D + assert getattr(result, ref_count_field.db_column) == 3 + + +@pytest.mark.django_db +def test_join_array_slice(data_fixture): + """join(array_slice(...), sep) returns comma-separated sliced values.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field, b_rows, row_a1 = _setup_text_5_rows( + data_fixture, table_a, table_b, link_field, user + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="join_sliced", + formula="join(array_slice(field('lookup'), 0, 2), ', ')", + ) + + # Also via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{text_field.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_join_sliced", + formula="join(array_slice(field('ref_lookup'), 0, 2), ', ')", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + assert getattr(result, join_field.db_column) == "A, B" + assert getattr(result, ref_join_field.db_column) == "A, B" + + +@pytest.mark.django_db +def test_array_slice_array_unique_chained(data_fixture): + """array_slice(array_unique(...)) chains correctly — deduplicate then slice.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "X"}, + {target.db_column: "Y"}, + {target.db_column: "X"}, # duplicate + {target.db_column: "Z"}, + ], + ) + .created_rows + ) + + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + # unique gives [X, Y, Z], then slice first 2 → [X, Y] + chained_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="slice_unique", + formula="array_slice(array_unique(field('lookup')), 0, 2)", + ) + + # Also via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_chained_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_slice_unique", + formula="array_slice(array_unique(field('ref_lookup')), 0, 2)", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + sliced = getattr(result, chained_field.db_column) + ref_sliced = getattr(result, ref_chained_field.db_column) + + assert [e["value"] for e in sliced] == ["X", "Y"] + assert ref_sliced == sliced + + +@pytest.mark.django_db +def test_count_join_array_slice_inline_lookup(data_fixture): + """count() and join() work with array_slice wrapping an inline lookup.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "A"}, + {target.db_column: "B"}, + {target.db_column: "C"}, + ], + ) + .created_rows + ) + + (row_a1,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="count_sliced", + formula=f"count(array_slice(lookup('{link_field.name}', '{target.name}'), 0, 2))", + ) + join_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="join_sliced", + formula=f"join(array_slice(lookup('{link_field.name}', '{target.name}'), 0, 2), ', ')", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + assert getattr(result, count_field.db_column) == 2 # A, B + assert getattr(result, join_field.db_column) == "A, B" + + +@pytest.mark.django_db +def test_array_slice_nan_arguments(data_fixture): + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field, b_rows, row_a1 = _setup_text_5_rows( + data_fixture, table_a, table_b, link_field, user + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + + nan_start_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="nan_start", + formula="array_slice(field('lookup'), tonumber('x'), 2)", + ) + nan_count_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="nan_count", + formula="array_slice(field('lookup'), 1, tonumber('x'))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a1.id) + + assert getattr(result, nan_start_field.db_column) == [] + assert getattr(result, nan_count_field.db_column) == [] diff --git a/changelog/entries/unreleased/feature/array_slice_formula.json b/changelog/entries/unreleased/feature/array_slice_formula.json new file mode 100644 index 0000000000..7fe3de8147 --- /dev/null +++ b/changelog/entries/unreleased/feature/array_slice_formula.json @@ -0,0 +1,8 @@ +{ + "type": "feature", + "message": "Add the `array_slice` formula function to extract sub-arrays from lookup arrays.", + "domain": "database", + "issue_number": 5053, + "bullet_points": [], + "created_at": "2026-03-27" +} diff --git a/premium/backend/src/baserow_premium/prompts/formula_docs.md b/premium/backend/src/baserow_premium/prompts/formula_docs.md index 02dad1c8aa..d17656c9f4 100644 --- a/premium/backend/src/baserow_premium/prompts/formula_docs.md +++ b/premium/backend/src/baserow_premium/prompts/formula_docs.md @@ -191,6 +191,7 @@ These functions work with arrays and lookup values to perform calculations acros | when_empty | If the first input is calculated to be empty the second input will be returned instead, otherwise if the first input is not empty the first will be returned. | when_empty(any, same type as the first) | when_empty(field("a"), "default") | | has_option | Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options. | has_option(multiple select, text); has_option(lookup(link row, single select), text) | has_option(field('multiple select'), 'option_a'); has_option(lookup(field('link row'), field('single select')), 'option_a') | | array_unique | Removes duplicate values from a lookup array, preserving first-occurrence order. Only works on arrays from lookup fields; does not support file fields. | array_unique(a lookup array) | array_unique(field('my lookup field')); count(array_unique(field('lookup'))); join(array_unique(field('lookup')), ', ') | +| array_slice | Returns a sub-array starting at the given 0-based index. Negative start counts from the end. Positive count takes elements forward, 0 means all remaining, negative count takes elements backward in reverse order. | array_slice(array, start, count) | array_slice(field('lookup'), 0, 3); array_slice(field('lookup'), -2, 2) | ### URL Functions diff --git a/web-frontend/locales/en.json b/web-frontend/locales/en.json index 03e3d3cf17..d3ffd52c9b 100644 --- a/web-frontend/locales/en.json +++ b/web-frontend/locales/en.json @@ -621,7 +621,8 @@ "secondsToDurationDescription": "Converts the number of seconds provided into a duration.", "durationToSecondsDescription": "Converts the duration provided into the corresponding number of seconds.", "hasOptionDescription": "Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options.", - "arrayUniqueDescription": "Returns only the unique items from an array, removing duplicates while preserving the order of first occurrence." + "arrayUniqueDescription": "Returns only the unique items from an array, removing duplicates while preserving the order of first occurrence.", + "arraySliceDescription": "Returns a sub-array starting at the given index (0-based, negative start counts from the end). Positive count takes elements forward, 0 means all remaining, negative count takes elements backward in reverse order." }, "functionnalGridViewFieldLinkRow": { "unnamed": "unnamed row {value}" diff --git a/web-frontend/modules/database/formula/functions.js b/web-frontend/modules/database/formula/functions.js index b3aab7ad11..d253796f87 100644 --- a/web-frontend/modules/database/formula/functions.js +++ b/web-frontend/modules/database/formula/functions.js @@ -2627,3 +2627,30 @@ export class BaserowArrayUnique extends BaserowFunctionDefinition { return 'array' } } + +export class BaserowArraySlice extends BaserowFunctionDefinition { + static getType() { + return 'array_slice' + } + + getDescription() { + const { $i18n: i18n } = this.app + return i18n.t('formulaFunctions.arraySliceDescription') + } + + getSyntaxUsage() { + return ['array_slice(array, start, count)'] + } + + getExamples() { + return [ + "array_slice(field('my lookup'), 0, 3)", + "array_slice(field('my lookup'), -2, 2)", + "array_slice(field('my lookup'), 3, -1)", + ] + } + + getFormulaType() { + return 'array' + } +} diff --git a/web-frontend/modules/database/plugin.js b/web-frontend/modules/database/plugin.js index fe292fc834..43ab23ab9a 100644 --- a/web-frontend/modules/database/plugin.js +++ b/web-frontend/modules/database/plugin.js @@ -274,6 +274,7 @@ import { BaserowGetFileCount, BaserowToUrl, BaserowArrayUnique, + BaserowArraySlice, } from '@baserow/modules/database/formula/functions' import { BaserowFormulaArrayType, @@ -858,6 +859,7 @@ export default defineNuxtPlugin({ $registry.register('formula_function', new BaserowIndex(context)) $registry.register('formula_function', new BaserowToUrl(context)) $registry.register('formula_function', new BaserowArrayUnique(context)) + $registry.register('formula_function', new BaserowArraySlice(context)) // Formula Types $registry.register('formula_type', new BaserowFormulaTextType(context)) diff --git a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js index 8f4f4ba632..9b5b2c8464 100644 --- a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js +++ b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js @@ -107,6 +107,7 @@ describe('Formula Functions Test', () => { 'is_image', 'tourl', 'array_unique', + 'array_slice', ] const frontendFunctionTypes = Object.keys( testApp.store.$registry.getAll('formula_function') From d9acf4f9abfbecc5042e4a64e0dbbdca459d0852 Mon Sep 17 00:00:00 2001 From: Davide Silvestri <75379892+silvestrid@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:39:29 +0200 Subject: [PATCH 4/5] refactor: use approximate count for admin pagination (#5119) --- backend/src/baserow/api/admin/users/views.py | 2 ++ backend/src/baserow/api/admin/views.py | 3 +- .../src/baserow/api/admin/workspaces/views.py | 2 ++ backend/src/baserow/api/pagination.py | 24 ++++++++++++++ backend/src/baserow/core/db.py | 32 ++++++++++++++++++- .../groups/test_workspaces_admin_views.py | 2 +- .../api/admin/users/test_users_admin_views.py | 2 +- .../database/api/rows/test_row_serializers.py | 26 ++++++++------- backend/tests/baserow/core/test_core_db.py | 26 +++++++++++++++ ...log_pagination_with_approximate_count.json | 9 ++++++ .../baserow_enterprise/api/audit_log/views.py | 2 ++ 11 files changed, 114 insertions(+), 16 deletions(-) create mode 100644 changelog/entries/unreleased/refactor/speedup_audit_log_pagination_with_approximate_count.json diff --git a/backend/src/baserow/api/admin/users/views.py b/backend/src/baserow/api/admin/users/views.py index 2118bd8e75..b3bacb002a 100644 --- a/backend/src/baserow/api/admin/users/views.py +++ b/backend/src/baserow/api/admin/users/views.py @@ -23,6 +23,7 @@ ) from baserow.api.admin.views import AdminListingView from baserow.api.decorators import map_exceptions, validate_body +from baserow.api.pagination import PageNumberPaginationWithApproximateCount from baserow.api.schemas import get_error_schema from baserow.api.user.registries import member_data_registry from baserow.api.user.schemas import authenticate_user_schema @@ -43,6 +44,7 @@ class UsersAdminView(AdminListingView): serializer_class = UserAdminResponseSerializer + pagination_class = PageNumberPaginationWithApproximateCount search_fields = ["id", "username", "first_name"] sort_field_mapping = { "id": "id", diff --git a/backend/src/baserow/api/admin/views.py b/backend/src/baserow/api/admin/views.py index 002dc9a9b8..3ac36e22b6 100755 --- a/backend/src/baserow/api/admin/views.py +++ b/backend/src/baserow/api/admin/views.py @@ -30,6 +30,7 @@ class APIListingView( APIView, SearchableViewMixin, SortableViewMixin, FilterableViewMixin ): serializer_class = None + pagination_class = PageNumberPagination search_fields: List[str] = ["id"] filters_field_mapping: Dict[str, str] = {} sort_field_mapping: Dict[str, str] = {} @@ -56,7 +57,7 @@ def get(self, request): queryset = self.apply_sorts_or_default_sort(sorts, queryset) queryset = self.apply_ids_filter(ids_param, queryset) - paginator = PageNumberPagination(limit_page_size=100) + paginator = self.pagination_class(limit_page_size=100) page = paginator.paginate_queryset(queryset, request, self) serializer = self.get_serializer(request, page, many=True) diff --git a/backend/src/baserow/api/admin/workspaces/views.py b/backend/src/baserow/api/admin/workspaces/views.py index 118bdc1728..8c29cbafb2 100644 --- a/backend/src/baserow/api/admin/workspaces/views.py +++ b/backend/src/baserow/api/admin/workspaces/views.py @@ -10,6 +10,7 @@ from baserow.api.admin.views import AdminListingView, APIListingView from baserow.api.decorators import map_exceptions from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST +from baserow.api.pagination import PageNumberPaginationWithApproximateCount from baserow.api.schemas import get_error_schema from baserow.core.admin.workspaces.exceptions import CannotDeleteATemplateGroupError from baserow.core.admin.workspaces.handler import WorkspacesAdminHandler @@ -27,6 +28,7 @@ class WorkspacesAdminView(AdminListingView): serializer_class = WorkspacesAdminResponseSerializer + pagination_class = PageNumberPaginationWithApproximateCount search_fields = ["id", "name"] sort_field_mapping = { "id": "id", diff --git a/backend/src/baserow/api/pagination.py b/backend/src/baserow/api/pagination.py index da672b2e3f..63fe91c631 100644 --- a/backend/src/baserow/api/pagination.py +++ b/backend/src/baserow/api/pagination.py @@ -1,3 +1,4 @@ +from functools import cached_property from typing import Protocol from django.core.paginator import Paginator as DjangoPaginator @@ -12,6 +13,8 @@ from rest_framework.response import Response from rest_framework.status import HTTP_400_BAD_REQUEST +from baserow.core.db import get_approximate_row_count + class Pageable(Protocol): def paginate_queryset(self, queryset, request, view=None): @@ -150,3 +153,24 @@ def get_paginated_response_schema(self, schema): "results": schema, }, } + + +class ApproximateCountPaginator(Paginator): + """ + A paginator that uses Postgres EXPLAIN to estimate the total row count + instead of running an expensive COUNT(*) query. + """ + + @cached_property + def count(self): + return get_approximate_row_count(self.object_list) + + +class PageNumberPaginationWithApproximateCount(PageNumberPagination): + """ + Page number pagination that uses an approximate count from Postgres EXPLAIN + instead of COUNT(*). Suitable for large tables like audit logs where an + exact count is not required. + """ + + django_paginator_class = ApproximateCountPaginator diff --git a/backend/src/baserow/core/db.py b/backend/src/baserow/core/db.py index cf1646ca38..76e9333671 100644 --- a/backend/src/baserow/core/db.py +++ b/backend/src/baserow/core/db.py @@ -1,4 +1,5 @@ import contextlib +import json import random import time from collections import defaultdict @@ -20,7 +21,12 @@ from django.conf import settings from django.contrib.contenttypes.models import ContentType -from django.db import DEFAULT_DB_ALIAS, OperationalError, connection, transaction +from django.db import ( + DEFAULT_DB_ALIAS, + OperationalError, + connection, + transaction, +) from django.db.models import ForeignKey, ManyToManyField, Max, Model, Prefetch, QuerySet from django.db.models.functions import Collate from django.db.models.query import ModelIterable @@ -37,6 +43,30 @@ ModelInstance = TypeVar("ModelInstance", bound=object) +APPROXIMATE_COUNT_THRESHOLD = 50_000 + + +def get_approximate_row_count(queryset: QuerySet) -> int: + """ + Uses Postgres EXPLAIN to estimate the row count for the given queryset. + If the estimate is below APPROXIMATE_COUNT_THRESHOLD, falls back to an + exact COUNT(*) since the cost is negligible for small result sets and + the planner estimate is unreliable at that scale. + + :param queryset: The queryset to estimate the row count for. + :return: An estimate of the row count for the queryset. + """ + + queryset = queryset.order_by() + plan = json.loads(queryset.explain(format="json")) + estimate = int(plan[0]["Plan"]["Plan Rows"]) + + if estimate < APPROXIMATE_COUNT_THRESHOLD: + return queryset.count() + + return estimate + + def get_database_dsn() -> str: """ Constructs the database DSN from the default database settings. diff --git a/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py b/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py index 5fd5646a58..65dd72fd73 100644 --- a/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py +++ b/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py @@ -52,7 +52,7 @@ class as the list users endpoint which already has extensive tests. We only need ) assert response.status_code == HTTP_403_FORBIDDEN - with django_assert_num_queries(5): + with django_assert_num_queries(6): response = api_client.get( reverse("api:admin:workspaces:list"), format="json", diff --git a/backend/tests/baserow/api/admin/users/test_users_admin_views.py b/backend/tests/baserow/api/admin/users/test_users_admin_views.py index 954b778c9d..fadfdc9f5a 100644 --- a/backend/tests/baserow/api/admin/users/test_users_admin_views.py +++ b/backend/tests/baserow/api/admin/users/test_users_admin_views.py @@ -771,7 +771,7 @@ def test_admin_getting_view_users_only_runs_two_queries_instead_of_n( first_name="Test1", is_staff=True, ) - fixed_num_of_queries_unrelated_to_number_of_rows = 6 + fixed_num_of_queries_unrelated_to_number_of_rows = 7 for i in range(10): data_fixture.create_user_workspace() diff --git a/backend/tests/baserow/contrib/database/api/rows/test_row_serializers.py b/backend/tests/baserow/contrib/database/api/rows/test_row_serializers.py index 76a0aa85fd..f1be74a1ba 100644 --- a/backend/tests/baserow/contrib/database/api/rows/test_row_serializers.py +++ b/backend/tests/baserow/contrib/database/api/rows/test_row_serializers.py @@ -521,12 +521,10 @@ def test_get_row_serializer_with_user_field_names( "value": "E", }, ], - "formula_multiple_collaborators": unordered( - [ - {"id": u2.id, "name": u2.first_name}, - {"id": u3.id, "name": u3.first_name}, - ] - ), + "formula_multiple_collaborators": [ + {"id": u2.id, "name": u2.first_name}, + {"id": u3.id, "name": u3.first_name}, + ], "formula_text": "test FORMULA", "count": "3", "rollup": "-122.222", @@ -540,12 +538,10 @@ def test_get_row_serializer_with_user_field_names( "multiple_collaborators_lookup": [ { "id": 1, - "value": unordered( - [ - {"id": u2.id, "name": u2.first_name}, - {"id": u3.id, "name": u3.first_name}, - ] - ), + "value": [ + {"id": u2.id, "name": u2.first_name}, + {"id": u3.id, "name": u3.first_name}, + ], }, { "id": 2, @@ -573,6 +569,12 @@ def test_get_row_serializer_with_user_field_names( ) ) test_result = json.loads(json.dumps(serializer_instance.data[0])) + expected_result["formula_multiple_collaborators"] = unordered( + expected_result["formula_multiple_collaborators"] + ) + expected_result["multiple_collaborators_lookup"][0]["value"] = unordered( + expected_result["multiple_collaborators_lookup"][0]["value"] + ) assert test_result == expected_result diff --git a/backend/tests/baserow/core/test_core_db.py b/backend/tests/baserow/core/test_core_db.py index cb6a422321..7011edc9df 100644 --- a/backend/tests/baserow/core/test_core_db.py +++ b/backend/tests/baserow/core/test_core_db.py @@ -28,6 +28,7 @@ LockedAtomicTransaction, MultiFieldPrefetchQuerysetMixin, QuerySet, + get_approximate_row_count, specific_iterator, specific_queryset, ) @@ -701,3 +702,28 @@ def test_specific_iterator_skip_missing_specific_objects(data_fixture): mock_logger.error.assert_called_once_with( f"The specific object with id {field_without_specific.id} does not exist." ) + + +@pytest.mark.django_db +def test_get_approximate_row_count_falls_back_to_exact_for_small_tables(): + queryset = Workspace.objects.all() + count = get_approximate_row_count(queryset) + assert count == queryset.count() + + +@pytest.mark.django_db +def test_get_approximate_row_count_returns_estimate_above_threshold(): + queryset = Workspace.objects.all() + with patch("baserow.core.db.APPROXIMATE_COUNT_THRESHOLD", 0): + count = get_approximate_row_count(queryset) + assert isinstance(count, int) + assert count >= 0 + + +@pytest.mark.django_db +def test_get_approximate_row_count_works_with_filtered_queryset(data_fixture): + data_fixture.create_workspace(name="test_ws_1") + data_fixture.create_workspace(name="test_ws_2") + queryset = Workspace.objects.filter(name__startswith="test_ws_") + count = get_approximate_row_count(queryset) + assert count == 2 diff --git a/changelog/entries/unreleased/refactor/speedup_audit_log_pagination_with_approximate_count.json b/changelog/entries/unreleased/refactor/speedup_audit_log_pagination_with_approximate_count.json new file mode 100644 index 0000000000..7af71e667d --- /dev/null +++ b/changelog/entries/unreleased/refactor/speedup_audit_log_pagination_with_approximate_count.json @@ -0,0 +1,9 @@ +{ + "type": "refactor", + "message": "Use Postgres EXPLAIN-based approximate count for audit log pagination to avoid expensive COUNT(*) on large tables.", + "issue_origin": "github", + "issue_number": null, + "domain": "core", + "bullet_points": [], + "created_at": "2026-04-02" +} diff --git a/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py b/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py index 1410e1702f..982be98edf 100755 --- a/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py +++ b/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py @@ -17,6 +17,7 @@ from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST from baserow.api.jobs.errors import ERROR_MAX_JOB_COUNT_EXCEEDED from baserow.api.jobs.serializers import JobSerializer +from baserow.api.pagination import PageNumberPaginationWithApproximateCount from baserow.api.schemas import CLIENT_SESSION_ID_SCHEMA_PARAMETER, get_error_schema from baserow.core.actions import DeleteWorkspaceActionType, OrderWorkspacesActionType from baserow.core.exceptions import WorkspaceDoesNotExist @@ -44,6 +45,7 @@ class AuditLogView(APIListingView): permission_classes = (IsAuthenticated,) + pagination_class = PageNumberPaginationWithApproximateCount serializer_class = AuditLogSerializer filters_field_mapping = { "user_id": "user_id", From c3958f264b028557e65f003c7ebc6b7fd39b14de Mon Sep 17 00:00:00 2001 From: Davide Silvestri <75379892+silvestrid@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:47:21 +0200 Subject: [PATCH 5/5] feat (database): `first` and `last` formulas (#5066) * feat: add first(), last() and generalized index() formula functions * fix: address PR review feedback and handle NaN index argument - Rename _BaserowIndexShortcut to BaserowIndexShortcut (drop leading _) - Remove unnecessary comments and section separators - Rename variable c to clone in JSONBArrayGetElement - Rename test to test_first_and_last_return_scalar_values, drop docstring - Remove step comments in test_index_generalized.py - Handle NaN/div-by-zero as index() argument (returns null instead of crash) * feat: update index() formula examples to include array fields --- .../database/formula/ast/function_defs.py | 152 ++- .../django_expressions.py | 41 + .../database/formula/types/formula_type.py | 13 + .../database/formula/types/formula_types.py | 16 + .../contrib/automation/history/utils.py | 6 +- .../formula/test_baserow_formula_results.py | 37 + .../formula/test_index_generalized.py | 866 ++++++++++++++++++ .../feature/generalize_index_first_last.json | 8 + .../baserow_premium/prompts/formula_docs.md | 3 + web-frontend/locales/en.json | 6 +- .../modules/database/formula/functions.js | 53 +- web-frontend/modules/database/plugin.js | 4 + .../database/formula/formulaFunctions.spec.js | 2 + 13 files changed, 1182 insertions(+), 25 deletions(-) create mode 100644 backend/tests/baserow/contrib/database/formula/test_index_generalized.py create mode 100644 changelog/entries/unreleased/feature/generalize_index_first_last.json diff --git a/backend/src/baserow/contrib/database/formula/ast/function_defs.py b/backend/src/baserow/contrib/database/formula/ast/function_defs.py index 779c537c47..4023f6d5c2 100644 --- a/backend/src/baserow/contrib/database/formula/ast/function_defs.py +++ b/backend/src/baserow/contrib/database/formula/ast/function_defs.py @@ -76,6 +76,7 @@ BaserowExpressionContext, BaserowFunctionCall, BaserowIntegerLiteral, + BaserowStringLiteral, ) from baserow.contrib.database.formula.expression_generator.django_expressions import ( AndExpr, @@ -84,6 +85,7 @@ GreaterThanExpr, GreaterThanOrEqualExpr, IsNullExpr, + JSONBArrayGetElement, JSONBArrayJoinValues, JSONBArraySlice, JSONBArrayUniqueByValue, @@ -265,6 +267,8 @@ def register_formula_functions(registry): registry.register(BaserowArrayLength()) registry.register(BaserowArrayJoinValues()) registry.register(BaserowArraySlice()) + registry.register(BaserowFirst()) + registry.register(BaserowLast()) # ManyToMany functions registry.register(BaserowStringAggManyToManyValues()) registry.register(BaserowManyToManyCount()) @@ -2607,6 +2611,45 @@ def to_django_expression( ) +class BaserowIndexShortcut(OneArgumentBaserowFunction): + arg_type = [BaserowFormulaValidType] + _index: int + + def type_function( + self, + func_call: BaserowFunctionCall[UnTyped], + arg: BaserowExpression[BaserowFormulaValidType], + ) -> BaserowExpression[BaserowFormulaType]: + if arg.many: + arg = arg.expression_type.collapse_many(arg) + + if not isinstance(arg.expression_type, BaserowFormulaArrayType): + return func_call.with_invalid_type(f"{self.type} requires an array input.") + + from baserow.contrib.database.formula.registries import ( + formula_function_registry, + ) + + num_type = BaserowFormulaNumberType(0) + index_func = formula_function_registry.get("index") + return index_func.call_and_type_with_args( + [arg, BaserowIntegerLiteral(self._index, num_type)] + ) + + def to_django_expression(self, arg: Expression) -> Expression: + raise NotImplementedError("type_function delegates to index") + + +class BaserowFirst(BaserowIndexShortcut): + type = "first" + _index = 0 + + +class BaserowLast(BaserowIndexShortcut): + type = "last" + _index = -1 + + class BaserowArrayLength(OneArgumentBaserowFunction): type = "array_length" arg_type = [BaserowFormulaArrayType] @@ -2979,36 +3022,105 @@ def to_django_expression(self, arg: Expression) -> Expression: ) -class BaserowIndex(TwoArgumentBaserowFunction): - arg1_type = [BaserowFormulaArrayType] - arg2_type = [BaserowFormulaNumberType] +def _index_output_field(mode): + """Return a fresh Django output_field for the given extraction mode.""" + + from baserow.contrib.database.formula.types.formula_types import ( + _lookup_formula_type_from_string, + ) + + try: + return _lookup_formula_type_from_string(mode).output_field_class() + except Exception: + return fields.TextField() + + +def _unwrap_literal_value(django_expr): + """ + Extract the Python value from a Django expression that wraps a + ``Value(...)`` — e.g. ``Cast(Value('x'), TextField())``. + """ + + while not hasattr(django_expr, "value"): + if ( + hasattr(django_expr, "source_expressions") + and django_expr.source_expressions + ): + django_expr = django_expr.source_expressions[0] + else: + return None + return django_expr.value + +class BaserowIndex(BaserowFunctionDefinition): type = "index" + num_args = NumOfArgsBetween(2, 4) - def type_function( + @property + def arg_types(self) -> BaserowArgumentTypeChecker: + def type_checker(arg_index, arg_types): + if arg_index == 0: + return [BaserowFormulaValidType] + elif arg_index == 1: + return [BaserowFormulaNumberType] + else: + return [BaserowFormulaTextType] # mode + sql literals + + return type_checker + + def type_function_given_valid_args( self, + args: List[BaserowExpression[BaserowFormulaValidType]], func_call: BaserowFunctionCall[UnTyped], - arg1: BaserowExpression[BaserowFormulaValidType], - arg2: BaserowExpression[BaserowFormulaValidType], ) -> BaserowExpression[BaserowFormulaType]: - if not isinstance(arg1.expression_type.sub_type, BaserowFormulaSingleFileType): + if len(args) not in (2, 4): return func_call.with_invalid_type( - "index only currently supports indexing file fields." - ) - else: - if arg1.many: - arg1 = arg1.expression_type.collapse_many(arg1) - return func_call.with_args([arg1, arg2]).with_valid_type( - arg1.expression_type.sub_type + "index requires exactly 2 arguments: an array and an index." ) - def to_django_expression(self, arg1: Expression, arg2: Expression) -> Expression: - return Func( - arg1, - Cast(arg2, fields.TextField()), - function="jsonb_extract_path", - output_field=JSONField(), + arg1, arg2 = args[0], args[1] + + if arg1.many: + arg1 = arg1.expression_type.collapse_many(arg1) + + if not isinstance(arg1.expression_type, BaserowFormulaArrayType): + return func_call.with_invalid_type("index requires an array input.") + + sub_type = arg1.expression_type.sub_type + + if len(args) == 4: + return func_call.with_args(list(args)).with_valid_type(sub_type) + + mode_literal = BaserowStringLiteral( + sub_type.array_index_mode, BaserowFormulaTextType() ) + sql_literal = BaserowStringLiteral( + sub_type.array_index_sql, BaserowFormulaTextType() + ) + + return func_call.with_args( + [arg1, arg2, mode_literal, sql_literal] + ).with_valid_type(sub_type) + + def to_django_expression_given_args( + self, + args: List["WrappedExpressionWithMetadata"], + context: BaserowExpressionContext, + ) -> "WrappedExpressionWithMetadata": + mode = _unwrap_literal_value(args[2].expression) or "text" + value_sql = _unwrap_literal_value(args[3].expression) or "{elem} ->> 'value'" + safe_index = handle_arg_being_nan( + args[1].expression, + Value(None, output_field=fields.IntegerField()), + args[1].expression, + ) + expr = JSONBArrayGetElement( + args[0].expression, + safe_index, + value_sql, + _index_output_field(mode), + ) + return WrappedExpressionWithMetadata.from_args(expr, args) class BaserowJsonbExtractPathText(BaserowFunctionDefinition): diff --git a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py index ba37d0ad26..cdbca72cb0 100644 --- a/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py +++ b/backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py @@ -163,6 +163,47 @@ def as_sql(self, compiler, connection, **extra_context): return sql, (*separator_params, *params) +class JSONBArrayGetElement(Expression): + """ + Extract a single element from a JSONB array by 0-based index (negative + counts from end) and optionally unwrap / cast the ``value`` key. + + *value_sql* is a SQL template with an ``{elem}`` placeholder that controls + how the element is extracted (e.g. ``({elem} ->> 'value')::numeric``). + Each formula type provides its own template via ``array_index_sql``. + + PostgreSQL's ``->`` operator natively handles negative indices and returns + NULL for out-of-bounds, so no CASE expression is needed. + """ + + def __init__(self, array_expr, index_expr, value_sql, output_field): + super().__init__(output_field=output_field) + self.array_expr = array_expr + self.index_expr = index_expr + self.value_sql = value_sql + + def resolve_expression( + self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False + ): + clone = self.copy() + clone.is_summary = summarize + clone.array_expr = self.array_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + clone.index_expr = self.index_expr.resolve_expression( + query, allow_joins, reuse, summarize, for_save + ) + return clone + + def as_sql(self, compiler, connection): + arr_sql, arr_params = compiler.compile(self.array_expr) + idx_sql, idx_params = compiler.compile(self.index_expr) + + elem_sql = f"({arr_sql}) -> ({idx_sql})::int" + sql = f"({self.value_sql.format(elem=elem_sql)})" + return sql, list(arr_params) + list(idx_params) + + class JSONBArraySlice(Expression): """ Slice a JSONB array with offset, limit, and optional reverse. diff --git a/backend/src/baserow/contrib/database/formula/types/formula_type.py b/backend/src/baserow/contrib/database/formula/types/formula_type.py index 68fe4a1410..afb85a6e6e 100644 --- a/backend/src/baserow/contrib/database/formula/types/formula_type.py +++ b/backend/src/baserow/contrib/database/formula/types/formula_type.py @@ -1,6 +1,7 @@ import abc from typing import TYPE_CHECKING, List, Type, TypeVar +from django.db import models from django.db.models import Expression, F, Model, Value from django.utils.functional import classproperty @@ -285,6 +286,18 @@ def can_represent_collaborators(self) -> bool: def item_is_in_nested_value_object_when_in_array(self) -> bool: return True + @property + def array_index_mode(self) -> str: + return self.type + + @property + def array_index_sql(self) -> str: + if not self.item_is_in_nested_value_object_when_in_array: + return "{elem}" + return "{elem} ->> 'value'" + + output_field_class = models.TextField + @property def can_have_db_index(self) -> bool: return False diff --git a/backend/src/baserow/contrib/database/formula/types/formula_types.py b/backend/src/baserow/contrib/database/formula/types/formula_types.py index 2be688299e..7f8bd65a29 100644 --- a/backend/src/baserow/contrib/database/formula/types/formula_types.py +++ b/backend/src/baserow/contrib/database/formula/types/formula_types.py @@ -80,6 +80,9 @@ class BaserowJSONBObjectBaseType(BaserowFormulaValidType, ABC): + array_index_sql = "{elem} -> 'value'" + output_field_class = JSONField + def parse_filter_value(self, field, model_field, value): """ Since the subclasses don't have a baserow_field_type or data might be stored @@ -361,6 +364,8 @@ class BaserowFormulaNumberType( ): type = "number" baserow_field_type = "number" + array_index_sql = "({elem} ->> 'value')::numeric" + output_field_class = models.DecimalField user_overridable_formatting_option_fields = [ "number_decimal_places", "number_prefix", @@ -516,6 +521,8 @@ class BaserowFormulaBooleanType( ): type = "boolean" baserow_field_type = "boolean" + array_index_sql = "({elem} ->> 'value')::boolean" + output_field_class = models.BooleanField can_order_by_in_array = True can_group_by = True can_have_db_index = True @@ -715,6 +722,8 @@ class BaserowFormulaDurationType( ): type = "duration" baserow_field_type = "duration" + array_index_sql = "({elem} ->> 'value')::interval" + output_field_class = models.DurationField user_overridable_formatting_option_fields = ["duration_format"] can_group_by = True can_order_by_in_array = True @@ -856,6 +865,7 @@ class BaserowFormulaDateType( can_order_by_in_array = True can_group_by = True can_have_db_index = True + output_field_class = models.DateTimeField def __init__( self, @@ -873,6 +883,11 @@ def __init__( self.date_show_tzinfo = date_show_tzinfo self.date_force_timezone = date_force_timezone + @property + def array_index_sql(self) -> str: + cast = "::timestamptz" if self.date_include_time else "::date" + return f"({{elem}} ->> 'value'){cast}" + @property def comparable_types(self) -> List[Type["BaserowFormulaValidType"]]: return [ @@ -1003,6 +1018,7 @@ class BaserowFormulaSingleFileType( can_order_by_in_array = False baserow_field_type = None item_is_in_nested_value_object_when_in_array = False + array_index_sql = "{elem}" can_represent_files = True def is_searchable(self, field): diff --git a/backend/tests/baserow/contrib/automation/history/utils.py b/backend/tests/baserow/contrib/automation/history/utils.py index 76ebaac38e..804da2780f 100644 --- a/backend/tests/baserow/contrib/automation/history/utils.py +++ b/backend/tests/baserow/contrib/automation/history/utils.py @@ -6,7 +6,11 @@ def assert_history( ): """Helper to test AutomationWorkflowHistory objects.""" - histories = list(AutomationWorkflowHistory.objects.filter(workflow=workflow)) + histories = list( + AutomationWorkflowHistory.objects.filter(workflow=workflow).order_by( + "started_on", "id" + ) + ) assert len(histories) == expected_count if expected_count > 0: history = histories[history_index] diff --git a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py index b833246332..bd70368a20 100644 --- a/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py +++ b/backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py @@ -3063,6 +3063,43 @@ def test_array_slice_empty_array(data_fixture): assert getattr(result, ref_slice_field.db_column) == [] +@pytest.mark.django_db +def test_first_and_last_return_scalar_values(data_fixture): + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field, b_rows, row_a1 = _setup_text_5_rows( + data_fixture, table_a, table_b, link_field, user + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + first_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="first_val", + formula="first(field('lookup'))", + ) + last_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="last_val", + formula="last(field('lookup'))", + ) + + table_a_model = table_a.get_model() + result = table_a_model.objects.get(id=row_a1.id) + + assert getattr(result, first_field.db_column) == "A" + assert getattr(result, last_field.db_column) == "E" + + @pytest.mark.django_db def test_array_slice_rejects_non_array_input(data_fixture): user = data_fixture.create_user() diff --git a/backend/tests/baserow/contrib/database/formula/test_index_generalized.py b/backend/tests/baserow/contrib/database/formula/test_index_generalized.py new file mode 100644 index 0000000000..ad278e63e1 --- /dev/null +++ b/backend/tests/baserow/contrib/database/formula/test_index_generalized.py @@ -0,0 +1,866 @@ +""" +Tests for generalized index(), first(), last() on all array types. +index() returns a scalar (the sub_type), supports 0-based and negative indices. +first(arr) = index(arr, 0), last(arr) = index(arr, -1). +""" + +from datetime import date, timedelta +from decimal import Decimal + +import pytest + +from baserow.contrib.database.fields.handler import FieldHandler +from baserow.contrib.database.formula.types.exceptions import InvalidFormulaType +from baserow.contrib.database.rows.handler import RowHandler + + +def _setup_single_select(df, table): + field = df.create_single_select_field(table=table, name="target") + opt_a = df.create_select_option(field=field, value="Alpha", order=0) + opt_b = df.create_select_option(field=field, value="Beta", order=1) + opt_c = df.create_select_option(field=field, value="Gamma", order=2) + return field, opt_a.id, opt_b.id, opt_c.id + + +def _to_date(val: str) -> date: + return date.fromisoformat(val) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "setup_fn,values,to_expected", + [ + ( + lambda df, table: df.create_text_field(table=table, name="target"), + ["apple", "banana", "cherry", "date"], + None, + ), + ( + lambda df, table: df.create_number_field( + table=table, name="target", number_decimal_places=2 + ), + [Decimal("10.50"), Decimal("20.00"), Decimal("30.75"), Decimal("40.00")], + None, + ), + ( + lambda df, table: df.create_boolean_field(table=table, name="target"), + [True, False, True, False], + None, + ), + # Date: write as ISO string, read back as date objects + ( + lambda df, table: df.create_date_field(table=table, name="target"), + ["2024-01-15", "2024-06-01", "2024-12-25", "2025-03-01"], + _to_date, + ), + ( + lambda df, table: df.create_duration_field( + table=table, name="target", duration_format="h:mm" + ), + [ + timedelta(hours=1, minutes=30), + timedelta(hours=2), + timedelta(hours=3, minutes=45), + timedelta(hours=5), + ], + None, + ), + ( + lambda df, table: df.create_url_field(table=table, name="target"), + [ + "https://example.com", + "https://baserow.io", + "https://python.org", + "https://django.com", + ], + None, + ), + ( + lambda df, table: df.create_email_field(table=table, name="target"), + [ + "alice@example.com", + "bob@example.com", + "carol@example.com", + "dave@example.com", + ], + None, + ), + ( + lambda df, table: df.create_phone_number_field(table=table, name="target"), + ["+1234567890", "+0987654321", "+1111111111", "+2222222222"], + None, + ), + ( + lambda df, table: df.create_rating_field(table=table, name="target"), + [3, 5, 1, 4], + None, + ), + ], + ids=[ + "text", + "number", + "boolean", + "date", + "duration", + "url", + "email", + "phone", + "rating", + ], +) +def test_index_first_last_scalar_types( + data_fixture, + api_client, + setup_fn, + values, + to_expected, +): + """ + index(lookup, n) returns the scalar value at position n. + first() = index(arr, 0), last() = index(arr, -1). + Parametrized across scalar field types. + + Also verifies that row updates, row additions with empty values, and + row deletions in the linked table correctly recalculate the formula + and that the formula table can still be fetched via the API afterwards. + """ + + user, token = data_fixture.create_user_and_token() + database = data_fixture.create_database_application(user=user) + table_a = data_fixture.create_database_table(database=database, name="A") + table_b = data_fixture.create_database_table(database=database, name="B") + data_fixture.create_text_field(table=table_a, name="pa", primary=True) + data_fixture.create_text_field(table=table_b, name="pb", primary=True) + + link_field = FieldHandler().create_field( + user, table_a, "link_row", name="link", link_row_table=table_b + ) + + target_field = setup_fn(data_fixture, table_b) + + expected_vals = [to_expected(v) for v in values] if to_expected else values + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [{target_field.db_column: v} for v in values], + ) + .created_rows + ) + + # Row A1: links to first 3; Row A2: empty; Row A3: single link + row_a1, row_a2, row_a3 = ( + RowHandler() + .create_rows( + user, + table_a, + [ + {link_field.db_column: [r.id for r in rows_b[:3]]}, + {link_field.db_column: []}, + {link_field.db_column: [rows_b[3].id]}, + ], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target_field.name}')", + ) + + first_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="first_val", + formula="first(field('lookup'))", + ) + last_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="last_val", + formula="last(field('lookup'))", + ) + index0 = FieldHandler().create_field( + user, + table_a, + "formula", + name="idx0", + formula="index(field('lookup'), 0)", + ) + index1 = FieldHandler().create_field( + user, + table_a, + "formula", + name="idx1", + formula="index(field('lookup'), 1)", + ) + index_neg1 = FieldHandler().create_field( + user, + table_a, + "formula", + name="idx_neg1", + formula="index(field('lookup'), -1)", + ) + + # Same via a formula field referencing the target field indirectly. + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target_field.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_first = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_first_val", + formula="first(field('ref_lookup'))", + ) + ref_last = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_last_val", + formula="last(field('ref_lookup'))", + ) + ref_index0 = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_idx0", + formula="index(field('ref_lookup'), 0)", + ) + ref_index1 = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_idx1", + formula="index(field('ref_lookup'), 1)", + ) + + model = table_a.get_model() + r1 = model.objects.get(id=row_a1.id) + r2 = model.objects.get(id=row_a2.id) + + assert getattr(r1, index0.db_column) == expected_vals[0] + assert getattr(r1, index1.db_column) == expected_vals[1] + assert getattr(r1, index_neg1.db_column) == expected_vals[2] + assert getattr(r1, first_field.db_column) == expected_vals[0] + assert getattr(r1, last_field.db_column) == expected_vals[2] + + assert getattr(r2, index0.db_column) is None + assert getattr(r2, first_field.db_column) is None + + # Row A3: single element — first and last are the same + r3 = model.objects.get(id=row_a3.id) + assert getattr(r3, first_field.db_column) == expected_vals[3] + assert getattr(r3, last_field.db_column) == expected_vals[3] + assert getattr(r3, index0.db_column) == expected_vals[3] + assert getattr(r3, index_neg1.db_column) == expected_vals[3] + + # Formula-ref path must match + assert getattr(r1, ref_first.db_column) == expected_vals[0] + assert getattr(r1, ref_last.db_column) == expected_vals[2] + assert getattr(r1, ref_index0.db_column) == expected_vals[0] + assert getattr(r1, ref_index1.db_column) == expected_vals[1] + assert getattr(r2, ref_first.db_column) is None + assert getattr(r3, ref_first.db_column) == expected_vals[3] + assert getattr(r3, ref_last.db_column) == expected_vals[3] + + RowHandler().update_rows( + user, + table_a, + [ + { + "id": row_a1.id, + link_field.db_column: [r.id for r in rows_b], + } + ], + ) + + model = table_a.get_model() + r1 = model.objects.get(id=row_a1.id) + assert getattr(r1, last_field.db_column) == expected_vals[3] + assert getattr(r1, ref_last.db_column) == expected_vals[3] + + RowHandler().update_rows( + user, + table_b, + [{"id": rows_b[0].id, target_field.db_column: values[3]}], + ) + + model = table_a.get_model() + r1 = model.objects.get(id=row_a1.id) + assert getattr(r1, first_field.db_column) == expected_vals[3] + assert getattr(r1, ref_first.db_column) == expected_vals[3] + + RowHandler().delete_rows(user, table_b, [rows_b[0].id]) + + from baserow.contrib.database.views.handler import ViewHandler + + grid = ViewHandler().create_view(user, table_a, "grid", name="test") + response = api_client.get( + f"/api/database/views/grid/{grid.id}/", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == 200, ( + f"API crash after update/delete: {response.content.decode()[:300]}" + ) + + +@pytest.mark.django_db +def test_index_single_select(data_fixture): + """index() on a single_select lookup returns the select option object.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target, opt_a_id, opt_b_id, opt_c_id = _setup_single_select(data_fixture, table_b) + + row_b1, row_b2, row_b3 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: opt_a_id}, + {target.db_column: opt_b_id}, + {target.db_column: opt_c_id}, + ], + ) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [row_b1.id, row_b2.id, row_b3.id]}], + ) + .created_rows + ) + + lookup_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + first_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="first_val", + formula="first(field('lookup'))", + ) + last_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="last_val", + formula="last(field('lookup'))", + ) + + # Same via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_first = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_first_val", + formula="first(field('ref_lookup'))", + ) + ref_last = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_last_val", + formula="last(field('ref_lookup'))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + + first_val = getattr(result, first_field.db_column) + assert first_val["value"] == "Alpha" + + last_val = getattr(result, last_field.db_column) + assert last_val["value"] == "Gamma" + + # Formula-ref path must match + ref_first_val = getattr(result, ref_first.db_column) + assert ref_first_val["value"] == "Alpha" + ref_last_val = getattr(result, ref_last.db_column) + assert ref_last_val["value"] == "Gamma" + + +@pytest.mark.django_db +def test_index_multiple_select(data_fixture): + """index() on a multiple_select lookup returns the list of selected options.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_multiple_select_field(table=table_b, name="target") + opt_a = data_fixture.create_select_option(field=target, value="Red", order=0) + opt_b = data_fixture.create_select_option(field=target, value="Blue", order=1) + opt_c = data_fixture.create_select_option(field=target, value="Green", order=2) + + row_b1, row_b2 = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: [opt_a.id, opt_b.id]}, # Red, Blue + {target.db_column: [opt_c.id]}, # Green + ], + ) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [row_b1.id, row_b2.id]}], + ) + .created_rows + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + first_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="first_val", + formula="first(field('lookup'))", + ) + last_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="last_val", + formula="last(field('lookup'))", + ) + + # Same via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_first = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_first_val", + formula="first(field('ref_lookup'))", + ) + ref_last = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_last_val", + formula="last(field('ref_lookup'))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + + first_val = getattr(result, first_field.db_column) + assert isinstance(first_val, list) + assert {o["value"] for o in first_val} == {"Red", "Blue"} + + last_val = getattr(result, last_field.db_column) + assert isinstance(last_val, list) + assert {o["value"] for o in last_val} == {"Green"} + + # Formula-ref path must match + ref_first_val = getattr(result, ref_first.db_column) + assert isinstance(ref_first_val, list) + assert {o["value"] for o in ref_first_val} == {"Red", "Blue"} + ref_last_val = getattr(result, ref_last.db_column) + assert isinstance(ref_last_val, list) + assert {o["value"] for o in ref_last_val} == {"Green"} + + +@pytest.mark.django_db +def test_index_out_of_bounds(data_fixture): + """index() returns None for out-of-bounds indices.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + (row_b,) = ( + RowHandler() + .create_rows(user, table_b, [{target.db_column: "only"}]) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows(user, table_a, [{link_field.db_column: [row_b.id]}]) + .created_rows + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + oob_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="oob", + formula="index(field('lookup'), 99)", + ) + neg_oob = FieldHandler().create_field( + user, + table_a, + "formula", + name="neg_oob", + formula="index(field('lookup'), -99)", + ) + + # Same via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_oob = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_oob", + formula="index(field('ref_lookup'), 99)", + ) + ref_neg_oob = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_neg_oob", + formula="index(field('ref_lookup'), -99)", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, oob_field.db_column) is None + assert getattr(result, neg_oob.db_column) is None + assert getattr(result, ref_oob.db_column) is None + assert getattr(result, ref_neg_oob.db_column) is None + + +@pytest.mark.django_db +def test_index_on_empty_array(data_fixture): + """index() on an empty lookup returns None.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + data_fixture.create_text_field(table=table_b, name="target") + + (row_a,) = ( + RowHandler() + .create_rows(user, table_a, [{link_field.db_column: []}]) + .created_rows + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', 'target')", + ) + + idx_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="idx", + formula="index(field('lookup'), 0)", + ) + + # Same via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula="field('target')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_idx = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_idx", + formula="index(field('ref_lookup'), 0)", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, idx_field.db_column) is None + assert getattr(result, ref_idx.db_column) is None + + +@pytest.mark.django_db +def test_first_array_unique_composability(data_fixture): + """first(array_unique(lookup)) returns the first unique value.""" + + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + target = data_fixture.create_text_field(table=table_b, name="target") + + rows_b = ( + RowHandler() + .create_rows( + user, + table_b, + [ + {target.db_column: "dup"}, + {target.db_column: "unique"}, + {target.db_column: "dup"}, + ], + ) + .created_rows + ) + + (row_a,) = ( + RowHandler() + .create_rows( + user, + table_a, + [{link_field.db_column: [r.id for r in rows_b]}], + ) + .created_rows + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{target.name}')", + ) + + first_unique = FieldHandler().create_field( + user, + table_a, + "formula", + name="first_unique", + formula="first(array_unique(field('lookup')))", + ) + last_unique = FieldHandler().create_field( + user, + table_a, + "formula", + name="last_unique", + formula="last(array_unique(field('lookup')))", + ) + + # Same via formula-ref path + ref_target = FieldHandler().create_field( + user, + table_b, + "formula", + name="ref_target", + formula=f"field('{target.name}')", + ) + FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_lookup", + formula=f"lookup('{link_field.name}', '{ref_target.name}')", + ) + ref_first_unique = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_first_unique", + formula="first(array_unique(field('ref_lookup')))", + ) + ref_last_unique = FieldHandler().create_field( + user, + table_a, + "formula", + name="ref_last_unique", + formula="last(array_unique(field('ref_lookup')))", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, first_unique.db_column) == "dup" + assert getattr(result, last_unique.db_column) == "unique" + assert getattr(result, ref_first_unique.db_column) == "dup" + assert getattr(result, ref_last_unique.db_column) == "unique" + + +@pytest.mark.django_db +def test_index_rejects_non_array(data_fixture): + """index() on a non-array field produces a formula error.""" + + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + data_fixture.create_text_field(table=table, name="name", primary=True) + + with pytest.raises(InvalidFormulaType, match="array"): + FieldHandler().create_field( + user, + table, + "formula", + name="bad", + formula="index(field('name'), 0)", + ) + + +@pytest.mark.django_db +def test_index_file_field_still_works(data_fixture): + """index() on a file array still works (backward compatibility).""" + + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + file_field = data_fixture.create_file_field(table=table, name="files") + + user_file = data_fixture.create_user_file() + RowHandler().create_rows( + user, + table, + [ + { + file_field.db_column: [ + {"name": user_file.name, "visible_name": "test.txt"} + ] + } + ], + ) + + idx_field = FieldHandler().create_field( + user, + table, + "formula", + name="first_file", + formula="index(field('files'), 0)", + ) + + model = table.get_model() + result = model.objects.first() + val = getattr(result, idx_field.db_column) + # File index should return the file object (JSONB) + assert val is not None + assert "visible_name" in val + + +@pytest.mark.django_db +def test_index_nan_argument_returns_null(data_fixture): + user = data_fixture.create_user() + table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user) + text_field = data_fixture.create_text_field(table=table_b, name="target") + + b_row = ( + RowHandler() + .create_rows(user, table_b, [{text_field.db_column: "A"}]) + .created_rows[0] + ) + + row_a = ( + RowHandler() + .create_rows(user, table_a, [{link_field.db_column: [b_row.id]}]) + .created_rows[0] + ) + + FieldHandler().create_field( + user, + table_a, + "formula", + name="lookup", + formula=f"lookup('{link_field.name}', '{text_field.name}')", + ) + + nan_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="nan_index", + formula="index(field('lookup'), tonumber('x'))", + ) + div_zero_field = FieldHandler().create_field( + user, + table_a, + "formula", + name="div_zero_index", + formula="index(field('lookup'), 1/0)", + ) + + model = table_a.get_model() + result = model.objects.get(id=row_a.id) + assert getattr(result, nan_field.db_column) is None + assert getattr(result, div_zero_field.db_column) is None diff --git a/changelog/entries/unreleased/feature/generalize_index_first_last.json b/changelog/entries/unreleased/feature/generalize_index_first_last.json new file mode 100644 index 0000000000..71aa642796 --- /dev/null +++ b/changelog/entries/unreleased/feature/generalize_index_first_last.json @@ -0,0 +1,8 @@ +{ + "type": "feature", + "message": "Generalize the `index` formula to work with any array type (not just file fields) and add `first` and `last` convenience functions.", + "domain": "database", + "issue_number": 5065, + "bullet_points": [], + "created_at": "2026-03-27" +} diff --git a/premium/backend/src/baserow_premium/prompts/formula_docs.md b/premium/backend/src/baserow_premium/prompts/formula_docs.md index d17656c9f4..50a73de63d 100644 --- a/premium/backend/src/baserow_premium/prompts/formula_docs.md +++ b/premium/backend/src/baserow_premium/prompts/formula_docs.md @@ -192,6 +192,9 @@ These functions work with arrays and lookup values to perform calculations acros | has_option | Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options. | has_option(multiple select, text); has_option(lookup(link row, single select), text) | has_option(field('multiple select'), 'option_a'); has_option(lookup(field('link row'), field('single select')), 'option_a') | | array_unique | Removes duplicate values from a lookup array, preserving first-occurrence order. Only works on arrays from lookup fields; does not support file fields. | array_unique(a lookup array) | array_unique(field('my lookup field')); count(array_unique(field('lookup'))); join(array_unique(field('lookup')), ', ') | | array_slice | Returns a sub-array starting at the given 0-based index. Negative start counts from the end. Positive count takes elements forward, 0 means all remaining, negative count takes elements backward in reverse order. | array_slice(array, start, count) | array_slice(field('lookup'), 0, 3); array_slice(field('lookup'), -2, 2) | +| index | Returns the element at the given 0-based position from an array. Negative indices count from the end (-1 is last). Returns null for out-of-bounds. | index(array, number) | index(field('lookup'), 0); index(field('lookup'), -1) | +| first | Returns the first element from an array (shortcut for index(array, 0)). | first(array) | first(field('lookup')) | +| last | Returns the last element from an array (shortcut for index(array, -1)). | last(array) | last(field('lookup')) | ### URL Functions diff --git a/web-frontend/locales/en.json b/web-frontend/locales/en.json index d3ffd52c9b..63cd341a5e 100644 --- a/web-frontend/locales/en.json +++ b/web-frontend/locales/en.json @@ -617,12 +617,14 @@ "getImageWidthDescription": "Returns the image width from a single file returned from the index function.", "getImageHeightDescription": "Returns the image height from a single file returned from the index function.", "getIsImageDescription": "Returns if the single file returned from the index function is an image or not.", - "indexDescription": "Returns the file from a file field at the position provided by the second argument.", + "indexDescription": "Returns the element at the given 0-based position from an array value (for example from a lookup, file, or other array-typed field). Negative indices count from the end (-1 is last).", "secondsToDurationDescription": "Converts the number of seconds provided into a duration.", "durationToSecondsDescription": "Converts the duration provided into the corresponding number of seconds.", "hasOptionDescription": "Returns true if the first argument is a multiple select field or a lookup to a single select field and the second argument is one of the options.", "arrayUniqueDescription": "Returns only the unique items from an array, removing duplicates while preserving the order of first occurrence.", - "arraySliceDescription": "Returns a sub-array starting at the given index (0-based, negative start counts from the end). Positive count takes elements forward, 0 means all remaining, negative count takes elements backward in reverse order." + "arraySliceDescription": "Returns a sub-array starting at the given index (0-based, negative counts from end). Positive count takes elements forward, 0 means all remaining, negative count takes elements backward in reverse order.", + "firstDescription": "Returns the first element from an array value (for example from a lookup, file, or other array-typed field).", + "lastDescription": "Returns the last element from an array value (for example from a lookup, file, or other array-typed field)." }, "functionnalGridViewFieldLinkRow": { "unnamed": "unnamed row {value}" diff --git a/web-frontend/modules/database/formula/functions.js b/web-frontend/modules/database/formula/functions.js index d253796f87..0abab4293a 100644 --- a/web-frontend/modules/database/formula/functions.js +++ b/web-frontend/modules/database/formula/functions.js @@ -2570,11 +2570,14 @@ export class BaserowIndex extends BaserowFunctionDefinition { } getSyntaxUsage() { - return ['index(a file field, a number)'] + return ['index(a file field, a number)', 'index(an array field, a number)'] } getExamples() { - return ['index(field("File field"), 0)'] + return [ + 'index(field("File field"), 0)', + 'index(field("Link row field"), 0)', + ] } getFormulaType() { @@ -2654,3 +2657,49 @@ export class BaserowArraySlice extends BaserowFunctionDefinition { return 'array' } } + +export class BaserowFirst extends BaserowFunctionDefinition { + static getType() { + return 'first' + } + + getDescription() { + const { $i18n: i18n } = this.app + return i18n.t('formulaFunctions.firstDescription') + } + + getSyntaxUsage() { + return ['first(array)'] + } + + getExamples() { + return ["first(field('my lookup'))"] + } + + getFormulaType() { + return 'special' + } +} + +export class BaserowLast extends BaserowFunctionDefinition { + static getType() { + return 'last' + } + + getDescription() { + const { $i18n: i18n } = this.app + return i18n.t('formulaFunctions.lastDescription') + } + + getSyntaxUsage() { + return ['last(array)'] + } + + getExamples() { + return ["last(field('my lookup'))"] + } + + getFormulaType() { + return 'special' + } +} diff --git a/web-frontend/modules/database/plugin.js b/web-frontend/modules/database/plugin.js index 43ab23ab9a..ee031533b4 100644 --- a/web-frontend/modules/database/plugin.js +++ b/web-frontend/modules/database/plugin.js @@ -275,6 +275,8 @@ import { BaserowToUrl, BaserowArrayUnique, BaserowArraySlice, + BaserowFirst, + BaserowLast, } from '@baserow/modules/database/formula/functions' import { BaserowFormulaArrayType, @@ -860,6 +862,8 @@ export default defineNuxtPlugin({ $registry.register('formula_function', new BaserowToUrl(context)) $registry.register('formula_function', new BaserowArrayUnique(context)) $registry.register('formula_function', new BaserowArraySlice(context)) + $registry.register('formula_function', new BaserowFirst(context)) + $registry.register('formula_function', new BaserowLast(context)) // Formula Types $registry.register('formula_type', new BaserowFormulaTextType(context)) diff --git a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js index 9b5b2c8464..95660174f5 100644 --- a/web-frontend/test/unit/database/formula/formulaFunctions.spec.js +++ b/web-frontend/test/unit/database/formula/formulaFunctions.spec.js @@ -108,6 +108,8 @@ describe('Formula Functions Test', () => { 'tourl', 'array_unique', 'array_slice', + 'first', + 'last', ] const frontendFunctionTypes = Object.keys( testApp.store.$registry.getAll('formula_function')