diff --git a/backend/src/baserow/contrib/database/apps.py b/backend/src/baserow/contrib/database/apps.py index 90c20feded..57f26d1b64 100755 --- a/backend/src/baserow/contrib/database/apps.py +++ b/backend/src/baserow/contrib/database/apps.py @@ -5,6 +5,7 @@ from django.db import ProgrammingError from django.db.models.signals import post_migrate, pre_migrate +from baserow.contrib.database.fields.utils.pg_datetime import pg_init from baserow.contrib.database.table.cache import clear_generated_model_cache from baserow.contrib.database.table.operations import RestoreDatabaseTableOperationType from baserow.core.registries import ( @@ -1140,6 +1141,9 @@ def ready(self): get_user_model()._meta._expire_cache = lambda *a, **kw: None SelectOption._meta._expire_cache = lambda *a, **kw: None + # date/datetime min/max year handling - replace overflowed date with None + pg_init() + # noinspection PyPep8Naming def clear_generated_model_cache_receiver(sender, **kwargs): diff --git a/backend/src/baserow/contrib/database/fields/field_types.py b/backend/src/baserow/contrib/database/fields/field_types.py index f14325f2f5..feb1b5bc61 100755 --- a/backend/src/baserow/contrib/database/fields/field_types.py +++ b/backend/src/baserow/contrib/database/fields/field_types.py @@ -1386,14 +1386,27 @@ def get_alter_column_prepare_new_value(self, connection, from_field, to_field): ELSEIF p_in IS NULL THEN p_in = null; ELSE - p_in = GREATEST( - {sql_function}(p_in::text, 'FM{sql_format}'), - '0001-01-01'::{sql_type} - ); + p_in = case when + {sql_function}(p_in::text, 'FM{sql_format}') + between '0001-01-01'::{sql_type} + and '9999-12-31'::{sql_type} + then + {sql_function}(p_in::text, 'FM{sql_format}') + else NULL + end; + END IF; exception when others then begin - p_in = GREATEST(p_in::{sql_type}, '0001-01-01'::{sql_type}); + p_in = case when + p_in::{sql_type} + between '0001-01-01'::{sql_type} + and '9999-12-31'::{sql_type} + then + p_in::{sql_type} + else NULL + end; + exception when others then p_in = p_default; end; diff --git a/backend/src/baserow/contrib/database/fields/utils/pg_datetime.py b/backend/src/baserow/contrib/database/fields/utils/pg_datetime.py new file mode 100644 index 0000000000..959f9c020d --- /dev/null +++ b/backend/src/baserow/contrib/database/fields/utils/pg_datetime.py @@ -0,0 +1,140 @@ +import typing + +from baserow.core.psycopg import is_psycopg3, psycopg + +if is_psycopg3: + from django.db.backends.signals import connection_created + + from baserow.core.psycopg import ( + DataError, + DateBinaryLoader, + DateLoader, + TimestampBinaryLoader, + TimestampLoader, + TimestamptzBinaryLoader, + TimestamptzLoader, + ) + + class _DateOverflowLoaderMixin: + def load(self, data): + try: + return super().load(data) + except DataError: + return None + + class _TimestamptzOverflowLoaderMixin: + timezone = None + + def load(self, data): + try: + res = super().load(data) + return res.replace(tzinfo=self.timezone) + except DataError: + return None + + class BaserowDateLoader(_DateOverflowLoaderMixin, DateLoader): + pass + + class BaserowDateBinaryLoader(_DateOverflowLoaderMixin, DateBinaryLoader): + pass + + class BaserowTimestampLoader(_DateOverflowLoaderMixin, TimestampLoader): + pass + + class BaserowTimestampBinaryLoader(_DateOverflowLoaderMixin, TimestampBinaryLoader): + pass + + def pg_init(): + """ + Registers loaders for psycopg3 to handle date overflow. + """ + + psycopg.adapters.register_loader("date", BaserowDateLoader) + psycopg.adapters.register_loader("date", BaserowDateBinaryLoader) + + psycopg.adapters.register_loader("timestamp", BaserowTimestampLoader) + psycopg.adapters.register_loader("timestamp", BaserowTimestampBinaryLoader) + + # psycopg3 and timezones allow per-connection / per-cursor adapting. This is + # done in django/db/backends/postgresql/psycopg_any.py in a hook that + # registries tz aware adapter for each connection/cursor. + # We can re-register our loaders here, but note that this will work on + # per-connection tz setting. Cursors still will use django-provided adapters + def register_context(signal, sender, connection, **kwargs): + register_on_connection(connection) + + connection_created.connect(register_context) + + def register_on_connection(connection): + """ + Registers timestamptz pg type loaders for a connection. + """ + + ctx = connection.connection.adapters + + class SpecificTzLoader(_TimestamptzOverflowLoaderMixin, TimestamptzLoader): + timezone = connection.timezone + + class SpecificTzBinaryLoader( + _TimestamptzOverflowLoaderMixin, TimestamptzBinaryLoader + ): + timezone = connection.timezone + + ctx.register_loader("timestamptz", SpecificTzLoader) + ctx.register_loader("timestamptz", SpecificTzBinaryLoader) + +else: + from django.db.utils import DataError as DjangoDataError + + from psycopg2._psycopg import ( + DATE, + DATEARRAY, + DATETIME, + DATETIMEARRAY, + DATETIMETZ, + DATETIMETZARRAY, + DataError, + ) + + def _make_adapter( + type_adapter, + ) -> typing.Callable[[typing.Any, typing.Any], typing.Any]: + def adapter(value, cur): + try: + return type_adapter(value, cur) + except (DataError, DjangoDataError, ValueError): + return + + return adapter + + def pg_init(): + """ + Registers loaders for psycopg2 to handle date overflow. + """ + + for type_adapter, typea_adapter in ( + ( + DATE, + DATEARRAY, + ), + ( + DATETIME, + DATETIMEARRAY, + ), + ( + DATETIMETZ, + DATETIMETZARRAY, + ), + ): + oid = type_adapter.values + array_oid = typea_adapter.values + typename = type_adapter.name + handler = _make_adapter(type_adapter) + array_handler = _make_adapter(typea_adapter) + + ptype = psycopg.extensions.new_type(oid, typename, handler) + array_ptype = psycopg.extensions.new_type( + array_oid, typename, array_handler + ) + psycopg.extensions.register_type(ptype) + psycopg.extensions.register_type(array_ptype) diff --git a/backend/src/baserow/core/psycopg.py b/backend/src/baserow/core/psycopg.py index eaed5443a7..09f598ac9e 100644 --- a/backend/src/baserow/core/psycopg.py +++ b/backend/src/baserow/core/psycopg.py @@ -5,8 +5,20 @@ import psycopg # noqa: F401 from psycopg import errors, sql # noqa: F401 + # used for date type mapping + from psycopg.types.datetime import ( # noqa: F401 + DataError, + DateBinaryLoader, + DateLoader, + TimestampBinaryLoader, + TimestampLoader, + TimestamptzBinaryLoader, + TimestamptzLoader, + ) + else: import psycopg2 as psycopg # noqa: F401 + from psycopg2 import DataError # noqa: F401 from psycopg2 import errors, sql # noqa: F401 diff --git a/backend/tests/baserow/contrib/database/field/test_date_field_type.py b/backend/tests/baserow/contrib/database/field/test_date_field_type.py index e912681f69..6fd0322284 100644 --- a/backend/tests/baserow/contrib/database/field/test_date_field_type.py +++ b/backend/tests/baserow/contrib/database/field/test_date_field_type.py @@ -8,11 +8,12 @@ from baserow.contrib.database.fields.field_types import DateFieldType from baserow.contrib.database.fields.handler import FieldHandler -from baserow.contrib.database.fields.models import DateField +from baserow.contrib.database.fields.models import DateField, TextField from baserow.contrib.database.fields.registries import field_type_registry from baserow.contrib.database.fields.utils import DeferredForeignKeyUpdater from baserow.contrib.database.rows.handler import RowHandler from baserow.contrib.database.views.handler import ViewHandler +from baserow.core.psycopg import is_psycopg3 from baserow.core.registries import ImportExportConfig @@ -519,10 +520,8 @@ def test_negative_date_field_value(data_fixture): assert getattr(results[3], f"field_{datetime_field.id}") is None assert getattr(results[4], f"field_{date_field.id}") is None assert getattr(results[4], f"field_{datetime_field.id}") is None - assert getattr(results[5], f"field_{date_field.id}") == date(1, 1, 1) - assert getattr(results[5], f"field_{datetime_field.id}") == ( - datetime(1, 1, 1, tzinfo=timezone.utc) - ) + assert getattr(results[5], f"field_{date_field.id}") is None + assert getattr(results[5], f"field_{datetime_field.id}") is None assert getattr(results[6], f"field_{date_field.id}") is None assert getattr(results[6], f"field_{datetime_field.id}") is None assert getattr(results[7], f"field_{date_field.id}") == date(2010, 2, 3) @@ -738,3 +737,106 @@ def test_get_group_by_metadata_in_rows_with_date_field(data_fixture): ] ) } + + +@pytest.mark.django_db +def test_date_field_overflow(settings, data_fixture): + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + + field_handler = FieldHandler() + row_handler = RowHandler() + + date_field = field_handler.create_field( + user=user, + table=table, + type_name="text", + name="Date", + ) + invalid_date_value = "19999-01-01" + row = row_handler.create_row( + user=user, table=table, values={date_field.db_column: invalid_date_value} + ) + assert getattr(row, date_field.db_column, None) == invalid_date_value + + date_field = field_handler.update_field( + user=user, field=date_field, new_type_name="date", date_format="ISO" + ) + + assert isinstance( + table.get_model().get_field_object(date_field.db_column)["field"], DateField + ) + out = row_handler.get_rows(table.get_model(), [row.id]) + assert len(out) == 1 + assert getattr(out[0], date_field.db_column, None) is None + + date_field = field_handler.update_field( + user=user, field=date_field, new_type_name="text", date_format="ISO" + ) + + table.refresh_from_db() + assert isinstance( + table.get_model().get_field_object(date_field.db_column)["field"], TextField + ) + out = row_handler.get_rows(table.get_model(), [row.id]) + assert len(out) == 1 + assert getattr(out[0], date_field.db_column, None) is None + + +@pytest.mark.django_db +def test_datetime_field_overflow(on_db_connection, data_fixture): + if is_psycopg3: + from baserow.contrib.database.fields.utils.pg_datetime import ( + register_on_connection, + ) + + # manually register adapters, as signal-based registration will be called + # too late + on_db_connection(register_on_connection) + + user = data_fixture.create_user() + table = data_fixture.create_database_table(user=user) + + field_handler = FieldHandler() + row_handler = RowHandler() + + date_field = field_handler.create_field( + user=user, + table=table, + type_name="text", + name="Date", + ) + invalid_date_value = "19999-01-01 01:01" + row = row_handler.create_row( + user=user, table=table, values={date_field.db_column: invalid_date_value} + ) + assert getattr(row, date_field.db_column, None) == invalid_date_value + + date_field = field_handler.update_field( + user=user, + field=date_field, + new_type_name="date", + date_format="ISO", + date_include_time=True, + date_time_format="24", + ) + assert isinstance( + table.get_model().get_field_object(date_field.db_column)["field"], DateField + ) + out = row_handler.get_rows(table.get_model(), [row.id]) + assert len(out) == 1 + + assert getattr(out[0], date_field.db_column, None) is None + + date_field = field_handler.update_field( + user=user, field=date_field, new_type_name="text", date_format="ISO" + ) + + table.refresh_from_db() + assert isinstance( + table.get_model().get_field_object(date_field.db_column)["field"], TextField + ) + out = row_handler.get_rows(table.get_model(), [row.id]) + assert len(out) == 1 + + assert getattr(out[0], date_field.db_column, None) is None diff --git a/backend/tests/baserow/contrib/database/file_import/test_file_import_tasks.py b/backend/tests/baserow/contrib/database/file_import/test_file_import_tasks.py index 4ffcca2d72..f9fe9ad95a 100644 --- a/backend/tests/baserow/contrib/database/file_import/test_file_import_tasks.py +++ b/backend/tests/baserow/contrib/database/file_import/test_file_import_tasks.py @@ -1,5 +1,5 @@ import json -from datetime import datetime, timedelta, timezone +from datetime import date, datetime, timedelta, timezone from typing import NamedTuple from django.conf import settings @@ -1306,6 +1306,52 @@ def test_run_file_import_task_with_upsert_for_multiple_field_types( assert len(model.objects.filter(**{description.db_column: "updated bbb"})) == 1 +@pytest.mark.django_db(transaction=True) +def test_run_file_import_task_with_date_validation( + data_fixture, patch_filefield_storage +): + user = data_fixture.create_user() + database = data_fixture.create_database_application(user=user) + table = data_fixture.create_database_table(user=user, database=database) + + f1 = data_fixture.create_date_field(table=table, order=1, name="date 1") + f2 = data_fixture.create_text_field(table=table, order=2, name="text 1") + + model = table.get_model() + + with patch_filefield_storage(): + job = data_fixture.create_file_import_job( + data={ + "data": [ + ["0000-01-01", "zero"], + ["2020-01-01", "one"], + ["19999-09-09", "two"], + ], + }, + table=table, + user=user, + first_row_header=False, + ) + run_async_job(job.id) + + job.refresh_from_db() + assert job.finished + rows = model.objects.order_by("order").all() + # first and last are discarded because they're not valid date formats + assert [ + ( + getattr(r, f1.db_column), + getattr(r, f2.db_column), + ) + for r in rows + ] == [ + ( + date(2020, 1, 1), + "one", + ) + ] + + @pytest.mark.django_db(transaction=True) @pytest.mark.field_constraints def test_run_file_import_task_with_field_constraints( @@ -1314,7 +1360,6 @@ def test_run_file_import_task_with_field_constraints( user = data_fixture.create_user() database = data_fixture.create_database_application(user=user) table = data_fixture.create_database_table(user=user, database=database) - handler = FieldHandler() text_field = handler.create_field( user=user, @@ -1344,7 +1389,6 @@ def test_run_file_import_task_with_field_constraints( first_row_header=False, ) run_async_job(job.id) - job.refresh_from_db() assert job.finished assert not job.failed diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 2344f45e44..c1cb3655e2 100755 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -84,3 +84,13 @@ def get_path(tpath, /, mode="rb") -> IOBase: yield get_path if fhandle and not fhandle.closed: fhandle.close() + + +@pytest.fixture() +def on_db_connection(db): + def register(callback): + from django.db import connection + + callback(connection) + + yield register diff --git a/changelog/entries/unreleased/bug/3324_psycopg_date_overflow_handling.json b/changelog/entries/unreleased/bug/3324_psycopg_date_overflow_handling.json new file mode 100644 index 0000000000..004e458f6a --- /dev/null +++ b/changelog/entries/unreleased/bug/3324_psycopg_date_overflow_handling.json @@ -0,0 +1,8 @@ +{ + "type": "bug", + "message": "handle date overflow in postgresql -> python conversion", + "domain": "database", + "issue_number": 3324, + "bullet_points": [], + "created_at": "2025-01-14" +}