Skip to content

Commit 78af857

Browse files
committed
Add tests for Partition Spec Evolution
1 parent 197423f commit 78af857

File tree

1 file changed

+66
-1
lines changed

1 file changed

+66
-1
lines changed

tests/integration/test_catalog.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pyiceberg.catalog.rest import RestCatalog
2828
from pyiceberg.catalog.sql import SqlCatalog
2929
from pyiceberg.exceptions import (
30+
CommitFailedException,
3031
CommitFailedException,
3132
NamespaceAlreadyExistsError,
3233
NamespaceNotEmptyError,
@@ -36,7 +37,9 @@
3637
)
3738
from pyiceberg.io import WAREHOUSE
3839
from pyiceberg.partitioning import PartitionField, PartitionSpec
40+
from pyiceberg.partitioning import PartitionField, PartitionSpec
3941
from pyiceberg.schema import INITIAL_SCHEMA_ID, Schema
42+
from pyiceberg.transforms import BucketTransform
4043
from pyiceberg.table.metadata import INITIAL_SPEC_ID
4144
from pyiceberg.table.sorting import INITIAL_SORT_ORDER_ID, SortField, SortOrder
4245
from pyiceberg.transforms import DayTransform, IdentityTransform
@@ -90,7 +93,6 @@ def rest_test_catalog() -> Generator[Catalog, None, None]:
9093
else:
9194
pytest.skip("PYICEBERG_TEST_CATALOG environment variables not set")
9295

93-
9496
@pytest.fixture(scope="function")
9597
def hive_catalog() -> Generator[Catalog, None, None]:
9698
test_catalog = HiveCatalog(
@@ -503,6 +505,69 @@ def test_update_namespace_properties(test_catalog: Catalog, database_name: str)
503505
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]
504506

505507

508+
@pytest.mark.integration
509+
@pytest.mark.parametrize("test_catalog", CATALOGS)
510+
def test_update_table_spec(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
511+
identifier = (database_name, table_name)
512+
test_catalog.create_namespace(database_name)
513+
table = test_catalog.create_table(identifier, test_schema)
514+
515+
with table.update_spec() as update:
516+
update.add_field(source_column_name="VendorID", transform=BucketTransform(16), partition_field_name="shard")
517+
518+
loaded = test_catalog.load_table(identifier)
519+
expected_spec = PartitionSpec(
520+
PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="shard"), spec_id=1
521+
)
522+
# The spec ID may not match, so check equality of the fields
523+
assert loaded.spec() == expected_spec
524+
525+
526+
@pytest.mark.integration
527+
@pytest.mark.parametrize("test_catalog", CATALOGS)
528+
def test_update_table_spec_conflict(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
529+
identifier = (database_name, table_name)
530+
test_catalog.create_namespace(database_name)
531+
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="id_bucket"))
532+
table = test_catalog.create_table(identifier, test_schema, partition_spec=spec)
533+
534+
update = table.update_spec()
535+
update.add_field(source_column_name="tpep_pickup_datetime", transform=BucketTransform(16), partition_field_name="shard")
536+
537+
# update with conflict
538+
conflict_table = test_catalog.load_table(identifier)
539+
with conflict_table.update_spec() as conflict_update:
540+
conflict_update.remove_field("id_bucket")
541+
542+
with pytest.raises(
543+
CommitFailedException, match="Requirement failed: default spec id has changed|default partition spec changed"
544+
):
545+
update.commit()
546+
547+
loaded = test_catalog.load_table(identifier)
548+
assert loaded.spec() == PartitionSpec(spec_id=1)
549+
550+
551+
@pytest.mark.integration
552+
@pytest.mark.parametrize("test_catalog", CATALOGS)
553+
def test_update_table_spec_then_revert(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
554+
identifier = (database_name, table_name)
555+
test_catalog.create_namespace(database_name)
556+
557+
initial_spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="id_bucket"))
558+
559+
table = test_catalog.create_table(identifier, test_schema, partition_spec=initial_spec, properties={"format-version": "2"})
560+
assert table.format_version == 2
561+
562+
with table.update_spec() as update:
563+
update.add_identity(source_column_name="tpep_pickup_datetime")
564+
565+
with table.update_spec() as update:
566+
update.remove_field("tpep_pickup_datetime")
567+
568+
assert table.spec() == initial_spec
569+
570+
506571
@pytest.mark.integration
507572
@pytest.mark.parametrize("test_catalog", CATALOGS)
508573
def test_register_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:

0 commit comments

Comments
 (0)