Skip to content

Commit 8b9f467

Browse files
committed
PR comments
1 parent f73c8b7 commit 8b9f467

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

pyiceberg/table/snapshots.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,9 @@ class Snapshot(IcebergBaseModel):
247247
first_row_id: Optional[int] = Field(
248248
alias="first-row-id", default=None, description="assigned to the first row in the first data file in the first manifest"
249249
)
250-
added_rows: Optional[int] = Field(alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs")
250+
added_rows: Optional[int] = Field(
251+
alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs"
252+
)
251253

252254
def __str__(self) -> str:
253255
"""Return the string representation of the Snapshot class."""
@@ -257,6 +259,22 @@ def __str__(self) -> str:
257259
result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
258260
return result_str
259261

262+
def __repr__(self) -> str:
263+
"""Return the string representation of the Snapshot class."""
264+
fields = [
265+
f"snapshot_id={self.snapshot_id}",
266+
f"parent_snapshot_id={self.parent_snapshot_id}",
267+
f"sequence_number={self.sequence_number}",
268+
f"timestamp_ms={self.timestamp_ms}",
269+
f"manifest_list='{self.manifest_list}'",
270+
f"summary={repr(self.summary)}" if self.summary else None,
271+
f"schema_id={self.schema_id}" if self.schema_id is not None else None,
272+
f"first_row_id={self.first_row_id}" if self.first_row_id is not None else None,
273+
f"added_rows={self.added_rows}" if self.added_rows is not None else None,
274+
]
275+
filtered_fields = [field for field in fields if field is not None]
276+
return f"Snapshot({', '.join(filtered_fields)})"
277+
260278
def manifests(self, io: FileIO) -> List[ManifestFile]:
261279
"""Return the manifests for the given snapshot."""
262280
return list(_manifests(io, self.manifest_list))

pyiceberg/table/update/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ def _(update: RemoveSchemasUpdate, base_metadata: TableMetadata, context: _Table
648648

649649
return base_metadata.model_copy(update={"schemas": schemas})
650650

651+
651652
@_apply_table_update.register(SetPartitionStatisticsUpdate)
652653
def _(update: SetPartitionStatisticsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
653654
partition_statistics = filter_statistics_by_snapshot_id(

tests/integration/test_writes/test_writes.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,13 +2493,12 @@ def test_stage_only_overwrite_files(
24932493

24942494

24952495
@pytest.mark.integration
2496-
def test_v3_write_and_read(spark: SparkSession, session_catalog: Catalog) -> None:
2496+
def test_v3_write_and_read_row_lineage(spark: SparkSession, session_catalog: Catalog) -> None:
24972497
"""Test writing to a v3 table and reading with Spark."""
24982498
identifier = "default.test_v3_write_and_read"
24992499
tbl = _create_table(session_catalog, identifier, {"format-version": "3"})
25002500
assert tbl.format_version == 3, f"Expected v3, got: v{tbl.format_version}"
2501-
assert tbl.metadata.next_row_id is not None, "Expected next_row_id to be initialized"
2502-
initial_next_row_id = tbl.metadata.next_row_id
2501+
initial_next_row_id = tbl.metadata.next_row_id or 0
25032502

25042503
test_data = pa.Table.from_pydict(
25052504
{
@@ -2525,9 +2524,6 @@ def test_v3_write_and_read(spark: SparkSession, session_catalog: Catalog) -> Non
25252524

25262525
tbl.append(test_data)
25272526

2528-
assert (
2529-
tbl.metadata.next_row_id == initial_next_row_id + len(test_data)
2530-
), "Expected next_row_id to be incremented by the number of added rows"
2531-
2532-
df = spark.table(identifier)
2533-
assert df.count() == 3, "Expected 3 rows"
2527+
assert tbl.metadata.next_row_id == initial_next_row_id + len(test_data), (
2528+
"Expected next_row_id to be incremented by the number of added rows"
2529+
)

tests/table/test_init.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,7 @@ def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_wi
15221522
(RemovePartitionStatisticsUpdate(snapshot_id=123456789),),
15231523
)
15241524

1525+
15251526
def test_add_snapshot_update_fails_without_first_row_id(table_v3: Table) -> None:
15261527
new_snapshot = Snapshot(
15271528
snapshot_id=25,

0 commit comments

Comments
 (0)