PR comments

rambleraptor · rambleraptor · commit 8b9f467d2e6f · 2025-10-22T13:30:23.000-07:00
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
@@ -247,7 +247,9 @@ class Snapshot(IcebergBaseModel):
     first_row_id: Optional[int] = Field(
         alias="first-row-id", default=None, description="assigned to the first row in the first data file in the first manifest"
     )
-    added_rows: Optional[int] = Field(alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs")
+    added_rows: Optional[int] = Field(
+        alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs"
+    )
 
     def __str__(self) -> str:
         """Return the string representation of the Snapshot class."""
@@ -257,6 +259,22 @@ def __str__(self) -> str:
         result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
         return result_str
 
+    def __repr__(self) -> str:
+        """Return the string representation of the Snapshot class."""
+        fields = [
+            f"snapshot_id={self.snapshot_id}",
+            f"parent_snapshot_id={self.parent_snapshot_id}",
+            f"sequence_number={self.sequence_number}",
+            f"timestamp_ms={self.timestamp_ms}",
+            f"manifest_list='{self.manifest_list}'",
+            f"summary={repr(self.summary)}" if self.summary else None,
+            f"schema_id={self.schema_id}" if self.schema_id is not None else None,
+            f"first_row_id={self.first_row_id}" if self.first_row_id is not None else None,
+            f"added_rows={self.added_rows}" if self.added_rows is not None else None,
+        ]
+        filtered_fields = [field for field in fields if field is not None]
+        return f"Snapshot({', '.join(filtered_fields)})"
+
     def manifests(self, io: FileIO) -> List[ManifestFile]:
         """Return the manifests for the given snapshot."""
         return list(_manifests(io, self.manifest_list))
diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py
@@ -648,6 +648,7 @@ def _(update: RemoveSchemasUpdate, base_metadata: TableMetadata, context: _Table
 
     return base_metadata.model_copy(update={"schemas": schemas})
 
+
 @_apply_table_update.register(SetPartitionStatisticsUpdate)
 def _(update: SetPartitionStatisticsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
     partition_statistics = filter_statistics_by_snapshot_id(
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
@@ -2493,13 +2493,12 @@ def test_stage_only_overwrite_files(
 
 
 @pytest.mark.integration
-def test_v3_write_and_read(spark: SparkSession, session_catalog: Catalog) -> None:
+def test_v3_write_and_read_row_lineage(spark: SparkSession, session_catalog: Catalog) -> None:
     """Test writing to a v3 table and reading with Spark."""
     identifier = "default.test_v3_write_and_read"
     tbl = _create_table(session_catalog, identifier, {"format-version": "3"})
     assert tbl.format_version == 3, f"Expected v3, got: v{tbl.format_version}"
-    assert tbl.metadata.next_row_id is not None, "Expected next_row_id to be initialized"
-    initial_next_row_id = tbl.metadata.next_row_id
+    initial_next_row_id = tbl.metadata.next_row_id or 0
 
     test_data = pa.Table.from_pydict(
         {
@@ -2525,9 +2524,6 @@ def test_v3_write_and_read(spark: SparkSession, session_catalog: Catalog) -> Non
 
     tbl.append(test_data)
 
-    assert (
-        tbl.metadata.next_row_id == initial_next_row_id + len(test_data)
-    ), "Expected next_row_id to be incremented by the number of added rows"
-
-    df = spark.table(identifier)
-    assert df.count() == 3, "Expected 3 rows"
+    assert tbl.metadata.next_row_id == initial_next_row_id + len(test_data), (
+        "Expected next_row_id to be incremented by the number of added rows"
+    )
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
@@ -1522,6 +1522,7 @@ def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_wi
             (RemovePartitionStatisticsUpdate(snapshot_id=123456789),),
         )
 
+
 def test_add_snapshot_update_fails_without_first_row_id(table_v3: Table) -> None:
     new_snapshot = Snapshot(
         snapshot_id=25,

Original file line number	Diff line number	Diff line change
`@@ -1522,6 +1522,7 @@ def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_wi`
`1522`	`1522`	`(RemovePartitionStatisticsUpdate(snapshot_id=123456789),),`
`1523`	`1523`	`)`
`1524`	`1524`
	`1525`	`+`
`1525`	`1526`	`def test_add_snapshot_update_fails_without_first_row_id(table_v3: Table) -> None:`
`1526`	`1527`	`new_snapshot = Snapshot(`
`1527`	`1528`	`snapshot_id=25,`