Skip to content

Commit e3aaa5e

Browse files
committed
small refactor, and comments
1 parent 6e3ab87 commit e3aaa5e

File tree

2 files changed

+16
-62
lines changed

2 files changed

+16
-62
lines changed

pyiceberg/table/__init__.py

Lines changed: 13 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,12 @@
2626
from functools import cached_property
2727
from itertools import chain
2828
from types import TracebackType
29-
from typing import (
30-
TYPE_CHECKING,
31-
Any,
32-
TypeVar,
33-
)
29+
from typing import TYPE_CHECKING, Any, TypeVar
3430

3531
from pydantic import Field
3632

3733
import pyiceberg.expressions.parser as parser
38-
from pyiceberg.expressions import (
39-
AlwaysFalse,
40-
AlwaysTrue,
41-
And,
42-
BooleanExpression,
43-
EqualTo,
44-
IsNull,
45-
Or,
46-
Reference,
47-
)
34+
from pyiceberg.expressions import AlwaysFalse, AlwaysTrue, And, BooleanExpression, EqualTo, IsNull, Or, Reference
4835
from pyiceberg.expressions.visitors import (
4936
ResidualEvaluator,
5037
_InclusiveMetricsEvaluator,
@@ -54,36 +41,17 @@
5441
manifest_evaluator,
5542
)
5643
from pyiceberg.io import FileIO, load_file_io
57-
from pyiceberg.manifest import (
58-
DataFile,
59-
DataFileContent,
60-
ManifestContent,
61-
ManifestEntry,
62-
ManifestFile,
63-
)
64-
from pyiceberg.partitioning import (
65-
PARTITION_FIELD_ID_START,
66-
UNPARTITIONED_PARTITION_SPEC,
67-
PartitionKey,
68-
PartitionSpec,
69-
)
44+
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestEntry, ManifestFile
45+
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, UNPARTITIONED_PARTITION_SPEC, PartitionKey, PartitionSpec
7046
from pyiceberg.schema import Schema
7147
from pyiceberg.table.delete_file_index import DeleteFileIndex
7248
from pyiceberg.table.inspect import InspectTable
7349
from pyiceberg.table.locations import LocationProvider, load_location_provider
7450
from pyiceberg.table.maintenance import MaintenanceTable
75-
from pyiceberg.table.metadata import (
76-
INITIAL_SEQUENCE_NUMBER,
77-
TableMetadata,
78-
)
79-
from pyiceberg.table.name_mapping import (
80-
NameMapping,
81-
)
51+
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadata
52+
from pyiceberg.table.name_mapping import NameMapping
8253
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef
83-
from pyiceberg.table.snapshots import (
84-
Snapshot,
85-
SnapshotLogEntry,
86-
)
54+
from pyiceberg.table.snapshots import Snapshot, SnapshotLogEntry
8755
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
8856
from pyiceberg.table.update import (
8957
AddPartitionSpecUpdate,
@@ -107,11 +75,7 @@
10775
update_table_metadata,
10876
)
10977
from pyiceberg.table.update.schema import UpdateSchema
110-
from pyiceberg.table.update.snapshot import (
111-
ManageSnapshots,
112-
UpdateSnapshot,
113-
_FastAppendFiles,
114-
)
78+
from pyiceberg.table.update.snapshot import ManageSnapshots, UpdateSnapshot, _FastAppendFiles
11579
from pyiceberg.table.update.sorting import UpdateSortOrder
11680
from pyiceberg.table.update.spec import UpdateSpec
11781
from pyiceberg.table.update.statistics import UpdateStatistics
@@ -126,9 +90,7 @@
12690
Record,
12791
TableVersion,
12892
)
129-
from pyiceberg.types import (
130-
strtobool,
131-
)
93+
from pyiceberg.types import strtobool
13294
from pyiceberg.utils.concurrent import ExecutorFactory
13395
from pyiceberg.utils.config import Config
13496
from pyiceberg.utils.properties import property_as_bool
@@ -144,11 +106,7 @@
144106
from pyiceberg_core.datafusion import IcebergDataFusionTable
145107

146108
from pyiceberg.catalog import Catalog
147-
from pyiceberg.catalog.rest.scan_planning import (
148-
RESTContentFile,
149-
RESTDeleteFile,
150-
RESTFileScanTask,
151-
)
109+
from pyiceberg.catalog.rest.scan_planning import RESTContentFile, RESTDeleteFile, RESTFileScanTask
152110

153111
ALWAYS_TRUE = AlwaysTrue()
154112
DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE = "downcast-ns-timestamp-to-us-on-write"
@@ -403,6 +361,8 @@ def _build_partition_predicate(
403361
404362
Args:
405363
partition_records: A set of partition records to match
364+
spec: An optional partition spec, if none then defaults to current
365+
schema: An optional schema, if none then defaults to current
406366
Returns:
407367
A predicate matching any of the input partition records.
408368
"""
@@ -675,11 +635,7 @@ def delete(
675635
case_sensitive: A bool determine if the provided `delete_filter` is case-sensitive
676636
branch: Branch Reference to run the delete operation
677637
"""
678-
from pyiceberg.io.pyarrow import (
679-
ArrowScan,
680-
_dataframe_to_data_files,
681-
_expression_to_complementary_pyarrow,
682-
)
638+
from pyiceberg.io.pyarrow import ArrowScan, _dataframe_to_data_files, _expression_to_complementary_pyarrow
683639

684640
if (
685641
self.table_metadata.properties.get(TableProperties.DELETE_MODE, TableProperties.DELETE_MODE_DEFAULT)

pyiceberg/table/update/snapshot.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def _write_delete_manifest() -> list[ManifestFile]:
211211
for spec_id, entries in partition_groups.items():
212212
with write_manifest(
213213
format_version=self._transaction.table_metadata.format_version,
214-
spec=self._transaction.table_metadata.specs()[spec_id],
214+
spec=self.spec(spec_id),
215215
schema=self.schema(),
216216
output_file=self.new_manifest_output(),
217217
snapshot_id=self._snapshot_id,
@@ -369,17 +369,15 @@ def fetch_manifest_entry(self, manifest: ManifestFile, discard_deleted: bool = T
369369
return manifest.fetch_manifest_entry(io=self._io, discard_deleted=discard_deleted)
370370

371371
def _build_partition_projection(self, spec_id: int) -> BooleanExpression:
372-
spec = self._transaction.table_metadata.specs()[spec_id]
373-
project = inclusive_projection(self.schema(), spec, self._case_sensitive)
372+
project = inclusive_projection(self.schema(), self.spec(spec_id), self._case_sensitive)
374373
return project(self._predicate)
375374

376375
@cached_property
377376
def partition_filters(self) -> KeyDefaultDict[int, BooleanExpression]:
378377
return KeyDefaultDict(self._build_partition_projection)
379378

380379
def _build_manifest_evaluator(self, spec_id: int) -> Callable[[ManifestFile], bool]:
381-
spec = self._transaction.table_metadata.specs()[spec_id]
382-
return manifest_evaluator(spec, self.schema(), self.partition_filters[spec_id], self._case_sensitive)
380+
return manifest_evaluator(self.spec(spec_id), self.schema(), self.partition_filters[spec_id], self._case_sensitive)
383381

384382
def delete_by_predicate(self, predicate: BooleanExpression, case_sensitive: bool = True) -> None:
385383
self._predicate = Or(self._predicate, predicate)

0 commit comments

Comments
 (0)