Skip to content

Commit 56a19dd

Browse files
authored
Merge branch 'main' into drop-python-39
2 parents 074a466 + cc14158 commit 56a19dd

File tree

13 files changed

+807
-768
lines changed

13 files changed

+807
-768
lines changed

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
if: startsWith(matrix.os, 'ubuntu')
6262

6363
- name: Build wheels
64-
uses: pypa/cibuildwheel@v3.2.0
64+
uses: pypa/cibuildwheel@v3.2.1
6565
with:
6666
output-dir: wheelhouse
6767
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
if: startsWith(matrix.os, 'ubuntu')
5757

5858
- name: Build wheels
59-
uses: pypa/cibuildwheel@v3.2.0
59+
uses: pypa/cibuildwheel@v3.2.1
6060
with:
6161
output-dir: wheelhouse
6262
config-file: "pyproject.toml"

mkdocs/docs/configuration.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,18 @@ catalog:
539539
py-io-impl: pyiceberg.io.fsspec.FsspecFileIO
540540
```
541541

542+
##### Apache Gravitino
543+
544+
```yaml
545+
catalog:
546+
gravitino_catalog:
547+
type: rest
548+
uri: <gravitino-catalog-uri>
549+
header.X-Iceberg-Access-Delegation: vended-credentials
550+
auth:
551+
type: noop
552+
```
553+
542554
### SQL Catalog
543555

544556
The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. The init_catalog_tables is optional and defaults to True. If it is set to False, the catalog tables will not be created when the SQLCatalog is initialized. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls):

poetry.lock

Lines changed: 491 additions & 592 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/expressions/__init__.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,18 @@ def as_bound(self) -> Type[BoundNotIn[L]]:
743743
return BoundNotIn[L]
744744

745745

746-
class LiteralPredicate(UnboundPredicate[L], ABC):
747-
literal: Literal[L]
746+
class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC):
747+
type: TypingLiteral["lt", "lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type")
748+
term: UnboundTerm[Any]
749+
value: Literal[L] = Field()
750+
model_config = ConfigDict(populate_by_name=True, frozen=True, arbitrary_types_allowed=True)
748751

749-
def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]): # pylint: disable=W0621
750-
super().__init__(term)
751-
self.literal = _to_literal(literal) # pylint: disable=W0621
752+
def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]):
753+
super().__init__(term=_to_unbound_term(term), value=_to_literal(literal)) # type: ignore[call-arg]
754+
755+
@property
756+
def literal(self) -> Literal[L]:
757+
return self.value
752758

753759
def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundLiteralPredicate[L]:
754760
bound_term = self.term.bind(schema, case_sensitive)
@@ -773,6 +779,10 @@ def __eq__(self, other: Any) -> bool:
773779
return self.term == other.term and self.literal == other.literal
774780
return False
775781

782+
def __str__(self) -> str:
783+
"""Return the string representation of the LiteralPredicate class."""
784+
return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})"
785+
776786
def __repr__(self) -> str:
777787
"""Return the string representation of the LiteralPredicate class."""
778788
return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})"
@@ -886,6 +896,8 @@ def as_unbound(self) -> Type[NotStartsWith[L]]:
886896

887897

888898
class EqualTo(LiteralPredicate[L]):
899+
type: TypingLiteral["eq"] = Field(default="eq", alias="type")
900+
889901
def __invert__(self) -> NotEqualTo[L]:
890902
"""Transform the Expression into its negated version."""
891903
return NotEqualTo[L](self.term, self.literal)
@@ -896,6 +908,8 @@ def as_bound(self) -> Type[BoundEqualTo[L]]:
896908

897909

898910
class NotEqualTo(LiteralPredicate[L]):
911+
type: TypingLiteral["not-eq"] = Field(default="not-eq", alias="type")
912+
899913
def __invert__(self) -> EqualTo[L]:
900914
"""Transform the Expression into its negated version."""
901915
return EqualTo[L](self.term, self.literal)
@@ -906,6 +920,8 @@ def as_bound(self) -> Type[BoundNotEqualTo[L]]:
906920

907921

908922
class LessThan(LiteralPredicate[L]):
923+
type: TypingLiteral["lt"] = Field(default="lt", alias="type")
924+
909925
def __invert__(self) -> GreaterThanOrEqual[L]:
910926
"""Transform the Expression into its negated version."""
911927
return GreaterThanOrEqual[L](self.term, self.literal)
@@ -916,6 +932,8 @@ def as_bound(self) -> Type[BoundLessThan[L]]:
916932

917933

918934
class GreaterThanOrEqual(LiteralPredicate[L]):
935+
type: TypingLiteral["gt-eq"] = Field(default="gt-eq", alias="type")
936+
919937
def __invert__(self) -> LessThan[L]:
920938
"""Transform the Expression into its negated version."""
921939
return LessThan[L](self.term, self.literal)
@@ -926,6 +944,8 @@ def as_bound(self) -> Type[BoundGreaterThanOrEqual[L]]:
926944

927945

928946
class GreaterThan(LiteralPredicate[L]):
947+
type: TypingLiteral["gt"] = Field(default="gt", alias="type")
948+
929949
def __invert__(self) -> LessThanOrEqual[L]:
930950
"""Transform the Expression into its negated version."""
931951
return LessThanOrEqual[L](self.term, self.literal)
@@ -936,6 +956,8 @@ def as_bound(self) -> Type[BoundGreaterThan[L]]:
936956

937957

938958
class LessThanOrEqual(LiteralPredicate[L]):
959+
type: TypingLiteral["lt-eq"] = Field(default="lt-eq", alias="type")
960+
939961
def __invert__(self) -> GreaterThan[L]:
940962
"""Transform the Expression into its negated version."""
941963
return GreaterThan[L](self.term, self.literal)
@@ -946,6 +968,8 @@ def as_bound(self) -> Type[BoundLessThanOrEqual[L]]:
946968

947969

948970
class StartsWith(LiteralPredicate[L]):
971+
type: TypingLiteral["starts-with"] = Field(default="starts-with", alias="type")
972+
949973
def __invert__(self) -> NotStartsWith[L]:
950974
"""Transform the Expression into its negated version."""
951975
return NotStartsWith[L](self.term, self.literal)
@@ -956,6 +980,8 @@ def as_bound(self) -> Type[BoundStartsWith[L]]:
956980

957981

958982
class NotStartsWith(LiteralPredicate[L]):
983+
type: TypingLiteral["not-starts-with"] = Field(default="not-starts-with", alias="type")
984+
959985
def __invert__(self) -> StartsWith[L]:
960986
"""Transform the Expression into its negated version."""
961987
return StartsWith[L](self.term, self.literal)

pyiceberg/table/__init__.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
Callable,
3232
Dict,
3333
Iterable,
34+
Iterator,
3435
List,
3536
Optional,
3637
Set,
@@ -1942,11 +1943,11 @@ def _check_sequence_number(min_sequence_number: int, manifest: ManifestFile) ->
19421943
and (manifest.sequence_number or INITIAL_SEQUENCE_NUMBER) >= min_sequence_number
19431944
)
19441945

1945-
def plan_files(self) -> Iterable[FileScanTask]:
1946-
"""Plans the relevant files by filtering on the PartitionSpecs.
1946+
def scan_plan_helper(self) -> Iterator[List[ManifestEntry]]:
1947+
"""Filter and return manifest entries based on partition and metrics evaluators.
19471948
19481949
Returns:
1949-
List of FileScanTasks that contain both data and delete files.
1950+
Iterator of ManifestEntry objects that match the scan's partition filter.
19501951
"""
19511952
snapshot = self.snapshot()
19521953
if not snapshot:
@@ -1957,8 +1958,6 @@ def plan_files(self) -> Iterable[FileScanTask]:
19571958

19581959
manifest_evaluators: Dict[int, Callable[[ManifestFile], bool]] = KeyDefaultDict(self._build_manifest_evaluator)
19591960

1960-
residual_evaluators: Dict[int, Callable[[DataFile], ResidualEvaluator]] = KeyDefaultDict(self._build_residual_evaluator)
1961-
19621961
manifests = [
19631962
manifest_file
19641963
for manifest_file in snapshot.manifests(self.io)
@@ -1972,25 +1971,34 @@ def plan_files(self) -> Iterable[FileScanTask]:
19721971

19731972
min_sequence_number = _min_sequence_number(manifests)
19741973

1974+
executor = ExecutorFactory.get_or_create()
1975+
1976+
return executor.map(
1977+
lambda args: _open_manifest(*args),
1978+
[
1979+
(
1980+
self.io,
1981+
manifest,
1982+
partition_evaluators[manifest.partition_spec_id],
1983+
self._build_metrics_evaluator(),
1984+
)
1985+
for manifest in manifests
1986+
if self._check_sequence_number(min_sequence_number, manifest)
1987+
],
1988+
)
1989+
1990+
def plan_files(self) -> Iterable[FileScanTask]:
1991+
"""Plans the relevant files by filtering on the PartitionSpecs.
1992+
1993+
Returns:
1994+
List of FileScanTasks that contain both data and delete files.
1995+
"""
19751996
data_entries: List[ManifestEntry] = []
19761997
positional_delete_entries = SortedList(key=lambda entry: entry.sequence_number or INITIAL_SEQUENCE_NUMBER)
19771998

1978-
executor = ExecutorFactory.get_or_create()
1979-
for manifest_entry in chain(
1980-
*executor.map(
1981-
lambda args: _open_manifest(*args),
1982-
[
1983-
(
1984-
self.io,
1985-
manifest,
1986-
partition_evaluators[manifest.partition_spec_id],
1987-
self._build_metrics_evaluator(),
1988-
)
1989-
for manifest in manifests
1990-
if self._check_sequence_number(min_sequence_number, manifest)
1991-
],
1992-
)
1993-
):
1999+
residual_evaluators: Dict[int, Callable[[DataFile], ResidualEvaluator]] = KeyDefaultDict(self._build_residual_evaluator)
2000+
2001+
for manifest_entry in chain.from_iterable(self.scan_plan_helper()):
19942002
data_file = manifest_entry.data_file
19952003
if data_file.content == DataFileContent.DATA:
19962004
data_entries.append(manifest_entry)

0 commit comments

Comments
 (0)