feat: subclass LiteralPredicate instead of using internal class

jaimeferj · jaimeferj · commit 2ebc03d2e2ba · 2025-10-03T18:55:15.000+02:00
diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py
@@ -17,11 +17,13 @@
 
 from __future__ import annotations
 
+import typing
 from abc import ABC, abstractmethod
 from functools import cached_property
 from typing import (
     Any,
     Callable,
+    ClassVar,
     Generic,
     Iterable,
     Sequence,
@@ -32,7 +34,7 @@
     Union,
 )
 
-from pydantic import Field
+from pydantic import ConfigDict, Field, field_serializer, field_validator
 
 from pyiceberg.expressions.literals import (
     AboveMax,
@@ -727,45 +729,37 @@ def as_bound(self) -> Type[BoundNotIn[L]]:
         return BoundNotIn[L]
 
 
-class LiteralPredicate(UnboundPredicate[L], ABC):
-    literal: Literal[L]
+class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC):
+    op: str = Field(
+        default="",
+        alias="type",
+        validation_alias="type",
+        serialization_alias="type",
+        repr=False,
+    )
+    term: Term[L]
+    literal: Literal[L] = Field(serialization_alias="value")
+
+    __op__: ClassVar[str] = ""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
 
     def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]):  # pylint: disable=W0621
-        super().__init__(term)
-        self.literal = _to_literal(literal)  # pylint: disable=W0621
-
-    # ---- JSON (Pydantic) serialization helpers ----
-
-    class _LiteralPredicateModel(IcebergBaseModel):
-        type: str = Field(alias="type")
-        term: str
-        value: Any
-
-    def _json_op(self) -> str:
-        mapping = {
-            EqualTo: "eq",
-            NotEqualTo: "not-eq",
-            LessThan: "lt",
-            LessThanOrEqual: "lt-eq",
-            GreaterThan: "gt",
-            GreaterThanOrEqual: "gt-eq",
-            StartsWith: "starts-with",
-            NotStartsWith: "not-starts-with",
-        }
-        for cls, op in mapping.items():
-            if isinstance(self, cls):
-                return op
-        raise ValueError(f"Unknown LiteralPredicate: {type(self).__name__}")
-
-    def model_dump(self, **kwargs: Any) -> dict:
-        term_name = getattr(self.term, "name", str(self.term))
-        return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump(**kwargs)
-
-    def model_dump_json(self, **kwargs: Any) -> str:
-        term_name = getattr(self.term, "name", str(self.term))
-        return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump_json(
-            **kwargs
-        )
+        super().__init__(term=_to_unbound_term(term), literal=_to_literal(literal))
+
+    def model_post_init(self, __context: Any) -> None:
+        if not self.op:
+            object.__setattr__(self, "op", self.__op__)
+        elif self.op != self.__op__:
+            raise ValueError(f"Invalid type {self.op!r}; expected {self.__op__!r}")
+
+    @field_serializer("term")
+    def ser_term(self, v: Term[L]) -> str:
+        return v.name
+
+    @field_serializer("literal")
+    def ser_literal(self, literal: Literal[L]) -> str:
+        return "Any"
 
     def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundLiteralPredicate[L]:
         bound_term = self.term.bind(schema, case_sensitive)
@@ -790,6 +784,10 @@ def __eq__(self, other: Any) -> bool:
             return self.term == other.term and self.literal == other.literal
         return False
 
+    def __str__(self) -> str:
+        """Return the string representation of the LiteralPredicate class."""
+        return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})"
+
     def __repr__(self) -> str:
         """Return the string representation of the LiteralPredicate class."""
         return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})"
@@ -903,6 +901,8 @@ def as_unbound(self) -> Type[NotStartsWith[L]]:
 
 
 class EqualTo(LiteralPredicate[L]):
+    __op__ = "eq"
+
     def __invert__(self) -> NotEqualTo[L]:
         """Transform the Expression into its negated version."""
         return NotEqualTo[L](self.term, self.literal)
@@ -913,6 +913,8 @@ def as_bound(self) -> Type[BoundEqualTo[L]]:
 
 
 class NotEqualTo(LiteralPredicate[L]):
+    __op__ = "not-eq"
+
     def __invert__(self) -> EqualTo[L]:
         """Transform the Expression into its negated version."""
         return EqualTo[L](self.term, self.literal)
@@ -923,6 +925,8 @@ def as_bound(self) -> Type[BoundNotEqualTo[L]]:
 
 
 class LessThan(LiteralPredicate[L]):
+    __op__ = "lt"
+
     def __invert__(self) -> GreaterThanOrEqual[L]:
         """Transform the Expression into its negated version."""
         return GreaterThanOrEqual[L](self.term, self.literal)
@@ -933,6 +937,8 @@ def as_bound(self) -> Type[BoundLessThan[L]]:
 
 
 class GreaterThanOrEqual(LiteralPredicate[L]):
+    __op__ = "gt-eq"
+
     def __invert__(self) -> LessThan[L]:
         """Transform the Expression into its negated version."""
         return LessThan[L](self.term, self.literal)
@@ -943,6 +949,8 @@ def as_bound(self) -> Type[BoundGreaterThanOrEqual[L]]:
 
 
 class GreaterThan(LiteralPredicate[L]):
+    __op__ = "gt"
+
     def __invert__(self) -> LessThanOrEqual[L]:
         """Transform the Expression into its negated version."""
         return LessThanOrEqual[L](self.term, self.literal)
@@ -953,6 +961,8 @@ def as_bound(self) -> Type[BoundGreaterThan[L]]:
 
 
 class LessThanOrEqual(LiteralPredicate[L]):
+    __op__ = "lt-eq"
+
     def __invert__(self) -> GreaterThan[L]:
         """Transform the Expression into its negated version."""
         return GreaterThan[L](self.term, self.literal)
@@ -963,6 +973,8 @@ def as_bound(self) -> Type[BoundLessThanOrEqual[L]]:
 
 
 class StartsWith(LiteralPredicate[L]):
+    __op__ = "starts-with"
+
     def __invert__(self) -> NotStartsWith[L]:
         """Transform the Expression into its negated version."""
         return NotStartsWith[L](self.term, self.literal)
@@ -973,6 +985,8 @@ def as_bound(self) -> Type[BoundStartsWith[L]]:
 
 
 class NotStartsWith(LiteralPredicate[L]):
+    __op__ = "not-starts-with"
+
     def __invert__(self) -> StartsWith[L]:
         """Transform the Expression into its negated version."""
         return StartsWith[L](self.term, self.literal)
diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py
@@ -923,7 +923,7 @@ def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None:
 
 def test_equal_to() -> None:
     equal_to = EqualTo(Reference("a"), literal("a"))
-    assert equal_to.model_dump_json() == '{"type":"eq","term":"a","value":"a"}'
+    assert equal_to.model_dump_json() == '{"term":"a","type":"eq","value":"Any"}'
     assert str(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))"
     assert repr(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))"
     assert equal_to == eval(repr(equal_to))
@@ -932,7 +932,7 @@ def test_equal_to() -> None:
 
 def test_not_equal_to() -> None:
     not_equal_to = NotEqualTo(Reference("a"), literal("a"))
-    assert not_equal_to.model_dump_json() == '{"type":"not-eq","term":"a","value":"a"}'
+    assert not_equal_to.model_dump_json() == '{"term":"a","type":"not-eq","value":"Any"}'
     assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))"
     assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))"
     assert not_equal_to == eval(repr(not_equal_to))
@@ -941,7 +941,7 @@ def test_not_equal_to() -> None:
 
 def test_greater_than_or_equal_to() -> None:
     greater_than_or_equal_to = GreaterThanOrEqual(Reference("a"), literal("a"))
-    assert greater_than_or_equal_to.model_dump_json() == '{"type":"gt-eq","term":"a","value":"a"}'
+    assert greater_than_or_equal_to.model_dump_json() == '{"term":"a","type":"gt-eq","value":"Any"}'
     assert str(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to))
@@ -950,7 +950,7 @@ def test_greater_than_or_equal_to() -> None:
 
 def test_greater_than() -> None:
     greater_than = GreaterThan(Reference("a"), literal("a"))
-    assert greater_than.model_dump_json() == '{"type":"gt","term":"a","value":"a"}'
+    assert greater_than.model_dump_json() == '{"term":"a","type":"gt","value":"Any"}'
     assert str(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))"
     assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))"
     assert greater_than == eval(repr(greater_than))
@@ -959,32 +959,32 @@ def test_greater_than() -> None:
 
 def test_less_than() -> None:
     less_than = LessThan(Reference("a"), literal("a"))
-    assert less_than.model_dump_json() == '{"type":"lt","term":"a","value":"a"}'
+    assert less_than.model_dump_json() == '{"term":"a","type":"lt","value":"Any"}'
     assert str(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))"
     assert repr(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))"
     assert less_than == eval(repr(less_than))
     assert less_than == pickle.loads(pickle.dumps(less_than))
 
 
-def test_starts_with() -> None:
-    starts_with = StartsWith(Reference("a"), literal("a"))
-    assert starts_with.model_dump_json() == '{"type":"starts-with","term":"a","value":"a"}'
-
-
-def test_not_starts_with() -> None:
-    not_starts_with = NotStartsWith(Reference("a"), literal("a"))
-    assert not_starts_with.model_dump_json() == '{"type":"not-starts-with","term":"a","value":"a"}'
-
-
 def test_less_than_or_equal() -> None:
     less_than_or_equal = LessThanOrEqual(Reference("a"), literal("a"))
-    assert less_than_or_equal.model_dump_json() == '{"type":"lt-eq","term":"a","value":"a"}'
+    assert less_than_or_equal.model_dump_json() == '{"term":"a","type":"lt-eq","value":"Any"}'
     assert str(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert less_than_or_equal == eval(repr(less_than_or_equal))
     assert less_than_or_equal == pickle.loads(pickle.dumps(less_than_or_equal))
 
 
+def test_starts_with() -> None:
+    starts_with = StartsWith(Reference("a"), literal("a"))
+    assert starts_with.model_dump_json() == '{"term":"a","type":"starts-with","value":"Any"}'
+
+
+def test_not_starts_with() -> None:
+    not_starts_with = NotStartsWith(Reference("a"), literal("a"))
+    assert not_starts_with.model_dump_json() == '{"term":"a","type":"not-starts-with","value":"Any"}'
+
+
 def test_bound_reference_eval(table_schema_simple: Schema) -> None:
     """Test creating a BoundReference and evaluating it on a StructProtocol"""
     struct = Record("foovalue", 123, True)