diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py index e4ab3befa3..a6a0dafb72 100644 --- a/pyiceberg/expressions/visitors.py +++ b/pyiceberg/expressions/visitors.py @@ -15,8 +15,9 @@ # specific language governing permissions and limitations # under the License. import math +import threading from abc import ABC, abstractmethod -from collections.abc import Callable +from collections.abc import Callable, Hashable from functools import singledispatch from typing import ( Any, @@ -25,6 +26,9 @@ TypeVar, ) +from cachetools import LRUCache, cached +from cachetools.keys import hashkey + from pyiceberg.conversions import from_bytes from pyiceberg.expressions import ( AlwaysFalse, @@ -1970,6 +1974,20 @@ def residual_for(self, partition_data: Record) -> BooleanExpression: return self.expr +_DEFAULT_RESIDUAL_EVALUATOR_CACHE_SIZE = 128 + + +def _residual_evaluator_cache_key( + spec: PartitionSpec, expr: BooleanExpression, case_sensitive: bool, schema: Schema +) -> tuple[Hashable, ...]: + return hashkey(spec.spec_id, repr(expr), case_sensitive, schema.schema_id) + + +@cached( + cache=LRUCache(maxsize=_DEFAULT_RESIDUAL_EVALUATOR_CACHE_SIZE), + key=_residual_evaluator_cache_key, + lock=threading.RLock(), +) def residual_evaluator_of( spec: PartitionSpec, expr: BooleanExpression, case_sensitive: bool, schema: Schema ) -> ResidualEvaluator: