diff --git a/src/iceberg/expression/evaluator.cc b/src/iceberg/expression/evaluator.cc index c4272cffc..02438e3f5 100644 --- a/src/iceberg/expression/evaluator.cc +++ b/src/iceberg/expression/evaluator.cc @@ -44,72 +44,71 @@ class EvalVisitor : public BoundVisitor { return left_result || right_result; } - Result IsNull(const std::shared_ptr& term) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result IsNull(const std::shared_ptr& expr) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value.IsNull(); } - Result NotNull(const std::shared_ptr& term) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, IsNull(term)); + Result NotNull(const std::shared_ptr& expr) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, IsNull(expr)); return !value; } - Result IsNaN(const std::shared_ptr& term) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result IsNaN(const std::shared_ptr& expr) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value.IsNaN(); } - Result NotNaN(const std::shared_ptr& term) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, IsNaN(term)); + Result NotNaN(const std::shared_ptr& expr) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, IsNaN(expr)); return !value; } - Result Lt(const std::shared_ptr& term, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result Lt(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value < lit; } - Result LtEq(const std::shared_ptr& term, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result LtEq(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value <= lit; } - Result Gt(const std::shared_ptr& term, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result Gt(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value > lit; } - Result GtEq(const std::shared_ptr& term, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result GtEq(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value >= lit; } - Result Eq(const std::shared_ptr& term, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + Result Eq(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return value == lit; } - Result NotEq(const std::shared_ptr& term, - const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto eq_result, Eq(term, lit)); + Result NotEq(const std::shared_ptr& expr, const Literal& lit) override { + ICEBERG_ASSIGN_OR_RAISE(auto eq_result, Eq(expr, lit)); return !eq_result; } - Result In(const std::shared_ptr& term, + Result In(const std::shared_ptr& expr, const BoundSetPredicate::LiteralSet& literal_set) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); return literal_set.contains(value); } - Result NotIn(const std::shared_ptr& term, + Result NotIn(const std::shared_ptr& expr, const BoundSetPredicate::LiteralSet& literal_set) override { - ICEBERG_ASSIGN_OR_RAISE(auto in_result, In(term, literal_set)); + ICEBERG_ASSIGN_OR_RAISE(auto in_result, In(expr, literal_set)); return !in_result; } - Result StartsWith(const std::shared_ptr& term, + Result StartsWith(const std::shared_ptr& expr, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(row_)); + ICEBERG_ASSIGN_OR_RAISE(auto value, expr->Evaluate(row_)); // Both value and literal should be strings if (!std::holds_alternative(value.value()) || @@ -122,9 +121,9 @@ class EvalVisitor : public BoundVisitor { return str_value.starts_with(str_prefix); } - Result NotStartsWith(const std::shared_ptr& term, + Result NotStartsWith(const std::shared_ptr& expr, const Literal& lit) override { - ICEBERG_ASSIGN_OR_RAISE(auto starts_result, StartsWith(term, lit)); + ICEBERG_ASSIGN_OR_RAISE(auto starts_result, StartsWith(expr, lit)); return !starts_result; } @@ -144,7 +143,7 @@ Result> Evaluator::Make(const Schema& schema, return std::unique_ptr(new Evaluator(std::move(bound_expr))); } -Result Evaluator::Eval(const StructLike& row) const { +Result Evaluator::Evaluate(const StructLike& row) const { EvalVisitor visitor(row); return Visit(bound_expr_, visitor); } diff --git a/src/iceberg/expression/evaluator.h b/src/iceberg/expression/evaluator.h index 0d9b6c820..aca862e43 100644 --- a/src/iceberg/expression/evaluator.h +++ b/src/iceberg/expression/evaluator.h @@ -54,7 +54,7 @@ class ICEBERG_EXPORT Evaluator { /// /// \param row The data row to evaluate /// \return true if the row matches the expression, false otherwise, or error - Result Eval(const StructLike& row) const; + Result Evaluate(const StructLike& row) const; private: explicit Evaluator(std::shared_ptr bound_expr); diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index 58b3fcc1a..35ffbfdfe 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -27,6 +27,7 @@ #include "iceberg/iceberg_export.h" #include "iceberg/result.h" +#include "iceberg/type_fwd.h" #include "iceberg/util/formattable.h" #include "iceberg/util/macros.h" @@ -328,4 +329,43 @@ ICEBERG_EXPORT std::string_view ToString(Expression::Operation op); /// \brief Returns the negated operation. ICEBERG_EXPORT Result Negate(Expression::Operation op); +/// \brief Interface for unbound expressions that need schema binding. +/// +/// Unbound expressions contain string-based references that must be resolved +/// against a concrete schema to produce bound expressions that can be evaluated. +/// +/// \tparam B The bound type this term produces when binding is successful +template +class ICEBERG_EXPORT Unbound { + public: + /// \brief Bind this expression to a concrete schema. + /// + /// \param schema The schema to bind against + /// \param case_sensitive Whether field name matching should be case sensitive + /// \return A bound expression or an error if binding fails + virtual Result> Bind(const Schema& schema, + bool case_sensitive) const = 0; + + /// \brief Overloaded Bind method that uses case-sensitive matching by default. + Result> Bind(const Schema& schema) const; + + /// \brief Returns the underlying named reference for this unbound term. + virtual std::shared_ptr reference() = 0; +}; + +/// \brief Interface for bound expressions that can be evaluated. +/// +/// Bound expressions have been resolved against a concrete schema and contain +/// all necessary information to evaluate against data structures. +class ICEBERG_EXPORT Bound { + public: + virtual ~Bound(); + + /// \brief Evaluate this expression against a row-based data. + virtual Result Evaluate(const StructLike& data) const = 0; + + /// \brief Returns the underlying bound reference for this term. + virtual std::shared_ptr reference() = 0; +}; + } // namespace iceberg diff --git a/src/iceberg/expression/expression_visitor.h b/src/iceberg/expression/expression_visitor.h index ed1e75d8e..d66382453 100644 --- a/src/iceberg/expression/expression_visitor.h +++ b/src/iceberg/expression/expression_visitor.h @@ -107,86 +107,86 @@ class ICEBERG_EXPORT BoundVisitor : public ExpressionVisitor { public: ~BoundVisitor() override = default; - /// \brief Visit an IS_NULL unary predicate. - /// \param term The bound term being tested - virtual Result IsNull(const std::shared_ptr& term) = 0; + /// \brief Visit an IS_NULL bound expression. + /// \param expr The bound expression being tested + virtual Result IsNull(const std::shared_ptr& expr) = 0; - /// \brief Visit a NOT_NULL unary predicate. - /// \param term The bound term being tested - virtual Result NotNull(const std::shared_ptr& term) = 0; + /// \brief Visit a NOT_NULL bound expression. + /// \param expr The bound expression being tested + virtual Result NotNull(const std::shared_ptr& expr) = 0; - /// \brief Visit an IS_NAN unary predicate. - /// \param term The bound term being tested - virtual Result IsNaN(const std::shared_ptr& term) { + /// \brief Visit an IS_NAN bound expression. + /// \param expr The bound expression being tested + virtual Result IsNaN(const std::shared_ptr& expr) { return NotSupported("IsNaN operation is not supported by this visitor"); } - /// \brief Visit a NOT_NAN unary predicate. - /// \param term The bound term being tested - virtual Result NotNaN(const std::shared_ptr& term) { + /// \brief Visit a NOT_NAN bound expression. + /// \param expr The bound expression being tested + virtual Result NotNaN(const std::shared_ptr& expr) { return NotSupported("NotNaN operation is not supported by this visitor"); } - /// \brief Visit a less-than predicate. - /// \param term The bound term + /// \brief Visit a less-than bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result Lt(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result Lt(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit a less-than-or-equal predicate. - /// \param term The bound term + /// \brief Visit a less-than-or-equal bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result LtEq(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result LtEq(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit a greater-than predicate. - /// \param term The bound term + /// \brief Visit a greater-than bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result Gt(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result Gt(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit a greater-than-or-equal predicate. - /// \param term The bound term + /// \brief Visit a greater-than-or-equal bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result GtEq(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result GtEq(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit an equality predicate. - /// \param term The bound term + /// \brief Visit an equality bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result Eq(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result Eq(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit a not-equal predicate. - /// \param term The bound term + /// \brief Visit a not-equal bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to compare against - virtual Result NotEq(const std::shared_ptr& term, const Literal& lit) = 0; + virtual Result NotEq(const std::shared_ptr& expr, const Literal& lit) = 0; - /// \brief Visit a starts-with predicate. - /// \param term The bound term + /// \brief Visit a starts-with bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to check for prefix match - virtual Result StartsWith([[maybe_unused]] const std::shared_ptr& term, + virtual Result StartsWith([[maybe_unused]] const std::shared_ptr& expr, [[maybe_unused]] const Literal& lit) { return NotSupported("StartsWith operation is not supported by this visitor"); } - /// \brief Visit a not-starts-with predicate. - /// \param term The bound term + /// \brief Visit a not-starts-with bound expression. + /// \param expr The bound expression being tested /// \param lit The literal value to check for prefix match - virtual Result NotStartsWith([[maybe_unused]] const std::shared_ptr& term, + virtual Result NotStartsWith([[maybe_unused]] const std::shared_ptr& expr, [[maybe_unused]] const Literal& lit) { return NotSupported("NotStartsWith operation is not supported by this visitor"); } - /// \brief Visit an IN set predicate. - /// \param term The bound term + /// \brief Visit an IN set bound expression. + /// \param expr The bound expression being tested /// \param literal_set The set of literal values to test membership virtual Result In( - [[maybe_unused]] const std::shared_ptr& term, + [[maybe_unused]] const std::shared_ptr& expr, [[maybe_unused]] const BoundSetPredicate::LiteralSet& literal_set) { return NotSupported("In operation is not supported by this visitor"); } - /// \brief Visit a NOT_IN set predicate. - /// \param term The bound term + /// \brief Visit a NOT_IN set bound expression. + /// \param expr The bound expression being tested /// \param literal_set The set of literal values to test membership virtual Result NotIn( - [[maybe_unused]] const std::shared_ptr& term, + [[maybe_unused]] const std::shared_ptr& expr, [[maybe_unused]] const BoundSetPredicate::LiteralSet& literal_set) { return NotSupported("NotIn operation is not supported by this visitor"); } diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h index c19b81324..8b9606e56 100644 --- a/src/iceberg/expression/term.h +++ b/src/iceberg/expression/term.h @@ -27,6 +27,7 @@ #include #include +#include "iceberg/expression/expression.h" #include "iceberg/expression/literal.h" #include "iceberg/type_fwd.h" #include "iceberg/util/formattable.h" @@ -45,45 +46,6 @@ class ICEBERG_EXPORT Term : public util::Formattable { template concept TermType = std::derived_from; -/// \brief Interface for unbound expressions that need schema binding. -/// -/// Unbound expressions contain string-based references that must be resolved -/// against a concrete schema to produce bound expressions that can be evaluated. -/// -/// \tparam B The bound type this term produces when binding is successful -template -class ICEBERG_EXPORT Unbound { - public: - /// \brief Bind this expression to a concrete schema. - /// - /// \param schema The schema to bind against - /// \param case_sensitive Whether field name matching should be case sensitive - /// \return A bound expression or an error if binding fails - virtual Result> Bind(const Schema& schema, - bool case_sensitive) const = 0; - - /// \brief Overloaded Bind method that uses case-sensitive matching by default. - Result> Bind(const Schema& schema) const; - - /// \brief Returns the underlying named reference for this unbound term. - virtual std::shared_ptr reference() = 0; -}; - -/// \brief Interface for bound expressions that can be evaluated. -/// -/// Bound expressions have been resolved against a concrete schema and contain -/// all necessary information to evaluate against data structures. -class ICEBERG_EXPORT Bound { - public: - virtual ~Bound(); - - /// \brief Evaluate this expression against a row-based data. - virtual Result Evaluate(const StructLike& data) const = 0; - - /// \brief Returns the underlying bound reference for this term. - virtual std::shared_ptr reference() = 0; -}; - /// \brief Base class for unbound terms. /// /// \tparam B The bound type this term produces when binding is successful. diff --git a/src/iceberg/test/evaluator_test.cc b/src/iceberg/test/evaluator_test.cc index 3483551e4..798f41e1f 100644 --- a/src/iceberg/test/evaluator_test.cc +++ b/src/iceberg/test/evaluator_test.cc @@ -121,7 +121,7 @@ class EvaluatorTest : public ::testing::Test { ICEBERG_UNWRAP_OR_FAIL(auto struct_like, ArrowArrayStructLike::Make(arrow_c_schema_, arrow_c_array, 0)); - ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator.Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator.Evaluate(*struct_like)); ASSERT_EQ(result, expected_result); } @@ -357,32 +357,32 @@ TEST_F(EvaluatorTest, StartsWith) { // abc startsWith abc => true ASSERT_THAT(struct_like->Reset(0), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // xabc startsWith abc => false ASSERT_THAT(struct_like->Reset(1), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // Abc startsWith abc => false ASSERT_THAT(struct_like->Reset(2), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // a startsWith abc => false ASSERT_THAT(struct_like->Reset(3), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // abcd startsWith abc => true ASSERT_THAT(struct_like->Reset(4), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // null startsWith abc => false ASSERT_THAT(struct_like->Reset(5), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); } @@ -416,32 +416,32 @@ TEST_F(EvaluatorTest, NotStartsWith) { // abc notStartsWith abc => false ASSERT_THAT(struct_like->Reset(0), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // xabc notStartsWith abc => true ASSERT_THAT(struct_like->Reset(1), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // Abc notStartsWith abc => true ASSERT_THAT(struct_like->Reset(2), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // a notStartsWith abc => true ASSERT_THAT(struct_like->Reset(3), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // abcde notStartsWith abc => false ASSERT_THAT(struct_like->Reset(4), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // Abcde notStartsWith abc => true ASSERT_THAT(struct_like->Reset(5), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); } @@ -533,17 +533,17 @@ TEST_F(EvaluatorTest, IsNaN) { // NaN is NaN => true ASSERT_THAT(struct_like->Reset(0), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // 2.0 is not NaN => false ASSERT_THAT(struct_like->Reset(1), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // Infinity is not NaN => false ASSERT_THAT(struct_like->Reset(2), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); } @@ -578,17 +578,17 @@ TEST_F(EvaluatorTest, NotNaN) { // NaN is NaN => false ASSERT_THAT(struct_like->Reset(0), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like)); EXPECT_FALSE(result); // 2.0 is not NaN => true ASSERT_THAT(struct_like->Reset(1), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); // Infinity is not NaN => true ASSERT_THAT(struct_like->Reset(2), IsOk()); - ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Eval(*struct_like)); + ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like)); EXPECT_TRUE(result); }