Skip to content

Commit aad8b49

Browse files
author
xiao.dong
committed
feat: add manifest evaluator
1 parent 428a171 commit aad8b49

File tree

11 files changed

+947
-1
lines changed

11 files changed

+947
-1
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set(ICEBERG_SOURCES
2727
expression/expressions.cc
2828
expression/inclusive_metrics_evaluator.cc
2929
expression/literal.cc
30+
expression/manifest_evaluator.cc
3031
expression/predicate.cc
3132
expression/rewrite_not.cc
3233
expression/strict_metrics_evaluator.cc

src/iceberg/expression/manifest_evaluator.cc

Lines changed: 391 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/expression/manifest_evaluator.h
23+
///
24+
/// Evaluates an Expression on a ManifestFile to test whether the file contains
25+
/// matching partitions.
26+
///
27+
/// For row expressions, evaluation is inclusive: it returns true if a file
28+
/// may match and false if it cannot match.
29+
///
30+
/// Files are passed to #eval(ManifestFile), which returns true if the manifest may
31+
/// contain data files that match the partition expression. Manifest files may be
32+
/// skipped if and only if the return value of eval is false.
33+
///
34+
35+
#include <memory>
36+
37+
#include "iceberg/iceberg_export.h"
38+
#include "iceberg/result.h"
39+
#include "iceberg/type_fwd.h"
40+
41+
namespace iceberg {
42+
43+
/// \brief Evaluates an Expression against manifest.
44+
/// \note: The evaluator is thread-safe.
45+
class ICEBERG_EXPORT ManifestEvaluator {
46+
public:
47+
/// \brief Make a manifest evaluator for RowFilter
48+
///
49+
/// \param expr The expression to evaluate
50+
/// \param spec The partition spec
51+
/// \param schema The schema of the table
52+
/// \param case_sensitive Whether field name matching is case-sensitive
53+
static Result<std::unique_ptr<ManifestEvaluator>> MakeRowFilter(
54+
std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& spec,
55+
const Schema& schema, bool case_sensitive = true);
56+
57+
/// \brief Make a manifest evaluator for PartitionFilter
58+
///
59+
/// \param expr The expression to evaluate
60+
/// \param spec The partition spec
61+
/// \param schema The schema of the table
62+
/// \param case_sensitive Whether field name matching is case-sensitive
63+
static Result<std::unique_ptr<ManifestEvaluator>> MakePartitionFilter(
64+
std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& spec,
65+
const Schema& schema, bool case_sensitive = true);
66+
67+
~ManifestEvaluator();
68+
69+
/// \brief Evaluate the expression against a manifest.
70+
///
71+
/// \param manifest The manifest to evaluate
72+
/// \return true if the row matches the expression, false otherwise, or error
73+
Result<bool> Evaluate(const ManifestFile& manifest) const;
74+
75+
private:
76+
explicit ManifestEvaluator(std::shared_ptr<Expression> expr);
77+
78+
private:
79+
std::shared_ptr<Expression> expr_;
80+
};
81+
82+
} // namespace iceberg

src/iceberg/expression/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ install_headers(
2525
'expressions.h',
2626
'inclusive_metrics_evaluator.h',
2727
'literal.h',
28+
'manifest_evaluator.h',
2829
'predicate.h',
2930
'rewrite_not.h',
3031
'strict_metrics_evaluator.h',

src/iceberg/expression/term.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ class ICEBERG_EXPORT BoundReference
157157

158158
Kind kind() const override { return Kind::kReference; }
159159

160+
const StructLikeAccessor& accessor() const { return *accessor_; }
161+
160162
private:
161163
BoundReference(SchemaField field, std::unique_ptr<StructLikeAccessor> accessor);
162164

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ iceberg_sources = files(
4949
'expression/expressions.cc',
5050
'expression/inclusive_metrics_evaluator.cc',
5151
'expression/literal.cc',
52+
'expression/manifest_evaluator.cc',
5253
'expression/predicate.cc',
5354
'expression/rewrite_not.cc',
5455
'expression/strict_metrics_evaluator.cc',

src/iceberg/row/struct_like.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Result<Scalar> LiteralToScalar(const Literal& literal) {
7070

7171
StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
7272
std::span<const size_t> position_path)
73-
: type_(std::move(type)) {
73+
: type_(std::move(type)), position_path_(position_path.begin(), position_path.end()) {
7474
if (position_path.size() == 1) {
7575
accessor_ = [pos =
7676
position_path[0]](const StructLike& struct_like) -> Result<Scalar> {

src/iceberg/row/struct_like.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,13 @@ class ICEBERG_EXPORT StructLikeAccessor {
121121
/// \brief Get the type of the value that this accessor is bound to.
122122
const Type& type() const { return *type_; }
123123

124+
/// \brief Get the position path of the value that this accessor bounded to.
125+
const std::vector<size_t>& position_path() const { return position_path_; }
126+
124127
private:
125128
std::shared_ptr<Type> type_;
126129
std::function<Result<Scalar>(const StructLike&)> accessor_;
130+
std::vector<size_t> position_path_;
127131
};
128132

129133
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ add_iceberg_test(expression_test
8585
expression_test.cc
8686
expression_visitor_test.cc
8787
literal_test.cc
88+
manifest_evaluator_test.cc
8889
inclusive_metrics_evaluator_test.cc
8990
inclusive_metrics_evaluator_with_transform_test.cc
9091
predicate_test.cc

0 commit comments

Comments
 (0)