Skip to content

Commit f493eb4

Browse files
committed
fix: review comments
1 parent 8929e84 commit f493eb4

File tree

3 files changed

+125
-214
lines changed

3 files changed

+125
-214
lines changed

src/iceberg/expression/projections.cc

Lines changed: 32 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,23 @@
2020
#include "iceberg/expression/projections.h"
2121

2222
#include <memory>
23-
#include <vector>
2423

2524
#include "iceberg/expression/expression.h"
2625
#include "iceberg/expression/expression_visitor.h"
27-
#include "iceberg/expression/expressions.h"
2826
#include "iceberg/expression/predicate.h"
2927
#include "iceberg/expression/rewrite_not.h"
30-
#include "iceberg/expression/term.h"
31-
#include "iceberg/partition_field.h"
3228
#include "iceberg/partition_spec.h"
3329
#include "iceberg/result.h"
3430
#include "iceberg/transform.h"
3531
#include "iceberg/util/macros.h"
3632

3733
namespace iceberg {
3834

39-
// Implementation detail - not exported
4035
class ProjectionVisitor : public ExpressionVisitor<std::shared_ptr<Expression>> {
4136
public:
4237
~ProjectionVisitor() override = default;
4338

44-
ProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
45-
const std::shared_ptr<Schema>& schema, bool case_sensitive)
39+
ProjectionVisitor(const PartitionSpec& spec, const Schema& schema, bool case_sensitive)
4640
: spec_(spec), schema_(schema), case_sensitive_(case_sensitive) {}
4741

4842
Result<std::shared_ptr<Expression>> AlwaysTrue() override { return True::Instance(); }
@@ -57,24 +51,20 @@ class ProjectionVisitor : public ExpressionVisitor<std::shared_ptr<Expression>>
5751
Result<std::shared_ptr<Expression>> And(
5852
const std::shared_ptr<Expression>& left_result,
5953
const std::shared_ptr<Expression>& right_result) override {
60-
return Expressions::And(left_result, right_result);
54+
return And::MakeFolded(left_result, right_result);
6155
}
6256

6357
Result<std::shared_ptr<Expression>> Or(
6458
const std::shared_ptr<Expression>& left_result,
6559
const std::shared_ptr<Expression>& right_result) override {
66-
return Expressions::Or(left_result, right_result);
60+
return Or::MakeFolded(left_result, right_result);
6761
}
6862

6963
Result<std::shared_ptr<Expression>> Predicate(
7064
const std::shared_ptr<UnboundPredicate>& pred) override {
71-
ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(*schema_, case_sensitive_));
65+
ICEBERG_ASSIGN_OR_RAISE(auto bound_pred, pred->Bind(schema_, case_sensitive_));
7266
if (bound_pred->is_bound_predicate()) {
73-
auto bound_predicate = std::dynamic_pointer_cast<BoundPredicate>(bound_pred);
74-
ICEBERG_DCHECK(
75-
bound_predicate != nullptr,
76-
"Expected bound_predicate to be non-null after is_bound_predicate() check");
77-
return Predicate(bound_predicate);
67+
return Predicate(std::dynamic_pointer_cast<BoundPredicate>(bound_pred));
7868
}
7969
return bound_pred;
8070
}
@@ -85,38 +75,9 @@ class ProjectionVisitor : public ExpressionVisitor<std::shared_ptr<Expression>>
8575
}
8676

8777
protected:
88-
const std::shared_ptr<PartitionSpec>& spec_;
89-
const std::shared_ptr<Schema>& schema_;
78+
const PartitionSpec& spec_;
79+
const Schema& schema_;
9080
bool case_sensitive_;
91-
92-
/// \brief Get partition fields that match the predicate's term.
93-
std::vector<const PartitionField*> GetFieldsByPredicate(
94-
const std::shared_ptr<BoundPredicate>& pred) const {
95-
int32_t source_id;
96-
switch (pred->term()->kind()) {
97-
case Term::Kind::kReference: {
98-
const auto& ref = pred->term()->reference();
99-
source_id = ref->field().field_id();
100-
break;
101-
}
102-
case Term::Kind::kTransform: {
103-
const auto& transform =
104-
internal::checked_pointer_cast<BoundTransform>(pred->term());
105-
source_id = transform->reference()->field().field_id();
106-
break;
107-
}
108-
default:
109-
std::unreachable();
110-
}
111-
112-
std::vector<const PartitionField*> result;
113-
for (const auto& field : spec_->fields()) {
114-
if (field.source_id() == source_id) {
115-
result.push_back(&field);
116-
}
117-
}
118-
return result;
119-
}
12081
};
12182

12283
ProjectionEvaluator::ProjectionEvaluator(std::unique_ptr<ProjectionVisitor> visitor)
@@ -131,16 +92,17 @@ class InclusiveProjectionVisitor : public ProjectionVisitor {
13192
public:
13293
~InclusiveProjectionVisitor() override = default;
13394

134-
InclusiveProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
135-
const std::shared_ptr<Schema>& schema, bool case_sensitive)
95+
InclusiveProjectionVisitor(const PartitionSpec& spec, const Schema& schema,
96+
bool case_sensitive)
13697
: ProjectionVisitor(spec, schema, case_sensitive) {}
13798

13899
Result<std::shared_ptr<Expression>> Predicate(
139100
const std::shared_ptr<BoundPredicate>& pred) override {
140101
ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
141102
// Find partition fields that match the predicate's term
142-
auto partition_fields = GetFieldsByPredicate(pred);
143-
if (partition_fields.empty()) {
103+
ICEBERG_ASSIGN_OR_RAISE(
104+
auto parts, spec_.GetFieldsBySourceId(pred->reference()->field().field_id()));
105+
if (parts.empty()) {
144106
// The predicate has no partition column
145107
return AlwaysTrue();
146108
}
@@ -155,19 +117,17 @@ class InclusiveProjectionVisitor : public ProjectionVisitor {
155117
// projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and
156118
// hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02.
157119
std::shared_ptr<Expression> result = True::Instance();
158-
for (const auto* part_field : partition_fields) {
120+
for (const auto& part : parts) {
159121
ICEBERG_ASSIGN_OR_RAISE(auto projected,
160-
part_field->transform()->Project(part_field->name(), pred));
122+
part.get().transform()->Project(part.get().name(), pred));
161123
if (projected != nullptr) {
162-
result =
163-
Expressions::And(result, std::shared_ptr<Expression>(projected.release()));
124+
ICEBERG_ASSIGN_OR_RAISE(result,
125+
And::MakeFolded(std::move(result), std::move(projected)));
164126
}
165127
}
166128

167129
return result;
168130
}
169-
170-
protected:
171131
};
172132

173133
/// \brief Strict projection evaluator.
@@ -177,16 +137,17 @@ class StrictProjectionVisitor : public ProjectionVisitor {
177137
public:
178138
~StrictProjectionVisitor() override = default;
179139

180-
StrictProjectionVisitor(const std::shared_ptr<PartitionSpec>& spec,
181-
const std::shared_ptr<Schema>& schema, bool case_sensitive)
140+
StrictProjectionVisitor(const PartitionSpec& spec, const Schema& schema,
141+
bool case_sensitive)
182142
: ProjectionVisitor(spec, schema, case_sensitive) {}
183143

184144
Result<std::shared_ptr<Expression>> Predicate(
185145
const std::shared_ptr<BoundPredicate>& pred) override {
186146
ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
187147
// Find partition fields that match the predicate's term
188-
auto partition_fields = GetFieldsByPredicate(pred);
189-
if (partition_fields.empty()) {
148+
ICEBERG_ASSIGN_OR_RAISE(
149+
auto parts, spec_.GetFieldsBySourceId(pred->reference()->field().field_id()));
150+
if (parts.empty()) {
190151
// The predicate has no matching partition columns
191152
return AlwaysFalse();
192153
}
@@ -199,12 +160,12 @@ class StrictProjectionVisitor : public ProjectionVisitor {
199160
// predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but
200161
// not the day, but does match the original predicate.
201162
std::shared_ptr<Expression> result = False::Instance();
202-
for (const auto* part_field : partition_fields) {
203-
ICEBERG_ASSIGN_OR_RAISE(auto projected, part_field->transform()->ProjectStrict(
204-
part_field->name(), pred));
163+
for (const auto& part : parts) {
164+
ICEBERG_ASSIGN_OR_RAISE(
165+
auto projected, part.get().transform()->ProjectStrict(part.get().name(), pred));
205166
if (projected != nullptr) {
206-
result =
207-
Expressions::Or(result, std::shared_ptr<Expression>(projected.release()));
167+
ICEBERG_ASSIGN_OR_RAISE(result,
168+
Or::MakeFolded(std::move(result), std::move(projected)));
208169
}
209170
}
210171

@@ -224,18 +185,18 @@ Result<std::shared_ptr<Expression>> ProjectionEvaluator::Project(
224185
return Visit<std::shared_ptr<Expression>, ProjectionVisitor>(rewritten, *visitor_);
225186
}
226187

227-
std::unique_ptr<ProjectionEvaluator> Projections::Inclusive(
228-
const std::shared_ptr<PartitionSpec>& spec, const std::shared_ptr<Schema>& schema,
229-
bool case_sensitive) {
188+
std::unique_ptr<ProjectionEvaluator> Projections::Inclusive(const PartitionSpec& spec,
189+
const Schema& schema,
190+
bool case_sensitive) {
230191
auto visitor =
231192
std::make_unique<InclusiveProjectionVisitor>(spec, schema, case_sensitive);
232193
return std::unique_ptr<ProjectionEvaluator>(
233194
new ProjectionEvaluator(std::move(visitor)));
234195
}
235196

236-
std::unique_ptr<ProjectionEvaluator> Projections::Strict(
237-
const std::shared_ptr<PartitionSpec>& spec, const std::shared_ptr<Schema>& schema,
238-
bool case_sensitive) {
197+
std::unique_ptr<ProjectionEvaluator> Projections::Strict(const PartitionSpec& spec,
198+
const Schema& schema,
199+
bool case_sensitive) {
239200
auto visitor = std::make_unique<StrictProjectionVisitor>(spec, schema, case_sensitive);
240201
return std::unique_ptr<ProjectionEvaluator>(
241202
new ProjectionEvaluator(std::move(visitor)));

src/iceberg/expression/projections.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class ICEBERG_EXPORT ProjectionEvaluator {
5050
/// \param visitor The projection visitor to use
5151
explicit ProjectionEvaluator(std::unique_ptr<class ProjectionVisitor> visitor);
5252

53-
std::unique_ptr<class ProjectionVisitor> visitor_;
53+
std::unique_ptr<ProjectionVisitor> visitor_;
5454
};
5555

5656
/// \brief Utils to project expressions on rows to expressions on partitions.
@@ -62,8 +62,7 @@ class ICEBERG_EXPORT ProjectionEvaluator {
6262
///
6363
/// A strict projection guarantees that if a partition matches a projected expression,
6464
/// then all rows in that partition will match the original expression.
65-
class ICEBERG_EXPORT Projections {
66-
public:
65+
struct ICEBERG_EXPORT Projections {
6766
/// \brief Creates an inclusive ProjectionEvaluator for the partition spec.
6867
///
6968
/// An evaluator is used to project expressions for a table's data rows into expressions
@@ -75,12 +74,13 @@ class ICEBERG_EXPORT Projections {
7574
/// Each predicate in the expression is projected using Transform::Project.
7675
///
7776
/// \param spec a partition spec
77+
/// \param schema a schema
7878
/// \param case_sensitive whether the Projection should consider case sensitivity on
7979
/// column names or not. Defaults to true (case sensitive).
8080
/// \return an inclusive projection evaluator for the partition spec
81-
static std::unique_ptr<ProjectionEvaluator> Inclusive(
82-
const std::shared_ptr<PartitionSpec>& spec, const std::shared_ptr<Schema>& schema,
83-
bool case_sensitive = true);
81+
static std::unique_ptr<ProjectionEvaluator> Inclusive(const PartitionSpec& spec,
82+
const Schema& schema,
83+
bool case_sensitive = true);
8484

8585
/// \brief Creates a strict ProjectionEvaluator for the partition spec.
8686
///
@@ -93,15 +93,13 @@ class ICEBERG_EXPORT Projections {
9393
/// Each predicate in the expression is projected using Transform::ProjectStrict.
9494
///
9595
/// \param spec a partition spec
96+
/// \param schema a schema
9697
/// \param case_sensitive whether the Projection should consider case sensitivity on
9798
/// column names or not. Defaults to true (case sensitive).
9899
/// \return a strict projection evaluator for the partition spec
99-
static std::unique_ptr<ProjectionEvaluator> Strict(
100-
const std::shared_ptr<PartitionSpec>& spec, const std::shared_ptr<Schema>& schema,
101-
bool case_sensitive = true);
102-
103-
private:
104-
Projections() = default;
100+
static std::unique_ptr<ProjectionEvaluator> Strict(const PartitionSpec& spec,
101+
const Schema& schema,
102+
bool case_sensitive = true);
105103
};
106104

107105
} // namespace iceberg

0 commit comments

Comments
 (0)