diff --git a/src/iceberg/expression/manifest_evaluator.cc b/src/iceberg/expression/manifest_evaluator.cc index 845ecb6ca..9c4e708f1 100644 --- a/src/iceberg/expression/manifest_evaluator.cc +++ b/src/iceberg/expression/manifest_evaluator.cc @@ -363,12 +363,10 @@ Result> ManifestEvaluator::MakePartitionFilte std::shared_ptr expr, const std::shared_ptr& spec, const Schema& schema, bool case_sensitive) { ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec->PartitionType(schema)); - auto field_span = partition_type->fields(); - std::vector fields(field_span.begin(), field_span.end()); - auto partition_schema = std::make_shared(fields); ICEBERG_ASSIGN_OR_RAISE(auto rewrite_expr, RewriteNot::Visit(std::move(expr))); - ICEBERG_ASSIGN_OR_RAISE(auto partition_expr, - Binder::Bind(*partition_schema, rewrite_expr, case_sensitive)); + ICEBERG_ASSIGN_OR_RAISE( + auto partition_expr, + Binder::Bind(*partition_type->ToSchema(), rewrite_expr, case_sensitive)); return std::unique_ptr( new ManifestEvaluator(std::move(partition_expr))); } diff --git a/src/iceberg/expression/residual_evaluator.cc b/src/iceberg/expression/residual_evaluator.cc index e818199ed..cf8f98120 100644 --- a/src/iceberg/expression/residual_evaluator.cc +++ b/src/iceberg/expression/residual_evaluator.cc @@ -27,6 +27,7 @@ #include "iceberg/schema.h" #include "iceberg/schema_internal.h" #include "iceberg/transform.h" +#include "iceberg/type.h" #include "iceberg/util/macros.h" namespace iceberg { @@ -42,8 +43,7 @@ class ResidualVisitor : public BoundVisitor> { const StructLike& partition_data, bool case_sensitive) { ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec.PartitionType(schema)); - auto partition_schema = FromStructType(std::move(*partition_type), std::nullopt); - return ResidualVisitor(spec, schema, std::move(partition_schema), partition_data, + return ResidualVisitor(spec, schema, std::move(partition_type), partition_data, case_sensitive); } @@ -202,17 +202,17 @@ class ResidualVisitor : public BoundVisitor> { private: ResidualVisitor(const PartitionSpec& spec, const Schema& schema, - std::unique_ptr partition_schema, + std::unique_ptr partition_type, const StructLike& partition_data, bool case_sensitive) : spec_(spec), schema_(schema), - partition_schema_(std::move(partition_schema)), + partition_type_(std::move(partition_type)), partition_data_(partition_data), case_sensitive_(case_sensitive) {} const PartitionSpec& spec_; const Schema& schema_; - std::unique_ptr partition_schema_; + std::unique_ptr partition_type_; const StructLike& partition_data_; bool case_sensitive_; }; @@ -235,6 +235,7 @@ Result> ResidualVisitor::Predicate( // Not associated with a partition field, can't be evaluated return pred; } + auto schema = partition_type_->ToSchema(); for (const auto& part : parts) { // Check the strict projection @@ -243,9 +244,8 @@ Result> ResidualVisitor::Predicate( std::shared_ptr strict_result = nullptr; if (strict_projection != nullptr) { - ICEBERG_ASSIGN_OR_RAISE( - auto bound_strict, - strict_projection->Bind(*partition_schema_, case_sensitive_)); + ICEBERG_ASSIGN_OR_RAISE(auto bound_strict, + strict_projection->Bind(*schema, case_sensitive_)); if (bound_strict->is_bound_predicate()) { ICEBERG_ASSIGN_OR_RAISE( strict_result, BoundVisitor::Predicate( @@ -268,9 +268,8 @@ Result> ResidualVisitor::Predicate( std::shared_ptr inclusive_result = nullptr; if (inclusive_projection != nullptr) { - ICEBERG_ASSIGN_OR_RAISE( - auto bound_inclusive, - inclusive_projection->Bind(*partition_schema_, case_sensitive_)); + ICEBERG_ASSIGN_OR_RAISE(auto bound_inclusive, + inclusive_projection->Bind(*schema, case_sensitive_)); if (bound_inclusive->is_bound_predicate()) { ICEBERG_ASSIGN_OR_RAISE( diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc index 24e61644f..5b814204d 100644 --- a/src/iceberg/row/struct_like.cc +++ b/src/iceberg/row/struct_like.cc @@ -87,20 +87,20 @@ StructLikeAccessor::StructLikeAccessor(std::shared_ptr type, return std::get>(first_level_field)->GetField(pos1); }; } else if (!position_path.empty()) { - accessor_ = [position_path](const StructLike& struct_like) -> Result { + accessor_ = [this](const StructLike& struct_like) -> Result { std::vector> backups; const StructLike* current_struct_like = &struct_like; - for (size_t i = 0; i < position_path.size() - 1; ++i) { + for (size_t i = 0; i < position_path_.size() - 1; ++i) { ICEBERG_ASSIGN_OR_RAISE(auto field, - current_struct_like->GetField(position_path[i])); + current_struct_like->GetField(position_path_[i])); if (!std::holds_alternative>(field)) { return InvalidSchema("Encountered non-struct in the position path [{}]", - position_path); + position_path_); } backups.push_back(std::get>(field)); current_struct_like = backups.back().get(); } - return current_struct_like->GetField(position_path.back()); + return current_struct_like->GetField(position_path_.back()); }; } else { accessor_ = [](const StructLike&) -> Result { diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index 94a8764dc..f6c459d8d 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -59,19 +59,22 @@ class ICEBERG_EXPORT Schema : public StructType { std::string ToString() const override; - /// \brief Find the SchemaField by field name. + /// \brief Recursively find the SchemaField by field name. /// /// Short names for maps and lists are included for any name that does not conflict with /// a canonical name. For example, a list, 'l', of structs with field 'x' will produce - /// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', if its + /// short name 'l.x' in addition to canonical name 'l.element.x'. A map 'm', if its /// value include a structs with field 'x' wil produce short name 'm.x' in addition to - /// canonical name 'm.value.x' + /// canonical name 'm.value.x'. /// FIXME: Currently only handles ASCII lowercase conversion; extend to support /// non-ASCII characters (e.g., using std::towlower or ICU) Result>> FindFieldByName( std::string_view name, bool case_sensitive = true) const; - /// \brief Find the SchemaField by field id. + /// \brief Recursively find the SchemaField by field id. + /// + /// \param field_id The id of the field to get the accessor for. + /// \return The field with the given id, or std::nullopt if not found. Result>> FindFieldById( int32_t field_id) const; diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc index 56c6ee2c7..1c4a73ff3 100644 --- a/src/iceberg/table_scan.cc +++ b/src/iceberg/table_scan.cc @@ -273,13 +273,13 @@ Result>> DataTableScan::PlanFiles() co // Get the table schema and partition type ICEBERG_ASSIGN_OR_RAISE(auto current_schema, context_.table_metadata->Schema()); - ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr partition_schema, + ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr partition_type, partition_spec->PartitionType(*current_schema)); for (const auto& manifest_file : manifest_files) { ICEBERG_ASSIGN_OR_RAISE( auto manifest_reader, - ManifestReader::Make(manifest_file, file_io_, partition_schema)); + ManifestReader::Make(manifest_file, file_io_, partition_type)); ICEBERG_ASSIGN_OR_RAISE(auto manifests, manifest_reader->Entries()); // TODO(gty404): filter manifests using partition spec and filter expression diff --git a/src/iceberg/test/manifest_writer_versions_test.cc b/src/iceberg/test/manifest_writer_versions_test.cc index 0bbdbcc52..36f9445fd 100644 --- a/src/iceberg/test/manifest_writer_versions_test.cc +++ b/src/iceberg/test/manifest_writer_versions_test.cc @@ -241,10 +241,9 @@ class ManifestWriterVersionsTest : public ::testing::Test { } std::vector ReadManifest(const ManifestFile& manifest_file) { - auto partition_schema_result = spec_->PartitionType(*schema_); - EXPECT_THAT(partition_schema_result, IsOk()); - std::shared_ptr partition_type = - std::move(partition_schema_result.value()); + auto partition_type_result = spec_->PartitionType(*schema_); + EXPECT_THAT(partition_type_result, IsOk()); + std::shared_ptr partition_type = std::move(partition_type_result.value()); auto reader_result = ManifestReader::Make(manifest_file, file_io_, partition_type); EXPECT_THAT(reader_result, IsOk()); diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index 104ddad75..1cd5fb3ec 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -25,6 +25,7 @@ #include #include "iceberg/exception.h" +#include "iceberg/schema.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" #include "iceberg/util/string_util.h" @@ -48,6 +49,7 @@ std::string StructType::ToString() const { repr += ">"; return repr; } + std::span StructType::fields() const { return fields_; } Result> StructType::GetFieldById( int32_t field_id) const { @@ -56,6 +58,7 @@ Result> StructType::GetFieldById( if (it == field_by_id.get().end()) return std::nullopt; return it->second; } + Result> StructType::GetFieldByIndex( int32_t index) const { if (index < 0 || static_cast(index) >= fields_.size()) { @@ -63,6 +66,7 @@ Result> StructType::GetFieldByInd } return fields_[index]; } + Result> StructType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { @@ -81,6 +85,11 @@ Result> StructType::GetFieldByNam } return std::nullopt; } + +std::unique_ptr StructType::ToSchema() const { + return std::make_unique(fields_); +} + bool StructType::Equals(const Type& other) const { if (other.type_id() != TypeId::kStruct) { return false; @@ -88,6 +97,7 @@ bool StructType::Equals(const Type& other) const { const auto& struct_ = static_cast(other); return fields_ == struct_.fields_; } + Result> StructType::InitFieldById(const StructType& self) { std::unordered_map field_by_id; @@ -100,6 +110,7 @@ StructType::InitFieldById(const StructType& self) { } return field_by_id; } + Result> StructType::InitFieldByName(const StructType& self) { std::unordered_map field_by_name; @@ -113,6 +124,7 @@ StructType::InitFieldByName(const StructType& self) { } return field_by_name; } + Result> StructType::InitFieldByLowerCaseName(const StructType& self) { std::unordered_map field_by_lowercase_name; @@ -146,6 +158,7 @@ std::string ListType::ToString() const { repr += ">"; return repr; } + std::span ListType::fields() const { return {&element_, 1}; } Result> ListType::GetFieldById( int32_t field_id) const { @@ -154,6 +167,7 @@ Result> ListType::GetFieldById( } return std::nullopt; } + Result> ListType::GetFieldByIndex( int index) const { if (index == 0) { @@ -161,6 +175,7 @@ Result> ListType::GetFieldByIndex } return InvalidArgument("Invalid index {} to get field from list", index); } + Result> ListType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { @@ -174,6 +189,7 @@ Result> ListType::GetFieldByName( } return std::nullopt; } + bool ListType::Equals(const Type& other) const { if (other.type_id() != TypeId::kList) { return false; @@ -195,6 +211,7 @@ MapType::MapType(SchemaField key, SchemaField value) const SchemaField& MapType::key() const { return fields_[0]; } const SchemaField& MapType::value() const { return fields_[1]; } TypeId MapType::type_id() const { return kTypeId; } + std::string MapType::ToString() const { // XXX: work around Clang/libc++: "<{}>" in a format string appears to get // parsed as {<>} or something; split up the format string to avoid that @@ -204,6 +221,7 @@ std::string MapType::ToString() const { repr += ">"; return repr; } + std::span MapType::fields() const { return fields_; } Result> MapType::GetFieldById( int32_t field_id) const { @@ -214,6 +232,7 @@ Result> MapType::GetFieldById( } return std::nullopt; } + Result> MapType::GetFieldByIndex( int32_t index) const { if (index == 0) { @@ -223,6 +242,7 @@ Result> MapType::GetFieldByIndex( } return InvalidArgument("Invalid index {} to get field from map", index); } + Result> MapType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { @@ -241,6 +261,7 @@ Result> MapType::GetFieldByName( } return std::nullopt; } + bool MapType::Equals(const Type& other) const { if (other.type_id() != TypeId::kMap) { return false; diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 49866c442..7e17b78d4 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -123,6 +123,8 @@ class ICEBERG_EXPORT StructType : public NestedType { std::string_view name, bool case_sensitive) const override; using NestedType::GetFieldByName; + std::unique_ptr ToSchema() const; + protected: bool Equals(const Type& other) const override;