From 7abe6e1a62cc9bf72843fe4e482fe088d217de2d Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Tue, 26 Aug 2025 14:46:46 +0800 Subject: [PATCH 1/6] feat: add case-insensitive field lookup for StructType, ListType, and MapType - Implemented case-insensitive GetFieldByName in NestedType subclasses. - Added lazy initialization for maps in StructType - Handled duplicate names/IDs with Status returns instead of throws. --- src/iceberg/manifest_reader_internal.cc | 14 +-- src/iceberg/table_scan.cc | 2 +- src/iceberg/type.cc | 134 +++++++++++++++++------- src/iceberg/type.h | 57 ++++++---- test/schema_test.cc | 8 -- test/type_test.cc | 102 ++++++++++++++++-- 6 files changed, 237 insertions(+), 80 deletions(-) diff --git a/src/iceberg/manifest_reader_internal.cc b/src/iceberg/manifest_reader_internal.cc index 0ff743edb..126bd7557 100644 --- a/src/iceberg/manifest_reader_internal.cc +++ b/src/iceberg/manifest_reader_internal.cc @@ -222,8 +222,8 @@ Result> ParseManifestList(ArrowSchema* schema, if (!field.has_value()) { return InvalidSchema("Field index {} is not found in schema", idx); } - auto field_name = field.value().get().name(); - bool required = !field.value().get().optional(); + auto field_name = field.value()->get().name(); + bool required = !field.value()->get().optional(); auto view_of_column = array_view.children[idx]; switch (idx) { case 0: @@ -340,8 +340,8 @@ Status ParseDataFile(const std::shared_ptr& data_file_schema, data_file_schema->fields().size(), view_of_column->n_children); } for (int64_t col_idx = 0; col_idx < view_of_column->n_children; ++col_idx) { - auto field_name = data_file_schema->GetFieldByIndex(col_idx).value().get().name(); - auto required = !data_file_schema->GetFieldByIndex(col_idx).value().get().optional(); + auto field_name = data_file_schema->GetFieldByIndex(col_idx).value()->get().name(); + auto required = !data_file_schema->GetFieldByIndex(col_idx).value()->get().optional(); auto view_of_file_field = view_of_column->children[col_idx]; auto manifest_entry_count = view_of_file_field->length; @@ -487,8 +487,8 @@ Result> ParseManifestEntry(ArrowSchema* schema, if (!field.has_value()) { return InvalidManifest("Field not found in schema: {}", idx); } - auto field_name = field.value().get().name(); - bool required = !field.value().get().optional(); + auto field_name = field.value()->get().name(); + bool required = !field.value()->get().optional(); auto view_of_column = array_view.children[idx]; switch (idx) { @@ -510,7 +510,7 @@ Result> ParseManifestEntry(ArrowSchema* schema, break; case 4: { auto data_file_schema = - dynamic_pointer_cast(field.value().get().type()); + dynamic_pointer_cast(field.value()->get().type()); ICEBERG_RETURN_UNEXPECTED( ParseDataFile(data_file_schema, view_of_column, manifest_entries)); break; diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc index a7edd5d79..b52db2265 100644 --- a/src/iceberg/table_scan.cc +++ b/src/iceberg/table_scan.cc @@ -115,7 +115,7 @@ Result> TableScanBuilder::Build() { return InvalidArgument("Column {} not found in schema '{}'", column_name, *schema_id); } - projected_fields.emplace_back(field_opt.value().get()); + projected_fields.emplace_back(field_opt.value()->get()); } context_.projected_schema = std::make_shared(std::move(projected_fields), schema->schema_id()); diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index e66f96daf..aa0321925 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -25,23 +25,18 @@ #include "iceberg/exception.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "iceberg/util/macros.h" +#include "iceberg/util/string_utils.h" namespace iceberg { -StructType::StructType(std::vector fields) : fields_(std::move(fields)) { - size_t index = 0; - for (const auto& field : fields_) { - auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), index); - if (!inserted) { - throw IcebergError( - std::format("StructType: duplicate field ID {} (field indices {} and {})", - field.field_id(), it->second, index)); - } - - ++index; - } +Result>> +NestedType::GetFieldByName(std::string_view name) const { + return GetFieldByName(name, true); } +StructType::StructType(std::vector fields) : fields_(std::move(fields)) {} + TypeId StructType::type_id() const { return kTypeId; } std::string StructType::ToString() const { @@ -53,27 +48,34 @@ std::string StructType::ToString() const { return repr; } std::span StructType::fields() const { return fields_; } -std::optional> StructType::GetFieldById( +Result>> StructType::GetFieldById( int32_t field_id) const { - auto it = field_id_to_index_.find(field_id); - if (it == field_id_to_index_.end()) return std::nullopt; - return fields_[it->second]; + ICEBERG_RETURN_UNEXPECTED(InitFieldById()); + auto it = field_by_id_.find(field_id); + if (it == field_by_id_.end()) return std::nullopt; + return it->second; } -std::optional> StructType::GetFieldByIndex( - int32_t index) const { - if (index < 0 || index >= static_cast(fields_.size())) { +Result>> +StructType::GetFieldByIndex(int32_t index) const { + if (index < 0 || static_cast(index) >= fields_.size()) { return std::nullopt; } return fields_[index]; } -std::optional> StructType::GetFieldByName( - std::string_view name) const { - // N.B. duplicate names are not permitted (looking at the Java - // implementation) so there is nothing in particular we need to do here - for (const auto& field : fields_) { - if (field.name() == name) { - return field; +Result>> +StructType::GetFieldByName(std::string_view name, bool case_sensitive) const { + if (case_sensitive) { + ICEBERG_RETURN_UNEXPECTED(InitFieldByName()); + auto it = field_by_name_.find(name); + if (it != field_by_name_.end()) { + return it->second; } + return std::nullopt; + } + ICEBERG_RETURN_UNEXPECTED(InitFieldByLowerCaseName()); + auto it = field_by_lowercase_name_.find(StringUtils::ToLower(name)); + if (it != field_by_lowercase_name_.end()) { + return it->second; } return std::nullopt; } @@ -84,6 +86,48 @@ bool StructType::Equals(const Type& other) const { const auto& struct_ = static_cast(other); return fields_ == struct_.fields_; } +Status StructType::InitFieldById() const { + if (!field_by_id_.empty()) { + return {}; + } + for (const auto& field : fields_) { + auto it = field_by_id_.try_emplace(field.field_id(), field); + if (!it.second) { + return NotAllowed("Duplicate field id found: {} (prev name: {}, curr name: {})", + field.field_id(), it.first->second.get().name(), field.name()); + } + } + return {}; +} +Status StructType::InitFieldByName() const { + if (!field_by_name_.empty()) { + return {}; + } + for (const auto& field : fields_) { + auto it = field_by_name_.try_emplace(field.name(), field); + if (!it.second) { + return NotAllowed("Duplicate field name found: {} (prev id: {}, curr id: {})", + it.first->first, it.first->second.get().field_id(), + field.field_id()); + } + } + return {}; +} +Status StructType::InitFieldByLowerCaseName() const { + if (!field_by_lowercase_name_.empty()) { + return {}; + } + for (const auto& field : fields_) { + auto it = + field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field); + if (!it.second) { + return NotAllowed("Duplicate field name found: {} (prev id: {}, curr id: {})", + it.first->first, it.first->second.get().field_id(), + field.field_id()); + } + } + return {}; +} ListType::ListType(SchemaField element) : element_(std::move(element)) { if (element_.name() != kElementName) { @@ -105,23 +149,29 @@ std::string ListType::ToString() const { return repr; } std::span ListType::fields() const { return {&element_, 1}; } -std::optional> ListType::GetFieldById( +Result>> ListType::GetFieldById( int32_t field_id) const { if (field_id == element_.field_id()) { return std::cref(element_); } return std::nullopt; } -std::optional> ListType::GetFieldByIndex( - int index) const { +Result>> +ListType::GetFieldByIndex(int index) const { if (index == 0) { return std::cref(element_); } return std::nullopt; } -std::optional> ListType::GetFieldByName( - std::string_view name) const { - if (name == element_.name()) { +Result>> ListType::GetFieldByName( + std::string_view name, bool case_sensitive) const { + if (case_sensitive) { + if (name == element_.name()) { + return std::cref(element_); + } + return std::nullopt; + } + if (StringUtils::ToLower(name) == StringUtils::ToLower(element_.name())) { return std::cref(element_); } return std::nullopt; @@ -159,7 +209,7 @@ std::string MapType::ToString() const { return repr; } std::span MapType::fields() const { return fields_; } -std::optional> MapType::GetFieldById( +Result>> MapType::GetFieldById( int32_t field_id) const { if (field_id == key().field_id()) { return key(); @@ -168,7 +218,7 @@ std::optional> MapType::GetFieldById( } return std::nullopt; } -std::optional> MapType::GetFieldByIndex( +Result>> MapType::GetFieldByIndex( int32_t index) const { if (index == 0) { return key(); @@ -177,11 +227,19 @@ std::optional> MapType::GetFieldByInde } return std::nullopt; } -std::optional> MapType::GetFieldByName( - std::string_view name) const { - if (name == kKeyName) { +Result>> MapType::GetFieldByName( + std::string_view name, bool case_sensitive) const { + if (case_sensitive) { + if (name == kKeyName) { + return key(); + } else if (name == kValueName) { + return value(); + } + return std::nullopt; + } + if (StringUtils::ToLower(name) == StringUtils::ToLower(kKeyName)) { return key(); - } else if (name == kValueName) { + } else if (StringUtils::ToLower(name) == StringUtils::ToLower(kValueName)) { return value(); } return std::nullopt; diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 78c0141b1..5dba9713b 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -33,6 +33,7 @@ #include #include "iceberg/iceberg_export.h" +#include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/util/formattable.h" @@ -78,20 +79,23 @@ class ICEBERG_EXPORT NestedType : public Type { /// \brief Get a field by field ID. /// /// \note This is O(1) complexity. - [[nodiscard]] virtual std::optional> + [[nodiscard]] virtual Result>> GetFieldById(int32_t field_id) const = 0; /// \brief Get a field by index. /// /// \note This is O(1) complexity. - [[nodiscard]] virtual std::optional> + [[nodiscard]] virtual Result>> GetFieldByIndex(int32_t index) const = 0; - /// \brief Get a field by name (case-sensitive). Behavior is undefined if + /// \brief Get a field by name. Behavior is not allowed if /// the field name is not unique; prefer GetFieldById or GetFieldByIndex /// when possible. /// - /// \note This is currently O(n) complexity. - [[nodiscard]] virtual std::optional> - GetFieldByName(std::string_view name) const = 0; + /// \note This is currently O(1) complexity. + [[nodiscard]] virtual Result>> + GetFieldByName(std::string_view name, bool case_sensitive) const = 0; + /// \brief Get a field by name(case-sensitive). + [[nodiscard]] Result>> + GetFieldByName(std::string_view name) const; }; /// \defgroup type-nested Nested Types @@ -109,18 +113,29 @@ class ICEBERG_EXPORT StructType : public NestedType { std::string ToString() const override; std::span fields() const override; - std::optional> GetFieldById( + Result>> GetFieldById( int32_t field_id) const override; - std::optional> GetFieldByIndex( + Result>> GetFieldByIndex( int32_t index) const override; - std::optional> GetFieldByName( - std::string_view name) const override; + Result>> GetFieldByName( + std::string_view name, bool case_sensitive) const override; + + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; + Status InitFieldById() const; + Status InitFieldByName() const; + Status InitFieldByLowerCaseName() const; + protected: std::vector fields_; - std::unordered_map field_id_to_index_; + mutable std::unordered_map> + field_by_id_; + mutable std::unordered_map> + field_by_name_; + mutable std::unordered_map> + field_by_lowercase_name_; }; /// \brief A data type representing a list of values. @@ -140,12 +155,14 @@ class ICEBERG_EXPORT ListType : public NestedType { std::string ToString() const override; std::span fields() const override; - std::optional> GetFieldById( + Result>> GetFieldById( int32_t field_id) const override; - std::optional> GetFieldByIndex( + Result>> GetFieldByIndex( int32_t index) const override; - std::optional> GetFieldByName( - std::string_view name) const override; + Result>> GetFieldByName( + std::string_view name, bool case_sensitive) const override; + + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; @@ -172,12 +189,14 @@ class ICEBERG_EXPORT MapType : public NestedType { std::string ToString() const override; std::span fields() const override; - std::optional> GetFieldById( + Result>> GetFieldById( int32_t field_id) const override; - std::optional> GetFieldByIndex( + Result>> GetFieldByIndex( int32_t index) const override; - std::optional> GetFieldByName( - std::string_view name) const override; + Result>> GetFieldByName( + std::string_view name, bool case_sensitive) const override; + + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; diff --git a/test/schema_test.cc b/test/schema_test.cc index d282cea9d..79ddaa5fc 100644 --- a/test/schema_test.cc +++ b/test/schema_test.cc @@ -51,14 +51,6 @@ TEST(SchemaTest, Basics) { ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(-1)); ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); } - ASSERT_THAT( - []() { - iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); - iceberg::SchemaField field2(5, "bar", iceberg::string(), true); - iceberg::Schema schema({field1, field2}, 100); - }, - ::testing::ThrowsMessage( - ::testing::HasSubstr("duplicate field ID 5"))); } TEST(SchemaTest, Equality) { diff --git a/test/type_test.cc b/test/type_test.cc index fca886928..0408dc4e4 100644 --- a/test/type_test.cc +++ b/test/type_test.cc @@ -393,12 +393,100 @@ TEST(TypeTest, Struct) { ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(-1)); ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element")); } +} + +TEST(TypeTest, StructTypeGetFieldByName) { + iceberg::SchemaField field1(1, "Foo", iceberg::int32(), true); + iceberg::SchemaField field2(2, "Bar", iceberg::string(), false); + iceberg::StructType struct_({field1, field2}); + + // Case-sensitive: exact match + ASSERT_THAT(struct_.GetFieldByName("Foo"), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByName("foo"), ::testing::Eq(std::nullopt)); + + // Case-insensitive + ASSERT_THAT(struct_.GetFieldByName("foo", false), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByName("fOO", false), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByName("FOO", false), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByName("bar", false), ::testing::Optional(field2)); + ASSERT_THAT(struct_.GetFieldByName("BaR", false), ::testing::Optional(field2)); + ASSERT_THAT(struct_.GetFieldByName("BAR", false), ::testing::Optional(field2)); + ASSERT_THAT(struct_.GetFieldByName("baz", false), ::testing::Eq(std::nullopt)); +} + +TEST(TypeTest, ListTypeGetFieldByName) { + iceberg::SchemaField element(1, "element", iceberg::int32(), true); + iceberg::ListType list(element); + + // Case-sensitive: exact match + ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(element)); + ASSERT_THAT(list.GetFieldByName("Element"), ::testing::Eq(std::nullopt)); + + // Case-insensitive + ASSERT_THAT(list.GetFieldByName("element", false), ::testing::Optional(element)); + ASSERT_THAT(list.GetFieldByName("Element", false), ::testing::Optional(element)); + ASSERT_THAT(list.GetFieldByName("ELEMENT", false), ::testing::Optional(element)); + ASSERT_THAT(list.GetFieldByName("eLeMeNt", false), ::testing::Optional(element)); + ASSERT_THAT(list.GetFieldByName("foo", false), ::testing::Eq(std::nullopt)); +} + +TEST(TypeTest, MapTypeGetFieldByName) { + iceberg::SchemaField key(1, "key", iceberg::int32(), true); + iceberg::SchemaField value(2, "value", iceberg::string(), false); + iceberg::MapType map(key, value); + + // Case-sensitive: exact match + ASSERT_THAT(map.GetFieldByName("key"), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByName("Key"), ::testing::Eq(std::nullopt)); + ASSERT_THAT(map.GetFieldByName("value"), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("Value"), ::testing::Eq(std::nullopt)); + + // Case-insensitive + ASSERT_THAT(map.GetFieldByName("Key", false), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByName("KEY", false), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByName("kEy", false), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByName("value", false), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("Value", false), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("VALUE", false), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("vAlUe", false), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("foo", false), ::testing::Eq(std::nullopt)); +} + +TEST(TypeTest, StructDuplicateId) { + iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); + iceberg::SchemaField field2(5, "bar", iceberg::string(), true); + iceberg::StructType struct_({field1, field2}); + + auto result = struct_.GetFieldById(5); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr( + "Duplicate field id found: 5 (prev name: foo, curr name: bar)")); +} + +TEST(TypeTest, StructDuplicateName) { + iceberg::SchemaField field1(1, "foo", iceberg::int32(), true); + iceberg::SchemaField field2(2, "foo", iceberg::string(), true); + iceberg::StructType struct_({field1, field2}); + + auto result = struct_.GetFieldByName("foo", true); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); ASSERT_THAT( - []() { - iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); - iceberg::SchemaField field2(5, "bar", iceberg::string(), true); - iceberg::StructType struct_({field1, field2}); - }, - ::testing::ThrowsMessage( - ::testing::HasSubstr("duplicate field ID 5"))); + result.error().message, + ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); +} + +TEST(TypeTest, StructDuplicateLowerCaseName) { + iceberg::SchemaField field1(1, "Foo", iceberg::int32(), true); + iceberg::SchemaField field2(2, "foo", iceberg::string(), true); + iceberg::StructType struct_({field1, field2}); + + auto result = struct_.GetFieldByName("foo", false); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); + ASSERT_THAT( + result.error().message, + ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); } From 18aef130a48434bcb6da1061766afecd5364ea03 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Wed, 27 Aug 2025 18:15:38 +0800 Subject: [PATCH 2/6] fix comments --- src/iceberg/type.cc | 63 +++++++++++++++++++++------------------------ src/iceberg/type.h | 46 ++++++++++++++++----------------- test/schema_test.cc | 10 +++++-- test/type_test.cc | 41 ++++++++++++++++++++++------- 4 files changed, 91 insertions(+), 69 deletions(-) diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index aa0321925..5daddf2bd 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -26,13 +26,13 @@ #include "iceberg/exception.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" -#include "iceberg/util/string_utils.h" +#include "iceberg/util/string_util.h" namespace iceberg { -Result>> -NestedType::GetFieldByName(std::string_view name) const { - return GetFieldByName(name, true); +Result> NestedType::GetFieldByName( + std::string_view name) const { + return GetFieldByName(name, /*case_sensitive=*/true); } StructType::StructType(std::vector fields) : fields_(std::move(fields)) {} @@ -48,22 +48,22 @@ std::string StructType::ToString() const { return repr; } std::span StructType::fields() const { return fields_; } -Result>> StructType::GetFieldById( +Result> StructType::GetFieldById( int32_t field_id) const { ICEBERG_RETURN_UNEXPECTED(InitFieldById()); auto it = field_by_id_.find(field_id); if (it == field_by_id_.end()) return std::nullopt; return it->second; } -Result>> -StructType::GetFieldByIndex(int32_t index) const { +Result> StructType::GetFieldByIndex( + int32_t index) const { if (index < 0 || static_cast(index) >= fields_.size()) { - return std::nullopt; + return InvalidArgument("index {} is out of range[0, {})", index, fields_.size()); } return fields_[index]; } -Result>> -StructType::GetFieldByName(std::string_view name, bool case_sensitive) const { +Result> StructType::GetFieldByName( + std::string_view name, bool case_sensitive) const { if (case_sensitive) { ICEBERG_RETURN_UNEXPECTED(InitFieldByName()); auto it = field_by_name_.find(name); @@ -93,8 +93,8 @@ Status StructType::InitFieldById() const { for (const auto& field : fields_) { auto it = field_by_id_.try_emplace(field.field_id(), field); if (!it.second) { - return NotAllowed("Duplicate field id found: {} (prev name: {}, curr name: {})", - field.field_id(), it.first->second.get().name(), field.name()); + return InvalidSchema("Duplicate field id found: {} (prev name: {}, curr name: {})", + field.field_id(), it.first->second.get().name(), field.name()); } } return {}; @@ -106,9 +106,9 @@ Status StructType::InitFieldByName() const { for (const auto& field : fields_) { auto it = field_by_name_.try_emplace(field.name(), field); if (!it.second) { - return NotAllowed("Duplicate field name found: {} (prev id: {}, curr id: {})", - it.first->first, it.first->second.get().field_id(), - field.field_id()); + return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr id: {})", + it.first->first, it.first->second.get().field_id(), + field.field_id()); } } return {}; @@ -121,9 +121,9 @@ Status StructType::InitFieldByLowerCaseName() const { auto it = field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field); if (!it.second) { - return NotAllowed("Duplicate field name found: {} (prev id: {}, curr id: {})", - it.first->first, it.first->second.get().field_id(), - field.field_id()); + return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr id: {})", + it.first->first, it.first->second.get().field_id(), + field.field_id()); } } return {}; @@ -149,29 +149,28 @@ std::string ListType::ToString() const { return repr; } std::span ListType::fields() const { return {&element_, 1}; } -Result>> ListType::GetFieldById( +Result> ListType::GetFieldById( int32_t field_id) const { if (field_id == element_.field_id()) { return std::cref(element_); } return std::nullopt; } -Result>> -ListType::GetFieldByIndex(int index) const { +Result> ListType::GetFieldByIndex(int index) const { if (index == 0) { return std::cref(element_); } - return std::nullopt; + return InvalidArgument("index {} is out of range[0, {})", index, 1); } -Result>> ListType::GetFieldByName( +Result> ListType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { - if (name == element_.name()) { + if (name == kElementName) { return std::cref(element_); } return std::nullopt; } - if (StringUtils::ToLower(name) == StringUtils::ToLower(element_.name())) { + if (StringUtils::ToLower(name) == kElementName) { return std::cref(element_); } return std::nullopt; @@ -209,8 +208,7 @@ std::string MapType::ToString() const { return repr; } std::span MapType::fields() const { return fields_; } -Result>> MapType::GetFieldById( - int32_t field_id) const { +Result> MapType::GetFieldById(int32_t field_id) const { if (field_id == key().field_id()) { return key(); } else if (field_id == value().field_id()) { @@ -218,16 +216,15 @@ Result>> MapType::GetFie } return std::nullopt; } -Result>> MapType::GetFieldByIndex( - int32_t index) const { +Result> MapType::GetFieldByIndex(int32_t index) const { if (index == 0) { return key(); } else if (index == 1) { return value(); } - return std::nullopt; + return InvalidArgument("index {} is out of range[0, {})", index, 2); } -Result>> MapType::GetFieldByName( +Result> MapType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { if (name == kKeyName) { @@ -237,9 +234,9 @@ Result>> MapType::GetFie } return std::nullopt; } - if (StringUtils::ToLower(name) == StringUtils::ToLower(kKeyName)) { + if (StringUtils::ToLower(name) == kKeyName) { return key(); - } else if (StringUtils::ToLower(name) == StringUtils::ToLower(kValueName)) { + } else if (StringUtils::ToLower(name) == kValueName) { return value(); } return std::nullopt; diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 5dba9713b..d766b586b 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -68,6 +68,7 @@ class ICEBERG_EXPORT PrimitiveType : public Type { bool is_nested() const override { return false; } }; +using SchemaFieldConstRef = std::reference_wrapper; /// \brief A data type that has child fields. class ICEBERG_EXPORT NestedType : public Type { public: @@ -79,23 +80,23 @@ class ICEBERG_EXPORT NestedType : public Type { /// \brief Get a field by field ID. /// /// \note This is O(1) complexity. - [[nodiscard]] virtual Result>> - GetFieldById(int32_t field_id) const = 0; + [[nodiscard]] virtual Result> GetFieldById( + int32_t field_id) const = 0; /// \brief Get a field by index. /// /// \note This is O(1) complexity. - [[nodiscard]] virtual Result>> - GetFieldByIndex(int32_t index) const = 0; - /// \brief Get a field by name. Behavior is not allowed if + [[nodiscard]] virtual Result> GetFieldByIndex( + int32_t index) const = 0; + /// \brief Get a field by name. Return an error Status if /// the field name is not unique; prefer GetFieldById or GetFieldByIndex /// when possible. /// /// \note This is currently O(1) complexity. - [[nodiscard]] virtual Result>> - GetFieldByName(std::string_view name, bool case_sensitive) const = 0; + [[nodiscard]] virtual Result> GetFieldByName( + std::string_view name, bool case_sensitive) const = 0; /// \brief Get a field by name(case-sensitive). - [[nodiscard]] Result>> - GetFieldByName(std::string_view name) const; + [[nodiscard]] Result> GetFieldByName( + std::string_view name) const; }; /// \defgroup type-nested Nested Types @@ -113,11 +114,11 @@ class ICEBERG_EXPORT StructType : public NestedType { std::string ToString() const override; std::span fields() const override; - Result>> GetFieldById( + Result> GetFieldById( int32_t field_id) const override; - Result>> GetFieldByIndex( + Result> GetFieldByIndex( int32_t index) const override; - Result>> GetFieldByName( + Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; using NestedType::GetFieldByName; @@ -130,12 +131,9 @@ class ICEBERG_EXPORT StructType : public NestedType { protected: std::vector fields_; - mutable std::unordered_map> - field_by_id_; - mutable std::unordered_map> - field_by_name_; - mutable std::unordered_map> - field_by_lowercase_name_; + mutable std::unordered_map field_by_id_; + mutable std::unordered_map field_by_name_; + mutable std::unordered_map field_by_lowercase_name_; }; /// \brief A data type representing a list of values. @@ -155,11 +153,11 @@ class ICEBERG_EXPORT ListType : public NestedType { std::string ToString() const override; std::span fields() const override; - Result>> GetFieldById( + Result> GetFieldById( int32_t field_id) const override; - Result>> GetFieldByIndex( + Result> GetFieldByIndex( int32_t index) const override; - Result>> GetFieldByName( + Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; using NestedType::GetFieldByName; @@ -189,11 +187,11 @@ class ICEBERG_EXPORT MapType : public NestedType { std::string ToString() const override; std::span fields() const override; - Result>> GetFieldById( + Result> GetFieldById( int32_t field_id) const override; - Result>> GetFieldByIndex( + Result> GetFieldByIndex( int32_t index) const override; - Result>> GetFieldByName( + Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; using NestedType::GetFieldByName; diff --git a/test/schema_test.cc b/test/schema_test.cc index 79ddaa5fc..1076b826d 100644 --- a/test/schema_test.cc +++ b/test/schema_test.cc @@ -47,8 +47,14 @@ TEST(SchemaTest, Basics) { ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2)); ASSERT_EQ(std::nullopt, schema.GetFieldById(0)); - ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(2)); - ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(-1)); + auto result = schema.GetFieldByIndex(2); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index 2 is out of range[0, 2)")); + result = schema.GetFieldByIndex(-1); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index -1 is out of range[0, 2)")); ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); } } diff --git a/test/type_test.cc b/test/type_test.cc index 0408dc4e4..6189af31f 100644 --- a/test/type_test.cc +++ b/test/type_test.cc @@ -315,13 +315,22 @@ TEST(TypeTest, List) { std::span fields = list.fields(); ASSERT_EQ(1, fields.size()); ASSERT_EQ(field, fields[0]); - ASSERT_THAT(list.GetFieldById(5), ::testing::Optional(field)); + auto result = list.GetFieldByIndex(5); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index 5 is out of range[0, 1)")); ASSERT_THAT(list.GetFieldByIndex(0), ::testing::Optional(field)); ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(field)); ASSERT_EQ(std::nullopt, list.GetFieldById(0)); - ASSERT_EQ(std::nullopt, list.GetFieldByIndex(1)); - ASSERT_EQ(std::nullopt, list.GetFieldByIndex(-1)); + result = list.GetFieldByIndex(1); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index 1 is out of range[0, 1)")); + result = list.GetFieldByIndex(-1); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index -1 is out of range[0, 1)")); ASSERT_EQ(std::nullopt, list.GetFieldByName("foo")); } ASSERT_THAT( @@ -350,8 +359,14 @@ TEST(TypeTest, Map) { ASSERT_THAT(map.GetFieldByName("value"), ::testing::Optional(value)); ASSERT_EQ(std::nullopt, map.GetFieldById(0)); - ASSERT_EQ(std::nullopt, map.GetFieldByIndex(2)); - ASSERT_EQ(std::nullopt, map.GetFieldByIndex(-1)); + auto result = map.GetFieldByIndex(2); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index 2 is out of range[0, 2)")); + result = map.GetFieldByIndex(-1); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index -1 is out of range[0, 2)")); ASSERT_EQ(std::nullopt, map.GetFieldByName("element")); } ASSERT_THAT( @@ -389,8 +404,14 @@ TEST(TypeTest, Struct) { ASSERT_THAT(struct_.GetFieldByName("bar"), ::testing::Optional(field2)); ASSERT_EQ(std::nullopt, struct_.GetFieldById(0)); - ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(2)); - ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(-1)); + auto result = struct_.GetFieldByIndex(2); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index 2 is out of range[0, 2)")); + result = struct_.GetFieldByIndex(-1); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); + ASSERT_THAT(result.error().message, + ::testing::HasSubstr("index -1 is out of range[0, 2)")); ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element")); } } @@ -459,7 +480,7 @@ TEST(TypeTest, StructDuplicateId) { auto result = struct_.GetFieldById(5); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); ASSERT_THAT(result.error().message, ::testing::HasSubstr( "Duplicate field id found: 5 (prev name: foo, curr name: bar)")); @@ -472,7 +493,7 @@ TEST(TypeTest, StructDuplicateName) { auto result = struct_.GetFieldByName("foo", true); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); ASSERT_THAT( result.error().message, ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); @@ -485,7 +506,7 @@ TEST(TypeTest, StructDuplicateLowerCaseName) { auto result = struct_.GetFieldByName("foo", false); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kNotAllowed); + ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); ASSERT_THAT( result.error().message, ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); From 9597b873809a977fb604c64d2ff87864086f1b72 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Thu, 28 Aug 2025 13:39:40 +0800 Subject: [PATCH 3/6] fix comments --- src/iceberg/type.cc | 34 +++++++++++++++++++--------------- src/iceberg/type.h | 13 ++++++------- test/schema_test.cc | 4 ++-- test/type_test.cc | 14 +++++++------- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index 5daddf2bd..114a591ba 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -30,7 +30,7 @@ namespace iceberg { -Result> NestedType::GetFieldByName( +Result> NestedType::GetFieldByName( std::string_view name) const { return GetFieldByName(name, /*case_sensitive=*/true); } @@ -48,21 +48,21 @@ std::string StructType::ToString() const { return repr; } std::span StructType::fields() const { return fields_; } -Result> StructType::GetFieldById( +Result> StructType::GetFieldById( int32_t field_id) const { ICEBERG_RETURN_UNEXPECTED(InitFieldById()); auto it = field_by_id_.find(field_id); if (it == field_by_id_.end()) return std::nullopt; return it->second; } -Result> StructType::GetFieldByIndex( +Result> StructType::GetFieldByIndex( int32_t index) const { if (index < 0 || static_cast(index) >= fields_.size()) { - return InvalidArgument("index {} is out of range[0, {})", index, fields_.size()); + return InvalidArgument("Invalid index {} to get field from struct", index); } return fields_[index]; } -Result> StructType::GetFieldByName( +Result> StructType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { ICEBERG_RETURN_UNEXPECTED(InitFieldByName()); @@ -149,20 +149,21 @@ std::string ListType::ToString() const { return repr; } std::span ListType::fields() const { return {&element_, 1}; } -Result> ListType::GetFieldById( +Result> ListType::GetFieldById( int32_t field_id) const { if (field_id == element_.field_id()) { return std::cref(element_); } return std::nullopt; } -Result> ListType::GetFieldByIndex(int index) const { +Result> ListType::GetFieldByIndex( + int index) const { if (index == 0) { return std::cref(element_); } - return InvalidArgument("index {} is out of range[0, {})", index, 1); + return InvalidArgument("Invalid index {} to get field from list", index); } -Result> ListType::GetFieldByName( +Result> ListType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { if (name == kElementName) { @@ -208,7 +209,8 @@ std::string MapType::ToString() const { return repr; } std::span MapType::fields() const { return fields_; } -Result> MapType::GetFieldById(int32_t field_id) const { +Result> MapType::GetFieldById( + int32_t field_id) const { if (field_id == key().field_id()) { return key(); } else if (field_id == value().field_id()) { @@ -216,15 +218,16 @@ Result> MapType::GetFieldById(int32_t field_i } return std::nullopt; } -Result> MapType::GetFieldByIndex(int32_t index) const { +Result> MapType::GetFieldByIndex( + int32_t index) const { if (index == 0) { return key(); } else if (index == 1) { return value(); } - return InvalidArgument("index {} is out of range[0, {})", index, 2); + return InvalidArgument("Invalid index {} to get field from map", index); } -Result> MapType::GetFieldByName( +Result> MapType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { if (name == kKeyName) { @@ -234,9 +237,10 @@ Result> MapType::GetFieldByName( } return std::nullopt; } - if (StringUtils::ToLower(name) == kKeyName) { + const auto lower_case_name = StringUtils::ToLower(name); + if (lower_case_name == kKeyName) { return key(); - } else if (StringUtils::ToLower(name) == kValueName) { + } else if (lower_case_name == kValueName) { return value(); } return std::nullopt; diff --git a/src/iceberg/type.h b/src/iceberg/type.h index d766b586b..dc510e800 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -68,12 +68,12 @@ class ICEBERG_EXPORT PrimitiveType : public Type { bool is_nested() const override { return false; } }; -using SchemaFieldConstRef = std::reference_wrapper; /// \brief A data type that has child fields. class ICEBERG_EXPORT NestedType : public Type { public: bool is_primitive() const override { return false; } bool is_nested() const override { return true; } + using SchemaFieldConstRef = std::reference_wrapper; /// \brief Get a view of the child fields. [[nodiscard]] virtual std::span fields() const = 0; @@ -106,6 +106,7 @@ class ICEBERG_EXPORT NestedType : public Type { /// \brief A data type representing a struct with nested fields. class ICEBERG_EXPORT StructType : public NestedType { public: + using NestedType::GetFieldByName; constexpr static TypeId kTypeId = TypeId::kStruct; explicit StructType(std::vector fields); ~StructType() override = default; @@ -121,10 +122,10 @@ class ICEBERG_EXPORT StructType : public NestedType { Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; - using NestedType::GetFieldByName; - protected: bool Equals(const Type& other) const override; + // TODO(nullccxsy): Lazy initialization has concurrency issues, need to add proper + // synchronization mechanism Status InitFieldById() const; Status InitFieldByName() const; Status InitFieldByLowerCaseName() const; @@ -139,6 +140,7 @@ class ICEBERG_EXPORT StructType : public NestedType { /// \brief A data type representing a list of values. class ICEBERG_EXPORT ListType : public NestedType { public: + using NestedType::GetFieldByName; constexpr static const TypeId kTypeId = TypeId::kList; constexpr static const std::string_view kElementName = "element"; @@ -160,8 +162,6 @@ class ICEBERG_EXPORT ListType : public NestedType { Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; - using NestedType::GetFieldByName; - protected: bool Equals(const Type& other) const override; @@ -171,6 +171,7 @@ class ICEBERG_EXPORT ListType : public NestedType { /// \brief A data type representing a dictionary of values. class ICEBERG_EXPORT MapType : public NestedType { public: + using NestedType::GetFieldByName; constexpr static const TypeId kTypeId = TypeId::kMap; constexpr static const std::string_view kKeyName = "key"; constexpr static const std::string_view kValueName = "value"; @@ -194,8 +195,6 @@ class ICEBERG_EXPORT MapType : public NestedType { Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; - using NestedType::GetFieldByName; - protected: bool Equals(const Type& other) const override; diff --git a/test/schema_test.cc b/test/schema_test.cc index 1076b826d..550454058 100644 --- a/test/schema_test.cc +++ b/test/schema_test.cc @@ -50,11 +50,11 @@ TEST(SchemaTest, Basics) { auto result = schema.GetFieldByIndex(2); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index 2 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index 2 to get field from struct")); result = schema.GetFieldByIndex(-1); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index -1 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index -1 to get field from struct")); ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); } } diff --git a/test/type_test.cc b/test/type_test.cc index 6189af31f..a3a4aa672 100644 --- a/test/type_test.cc +++ b/test/type_test.cc @@ -318,7 +318,7 @@ TEST(TypeTest, List) { auto result = list.GetFieldByIndex(5); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index 5 is out of range[0, 1)")); + ::testing::HasSubstr("Invalid index 5 to get field from list")); ASSERT_THAT(list.GetFieldByIndex(0), ::testing::Optional(field)); ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(field)); @@ -326,11 +326,11 @@ TEST(TypeTest, List) { result = list.GetFieldByIndex(1); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index 1 is out of range[0, 1)")); + ::testing::HasSubstr("Invalid index 1 to get field from list")); result = list.GetFieldByIndex(-1); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index -1 is out of range[0, 1)")); + ::testing::HasSubstr("Invalid index -1 to get field from list")); ASSERT_EQ(std::nullopt, list.GetFieldByName("foo")); } ASSERT_THAT( @@ -362,11 +362,11 @@ TEST(TypeTest, Map) { auto result = map.GetFieldByIndex(2); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index 2 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index 2 to get field from map")); result = map.GetFieldByIndex(-1); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index -1 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index -1 to get field from map")); ASSERT_EQ(std::nullopt, map.GetFieldByName("element")); } ASSERT_THAT( @@ -407,11 +407,11 @@ TEST(TypeTest, Struct) { auto result = struct_.GetFieldByIndex(2); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index 2 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index 2 to get field from struct")); result = struct_.GetFieldByIndex(-1); ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); ASSERT_THAT(result.error().message, - ::testing::HasSubstr("index -1 is out of range[0, 2)")); + ::testing::HasSubstr("Invalid index -1 to get field from struct")); ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element")); } } From 3ef372a4c39db03bb2314ce8dc2303fcc1878bc6 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Mon, 1 Sep 2025 15:59:14 +0800 Subject: [PATCH 4/6] fix comments --- src/iceberg/type.cc | 6 ++--- src/iceberg/type.h | 14 +++++----- test/schema_test.cc | 13 ++++----- test/type_test.cc | 64 ++++++++++++++++++++++----------------------- 4 files changed, 50 insertions(+), 47 deletions(-) diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index 114a591ba..c50bb7cb5 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -121,9 +121,9 @@ Status StructType::InitFieldByLowerCaseName() const { auto it = field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field); if (!it.second) { - return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr id: {})", - it.first->first, it.first->second.get().field_id(), - field.field_id()); + return InvalidSchema( + "Duplicate lowercase field name found: {} (prev id: {}, curr id: {})", + it.first->first, it.first->second.get().field_id(), field.field_id()); } } return {}; diff --git a/src/iceberg/type.h b/src/iceberg/type.h index dc510e800..a1aac2ebc 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -70,10 +70,12 @@ class ICEBERG_EXPORT PrimitiveType : public Type { /// \brief A data type that has child fields. class ICEBERG_EXPORT NestedType : public Type { + protected: + using SchemaFieldConstRef = std::reference_wrapper; + public: bool is_primitive() const override { return false; } bool is_nested() const override { return true; } - using SchemaFieldConstRef = std::reference_wrapper; /// \brief Get a view of the child fields. [[nodiscard]] virtual std::span fields() const = 0; @@ -91,10 +93,10 @@ class ICEBERG_EXPORT NestedType : public Type { /// the field name is not unique; prefer GetFieldById or GetFieldByIndex /// when possible. /// - /// \note This is currently O(1) complexity. + /// \note This is O(1) complexity. [[nodiscard]] virtual Result> GetFieldByName( std::string_view name, bool case_sensitive) const = 0; - /// \brief Get a field by name(case-sensitive). + /// \brief Get a field by name (case-sensitive). [[nodiscard]] Result> GetFieldByName( std::string_view name) const; }; @@ -106,7 +108,6 @@ class ICEBERG_EXPORT NestedType : public Type { /// \brief A data type representing a struct with nested fields. class ICEBERG_EXPORT StructType : public NestedType { public: - using NestedType::GetFieldByName; constexpr static TypeId kTypeId = TypeId::kStruct; explicit StructType(std::vector fields); ~StructType() override = default; @@ -121,6 +122,7 @@ class ICEBERG_EXPORT StructType : public NestedType { int32_t index) const override; Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; @@ -140,7 +142,6 @@ class ICEBERG_EXPORT StructType : public NestedType { /// \brief A data type representing a list of values. class ICEBERG_EXPORT ListType : public NestedType { public: - using NestedType::GetFieldByName; constexpr static const TypeId kTypeId = TypeId::kList; constexpr static const std::string_view kElementName = "element"; @@ -161,6 +162,7 @@ class ICEBERG_EXPORT ListType : public NestedType { int32_t index) const override; Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; @@ -171,7 +173,6 @@ class ICEBERG_EXPORT ListType : public NestedType { /// \brief A data type representing a dictionary of values. class ICEBERG_EXPORT MapType : public NestedType { public: - using NestedType::GetFieldByName; constexpr static const TypeId kTypeId = TypeId::kMap; constexpr static const std::string_view kKeyName = "key"; constexpr static const std::string_view kValueName = "value"; @@ -194,6 +195,7 @@ class ICEBERG_EXPORT MapType : public NestedType { int32_t index) const override; Result> GetFieldByName( std::string_view name, bool case_sensitive) const override; + using NestedType::GetFieldByName; protected: bool Equals(const Type& other) const override; diff --git a/test/schema_test.cc b/test/schema_test.cc index 550454058..272c6e75a 100644 --- a/test/schema_test.cc +++ b/test/schema_test.cc @@ -27,6 +27,7 @@ #include "iceberg/schema_field.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "matchers.h" TEST(SchemaTest, Basics) { { @@ -48,13 +49,13 @@ TEST(SchemaTest, Basics) { ASSERT_EQ(std::nullopt, schema.GetFieldById(0)); auto result = schema.GetFieldByIndex(2); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index 2 to get field from struct")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 2 to get field from struct")); result = schema.GetFieldByIndex(-1); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index -1 to get field from struct")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index -1 to get field from struct")); ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); } } diff --git a/test/type_test.cc b/test/type_test.cc index a3a4aa672..9963ab364 100644 --- a/test/type_test.cc +++ b/test/type_test.cc @@ -28,6 +28,7 @@ #include "iceberg/exception.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "matchers.h" struct TypeTestCase { /// Test case name, must be safe for Googletest (alphanumeric + underscore) @@ -316,21 +317,21 @@ TEST(TypeTest, List) { ASSERT_EQ(1, fields.size()); ASSERT_EQ(field, fields[0]); auto result = list.GetFieldByIndex(5); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index 5 to get field from list")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 5 to get field from list")); ASSERT_THAT(list.GetFieldByIndex(0), ::testing::Optional(field)); ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(field)); ASSERT_EQ(std::nullopt, list.GetFieldById(0)); result = list.GetFieldByIndex(1); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index 1 to get field from list")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 1 to get field from list")); result = list.GetFieldByIndex(-1); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index -1 to get field from list")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index -1 to get field from list")); ASSERT_EQ(std::nullopt, list.GetFieldByName("foo")); } ASSERT_THAT( @@ -360,13 +361,13 @@ TEST(TypeTest, Map) { ASSERT_EQ(std::nullopt, map.GetFieldById(0)); auto result = map.GetFieldByIndex(2); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index 2 to get field from map")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 2 to get field from map")); result = map.GetFieldByIndex(-1); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index -1 to get field from map")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index -1 to get field from map")); ASSERT_EQ(std::nullopt, map.GetFieldByName("element")); } ASSERT_THAT( @@ -405,13 +406,13 @@ TEST(TypeTest, Struct) { ASSERT_EQ(std::nullopt, struct_.GetFieldById(0)); auto result = struct_.GetFieldByIndex(2); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index 2 to get field from struct")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 2 to get field from struct")); result = struct_.GetFieldByIndex(-1); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidArgument); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr("Invalid index -1 to get field from struct")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index -1 to get field from struct")); ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element")); } } @@ -480,9 +481,9 @@ TEST(TypeTest, StructDuplicateId) { auto result = struct_.GetFieldById(5); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); - ASSERT_THAT(result.error().message, - ::testing::HasSubstr( + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidSchema)); + ASSERT_THAT(result, + iceberg::HasErrorMessage( "Duplicate field id found: 5 (prev name: foo, curr name: bar)")); } @@ -493,10 +494,9 @@ TEST(TypeTest, StructDuplicateName) { auto result = struct_.GetFieldByName("foo", true); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); - ASSERT_THAT( - result.error().message, - ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidSchema)); + ASSERT_THAT(result, iceberg::HasErrorMessage( + "Duplicate field name found: foo (prev id: 1, curr id: 2)")); } TEST(TypeTest, StructDuplicateLowerCaseName) { @@ -506,8 +506,8 @@ TEST(TypeTest, StructDuplicateLowerCaseName) { auto result = struct_.GetFieldByName("foo", false); ASSERT_FALSE(result.has_value()); - ASSERT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema); - ASSERT_THAT( - result.error().message, - ::testing::HasSubstr("Duplicate field name found: foo (prev id: 1, curr id: 2)")); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidSchema)); + ASSERT_THAT(result, + iceberg::HasErrorMessage( + "Duplicate lowercase field name found: foo (prev id: 1, curr id: 2)")); } From 2fb136ed18f829e5fc50227929adf14d7b4d3829 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Tue, 2 Sep 2025 14:20:46 +0800 Subject: [PATCH 5/6] refactor: simplify ICEBERG_RETURN_UNEXPECTED macro implementation - Streamlined the macro to reduce nesting and improve readability by using an inline if statement. --- src/iceberg/util/macros.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/iceberg/util/macros.h b/src/iceberg/util/macros.h index 3519c9a63..f11a680cc 100644 --- a/src/iceberg/util/macros.h +++ b/src/iceberg/util/macros.h @@ -19,13 +19,10 @@ #pragma once -#define ICEBERG_RETURN_UNEXPECTED(result) \ - do { \ - auto&& result_name = (result); \ - if (!result_name) [[unlikely]] { \ - return std::unexpected(result_name.error()); \ - } \ - } while (false); +#define ICEBERG_RETURN_UNEXPECTED(result) \ + if (auto&& result_name = result; !result_name) [[unlikely]] { \ + return std::unexpected(result_name.error()); \ + } #define ICEBERG_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr) \ auto&& result_name = (rexpr); \ From 2ce9d53c0c08fafa4d2d1d811e1fdd8293ea5059 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Wed, 3 Sep 2025 18:29:09 +0800 Subject: [PATCH 6/6] fix comments --- src/iceberg/type.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/iceberg/type.h b/src/iceberg/type.h index a1aac2ebc..4cb405c04 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -70,15 +70,13 @@ class ICEBERG_EXPORT PrimitiveType : public Type { /// \brief A data type that has child fields. class ICEBERG_EXPORT NestedType : public Type { - protected: - using SchemaFieldConstRef = std::reference_wrapper; - public: bool is_primitive() const override { return false; } bool is_nested() const override { return true; } /// \brief Get a view of the child fields. [[nodiscard]] virtual std::span fields() const = 0; + using SchemaFieldConstRef = std::reference_wrapper; /// \brief Get a field by field ID. /// /// \note This is O(1) complexity. @@ -132,7 +130,6 @@ class ICEBERG_EXPORT StructType : public NestedType { Status InitFieldByName() const; Status InitFieldByLowerCaseName() const; - protected: std::vector fields_; mutable std::unordered_map field_by_id_; mutable std::unordered_map field_by_name_;