diff --git a/src/iceberg/expression/residual_evaluator.cc b/src/iceberg/expression/residual_evaluator.cc index 3cdf2914a..821213eea 100644 --- a/src/iceberg/expression/residual_evaluator.cc +++ b/src/iceberg/expression/residual_evaluator.cc @@ -299,16 +299,12 @@ class UnpartitionedResidualEvaluator : public ResidualEvaluator { public: explicit UnpartitionedResidualEvaluator(std::shared_ptr expr) : ResidualEvaluator(std::move(expr), *PartitionSpec::Unpartitioned(), - *kEmptySchema_, true) {} + *Schema::EmptySchema(), true) {} Result> ResidualFor( const StructLike& /*partition_data*/) const override { return expr_; } - - private: - // Store an empty schema to avoid dangling reference when passing to base class - inline static const std::shared_ptr kEmptySchema_ = Schema::EmptySchema(); }; } // namespace diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index e35e36cfd..a52f418e4 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -488,9 +488,8 @@ Result> SchemaFromJson(const nlohmann::json& json) { auto identifier_field_ids, GetJsonValueOrDefault>(json, kIdentifierFieldIds)); - return std::make_unique(std::move(fields), - schema_id_opt.value_or(Schema::kInitialSchemaId), - std::move(identifier_field_ids)); + return Schema::Make(std::move(fields), schema_id_opt.value_or(Schema::kInitialSchemaId), + std::move(identifier_field_ids)); } nlohmann::json ToJson(const PartitionField& partition_field) { diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc index bc325251d..1cb263c44 100644 --- a/src/iceberg/schema.cc +++ b/src/iceberg/schema.cc @@ -21,6 +21,7 @@ #include #include +#include #include "iceberg/result.h" #include "iceberg/row/struct_like.h" @@ -34,11 +35,25 @@ namespace iceberg { -Schema::Schema(std::vector fields, int32_t schema_id, - std::vector identifier_field_ids) - : StructType(std::move(fields)), - schema_id_(schema_id), - identifier_field_ids_(std::move(identifier_field_ids)) {} +Schema::Schema(std::vector fields, int32_t schema_id) + : StructType(std::move(fields)), schema_id_(schema_id) {} + +Result> Schema::Make(std::vector fields, + int32_t schema_id, + std::vector identifier_field_ids) { + auto schema = std::make_unique(std::move(fields), schema_id); + + if (!identifier_field_ids.empty()) { + auto id_to_parent = IndexParents(*schema); + for (auto field_id : identifier_field_ids) { + ICEBERG_RETURN_UNEXPECTED( + ValidateIdentifierFields(field_id, *schema, id_to_parent)); + } + } + + schema->identifier_field_ids_ = std::move(identifier_field_ids); + return schema; +} Result> Schema::Make( std::vector fields, int32_t schema_id, @@ -53,10 +68,69 @@ Result> Schema::Make( } fresh_identifier_ids.push_back(field.value().get().field_id()); } + + if (!fresh_identifier_ids.empty()) { + auto id_to_parent = IndexParents(*schema); + for (auto field_id : fresh_identifier_ids) { + ICEBERG_RETURN_UNEXPECTED( + ValidateIdentifierFields(field_id, *schema, id_to_parent)); + } + } + schema->identifier_field_ids_ = std::move(fresh_identifier_ids); return schema; } +Status Schema::ValidateIdentifierFields( + int32_t field_id, const Schema& schema, + const std::unordered_map& id_to_parent) { + ICEBERG_ASSIGN_OR_RAISE(auto field_opt, schema.FindFieldById(field_id)); + ICEBERG_PRECHECK(field_opt.has_value(), + "Cannot add field {} as an identifier field: field does not exist", + field_id); + + const SchemaField& field = field_opt.value().get(); + ICEBERG_PRECHECK( + field.type()->is_primitive(), + "Cannot add field {} as an identifier field: not a primitive type field", field_id); + ICEBERG_PRECHECK(!field.optional(), + "Cannot add field {} as an identifier field: not a required field", + field_id); + ICEBERG_PRECHECK( + field.type()->type_id() != TypeId::kDouble && + field.type()->type_id() != TypeId::kFloat, + "Cannot add field {} as an identifier field: must not be float or double field", + field_id); + + // check whether the nested field is in a chain of required struct fields + // exploring from root for better error message for list and map types + std::stack ancestors; + auto parent_it = id_to_parent.find(field.field_id()); + while (parent_it != id_to_parent.end()) { + ancestors.push(parent_it->second); + parent_it = id_to_parent.find(parent_it->second); + } + + while (!ancestors.empty()) { + ICEBERG_ASSIGN_OR_RAISE(auto parent_opt, schema.FindFieldById(ancestors.top())); + ICEBERG_PRECHECK( + parent_opt.has_value(), + "Cannot add field {} as an identifier field: parent field id {} does not exist", + field_id, ancestors.top()); + const SchemaField& parent = parent_opt.value().get(); + ICEBERG_PRECHECK( + parent.type()->type_id() == TypeId::kStruct, + "Cannot add field {} as an identifier field: must not be nested in {}", field_id, + *parent.type()); + ICEBERG_PRECHECK(!parent.optional(), + "Cannot add field {} as an identifier field: must not be nested in " + "optional field {}", + field_id, parent.field_id()); + ancestors.pop(); + } + return {}; +} + const std::shared_ptr& Schema::EmptySchema() { static const auto empty_schema = std::make_shared(std::vector{}, kInitialSchemaId); diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index fc0825695..1445a7934 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -52,18 +52,47 @@ class ICEBERG_EXPORT Schema : public StructType { /// \brief Special value to select all columns from manifest files. static constexpr std::string_view kAllColumns = "*"; - explicit Schema(std::vector fields, int32_t schema_id = kInitialSchemaId, - std::vector identifier_field_ids = {}); + explicit Schema(std::vector fields, int32_t schema_id = kInitialSchemaId); + + /// \brief Create a schema. + /// + /// \param fields The fields that make up the schema. + /// \param schema_id The unique identifier for this schema (default:kInitialSchemaId). + /// \param identifier_field_ids Field IDs that uniquely identify rows in the table. + /// \return A new Schema instance or Status if failed. + static Result> Make(std::vector fields, + int32_t schema_id, + std::vector identifier_field_ids); /// \brief Create a schema. /// /// \param fields The fields that make up the schema. /// \param schema_id The unique identifier for this schema (default: kInitialSchemaId). /// \param identifier_field_names Canonical names of fields that uniquely identify rows - /// in the table (default: empty). \return A new Schema instance or Status if failed. + /// in the table. + /// \return A new Schema instance or Status if failed. static Result> Make( - std::vector fields, int32_t schema_id = kInitialSchemaId, - const std::vector& identifier_field_names = {}); + std::vector fields, int32_t schema_id, + const std::vector& identifier_field_names); + + /// \brief Validate that the identifier field with the given ID is valid for the schema + /// + /// This method checks that the specified field ID represents a valid identifier field + /// according to Iceberg's identifier field requirements. It verifies that the field: + /// - exists in the schema + /// - is a primitive type + /// - is not optional (required field) + /// - is not a float or double type + /// - is not nested within optional or non-struct parent fields + /// + /// \param field_id The ID of the field to validate as an identifier field. + /// \param schema The schema containing the field to validate. + /// \param id_to_parent A mapping from field IDs to their parent field IDs for nested + /// field validation. + /// \return Status indicating success or failure of the validation. + static Status ValidateIdentifierFields( + int32_t field_id, const Schema& schema, + const std::unordered_map& id_to_parent); /// \brief Get an empty schema. /// diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index b6ce120f5..851048b30 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -958,9 +958,9 @@ Result TableMetadataBuilder::Impl::AddSchema(const Schema& schema, metadata_.last_column_id = new_last_column_id; - auto new_schema = - std::make_shared(schema.fields() | std::ranges::to(), - new_schema_id, schema.IdentifierFieldIds()); + ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr new_schema, + Schema::Make(schema.fields() | std::ranges::to(), + new_schema_id, schema.IdentifierFieldIds())) if (!schema_found) { metadata_.schemas.push_back(new_schema); diff --git a/src/iceberg/test/assign_id_visitor_test.cc b/src/iceberg/test/assign_id_visitor_test.cc index f9290d7f8..8bec0ad53 100644 --- a/src/iceberg/test/assign_id_visitor_test.cc +++ b/src/iceberg/test/assign_id_visitor_test.cc @@ -74,8 +74,9 @@ std::shared_ptr CreateNestedStruct() { }); } -Schema CreateNestedSchema(std::vector identifier_field_ids = {}) { - return Schema( +Result> CreateNestedSchema( + std::vector identifier_field_ids = {}) { + return Schema::Make( { SchemaField::MakeRequired(/*field_id=*/10, "id", iceberg::int64()), SchemaField::MakeOptional(/*field_id=*/20, "list", CreateListOfStruct()), @@ -108,11 +109,11 @@ TEST(AssignFreshIdVisitorTest, FlatSchema) { } TEST(AssignFreshIdVisitorTest, NestedSchema) { - Schema schema = CreateNestedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateNestedSchema()); std::atomic id = 0; auto next_id = [&id]() { return ++id; }; ICEBERG_UNWRAP_OR_FAIL(auto fresh_schema, - AssignFreshIds(Schema::kInitialSchemaId, schema, next_id)); + AssignFreshIds(Schema::kInitialSchemaId, *schema, next_id)); ASSERT_EQ(4, fresh_schema->fields().size()); for (int32_t i = 0; i < fresh_schema->fields().size(); ++i) { @@ -169,20 +170,16 @@ TEST(AssignFreshIdVisitorTest, NestedSchema) { } TEST(AssignFreshIdVisitorTest, RefreshIdentifierId) { - std::atomic id = 0; + int32_t id = 0; auto next_id = [&id]() { return ++id; }; - Schema invalid_schema = CreateNestedSchema({10, 400}); - // Invalid identified field id - auto result = AssignFreshIds(Schema::kInitialSchemaId, invalid_schema, next_id); - EXPECT_THAT(result, IsError(ErrorKind::kInvalidSchema)); - EXPECT_THAT(result, HasErrorMessage("Cannot find")); - - id = 0; - Schema schema = CreateNestedSchema({10, 301}); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateNestedSchema({10, 301})); ICEBERG_UNWRAP_OR_FAIL(auto fresh_schema, - AssignFreshIds(Schema::kInitialSchemaId, schema, next_id)); + AssignFreshIds(Schema::kInitialSchemaId, *schema, next_id)); EXPECT_THAT(fresh_schema->IdentifierFieldIds(), testing::ElementsAre(1, 12)); + ICEBERG_UNWRAP_OR_FAIL(auto identifier_field_names, + fresh_schema->IdentifierFieldNames()); + EXPECT_THAT(identifier_field_names, testing::ElementsAre("id", "struct.outer_id")); } } // namespace iceberg diff --git a/src/iceberg/test/metadata_serde_test.cc b/src/iceberg/test/metadata_serde_test.cc index 6b8e37ad6..3668817ed 100644 --- a/src/iceberg/test/metadata_serde_test.cc +++ b/src/iceberg/test/metadata_serde_test.cc @@ -139,12 +139,13 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) { /*optional=*/false)}, /*schema_id=*/0); - auto expected_schema_2 = std::make_shared( - std::vector{SchemaField::MakeRequired(1, "x", int64()), - SchemaField::MakeRequired(2, "y", int64()), - SchemaField::MakeRequired(3, "z", int64())}, - /*schema_id=*/1, - /*identifier_field_ids=*/std::vector{1, 2}); + ICEBERG_UNWRAP_OR_FAIL( + std::shared_ptr expected_schema_2, + Schema::Make(std::vector{SchemaField::MakeRequired(1, "x", int64()), + SchemaField::MakeRequired(2, "y", int64()), + SchemaField::MakeRequired(3, "z", int64())}, + /*schema_id=*/1, + /*identifier_field_ids=*/std::vector{1, 2})); auto expected_spec_result = PartitionSpec::Make( /*spec_id=*/0, diff --git a/src/iceberg/test/schema_test.cc b/src/iceberg/test/schema_test.cc index 38fef36c2..9dab35fa7 100644 --- a/src/iceberg/test/schema_test.cc +++ b/src/iceberg/test/schema_test.cc @@ -43,49 +43,36 @@ std::unique_ptr MakeSchema(Args&&... args) { } TEST(SchemaTest, Basics) { - { - iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); - iceberg::SchemaField field2(7, "bar", iceberg::string(), true); - iceberg::Schema schema({field1, field2}, 100); - ASSERT_EQ(schema, schema); - ASSERT_EQ(100, schema.schema_id()); - std::span fields = schema.fields(); - ASSERT_EQ(2, fields.size()); - ASSERT_EQ(field1, fields[0]); - ASSERT_EQ(field2, fields[1]); - ASSERT_THAT(schema.GetFieldById(5), ::testing::Optional(field1)); - ASSERT_THAT(schema.GetFieldById(7), ::testing::Optional(field2)); - ASSERT_THAT(schema.GetFieldByIndex(0), ::testing::Optional(field1)); - ASSERT_THAT(schema.GetFieldByIndex(1), ::testing::Optional(field2)); - ASSERT_THAT(schema.GetFieldByName("foo"), ::testing::Optional(field1)); - ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2)); - - ASSERT_EQ(std::nullopt, schema.GetFieldById(0)); - auto result = schema.GetFieldByIndex(2); - ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); - ASSERT_THAT(result, - iceberg::HasErrorMessage("Invalid index 2 to get field from struct")); - result = schema.GetFieldByIndex(-1); - ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); - ASSERT_THAT(result, - iceberg::HasErrorMessage("Invalid index -1 to get field from struct")); - ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); - ASSERT_EQ(0, schema.IdentifierFieldIds().size()); - auto identifier_field_names = schema.IdentifierFieldNames(); - ASSERT_THAT(identifier_field_names, iceberg::IsOk()); - ASSERT_THAT(identifier_field_names.value(), ::testing::IsEmpty()); - } - - { - // identifier fields not empty - iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); - iceberg::SchemaField field2(7, "bar", iceberg::string(), true); - iceberg::Schema schema({field1, field2}, 100, {5, 7}); - ASSERT_THAT(schema.IdentifierFieldIds(), testing::ElementsAre(5, 7)); - auto result = schema.IdentifierFieldNames(); - ASSERT_THAT(result, iceberg::IsOk()); - ASSERT_THAT(result.value(), testing::ElementsAre("foo", "bar")); - } + iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); + iceberg::SchemaField field2(7, "bar", iceberg::string(), true); + iceberg::Schema schema({field1, field2}, 100); + ASSERT_EQ(schema, schema); + ASSERT_EQ(100, schema.schema_id()); + std::span fields = schema.fields(); + ASSERT_EQ(2, fields.size()); + ASSERT_EQ(field1, fields[0]); + ASSERT_EQ(field2, fields[1]); + ASSERT_THAT(schema.GetFieldById(5), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldById(7), ::testing::Optional(field2)); + ASSERT_THAT(schema.GetFieldByIndex(0), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldByIndex(1), ::testing::Optional(field2)); + ASSERT_THAT(schema.GetFieldByName("foo"), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2)); + + ASSERT_EQ(std::nullopt, schema.GetFieldById(0)); + auto result = schema.GetFieldByIndex(2); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index 2 to get field from struct")); + result = schema.GetFieldByIndex(-1); + ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, + iceberg::HasErrorMessage("Invalid index -1 to get field from struct")); + ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); + ASSERT_EQ(0, schema.IdentifierFieldIds().size()); + auto identifier_field_names = schema.IdentifierFieldNames(); + ASSERT_THAT(identifier_field_names, iceberg::IsOk()); + ASSERT_THAT(identifier_field_names.value(), ::testing::IsEmpty()); } TEST(SchemaTest, Equality) { @@ -97,9 +84,6 @@ TEST(SchemaTest, Equality) { iceberg::Schema schema3({field1}, 101); iceberg::Schema schema4({field3, field2}, 101); iceberg::Schema schema5({field1, field2}, 100); - iceberg::Schema schema6({field1, field2}, 100, {5}); - iceberg::Schema schema7({field1, field2}, 100, {5}); - iceberg::Schema schema8({field1, field2}, 100, {7}); ASSERT_EQ(schema1, schema1); ASSERT_NE(schema1, schema2); @@ -110,10 +94,85 @@ TEST(SchemaTest, Equality) { ASSERT_NE(schema4, schema1); ASSERT_EQ(schema1, schema5); ASSERT_EQ(schema5, schema1); +} - ASSERT_NE(schema5, schema6); - ASSERT_EQ(schema6, schema7); - ASSERT_NE(schema6, schema8); +TEST(SchemaTest, IdentifierFields) { + using iceberg::ErrorKind; + using iceberg::Schema; + using iceberg::SchemaField; + + // identifier fields without identifier fields + SchemaField field1(1, "id", iceberg::int32(), false); + SchemaField field2(2, "name", iceberg::string(), true); + SchemaField field3(3, "age", iceberg::float32(), false); + Schema schema({field1, field2}, 100); + + // Schema with normal identifier fields + ICEBERG_UNWRAP_OR_FAIL(auto schema_with_pk, + Schema::Make({field1, field2}, 100, std::vector{1})); + ASSERT_THAT(schema_with_pk->IdentifierFieldIds(), testing::ElementsAre(1)); + auto result = schema_with_pk->IdentifierFieldNames(); + ASSERT_THAT(result, iceberg::IsOk()); + EXPECT_THAT(result.value(), testing::ElementsAre("id")); + + // Euqality check + EXPECT_NE(schema, *schema_with_pk); + auto schema_with_pk_other = + Schema::Make({field1, field2}, 100, std::vector{1}); + ASSERT_THAT(schema_with_pk_other, iceberg::IsOk()) + << schema_with_pk_other.error().message; + EXPECT_EQ(*schema_with_pk, *schema_with_pk_other.value()); + + // Invalid identifier fields, identifier fields cannot be optional + auto res = Schema::Make({field1, field2}, 100, std::vector{2}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(res, iceberg::HasErrorMessage("not a required field")); + + // Invalid identifier fields, identifier fields invalid type + res = Schema::Make({field1, field2, field3}, 100, std::vector{3}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(res, iceberg::HasErrorMessage("must not be float or double field")); + + SchemaField field4( + 4, "struct", + std::make_shared(std::vector{field1, field2}), + false); + res = Schema::Make({field4}, 100, std::vector{4}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(res, iceberg::HasErrorMessage("not a primitive type field")); + + // Invalid identifier fields, identifier fields cannot be nested in optional field + SchemaField field5( + 4, "struct", + std::make_shared(std::vector{field1, field2}), + true); + res = Schema::Make({field5}, 100, std::vector{1}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(res, iceberg::HasErrorMessage("must not be nested in optional field")); + + // Invalid identifier fields, identifier fields cannot be nested in repeated field + SchemaField field6(5, "element", iceberg::int32(), false); + SchemaField field7(4, "list", std::make_shared(field6), true); + res = Schema::Make({field7}, 100, std::vector{5}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(res, iceberg::HasErrorMessage("must not be nested in list")); + + // Normal nested identifier fields + SchemaField field8( + 4, "struct", + std::make_shared(std::vector{field1, field2}), + false); + res = Schema::Make({field8}, 100, std::vector{1}); + ASSERT_THAT(res, iceberg::IsOk()); + EXPECT_THAT(res.value()->IdentifierFieldIds(), testing::ElementsAre(1)); + ICEBERG_UNWRAP_OR_FAIL(auto identifier_field_names, + res.value()->IdentifierFieldNames()); + EXPECT_THAT(identifier_field_names, testing::ElementsAre("struct.id")); + + // Invalid identifier fields, identifier fields not found + res = Schema::Make({field1, field2}, 100, std::vector{"not_exist"}); + EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidSchema)); + EXPECT_THAT(res, iceberg::HasErrorMessage("Cannot find identifier field")); } class BasicShortNameTest : public ::testing::Test { @@ -237,8 +296,8 @@ class ComplexShortNameTest : public ::testing::Test { field9_ = std::make_unique(9, "Map", maptype, false); - schema_ = std::make_unique( - std::vector{*field9_}, 1, std::vector{1, 2}); + schema_ = + std::make_unique(std::vector{*field9_}, 1); } std::unique_ptr schema_; @@ -358,14 +417,6 @@ TEST_F(ComplexShortNameTest, TestFindByShortNameCaseInsensitive) { ::testing::Optional(std::nullopt)); } -TEST_F(ComplexShortNameTest, TestIdentifierFieldNames) { - auto result = schema_->IdentifierFieldNames(); - ASSERT_THAT(result, iceberg::IsOk()); - ASSERT_THAT(result.value(), - ::testing::ElementsAre("Map.value.Second_child.element.Foo", - "Map.value.Second_child.element.Bar")); -} - class ComplexMapStructShortNameTest : public ::testing::Test { protected: void SetUp() override { diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc index 40b728dac..a0fbe3be7 100644 --- a/src/iceberg/test/table_metadata_builder_test.cc +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -49,24 +49,15 @@ std::shared_ptr CreateTestSchema() { return std::make_shared(std::vector{field1, field2, field3}, 0); } -// Helper function to create a simple schema with invalid identifier fields -std::shared_ptr CreateInvalidSchema() { - auto field1 = SchemaField::MakeRequired(2, "id", int32()); - auto field2 = SchemaField::MakeRequired(5, "part_col", string()); - auto field3 = SchemaField::MakeRequired(8, "sort_col", timestamp()); - return std::make_shared(std::vector{field1, field2, field3}, - /*schema_id=*/1, - /*identifier_field_ids=*/std::vector{10}); -} - // Helper function to create a simple schema with disordered field_ids -std::shared_ptr CreateDisorderedSchema() { +Result> CreateDisorderedSchema() { auto field1 = SchemaField::MakeRequired(2, "id", int32()); auto field2 = SchemaField::MakeRequired(5, "part_col", string()); auto field3 = SchemaField::MakeRequired(8, "sort_col", timestamp()); - return std::make_shared(std::vector{field1, field2, field3}, - /*schema_id=*/1, - /*identifier_field_ids=*/std::vector{2}); + + return Schema::Make(std::vector{field1, field2, field3}, + /*schema_id=*/1, + /*identifier_field_ids=*/std::vector{2}); } // Helper function to create base metadata for tests @@ -95,7 +86,7 @@ std::unique_ptr CreateBaseMetadata() { // test for TableMetadata TEST(TableMetadataTest, Make) { - auto Schema = CreateDisorderedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto Schema, CreateDisorderedSchema()); ICEBERG_UNWRAP_OR_FAIL( auto spec, PartitionSpec::Make(1, std::vector{PartitionField( 5, 1, "part_name", Transform::Identity())})); @@ -140,23 +131,8 @@ TEST(TableMetadataTest, Make) { EXPECT_EQ(NullOrder::kLast, order_fields[0].null_order()); } -TEST(TableMetadataTest, MakeWithInvalidSchema) { - auto schema = CreateInvalidSchema(); - ICEBERG_UNWRAP_OR_FAIL( - auto spec, PartitionSpec::Make(1, std::vector{PartitionField( - 5, 1, "part_name", Transform::Identity())})); - ICEBERG_UNWRAP_OR_FAIL( - auto order, SortOrder::Make(1, std::vector{SortField( - 5, Transform::Identity(), - SortDirection::kAscending, NullOrder::kLast)})); - - auto res = TableMetadata::Make(*schema, *spec, *order, "s3://bucket/test", {}); - EXPECT_THAT(res, IsError(ErrorKind::kInvalidSchema)); - EXPECT_THAT(res, HasErrorMessage("Cannot find identifier field id")); -} - TEST(TableMetadataTest, MakeWithInvalidPartitionSpec) { - auto schema = CreateDisorderedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema()); ICEBERG_UNWRAP_OR_FAIL( auto spec, PartitionSpec::Make(1, std::vector{PartitionField( 6, 1, "part_name", Transform::Identity())})); @@ -171,7 +147,7 @@ TEST(TableMetadataTest, MakeWithInvalidPartitionSpec) { } TEST(TableMetadataTest, MakeWithInvalidSortOrder) { - auto schema = CreateDisorderedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema()); ICEBERG_UNWRAP_OR_FAIL( auto spec, PartitionSpec::Make(1, std::vector{PartitionField( 5, 1, "part_name", Transform::Identity())})); @@ -191,7 +167,7 @@ TEST(TableMetadataTest, InvalidProperties) { { // Invalid metrics config - auto schema = CreateDisorderedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema()); std::unordered_map invlaid_metric_config = { {std::string(TableProperties::kMetricModeColumnConfPrefix) + "unknown_col", "value"}}; @@ -204,7 +180,7 @@ TEST(TableMetadataTest, InvalidProperties) { { // Invaid commit properties - auto schema = CreateDisorderedSchema(); + ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema()); std::unordered_map invlaid_commit_properties = { {TableProperties::kCommitNumRetries.key(), "-1"}};