diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 780ab61da..a90c5b0ca 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -21,9 +21,12 @@ #include #include +#include #include +#include #include #include +#include #include @@ -39,12 +42,11 @@ #include "iceberg/util/gzip_internal.h" #include "iceberg/util/macros.h" #include "iceberg/util/uuid.h" - namespace iceberg { - namespace { const TimePointMs kInvalidLastUpdatedMs = TimePointMs::min(); -} +constexpr int32_t kLastAdded = -1; +} // namespace std::string ToString(const SnapshotLogEntry& entry) { return std::format("SnapshotLogEntry[timestampMillis={},snapshotId={}]", @@ -274,11 +276,19 @@ struct TableMetadataBuilder::Impl { // Change tracking std::vector> changes; + std::optional last_added_schema_id; + std::optional last_added_order_id; + std::optional last_added_spec_id; // Metadata location tracking std::optional metadata_location; std::optional previous_metadata_location; + // indexes for convenience + std::unordered_map> schemas_by_id; + std::unordered_map> specs_by_id; + std::unordered_map> sort_orders_by_id; + // Constructor for new table explicit Impl(int8_t format_version) : base(nullptr), metadata{} { metadata.format_version = format_version; @@ -294,7 +304,22 @@ struct TableMetadataBuilder::Impl { // Constructor from existing metadata explicit Impl(const TableMetadata* base_metadata) - : base(base_metadata), metadata(*base_metadata) {} + : base(base_metadata), metadata(*base_metadata) { + // Initialize index maps from base metadata + for (const auto& schema : metadata.schemas) { + if (schema->schema_id().has_value()) { + schemas_by_id.emplace(schema->schema_id().value(), schema); + } + } + + for (const auto& spec : metadata.partition_specs) { + specs_by_id.emplace(spec->spec_id(), spec); + } + + for (const auto& order : metadata.sort_orders) { + sort_orders_by_id.emplace(order->order_id(), order); + } + } }; TableMetadataBuilder::TableMetadataBuilder(int8_t format_version) @@ -434,16 +459,95 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveSchemas( TableMetadataBuilder& TableMetadataBuilder::SetDefaultSortOrder( std::shared_ptr order) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + BUILDER_ASSIGN_OR_RETURN(auto order_id, AddSortOrderInternal(*order)); + return SetDefaultSortOrder(order_id); } TableMetadataBuilder& TableMetadataBuilder::SetDefaultSortOrder(int32_t order_id) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + if (order_id == -1) { + if (!impl_->last_added_order_id.has_value()) { + return AddError(ErrorKind::kInvalidArgument, + "Cannot set last added sort order: no sort order has been added"); + } + return SetDefaultSortOrder(impl_->last_added_order_id.value()); + } + + if (order_id == impl_->metadata.default_sort_order_id) { + return *this; + } + + impl_->metadata.default_sort_order_id = order_id; + + if (impl_->last_added_order_id == std::make_optional(order_id)) { + impl_->changes.push_back(std::make_unique(kLastAdded)); + } else { + impl_->changes.push_back(std::make_unique(order_id)); + } + return *this; +} + +Result TableMetadataBuilder::AddSortOrderInternal(const SortOrder& order) { + int32_t new_order_id = ReuseOrCreateNewSortOrderId(order); + + if (impl_->sort_orders_by_id.find(new_order_id) != impl_->sort_orders_by_id.end()) { + // update last_added_order_id if the order was added in this set of changes (since it + // is now the last) + bool is_new_order = + impl_->last_added_order_id.has_value() && + std::ranges::find_if(impl_->changes, [new_order_id](const auto& change) { + auto* add_sort_order = dynamic_cast(change.get()); + return add_sort_order && + add_sort_order->sort_order()->order_id() == new_order_id; + }) != impl_->changes.cend(); + impl_->last_added_order_id = + is_new_order ? std::make_optional(new_order_id) : std::nullopt; + return new_order_id; + } + + // Get current schema and validate the sort order against it + ICEBERG_ASSIGN_OR_RAISE(auto schema, impl_->metadata.Schema()); + ICEBERG_RETURN_UNEXPECTED(order.Validate(*schema)); + + std::shared_ptr new_order; + if (order.is_unsorted()) { + new_order = SortOrder::Unsorted(); + } else { + // Unlike freshSortOrder from Java impl, we don't use field name from old bound + // schema to rebuild the sort order. + ICEBERG_ASSIGN_OR_RAISE( + new_order, + SortOrder::Make(new_order_id, std::vector(order.fields().begin(), + order.fields().end()))); + } + + impl_->metadata.sort_orders.push_back(new_order); + impl_->sort_orders_by_id.emplace(new_order_id, new_order); + + impl_->changes.push_back(std::make_unique(new_order)); + impl_->last_added_order_id = new_order_id; + return new_order_id; } TableMetadataBuilder& TableMetadataBuilder::AddSortOrder( std::shared_ptr order) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + BUILDER_ASSIGN_OR_RETURN(auto order_id, AddSortOrderInternal(*order)); + return *this; +} + +int32_t TableMetadataBuilder::ReuseOrCreateNewSortOrderId(const SortOrder& new_order) { + if (new_order.is_unsorted()) { + return SortOrder::kUnsortedOrderId; + } + // determine the next order id + int32_t new_order_id = SortOrder::kInitialSortOrderId; + for (const auto& order : impl_->metadata.sort_orders) { + if (order->SameOrder(new_order)) { + return order->order_id(); + } else if (new_order_id <= order->order_id()) { + new_order_id = order->order_id() + 1; + } + } + return new_order_id; } TableMetadataBuilder& TableMetadataBuilder::AddSnapshot( diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 503b9b143..2d53fcb08 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -436,6 +436,17 @@ class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector { /// \brief Private constructor for building from existing metadata explicit TableMetadataBuilder(const TableMetadata* base); + /// \brief Internal method to add a sort order and return its ID + /// \param order The sort order to add + /// \return The ID of the added or reused sort order + Result AddSortOrderInternal(const SortOrder& order); + + /// \brief Internal method to check for existing sort order and reuse its ID or create a + /// new one + /// \param new_order The sort order to check + /// \return The ID to use for this sort order (reused if exists, new otherwise) + int32_t ReuseOrCreateNewSortOrderId(const SortOrder& new_order); + /// Internal state members struct Impl; std::unique_ptr impl_; diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc index ff41ae18c..a1e46615f 100644 --- a/src/iceberg/test/table_metadata_builder_test.cc +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -19,20 +19,33 @@ #include #include +#include #include #include "iceberg/partition_spec.h" +#include "iceberg/schema.h" #include "iceberg/snapshot.h" +#include "iceberg/sort_field.h" #include "iceberg/sort_order.h" #include "iceberg/table_metadata.h" #include "iceberg/table_update.h" #include "iceberg/test/matchers.h" +#include "iceberg/transform.h" +#include "iceberg/type.h" namespace iceberg { namespace { +// Helper function to create a simple schema for testing +std::shared_ptr CreateTestSchema() { + auto field1 = SchemaField::MakeRequired(1, "id", int32()); + auto field2 = SchemaField::MakeRequired(2, "data", string()); + auto field3 = SchemaField::MakeRequired(3, "ts", timestamp()); + return std::make_shared(std::vector{field1, field2, field3}, 0); +} + // Helper function to create base metadata for tests std::unique_ptr CreateBaseMetadata() { auto metadata = std::make_unique(); @@ -41,11 +54,14 @@ std::unique_ptr CreateBaseMetadata() { metadata->location = "s3://bucket/test"; metadata->last_sequence_number = 0; metadata->last_updated_ms = TimePointMs{std::chrono::milliseconds(1000)}; - metadata->last_column_id = 0; + metadata->last_column_id = 3; + metadata->current_schema_id = 0; + metadata->schemas.push_back(CreateTestSchema()); metadata->default_spec_id = PartitionSpec::kInitialSpecId; metadata->last_partition_id = 0; metadata->current_snapshot_id = Snapshot::kInvalidSnapshotId; metadata->default_sort_order_id = SortOrder::kInitialSortOrderId; + metadata->sort_orders.push_back(SortOrder::Unsorted()); metadata->next_row_id = TableMetadata::kInitialRowId; return metadata; } @@ -82,7 +98,7 @@ TEST(TableMetadataBuilderTest, BuildFromExisting) { EXPECT_EQ(metadata->location, "s3://bucket/test"); } -// Test AssignUUID method +// Test AssignUUID TEST(TableMetadataBuilderTest, AssignUUID) { // Assign UUID for new table auto builder = TableMetadataBuilder::BuildFromEmpty(2); @@ -174,17 +190,149 @@ TEST(TableMetadataBuilderTest, UpgradeFormatVersion) { EXPECT_THAT(builder->Build(), HasErrorMessage("Cannot downgrade")); } -// Test applying TableUpdate to builder -TEST(TableMetadataBuilderTest, ApplyUpdate) { - // Apply AssignUUID update - auto builder = TableMetadataBuilder::BuildFromEmpty(2); - table::AssignUUID update("apply-uuid"); - update.ApplyTo(*builder); - // TODO(Li Feiyang): Add more update and `apply` once other build methods are - // implemented +// Test AddSortOrder +TEST(TableMetadataBuilderTest, AddSortOrderBasic) { + auto base = CreateBaseMetadata(); + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + auto schema = CreateTestSchema(); + // 1. Add unsorted - should reuse existing unsorted order + builder->AddSortOrder(SortOrder::Unsorted()); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->table_uuid, "apply-uuid"); + ASSERT_EQ(metadata->sort_orders.size(), 1); + EXPECT_TRUE(metadata->sort_orders[0]->is_unsorted()); + + // 2. Add basic sort order + builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField field1(1, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order1, + SortOrder::Make(*schema, 1, std::vector{field1})); + builder->AddSortOrder(std::move(order1)); + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + ASSERT_EQ(metadata->sort_orders.size(), 2); + EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1); + + // 3. Add duplicate - should be idempotent + builder = TableMetadataBuilder::BuildFrom(base.get()); + ICEBERG_UNWRAP_OR_FAIL(auto order2, + SortOrder::Make(*schema, 1, std::vector{field1})); + ICEBERG_UNWRAP_OR_FAIL(auto order3, + SortOrder::Make(*schema, 1, std::vector{field1})); + builder->AddSortOrder(std::move(order2)); + builder->AddSortOrder(std::move(order3)); // Duplicate + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + ASSERT_EQ(metadata->sort_orders.size(), 2); // Only one added + + // 4. Add multiple different orders + verify ID reassignment + builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField field2(2, Transform::Identity(), SortDirection::kDescending, + NullOrder::kLast); + // User provides ID=99, Builder should reassign to ID=1 + ICEBERG_UNWRAP_OR_FAIL(auto order4, + SortOrder::Make(*schema, 99, std::vector{field1})); + ICEBERG_UNWRAP_OR_FAIL( + auto order5, SortOrder::Make(*schema, 2, std::vector{field1, field2})); + builder->AddSortOrder(std::move(order4)); + builder->AddSortOrder(std::move(order5)); + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + ASSERT_EQ(metadata->sort_orders.size(), 3); + EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1); // Reassigned from 99 + EXPECT_EQ(metadata->sort_orders[2]->order_id(), 2); +} + +TEST(TableMetadataBuilderTest, AddSortOrderInvalid) { + auto base = CreateBaseMetadata(); + auto schema = CreateTestSchema(); + + // 1. Invalid field ID + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField invalid_field(999, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order1, + SortOrder::Make(1, std::vector{invalid_field})); + builder->AddSortOrder(std::move(order1)); + ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed)); + ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot find source column")); + + // 2. Invalid transform (Day transform on string type) + builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField invalid_transform(2, Transform::Day(), SortDirection::kAscending, + NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order2, + SortOrder::Make(1, std::vector{invalid_transform})); + builder->AddSortOrder(std::move(order2)); + ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed)); + ASSERT_THAT(builder->Build(), HasErrorMessage("Invalid source type")); + + // 3. Without schema + builder = TableMetadataBuilder::BuildFromEmpty(2); + builder->AssignUUID("test-uuid"); + SortField field(1, Transform::Identity(), SortDirection::kAscending, NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order3, + SortOrder::Make(*schema, 1, std::vector{field})); + builder->AddSortOrder(std::move(order3)); + ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed)); + ASSERT_THAT(builder->Build(), HasErrorMessage("Schema with ID")); +} + +// Test SetDefaultSortOrder +TEST(TableMetadataBuilderTest, SetDefaultSortOrderBasic) { + auto base = CreateBaseMetadata(); + auto schema = CreateTestSchema(); + + // 1. Set default sort order by SortOrder object + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField field1(1, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order1_unique, + SortOrder::Make(*schema, 1, std::vector{field1})); + auto order1 = std::shared_ptr(std::move(order1_unique)); + builder->SetDefaultSortOrder(order1); + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + ASSERT_EQ(metadata->sort_orders.size(), 2); + EXPECT_EQ(metadata->default_sort_order_id, 1); + EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1); + + // 2. Set default sort order by order ID + builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField field2(1, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + ICEBERG_UNWRAP_OR_FAIL(auto order2_unique, + SortOrder::Make(*schema, 1, std::vector{field2})); + auto order2 = std::shared_ptr(std::move(order2_unique)); + builder->AddSortOrder(order2); + builder->SetDefaultSortOrder(1); + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + EXPECT_EQ(metadata->default_sort_order_id, 1); + + // 3. Set default sort order using -1 (last added) + builder = TableMetadataBuilder::BuildFrom(base.get()); + SortField field3(2, Transform::Identity(), SortDirection::kDescending, + NullOrder::kLast); + ICEBERG_UNWRAP_OR_FAIL(auto order3_unique, + SortOrder::Make(*schema, 1, std::vector{field3})); + auto order3 = std::shared_ptr(std::move(order3_unique)); + builder->AddSortOrder(order3); + builder->SetDefaultSortOrder(-1); // Use last added + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + EXPECT_EQ(metadata->default_sort_order_id, 1); + + // 4. Setting same order is no-op + builder = TableMetadataBuilder::BuildFrom(base.get()); + builder->SetDefaultSortOrder(0); + ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); + EXPECT_EQ(metadata->default_sort_order_id, 0); +} + +TEST(TableMetadataBuilderTest, SetDefaultSortOrderInvalid) { + auto base = CreateBaseMetadata(); + + // Try to use -1 (last added) when no order has been added + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + builder->SetDefaultSortOrder(-1); + ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed)); + ASSERT_THAT(builder->Build(), HasErrorMessage("no sort order has been added")); } } // namespace iceberg diff --git a/src/iceberg/test/table_update_test.cc b/src/iceberg/test/table_update_test.cc index d250b6d20..afa70894c 100644 --- a/src/iceberg/test/table_update_test.cc +++ b/src/iceberg/test/table_update_test.cc @@ -314,4 +314,56 @@ TEST(TableUpdateTest, AssignUUIDApplyUpdate) { EXPECT_EQ(metadata->table_uuid, "apply-uuid"); } +// Test AddSortOrder ApplyTo +TEST(TableUpdateTest, AddSortOrderApplyUpdate) { + auto base = CreateBaseMetadata(); + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + + // Create a sort order + auto schema = CreateTestSchema(); + SortField sort_field(1, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + auto sort_order = std::shared_ptr( + SortOrder::Make(*schema, 1, std::vector{sort_field}).value().release()); + + // Apply AddSortOrder update + table::AddSortOrder add_sort_order(sort_order); + add_sort_order.ApplyTo(*builder); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + + // Verify the sort order was added + ASSERT_EQ(metadata->sort_orders.size(), 2); // unsorted + new order + auto& added_order = metadata->sort_orders[1]; + EXPECT_EQ(added_order->order_id(), 1); + EXPECT_EQ(added_order->fields().size(), 1); + EXPECT_EQ(added_order->fields()[0].source_id(), 1); + EXPECT_EQ(added_order->fields()[0].direction(), SortDirection::kAscending); + EXPECT_EQ(added_order->fields()[0].null_order(), NullOrder::kFirst); +} + +// Test SetDefaultSortOrder ApplyTo +TEST(TableUpdateTest, SetDefaultSortOrderApplyUpdate) { + auto base = CreateBaseMetadata(); + + // add a sort order to the base metadata + auto schema = CreateTestSchema(); + SortField sort_field(1, Transform::Identity(), SortDirection::kDescending, + NullOrder::kLast); + auto sort_order = std::shared_ptr( + SortOrder::Make(*schema, 1, std::vector{sort_field}).value().release()); + base->sort_orders.push_back(sort_order); + + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + + // Apply SetDefaultSortOrder update to set the new sort order as default + table::SetDefaultSortOrder set_default_sort_order(1); + set_default_sort_order.ApplyTo(*builder); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + + // Verify the default sort order was changed + EXPECT_EQ(metadata->default_sort_order_id, 1); +} + } // namespace iceberg diff --git a/src/iceberg/util/error_collector.h b/src/iceberg/util/error_collector.h index f94967f97..48ea717eb 100644 --- a/src/iceberg/util/error_collector.h +++ b/src/iceberg/util/error_collector.h @@ -30,6 +30,21 @@ namespace iceberg { +#define BUILDER_RETURN_IF_ERROR(result) \ + if (auto&& result_name = result; !result_name) [[unlikely]] { \ + errors_.emplace_back(std::move(result_name.error())); \ + return *this; \ + } + +#define BUILDER_ASSIGN_OR_RETURN_IMPL(result_name, lhs, rexpr) \ + auto&& result_name = (rexpr); \ + BUILDER_RETURN_IF_ERROR(result_name) \ + lhs = std::move(result_name.value()); + +#define BUILDER_ASSIGN_OR_RETURN(lhs, rexpr) \ + BUILDER_ASSIGN_OR_RETURN_IMPL(ICEBERG_ASSIGN_OR_RAISE_NAME(result_, __COUNTER__), lhs, \ + rexpr) + /// \brief Base class for collecting validation errors in builder patterns /// /// This class provides error accumulation functionality for builders that