@@ -522,16 +522,166 @@ TableMetadataBuilder& TableMetadataBuilder::UpgradeFormatVersion(
522522}
523523
524524TableMetadataBuilder& TableMetadataBuilder::SetCurrentSchema (
525- std::shared_ptr<Schema> schema, int32_t new_last_column_id) {
526- throw IcebergError (std::format (" {} not implemented" , __FUNCTION__));
525+ std::shared_ptr<Schema> const & schema, int32_t new_last_column_id) {
526+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto schema_id,
527+ AddSchemaInternal (*schema, new_last_column_id));
528+ return SetCurrentSchema (schema_id);
527529}
528530
529531TableMetadataBuilder& TableMetadataBuilder::SetCurrentSchema (int32_t schema_id) {
530- throw IcebergError (std::format (" {} not implemented" , __FUNCTION__));
532+ if (schema_id == kLastAdded ) {
533+ ICEBERG_BUILDER_CHECK (impl_->last_added_schema_id .has_value (),
534+ " Cannot set last added schema: no schema has been added" );
535+ return SetCurrentSchema (impl_->last_added_schema_id .value ());
536+ }
537+
538+ if (impl_->metadata .current_schema_id == schema_id) {
539+ return *this ;
540+ }
541+
542+ auto it = impl_->schemas_by_id .find (schema_id);
543+ ICEBERG_BUILDER_CHECK (it != impl_->schemas_by_id .end (),
544+ " Cannot set current schema to unknown schema: {}" , schema_id);
545+ const auto & schema = it->second ;
546+
547+ // Rebuild all partition specs for the new current schema
548+ std::vector<std::shared_ptr<PartitionSpec>> updated_specs;
549+ for (const auto & spec : impl_->metadata .partition_specs ) {
550+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto updated_spec, UpdateSpecSchema (*schema, *spec));
551+ updated_specs.push_back (std::move (updated_spec));
552+ }
553+ impl_->metadata .partition_specs = std::move (updated_specs);
554+ impl_->specs_by_id .clear ();
555+ for (const auto & spec : impl_->metadata .partition_specs ) {
556+ impl_->specs_by_id .emplace (spec->spec_id (), spec);
557+ }
558+
559+ // Rebuild all sort orders for the new current schema
560+ std::vector<std::shared_ptr<SortOrder>> updated_orders;
561+ for (const auto & order : impl_->metadata .sort_orders ) {
562+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto updated_order,
563+ UpdateSortOrderSchema (*schema, *order));
564+ updated_orders.push_back (std::move (updated_order));
565+ }
566+ impl_->metadata .sort_orders = std::move (updated_orders);
567+ impl_->sort_orders_by_id .clear ();
568+ for (const auto & order : impl_->metadata .sort_orders ) {
569+ impl_->sort_orders_by_id .emplace (order->order_id (), order);
570+ }
571+
572+ // Set the current schema ID
573+ impl_->metadata .current_schema_id = schema_id;
574+
575+ // Record the change
576+ if (impl_->last_added_schema_id .has_value () &&
577+ impl_->last_added_schema_id .value () == schema_id) {
578+ impl_->changes .push_back (std::make_unique<table::SetCurrentSchema>(kLastAdded ));
579+ } else {
580+ impl_->changes .push_back (std::make_unique<table::SetCurrentSchema>(schema_id));
581+ }
582+
583+ return *this ;
531584}
532585
533- TableMetadataBuilder& TableMetadataBuilder::AddSchema (std::shared_ptr<Schema> schema) {
534- throw IcebergError (std::format (" {} not implemented" , __FUNCTION__));
586+ Result<std::shared_ptr<PartitionSpec>> TableMetadataBuilder::UpdateSpecSchema (
587+ const Schema& schema, const PartitionSpec& partition_spec) {
588+ // UpdateSpecSchema: Update partition spec to use the new schema
589+ // This preserves the partition spec structure but rebinds it to the new schema
590+
591+ // Copy all fields from the partition spec. IDs should not change.
592+ std::vector<PartitionField> fields;
593+ fields.reserve (partition_spec.fields ().size ());
594+ int32_t last_assigned_field_id = PartitionSpec::kLegacyPartitionDataIdStart ;
595+ for (const auto & field : partition_spec.fields ()) {
596+ fields.push_back (field);
597+ last_assigned_field_id = std::max (last_assigned_field_id, field.field_id ());
598+ }
599+
600+ // Build without validation because the schema may have changed in a way that makes
601+ // this spec invalid. The spec should still be preserved so that older metadata can
602+ // be interpreted.
603+ ICEBERG_ASSIGN_OR_RAISE (auto new_partition_spec,
604+ PartitionSpec::Make (partition_spec.spec_id (), std::move (fields),
605+ last_assigned_field_id));
606+
607+ // Validate the new partition name against the new schema
608+ ICEBERG_RETURN_UNEXPECTED (new_partition_spec->ValidatePartitionName (schema));
609+ return new_partition_spec;
610+ }
611+
612+ Result<std::unique_ptr<SortOrder>> TableMetadataBuilder::UpdateSortOrderSchema (
613+ const Schema& schema, const SortOrder& sort_order) {
614+ // Build without validation because the schema may have changed in a way that makes
615+ // this order invalid. The order should still be preserved so that older metadata can
616+ // be interpreted.
617+ auto fields = sort_order.fields ();
618+ std::vector<SortField> new_fields{fields.begin (), fields.end ()};
619+ return SortOrder::Make (sort_order.order_id (), std::move (new_fields));
620+ }
621+
622+ TableMetadataBuilder& TableMetadataBuilder::AddSchema (
623+ std::shared_ptr<Schema> const & schema) {
624+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto highest_field_id, schema->HighestFieldId ());
625+ AddSchemaInternal (*schema, std::max (impl_->metadata .last_column_id , highest_field_id));
626+ return *this ;
627+ }
628+
629+ Result<int32_t > TableMetadataBuilder::AddSchemaInternal (const Schema& schema,
630+ int32_t new_last_column_id) {
631+ if (new_last_column_id < impl_->metadata .last_column_id ) {
632+ return InvalidArgument (" Invalid last column ID: {} < {} (previous last column ID)" ,
633+ new_last_column_id, impl_->metadata .last_column_id );
634+ }
635+
636+ ICEBERG_RETURN_UNEXPECTED (schema.Validate (impl_->metadata .format_version ));
637+
638+ auto new_schema_id = ReuseOrCreateNewSchemaId (schema);
639+ if (impl_->schemas_by_id .find (new_schema_id) != impl_->schemas_by_id .end ()) {
640+ // update last_added_schema_id if the schema was added in this set of changes (since
641+ // it is now the last)
642+ bool is_new_schema =
643+ impl_->last_added_schema_id .has_value () &&
644+ std::ranges::find_if (impl_->changes , [new_schema_id](const auto & change) {
645+ if (change->kind () != TableUpdate::Kind::kAddSchema ) {
646+ return false ;
647+ }
648+ auto * add_schema = dynamic_cast <table::AddSchema*>(change.get ());
649+ return add_schema->schema ()->schema_id ().value_or (Schema::kInitialSchemaId ) ==
650+ new_schema_id;
651+ }) != impl_->changes .cend ();
652+ impl_->last_added_schema_id =
653+ is_new_schema ? std::make_optional (new_schema_id) : std::nullopt ;
654+ return new_schema_id;
655+ }
656+
657+ auto new_schema = std::make_shared<Schema>(
658+ std::vector<SchemaField>(schema.fields ().begin (), schema.fields ().end ()),
659+ new_schema_id);
660+
661+ impl_->metadata .schemas .push_back (new_schema);
662+ impl_->schemas_by_id .emplace (new_schema_id, new_schema);
663+
664+ impl_->changes .push_back (
665+ std::make_unique<table::AddSchema>(new_schema, new_last_column_id));
666+ impl_->metadata .last_column_id = new_last_column_id;
667+ impl_->last_added_schema_id = new_schema_id;
668+
669+ return new_schema_id;
670+ }
671+
672+ int32_t TableMetadataBuilder::ReuseOrCreateNewSchemaId (const Schema& new_schema) const {
673+ // if the schema already exists, use its id; otherwise use the highest id + 1
674+ auto new_schema_id =
675+ impl_->metadata .current_schema_id .value_or (Schema::kInitialSchemaId );
676+ for (auto & schema : impl_->metadata .schemas ) {
677+ auto schema_id = schema->schema_id ().value_or (Schema::kInitialSchemaId );
678+ if (schema->SameSchema (new_schema)) {
679+ return schema_id;
680+ } else if (new_schema_id <= schema_id) {
681+ new_schema_id = schema_id + 1 ;
682+ }
683+ }
684+ return new_schema_id;
535685}
536686
537687TableMetadataBuilder& TableMetadataBuilder::SetDefaultPartitionSpec (
@@ -555,7 +705,23 @@ TableMetadataBuilder& TableMetadataBuilder::RemovePartitionSpecs(
555705
556706TableMetadataBuilder& TableMetadataBuilder::RemoveSchemas (
557707 const std::vector<int32_t >& schema_ids) {
558- throw IcebergError (std::format (" {} not implemented" , __FUNCTION__));
708+ std::unordered_set<int32_t > schema_ids_to_remove (schema_ids.begin (), schema_ids.end ());
709+ auto current_schema_id =
710+ impl_->metadata .current_schema_id .value_or (Schema::kInitialSchemaId );
711+ ICEBERG_BUILDER_CHECK (schema_ids_to_remove.contains (current_schema_id),
712+ " Cannot remove current schema: {}" , current_schema_id);
713+
714+ if (!schema_ids_to_remove.empty ()) {
715+ impl_->metadata .schemas =
716+ impl_->metadata .schemas | std::views::filter ([&](const auto & schema) {
717+ return !schema_ids_to_remove.contains (
718+ schema->schema_id ().value_or (Schema::kInitialSchemaId ));
719+ }) |
720+ std::ranges::to<std::vector<std::shared_ptr<Schema>>>();
721+ impl_->changes .push_back (
722+ std::make_unique<table::RemoveSchemas>(std::move (schema_ids_to_remove)));
723+ }
724+ return *this ;
559725}
560726
561727TableMetadataBuilder& TableMetadataBuilder::SetDefaultSortOrder (
@@ -595,9 +761,11 @@ Result<int32_t> TableMetadataBuilder::AddSortOrderInternal(const SortOrder& orde
595761 bool is_new_order =
596762 impl_->last_added_order_id .has_value () &&
597763 std::ranges::find_if (impl_->changes , [new_order_id](const auto & change) {
764+ if (change->kind () != TableUpdate::Kind::kAddSortOrder ) {
765+ return false ;
766+ }
598767 auto * add_sort_order = dynamic_cast <table::AddSortOrder*>(change.get ());
599- return add_sort_order &&
600- add_sort_order->sort_order ()->order_id () == new_order_id;
768+ return add_sort_order->sort_order ()->order_id () == new_order_id;
601769 }) != impl_->changes .cend ();
602770 impl_->last_added_order_id =
603771 is_new_order ? std::make_optional (new_order_id) : std::nullopt ;
@@ -764,6 +932,27 @@ Result<std::unique_ptr<TableMetadata>> TableMetadataBuilder::Build() {
764932 std::chrono::system_clock::now ().time_since_epoch ())};
765933 }
766934
935+ auto current_schema_id =
936+ impl_->metadata .current_schema_id .value_or (Schema::kInitialSchemaId );
937+ auto schema_it = impl_->schemas_by_id .find (current_schema_id);
938+ ICEBERG_PRECHECK (schema_it != impl_->schemas_by_id .end (),
939+ " Current schema ID {} not found in schemas" , current_schema_id);
940+ const auto & current_schema = schema_it->second ;
941+ {
942+ auto spec_it = impl_->specs_by_id .find (impl_->metadata .default_spec_id );
943+ // FIXME(GuoTao.yu): Default spec must exist after we support update partition spec
944+ if (spec_it != impl_->specs_by_id .end ()) {
945+ ICEBERG_RETURN_UNEXPECTED (
946+ spec_it->second ->Validate (*current_schema, /* allow_missing_fields=*/ false ));
947+ }
948+ auto sort_order_it =
949+ impl_->sort_orders_by_id .find (impl_->metadata .default_sort_order_id );
950+ ICEBERG_PRECHECK (sort_order_it != impl_->sort_orders_by_id .end (),
951+ " Default sort order ID {} not found in sort orders" ,
952+ impl_->metadata .default_sort_order_id );
953+ ICEBERG_RETURN_UNEXPECTED (sort_order_it->second ->Validate (*current_schema));
954+ }
955+
767956 // 4. Buildup metadata_log from base metadata
768957 int32_t max_metadata_log_size =
769958 impl_->metadata .properties .Get (TableProperties::kMetadataPreviousVersionsMax );
0 commit comments