From 20e127be7f5bfdf3c16b57b38a69642b2b776674 Mon Sep 17 00:00:00 2001 From: Zhiyuan Date: Sun, 30 Nov 2025 20:36:34 -0800 Subject: [PATCH 1/3] Implement properties pending update --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/meson.build | 1 + src/iceberg/table.cc | 5 + src/iceberg/table.h | 7 ++ src/iceberg/table_metadata.cc | 24 +++- src/iceberg/table_update.cc | 8 +- src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/update_properties_test.cc | 105 +++++++++++++++++ src/iceberg/type_fwd.h | 1 + src/iceberg/update/update_properties.cc | 130 +++++++++++++++++++++ src/iceberg/update/update_properties.h | 62 ++++++++++ 11 files changed, 339 insertions(+), 6 deletions(-) create mode 100644 src/iceberg/test/update_properties_test.cc create mode 100644 src/iceberg/update/update_properties.cc create mode 100644 src/iceberg/update/update_properties.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index fb0c6ad70..524858c4a 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -63,6 +63,7 @@ set(ICEBERG_SOURCES table_requirements.cc table_scan.cc table_update.cc + update/update_properties.cc transform.cc transform_function.cc type.cc diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index bdb5dbf3e..b5a7d1058 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -88,6 +88,7 @@ iceberg_sources = files( 'transform.cc', 'transform_function.cc', 'type.cc', + 'update/update_properties.cc', 'util/bucket_util.cc', 'util/conversions.cc', 'util/decimal.cc', diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 9116a5e92..b84848971 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -28,6 +28,7 @@ #include "iceberg/table_metadata.h" #include "iceberg/table_properties.h" #include "iceberg/table_scan.h" +#include "iceberg/update/update_properties.h" #include "iceberg/util/macros.h" namespace iceberg { @@ -114,6 +115,10 @@ std::unique_ptr Table::NewTransaction() const { throw NotImplemented("Table::NewTransaction is not implemented"); } +std::unique_ptr Table::UpdateProperties() { + return std::make_unique(this); +} + const std::shared_ptr& Table::io() const { return io_; } std::unique_ptr Table::NewScan() const { diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 1ea5ce873..c575054f2 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -32,6 +32,8 @@ namespace iceberg { +class PropertiesUpdate; + /// \brief Represents an Iceberg table class ICEBERG_EXPORT Table { public: @@ -115,10 +117,15 @@ class ICEBERG_EXPORT Table { /// \return a pointer to the new Transaction virtual std::unique_ptr NewTransaction() const; + /// \brief Create a pending update to modify table properties + std::unique_ptr UpdateProperties(); + /// \brief Returns a FileIO to read and write table data and metadata files const std::shared_ptr& io() const; private: + friend class PropertiesUpdate; + const TableIdentifier identifier_; std::shared_ptr metadata_; std::string metadata_location_; diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index df23e2f92..3f5ad6121 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -476,12 +476,32 @@ TableMetadataBuilder& TableMetadataBuilder::RemovePartitionStatistics( TableMetadataBuilder& TableMetadataBuilder::SetProperties( const std::unordered_map& updated) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + if (updated.empty()) { + return *this; + } + + for (const auto& [key, value] : updated) { + impl_->metadata.properties[key] = value; + } + + impl_->changes.push_back(std::make_unique(updated)); + + return *this; } TableMetadataBuilder& TableMetadataBuilder::RemoveProperties( const std::vector& removed) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + if (removed.empty()) { + return *this; + } + + for (const auto& key : removed) { + impl_->metadata.properties.erase(key); + } + + impl_->changes.push_back(std::make_unique(removed)); + + return *this; } TableMetadataBuilder& TableMetadataBuilder::SetLocation(std::string_view location) { diff --git a/src/iceberg/table_update.cc b/src/iceberg/table_update.cc index fcb7a58c2..9b119af63 100644 --- a/src/iceberg/table_update.cc +++ b/src/iceberg/table_update.cc @@ -182,21 +182,21 @@ Status SetSnapshotRef::GenerateRequirements(TableUpdateContext& context) const { // SetProperties void SetProperties::ApplyTo(TableMetadataBuilder& builder) const { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + builder.SetProperties(updated_); } Status SetProperties::GenerateRequirements(TableUpdateContext& context) const { - return NotImplemented("SetTableProperties::GenerateRequirements not implemented"); + return {}; } // RemoveProperties void RemoveProperties::ApplyTo(TableMetadataBuilder& builder) const { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + builder.RemoveProperties(removed_); } Status RemoveProperties::GenerateRequirements(TableUpdateContext& context) const { - return NotImplemented("RemoveTableProperties::GenerateRequirements not implemented"); + return {}; } // SetLocation diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 21ccd4d66..935e95696 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -87,6 +87,7 @@ add_iceberg_test(table_test table_metadata_builder_test.cc table_requirement_test.cc table_update_test.cc + update_properties_test.cc test_common.cc) add_iceberg_test(expression_test diff --git a/src/iceberg/test/update_properties_test.cc b/src/iceberg/test/update_properties_test.cc new file mode 100644 index 000000000..224a3c0f4 --- /dev/null +++ b/src/iceberg/test/update_properties_test.cc @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/update_properties.h" + +#include +#include +#include + +#include +#include + +#include "iceberg/file_format.h" +#include "iceberg/partition_spec.h" +#include "iceberg/snapshot.h" +#include "iceberg/sort_order.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_properties.h" +#include "iceberg/test/matchers.h" +#include "iceberg/test/mock_catalog.h" + +namespace iceberg { + +namespace { + +std::shared_ptr MakeBaseMetadata( + std::unordered_map properties) { + auto metadata = std::make_shared(); + metadata->format_version = 2; + metadata->table_uuid = "test-uuid"; + metadata->location = "s3://bucket/table"; + metadata->last_sequence_number = TableMetadata::kInitialSequenceNumber; + metadata->last_updated_ms = TimePointMs{}; + metadata->last_column_id = 0; + metadata->default_spec_id = PartitionSpec::kInitialSpecId; + metadata->last_partition_id = 0; + metadata->default_sort_order_id = SortOrder::kInitialSortOrderId; + metadata->current_snapshot_id = Snapshot::kInvalidSnapshotId; + metadata->next_row_id = TableMetadata::kInitialRowId; + metadata->properties = std::move(properties); + return metadata; +} + +TableIdentifier MakeIdentifier() { return TableIdentifier{Namespace{{"ns"}}, "tbl"}; } + +} // namespace + +using ::testing::_; +using ::testing::ByMove; +using ::testing::Return; + +TEST(UpdatePropertiesTest, ApplyMergesUpdatesAndRemovals) { + auto metadata = + MakeBaseMetadata({{"foo", "bar"}, {"keep", "yes"}, {"format-version", "2"}}); + Table table(MakeIdentifier(), metadata, "loc", /*io=*/nullptr, /*catalog=*/nullptr); + + auto updater = table.UpdateProperties(); + updater->Set("foo", "baz").Remove("keep").DefaultFormat(FileFormatType::kOrc); + + auto applied = updater->Apply(); + ASSERT_THAT(applied, IsOk()); + + const auto& props = *applied; + EXPECT_EQ(props.at("foo"), "baz"); + EXPECT_FALSE(props.contains("keep")); + EXPECT_EQ(props.at(TableProperties::kDefaultFileFormat.key()), "orc"); +} + +TEST(UpdatePropertiesTest, CommitUsesCatalogAndRefreshesTable) { + auto catalog = std::make_shared(); + auto base_metadata = MakeBaseMetadata({{"foo", "bar"}}); + Table table(MakeIdentifier(), base_metadata, "loc", /*io=*/nullptr, catalog); + + auto updated_metadata = MakeBaseMetadata({{"foo", "new"}}); // response metadata + + EXPECT_CALL(*catalog, UpdateTable(table.name(), _, _)) + .WillOnce(Return(ByMove(Result>{std::make_unique( + table.name(), updated_metadata, "loc2", nullptr, catalog)}))); + + auto updater = table.UpdateProperties(); + updater->Set("foo", "new"); + + EXPECT_THAT(updater->Commit(), IsOk()); + EXPECT_EQ(table.properties().configs().at("foo"), "new"); + EXPECT_EQ(table.location(), "s3://bucket/table"); +} + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 68a4543fb..d84f61488 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -96,6 +96,7 @@ class Table; class TableProperties; class FileIO; class Transaction; +class PropertiesUpdate; class Transform; class TransformFunction; diff --git a/src/iceberg/update/update_properties.cc b/src/iceberg/update/update_properties.cc new file mode 100644 index 000000000..870c2fffc --- /dev/null +++ b/src/iceberg/update/update_properties.cc @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/update_properties.h" + +#include +#include + +#include "iceberg/catalog.h" +#include "iceberg/exception.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_properties.h" +#include "iceberg/table_requirement.h" +#include "iceberg/table_update.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +PropertiesUpdate::PropertiesUpdate(Table* table) : table_(table) {} + +PropertiesUpdate& PropertiesUpdate::Set(std::string key, std::string value) { + if (key.empty()) { + throw IcebergError("Property key cannot be empty"); + } + if (removals_.contains(key)) { + throw IcebergError(std::format("Cannot remove and update the same key: {}", key)); + } + + updates_[std::move(key)] = std::move(value); + return *this; +} + +PropertiesUpdate& PropertiesUpdate::Remove(std::string key) { + if (key.empty()) { + throw IcebergError("Property key cannot be empty"); + } + if (updates_.contains(key)) { + throw IcebergError(std::format("Cannot remove and update the same key: {}", key)); + } + + removals_.insert(std::move(key)); + return *this; +} + +PropertiesUpdate& PropertiesUpdate::DefaultFormat(FileFormatType format) { + return Set(std::string(TableProperties::kDefaultFileFormat.key()), + std::string(ToString(format))); +} + +Result> PropertiesUpdate::Apply() { + if (table_ == nullptr) { + return InvalidArgument("Cannot apply updates on a null table"); + } + + if (table_->catalog_) { + if (auto status = table_->Refresh(); !status) { + return std::unexpected(status.error()); + } + } + + std::unordered_map new_properties = + table_->properties().configs(); + + for (const auto& key : removals_) { + new_properties.erase(key); + } + for (const auto& [key, value] : updates_) { + new_properties[key] = value; + } + + return new_properties; +} + +Status PropertiesUpdate::Commit() { + if (table_ == nullptr) { + return InvalidArgument("Cannot commit updates on a null table"); + } + + if (updates_.empty() && removals_.empty()) { + return {}; + } + + ICEBERG_ASSIGN_OR_RAISE(auto applied, Apply()); + (void)applied; // apply for validation + + if (!table_->catalog_) { + return NotSupported("Commit requires a catalog-backed table"); + } + + std::vector> requirements; + std::vector> updates; + + if (!updates_.empty()) { + updates.push_back(std::make_unique(updates_)); + } + if (!removals_.empty()) { + std::vector removed(removals_.begin(), removals_.end()); + updates.push_back(std::make_unique(std::move(removed))); + } + + ICEBERG_ASSIGN_OR_RAISE(auto updated_table, table_->catalog_->UpdateTable( + table_->name(), requirements, updates)); + + table_->metadata_ = std::move(updated_table->metadata_); + table_->metadata_location_ = std::move(updated_table->metadata_location_); + table_->io_ = std::move(updated_table->io_); + table_->properties_ = std::move(updated_table->properties_); + table_->metadata_cache_ = std::make_unique(table_->metadata_.get()); + + return {}; +} + +} // namespace iceberg diff --git a/src/iceberg/update/update_properties.h b/src/iceberg/update/update_properties.h new file mode 100644 index 000000000..6173719f9 --- /dev/null +++ b/src/iceberg/update/update_properties.h @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/update/update_properties.h +/// Pending update for modifying table properties. + +#include +#include +#include + +#include "iceberg/file_format.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/pending_update.h" + +namespace iceberg { + +class Table; + +/// \brief Pending update for table properties. +class ICEBERG_EXPORT PropertiesUpdate + : public PendingUpdateTyped> { + public: + explicit PropertiesUpdate(Table* table); + + /// \brief Set or update a property value. + PropertiesUpdate& Set(std::string key, std::string value); + + /// \brief Remove a property key. + PropertiesUpdate& Remove(std::string key); + + /// \brief Set the default data file format. + PropertiesUpdate& DefaultFormat(FileFormatType format); + + Result> Apply() override; + + Status Commit() override; + + private: + Table* table_; // non-owning + std::unordered_map updates_; + std::unordered_set removals_; +}; + +} // namespace iceberg From 2d96e7f5b357659b5ca5107ec3a7d4c4d9e326c7 Mon Sep 17 00:00:00 2001 From: Zhiyuan Date: Sun, 30 Nov 2025 20:39:41 -0800 Subject: [PATCH 2/3] Update update_properties_test.cc --- src/iceberg/test/update_properties_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/iceberg/test/update_properties_test.cc b/src/iceberg/test/update_properties_test.cc index 224a3c0f4..b7f37ee94 100644 --- a/src/iceberg/test/update_properties_test.cc +++ b/src/iceberg/test/update_properties_test.cc @@ -90,6 +90,10 @@ TEST(UpdatePropertiesTest, CommitUsesCatalogAndRefreshesTable) { auto updated_metadata = MakeBaseMetadata({{"foo", "new"}}); // response metadata + EXPECT_CALL(*catalog, LoadTable(table.name())) + .WillOnce(Return(ByMove(Result>{std::make_unique
( + table.name(), MakeBaseMetadata({{"foo", "bar"}}), "loc", nullptr, catalog)}))); + EXPECT_CALL(*catalog, UpdateTable(table.name(), _, _)) .WillOnce(Return(ByMove(Result>{std::make_unique
( table.name(), updated_metadata, "loc2", nullptr, catalog)}))); From bcce0695649136b4672ffa2011fec550120e4fe6 Mon Sep 17 00:00:00 2001 From: Zhiyuan Date: Sun, 30 Nov 2025 21:25:16 -0800 Subject: [PATCH 3/3] Update update_properties_test.cc --- src/iceberg/test/update_properties_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/iceberg/test/update_properties_test.cc b/src/iceberg/test/update_properties_test.cc index b7f37ee94..ef01aeb9c 100644 --- a/src/iceberg/test/update_properties_test.cc +++ b/src/iceberg/test/update_properties_test.cc @@ -58,7 +58,9 @@ std::shared_ptr MakeBaseMetadata( return metadata; } -TableIdentifier MakeIdentifier() { return TableIdentifier{Namespace{{"ns"}}, "tbl"}; } +TableIdentifier MakeIdentifier() { + return TableIdentifier{.ns = Namespace{{"ns"}}, .name = "tbl"}; +} } // namespace