diff --git a/src/iceberg/catalog/memory/in_memory_catalog.cc b/src/iceberg/catalog/memory/in_memory_catalog.cc index 9e4a485a0..dc6d9d009 100644 --- a/src/iceberg/catalog/memory/in_memory_catalog.cc +++ b/src/iceberg/catalog/memory/in_memory_catalog.cc @@ -23,7 +23,10 @@ #include #include "iceberg/table.h" +#include "iceberg/table_identifier.h" #include "iceberg/table_metadata.h" +#include "iceberg/table_requirement.h" +#include "iceberg/table_update.h" #include "iceberg/util/macros.h" namespace iceberg { @@ -120,6 +123,13 @@ class ICEBERG_EXPORT InMemoryNamespace { /// \return The metadata location if the table exists; error otherwise. Result GetTableMetadataLocation(const TableIdentifier& table_ident) const; + /// \brief Updates the metadata location for the specified table. + /// + /// \param table_ident The identifier of the table. + /// \param metadata_location The new metadata location. + Status UpdateTableMetadataLocation(const TableIdentifier& table_ident, + const std::string& metadata_location); + /// \brief Internal utility for retrieving a namespace node pointer from the tree. /// /// \tparam NamespacePtr The type of the namespace node pointer. @@ -278,7 +288,7 @@ Result> InMemoryNamespace::ListTables( return table_names; } -Status InMemoryNamespace::RegisterTable(TableIdentifier const& table_ident, +Status InMemoryNamespace::RegisterTable(const TableIdentifier& table_ident, const std::string& metadata_location) { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); @@ -289,21 +299,21 @@ Status InMemoryNamespace::RegisterTable(TableIdentifier const& table_ident, return {}; } -Status InMemoryNamespace::UnregisterTable(TableIdentifier const& table_ident) { +Status InMemoryNamespace::UnregisterTable(const TableIdentifier& table_ident) { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); ns.value()->table_metadata_locations_.erase(table_ident.name); return {}; } -Result InMemoryNamespace::TableExists(TableIdentifier const& table_ident) const { +Result InMemoryNamespace::TableExists(const TableIdentifier& table_ident) const { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); return ns.value()->table_metadata_locations_.contains(table_ident.name); } Result InMemoryNamespace::GetTableMetadataLocation( - TableIdentifier const& table_ident) const { + const TableIdentifier& table_ident) const { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); const auto it = ns.value()->table_metadata_locations_.find(table_ident.name); @@ -313,17 +323,24 @@ Result InMemoryNamespace::GetTableMetadataLocation( return it->second; } +Status InMemoryNamespace::UpdateTableMetadataLocation( + const TableIdentifier& table_ident, const std::string& metadata_location) { + ICEBERG_ASSIGN_OR_RAISE(auto ns, GetNamespace(this, table_ident.ns)); + ns->table_metadata_locations_[table_ident.name] = metadata_location; + return {}; +} + std::shared_ptr InMemoryCatalog::Make( - std::string const& name, std::shared_ptr const& file_io, - std::string const& warehouse_location, - std::unordered_map const& properties) { + const std::string& name, const std::shared_ptr& file_io, + const std::string& warehouse_location, + const std::unordered_map& properties) { return std::make_shared(name, file_io, warehouse_location, properties); } InMemoryCatalog::InMemoryCatalog( - std::string const& name, std::shared_ptr const& file_io, - std::string const& warehouse_location, - std::unordered_map const& properties) + const std::string& name, const std::shared_ptr& file_io, + const std::string& warehouse_location, + const std::unordered_map& properties) : catalog_name_(std::move(name)), properties_(std::move(properties)), file_io_(std::move(file_io)), @@ -395,7 +412,31 @@ Result> InMemoryCatalog::UpdateTable( const std::vector>& requirements, const std::vector>& updates) { std::unique_lock lock(mutex_); - return NotImplemented("update table"); + ICEBERG_ASSIGN_OR_RAISE(auto base_metadata_location, + root_namespace_->GetTableMetadataLocation(identifier)); + + ICEBERG_ASSIGN_OR_RAISE(auto base, + TableMetadataUtil::Read(*file_io_, base_metadata_location)); + + for (const auto& requirement : requirements) { + ICEBERG_RETURN_UNEXPECTED(requirement->Validate(base.get())); + } + + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + for (const auto& update : updates) { + update->ApplyTo(*builder); + } + ICEBERG_ASSIGN_OR_RAISE(auto updated, builder->Build()); + ICEBERG_ASSIGN_OR_RAISE( + auto new_metadata_location, + TableMetadataUtil::Write(*file_io_, base.get(), base_metadata_location, *updated)); + ICEBERG_RETURN_UNEXPECTED( + root_namespace_->UpdateTableMetadataLocation(identifier, new_metadata_location)); + TableMetadataUtil::DeleteRemovedMetadataFiles(*file_io_, base.get(), *updated); + + return std::make_unique(identifier, std::move(updated), + std::move(new_metadata_location), file_io_, + std::static_pointer_cast(shared_from_this())); } Result> InMemoryCatalog::StageCreateTable( @@ -438,9 +479,8 @@ Result> InMemoryCatalog::LoadTable( ICEBERG_ASSIGN_OR_RAISE(auto metadata, TableMetadataUtil::Read(*file_io_, metadata_location)); - - return std::make_unique
(identifier, std::move(metadata), metadata_location, - file_io_, + return std::make_unique
(identifier, std::move(metadata), + std::move(metadata_location), file_io_, std::static_pointer_cast(shared_from_this())); } diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index cee309a16..7f25a174c 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -797,9 +797,7 @@ nlohmann::json ToJson(const TableMetadata& table_metadata) { json[kSortOrders] = ToJsonList(table_metadata.sort_orders); // write properties map - if (table_metadata.properties) { - json[kProperties] = table_metadata.properties->configs(); - } + json[kProperties] = table_metadata.properties.configs(); if (std::ranges::find_if(table_metadata.snapshots, [&](const auto& snapshot) { return snapshot->snapshot_id == table_metadata.current_snapshot_id; diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 4a1798c36..45005d8ee 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -51,9 +51,8 @@ Status Table::Refresh() { } ICEBERG_ASSIGN_OR_RAISE(auto refreshed_table, catalog_->LoadTable(identifier_)); - if (metadata_location_ != refreshed_table->metadata_location_) { + if (metadata_location_ != refreshed_table->metadata_file_location()) { metadata_ = std::move(refreshed_table->metadata_); - metadata_location_ = std::move(refreshed_table->metadata_location_); io_ = std::move(refreshed_table->io_); metadata_cache_ = std::make_unique(metadata_.get()); } @@ -87,12 +86,14 @@ Table::sort_orders() const { return metadata_cache_->GetSortOrdersById(); } -const std::shared_ptr& Table::properties() const { - return metadata_->properties; -} +const TableProperties& Table::properties() const { return metadata_->properties; } + +const std::string& Table::metadata_file_location() const { return metadata_location_; } const std::string& Table::location() const { return metadata_->location; } +const TimePointMs& Table::last_updated_ms() const { return metadata_->last_updated_ms; } + Result> Table::current_snapshot() const { return metadata_->Snapshot(); } diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 00c6deac5..0745fb697 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -29,6 +29,7 @@ #include "iceberg/snapshot.h" #include "iceberg/table_identifier.h" #include "iceberg/type_fwd.h" +#include "iceberg/util/timepoint.h" namespace iceberg { @@ -82,11 +83,19 @@ class ICEBERG_EXPORT Table { sort_orders() const; /// \brief Return a map of string properties for this table - const std::shared_ptr& properties() const; + const TableProperties& properties() const; + + /// \brief Return the table's metadata file location + const std::string& metadata_file_location() const; /// \brief Return the table's base location const std::string& location() const; + /// \brief Get the time when this table was last updated + /// + /// \return the time when this table was last updated + const TimePointMs& last_updated_ms() const; + /// \brief Return the table's current snapshot, return NotFoundError if not found Result> current_snapshot() const; diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 5d6cec1d9..4e814e027 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -20,13 +20,16 @@ #include "iceberg/table_metadata.h" #include +#include #include #include #include +#include #include #include #include #include +#include #include @@ -41,12 +44,14 @@ #include "iceberg/table_properties.h" #include "iceberg/table_update.h" #include "iceberg/util/gzip_internal.h" +#include "iceberg/util/location_util.h" #include "iceberg/util/macros.h" #include "iceberg/util/uuid.h" namespace iceberg { namespace { const TimePointMs kInvalidLastUpdatedMs = TimePointMs::min(); constexpr int32_t kLastAdded = -1; +constexpr std::string_view kMetadataFolderName = "metadata"; } // namespace std::string ToString(const SnapshotLogEntry& entry) { @@ -142,19 +147,6 @@ bool SnapshotRefEquals( return true; } -bool TablePropertiesEquals(const std::shared_ptr& lhs, - const std::shared_ptr& rhs) { - bool left_is_empty = lhs == nullptr || lhs->configs().empty(); - bool right_is_empty = rhs == nullptr || rhs->configs().empty(); - if (left_is_empty && right_is_empty) { - return true; - } - if (left_is_empty || right_is_empty) { - return false; - } - return lhs->configs() == rhs->configs(); -} - } // namespace bool operator==(const TableMetadata& lhs, const TableMetadata& rhs) { @@ -167,7 +159,7 @@ bool operator==(const TableMetadata& lhs, const TableMetadata& rhs) { SharedPtrVectorEquals(lhs.schemas, rhs.schemas) && lhs.default_spec_id == rhs.default_spec_id && lhs.last_partition_id == rhs.last_partition_id && - TablePropertiesEquals(lhs.properties, rhs.properties) && + lhs.properties == rhs.properties && lhs.current_snapshot_id == rhs.current_snapshot_id && SharedPtrVectorEquals(lhs.snapshots, rhs.snapshots) && lhs.snapshot_log == rhs.snapshot_log && lhs.metadata_log == rhs.metadata_log && @@ -234,30 +226,57 @@ Result TableMetadataCache::InitSnapshotMap( std::ranges::to(); } -// TableMetadataUtil implementation +Result TableMetadataUtil::Codec::FromString( + std::string_view name) { + std::string name_upper = StringUtils::ToUpper(name); + if (name_upper == kCodecTypeGzip) { + return MetadataFileCodecType::kGzip; + } else if (name_upper == kCodecTypeNone) { + return MetadataFileCodecType::kNone; + } + return InvalidArgument("Invalid codec name: {}", name); +} -Result TableMetadataUtil::CodecFromFileName( +Result TableMetadataUtil::Codec::FromFileName( std::string_view file_name) { - auto pos = file_name.find_last_of(".metadata.json"); + auto pos = file_name.find_last_of(kTableMetadataFileSuffix); if (pos == std::string::npos) { return InvalidArgument("{} is not a valid metadata file", file_name); } // We have to be backward-compatible with .metadata.json.gz files - if (file_name.ends_with(".metadata.json.gz")) { + if (file_name.ends_with(kCompGzipTableMetadataFileSuffix)) { return MetadataFileCodecType::kGzip; } std::string_view file_name_without_suffix = file_name.substr(0, pos); - if (file_name_without_suffix.ends_with(".gz")) { + if (file_name_without_suffix.ends_with(kGzipTableMetadataFileExtension)) { return MetadataFileCodecType::kGzip; } return MetadataFileCodecType::kNone; } +Result TableMetadataUtil::Codec::NameToFileExtension( + std::string_view codec) { + ICEBERG_ASSIGN_OR_RAISE(MetadataFileCodecType codec_type, FromString(codec)); + return TypeToFileExtension(codec_type); +} + +std::string TableMetadataUtil::Codec::TypeToFileExtension(MetadataFileCodecType codec) { + switch (codec) { + case MetadataFileCodecType::kGzip: + return std::string(kGzipTableMetadataFileSuffix); + case MetadataFileCodecType::kNone: + return std::string(kTableMetadataFileSuffix); + } + std::unreachable(); +} + +// TableMetadataUtil implementation + Result> TableMetadataUtil::Read( FileIO& io, const std::string& location, std::optional length) { - ICEBERG_ASSIGN_OR_RAISE(auto codec_type, CodecFromFileName(location)); + ICEBERG_ASSIGN_OR_RAISE(auto codec_type, Codec::FromFileName(location)); ICEBERG_ASSIGN_OR_RAISE(auto content, io.ReadFile(location, length)); if (codec_type == MetadataFileCodecType::kGzip) { @@ -272,6 +291,16 @@ Result> TableMetadataUtil::Read( return TableMetadataFromJson(json); } +Result TableMetadataUtil::Write(FileIO& io, const TableMetadata* base, + const std::string& base_metadata_location, + TableMetadata& metadata) { + int32_t version = ParseVersionFromLocation(base_metadata_location); + ICEBERG_ASSIGN_OR_RAISE(auto new_file_location, + NewTableMetadataFilePath(metadata, version + 1)); + ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, metadata)); + return new_file_location; +} + Status TableMetadataUtil::Write(FileIO& io, const std::string& location, const TableMetadata& metadata) { auto json = ToJson(metadata); @@ -279,6 +308,57 @@ Status TableMetadataUtil::Write(FileIO& io, const std::string& location, return io.WriteFile(location, json_string); } +void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const TableMetadata* base, + const TableMetadata& metadata) { + if (!base) { + return; + } + + bool delete_after_commit = + metadata.properties.Get(TableProperties::kMetadataDeleteAfterCommitEnabled); + if (delete_after_commit) { + auto current_files = + metadata.metadata_log | std::ranges::to>(); + std::ranges::for_each( + base->metadata_log | std::views::filter([¤t_files](const auto& entry) { + return !current_files.contains(entry); + }), + [&io](const auto& entry) { std::ignore = io.DeleteFile(entry.metadata_file); }); + } +} + +int32_t TableMetadataUtil::ParseVersionFromLocation(std::string_view metadata_location) { + size_t version_start = metadata_location.find_last_of('/') + 1; + size_t version_end = metadata_location.find('-', version_start); + + int32_t version = -1; + if (version_end != std::string::npos) { + std::from_chars(metadata_location.data() + version_start, + metadata_location.data() + version_end, version); + } + return version; +} + +Result TableMetadataUtil::NewTableMetadataFilePath(const TableMetadata& meta, + int32_t new_version) { + auto codec_name = + meta.properties.Get(TableProperties::kMetadataCompression); + ICEBERG_ASSIGN_OR_RAISE(std::string file_extension, + Codec::NameToFileExtension(codec_name)); + + std::string uuid = Uuid::GenerateV7().ToString(); + std::string filename = std::format("{:05d}-{}{}", new_version, uuid, file_extension); + + auto metadata_location = + meta.properties.Get(TableProperties::kWriteMetadataLocation); + if (!metadata_location.empty()) { + return std::format("{}/{}", LocationUtil::StripTrailingSlash(metadata_location), + filename); + } else { + return std::format("{}/{}/{}", meta.location, kMetadataFolderName, filename); + } +} + // TableMetadataBuilder implementation struct TableMetadataBuilder::Impl { @@ -295,8 +375,8 @@ struct TableMetadataBuilder::Impl { std::optional last_added_spec_id; // Metadata location tracking - std::optional metadata_location; - std::optional previous_metadata_location; + std::string metadata_location; + std::string previous_metadata_location; // indexes for convenience std::unordered_map> schemas_by_id; @@ -314,11 +394,11 @@ struct TableMetadataBuilder::Impl { metadata.current_snapshot_id = Snapshot::kInvalidSnapshotId; metadata.default_sort_order_id = SortOrder::kInitialSortOrderId; metadata.next_row_id = TableMetadata::kInitialRowId; - metadata.properties = TableProperties::default_properties(); } // Constructor from existing metadata - explicit Impl(const TableMetadata* base_metadata) + explicit Impl(const TableMetadata* base_metadata, + std::string base_metadata_location = "") : base(base_metadata), metadata(*base_metadata) { // Initialize index maps from base metadata for (const auto& schema : metadata.schemas) { @@ -334,6 +414,8 @@ struct TableMetadataBuilder::Impl { for (const auto& order : metadata.sort_orders) { sort_orders_by_id.emplace(order->order_id(), order); } + + metadata.last_updated_ms = kInvalidLastUpdatedMs; } }; @@ -363,12 +445,22 @@ std::unique_ptr TableMetadataBuilder::BuildFrom( TableMetadataBuilder& TableMetadataBuilder::SetMetadataLocation( std::string_view metadata_location) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + impl_->metadata_location = std::string(metadata_location); + if (impl_->base != nullptr) { + // Carry over lastUpdatedMillis from base and set previousFileLocation to null to + // avoid writing a new metadata log entry. + // This is safe since setting metadata location doesn't cause any changes and no other + // changes can be added when metadata location is configured + impl_->previous_metadata_location = std::string(); + impl_->metadata.last_updated_ms = impl_->base->last_updated_ms; + } + return *this; } TableMetadataBuilder& TableMetadataBuilder::SetPreviousMetadataLocation( std::string_view previous_metadata_location) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + impl_->previous_metadata_location = std::string(previous_metadata_location); + return *this; } TableMetadataBuilder& TableMetadataBuilder::AssignUUID() { @@ -626,7 +718,7 @@ TableMetadataBuilder& TableMetadataBuilder::SetProperties( // Add all updated properties to the metadata properties for (const auto& [key, value] : updated) { - impl_->metadata.properties->mutable_configs()[key] = value; + impl_->metadata.properties.mutable_configs()[key] = value; } // Record the change @@ -644,7 +736,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveProperties( // Remove each property from the metadata properties for (const auto& key : removed) { - impl_->metadata.properties->mutable_configs().erase(key); + impl_->metadata.properties.mutable_configs().erase(key); } // Record the change @@ -679,10 +771,23 @@ Result> TableMetadataBuilder::Build() { std::chrono::system_clock::now().time_since_epoch())}; } - // 4. Create and return the TableMetadata - auto result = std::make_unique(std::move(impl_->metadata)); + // 4. Buildup metadata_log from base metadata + int32_t max_metadata_log_size = + impl_->metadata.properties.Get(TableProperties::kMetadataPreviousVersionsMax); + if (impl_->base != nullptr && !impl_->previous_metadata_location.empty()) { + impl_->metadata.metadata_log.emplace_back(impl_->base->last_updated_ms, + impl_->previous_metadata_location); + } + if (impl_->metadata.metadata_log.size() > max_metadata_log_size) { + impl_->metadata.metadata_log.erase( + impl_->metadata.metadata_log.begin(), + impl_->metadata.metadata_log.end() - max_metadata_log_size); + } + + // TODO(anyone): 5. update snapshot_log - return result; + // 6. Create and return the TableMetadata + return std::make_unique(std::move(impl_->metadata)); } } // namespace iceberg diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index bc73a0baf..f428fd34f 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -29,6 +29,7 @@ #include #include "iceberg/iceberg_export.h" +#include "iceberg/table_properties.h" #include "iceberg/type_fwd.h" #include "iceberg/util/error_collector.h" #include "iceberg/util/lazy.h" @@ -99,7 +100,7 @@ struct ICEBERG_EXPORT TableMetadata { /// The highest assigned partition field ID across all partition specs for the table int32_t last_partition_id; /// A string to string map of table properties - std::shared_ptr properties; + TableProperties properties; /// ID of the current table snapshot int64_t current_snapshot_id; /// A list of valid snapshots @@ -460,11 +461,37 @@ enum class ICEBERG_EXPORT MetadataFileCodecType { /// \brief Utility class for table metadata struct ICEBERG_EXPORT TableMetadataUtil { - /// \brief Get the codec type from the table metadata file name. - /// - /// \param file_name The name of the table metadata file. - /// \return The codec type of the table metadata file. - static Result CodecFromFileName(std::string_view file_name); + struct ICEBERG_EXPORT Codec { + /// \brief Returns the MetadataFileCodecType corresponding to the given string. + /// + /// \param name The string to parse. + /// \return The MetadataFileCodecType corresponding to the given string. + static Result FromString(std::string_view name); + + /// \brief Get the codec type from the table metadata file name. + /// + /// \param file_name The name of the table metadata file. + /// \return The codec type of the table metadata file. + static Result FromFileName(std::string_view file_name); + + /// \brief Get the file extension from the codec type. + /// \param codec The codec name. + /// \return The file extension of the codec. + static Result NameToFileExtension(std::string_view codec); + + /// \brief Get the file extension from the codec type. + /// \param codec The codec type. + /// \return The file extension of the codec. + static std::string TypeToFileExtension(MetadataFileCodecType codec); + + static constexpr std::string_view kTableMetadataFileSuffix = ".metadata.json"; + static constexpr std::string_view kCompGzipTableMetadataFileSuffix = + ".metadata.json.gz"; + static constexpr std::string_view kGzipTableMetadataFileSuffix = ".gz.metadata.json"; + static constexpr std::string_view kGzipTableMetadataFileExtension = ".gz"; + static constexpr std::string_view kCodecTypeGzip = "GZIP"; + static constexpr std::string_view kCodecTypeNone = "NONE"; + }; /// \brief Read the table metadata file. /// @@ -476,6 +503,32 @@ struct ICEBERG_EXPORT TableMetadataUtil { class FileIO& io, const std::string& location, std::optional length = std::nullopt); + /// \brief Write a new metadata file to storage. + /// + /// Serializes the table metadata to JSON and writes it to a new metadata + /// file. If no location is specified in the metadata, generates a new + /// file path based on the version number. + /// + /// \param io The FileIO instance for writing files + /// \param base The base metadata (can be null for new tables) + /// \param metadata The metadata to write, which will be updated with the new location + /// \return The new metadata location + static Result Write(FileIO& io, const TableMetadata* base, + const std::string& base_metadata_location, + TableMetadata& metadata); + + /// \brief Delete removed metadata files based on retention policy. + /// + /// Removes obsolete metadata files that are no longer referenced in the + /// current metadata log, based on the metadata.delete-after-commit.enabled + /// property. + /// + /// \param io The FileIO instance for deleting files + /// \param base The previous metadata version + /// \param metadata The current metadata containing the updated log + static void DeleteRemovedMetadataFiles(FileIO& io, const TableMetadata* base, + const TableMetadata& metadata); + /// \brief Write the table metadata to a file. /// /// \param io The file IO to use to write the table metadata. @@ -483,6 +536,36 @@ struct ICEBERG_EXPORT TableMetadataUtil { /// \param metadata The table metadata to write. static Status Write(FileIO& io, const std::string& location, const TableMetadata& metadata); + + private: + /// \brief Parse the version number from a metadata file location. + /// + /// Extracts the version number from a metadata file path which follows + /// the format: vvvvv-uuid.metadata.json where vvvvv is the zero-padded + /// version number. + /// + /// \param metadata_location The metadata file location string + /// \return The parsed version number, or -1 if parsing fails or the + /// location doesn't contain a version + static int32_t ParseVersionFromLocation(std::string_view metadata_location); + + /// \brief Generate a new metadata file path for a table. + /// + /// \param metadata The table metadata. + /// \param version The version number for the new metadata file. + /// \return The generated metadata file path. + static Result NewTableMetadataFilePath(const TableMetadata& metadata, + int32_t version); }; } // namespace iceberg + +namespace std { +template <> +struct hash { + size_t operator()(const iceberg::MetadataLogEntry& m) const noexcept { + return std::hash{}(m.metadata_file); + } +}; + +} // namespace std diff --git a/src/iceberg/table_properties.cc b/src/iceberg/table_properties.cc index 035a6dda8..96633bc24 100644 --- a/src/iceberg/table_properties.cc +++ b/src/iceberg/table_properties.cc @@ -31,14 +31,12 @@ const std::unordered_set& TableProperties::reserved_properties() { return kReservedProperties; } -std::unique_ptr TableProperties::default_properties() { - return std::unique_ptr(new TableProperties()); -} +TableProperties TableProperties::default_properties() { return {}; } -std::unique_ptr TableProperties::FromMap( +TableProperties TableProperties::FromMap( std::unordered_map properties) { - auto table_properties = std::unique_ptr(new TableProperties()); - table_properties->configs_ = std::move(properties); + TableProperties table_properties; + table_properties.configs_ = std::move(properties); return table_properties; } diff --git a/src/iceberg/table_properties.h b/src/iceberg/table_properties.h index e9131cd96..debe61da2 100644 --- a/src/iceberg/table_properties.h +++ b/src/iceberg/table_properties.h @@ -252,7 +252,7 @@ class ICEBERG_EXPORT TableProperties : public ConfigBase { inline static Entry kMinSnapshotsToKeep{"history.expire.min-snapshots-to-keep", 1}; inline static Entry kMaxRefAgeMs{"history.expire.max-ref-age-ms", - std::numeric_limits::max()}; + (std::numeric_limits::max)()}; // Delete/Update/Merge properties @@ -289,17 +289,17 @@ class ICEBERG_EXPORT TableProperties : public ConfigBase { /// \brief Create a default TableProperties instance. /// /// \return A unique pointer to a TableProperties instance with default values - static std::unique_ptr default_properties(); + static TableProperties default_properties(); /// \brief Create a TableProperties instance from a map of key-value pairs. /// /// \param properties The map containing property key-value pairs /// \return A unique pointer to a TableProperties instance - static std::unique_ptr FromMap( - std::unordered_map properties); + static TableProperties FromMap(std::unordered_map properties); - private: - TableProperties() = default; + bool operator==(const TableProperties& other) const { + return configs_ == other.configs_; + } }; } // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index a48567132..3cb1b0114 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -106,6 +106,7 @@ add_iceberg_test(util_test decimal_test.cc endian_test.cc formatter_test.cc + location_util_test.cc string_util_test.cc truncate_util_test.cc uuid_test.cc diff --git a/src/iceberg/test/config_test.cc b/src/iceberg/test/config_test.cc index ec3b39156..b7f378bf8 100644 --- a/src/iceberg/test/config_test.cc +++ b/src/iceberg/test/config_test.cc @@ -96,13 +96,26 @@ TEST(ConfigTest, BasicOperations) { ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE2); ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 2.99); + // Test setting values again + config->Set(TestConfig::kStringConfig, std::string("newer_value")); + config->Set(TestConfig::kIntConfig, 200); + config->Set(TestConfig::kBoolConfig, false); + config->Set(TestConfig::kEnumConfig, TestEnum::VALUE1); + config->Set(TestConfig::kDoubleConfig, 3.99); + + ASSERT_EQ(config->Get(TestConfig::kStringConfig), "newer_value"); + ASSERT_EQ(config->Get(TestConfig::kIntConfig), 200); + ASSERT_EQ(config->Get(TestConfig::kBoolConfig), false); + ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE1); + ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 3.99); + // Test unsetting a value config->Unset(TestConfig::kIntConfig); config->Unset(TestConfig::kEnumConfig); config->Unset(TestConfig::kDoubleConfig); ASSERT_EQ(config->Get(TestConfig::kIntConfig), 25); - ASSERT_EQ(config->Get(TestConfig::kStringConfig), "new_value"); - ASSERT_EQ(config->Get(TestConfig::kBoolConfig), true); + ASSERT_EQ(config->Get(TestConfig::kStringConfig), "newer_value"); + ASSERT_EQ(config->Get(TestConfig::kBoolConfig), false); ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE1); ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 3.14); diff --git a/src/iceberg/test/in_memory_catalog_test.cc b/src/iceberg/test/in_memory_catalog_test.cc index f7e2f50a9..d1a8ccef9 100644 --- a/src/iceberg/test/in_memory_catalog_test.cc +++ b/src/iceberg/test/in_memory_catalog_test.cc @@ -20,6 +20,8 @@ #include "iceberg/catalog/memory/in_memory_catalog.h" #include +#include +#include #include #include @@ -28,10 +30,14 @@ #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/schema.h" #include "iceberg/table.h" +#include "iceberg/table_identifier.h" #include "iceberg/table_metadata.h" +#include "iceberg/table_requirement.h" +#include "iceberg/table_update.h" #include "iceberg/test/matchers.h" #include "iceberg/test/mock_catalog.h" #include "iceberg/test/test_resource.h" +#include "iceberg/util/uuid.h" namespace iceberg { @@ -63,7 +69,7 @@ class InMemoryCatalogTest : public ::testing::Test { info->test_suite_name(), info->name(), table_name); // generate a unique directory for the table std::error_code ec; - std::filesystem::create_directories(table_location, ec); + std::filesystem::create_directories(table_location + "metadata", ec); if (ec) { throw std::runtime_error( std::format("Failed to create temporary directory: {}, error message: {}", @@ -166,6 +172,65 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { ASSERT_EQ(loaded_table->current_snapshot().value()->snapshot_id, 2); } +TEST_F(InMemoryCatalogTest, UpdateTable) { + // First, create and register a table + TableIdentifier table_ident{.ns = {}, .name = "t1"}; + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, + ReadTableMetadataFromResource("TableMetadataV2Valid.json")); + + auto table_location = GenerateTestTableLocation(table_ident.name); + auto metadata_location = std::format("{}/metadata/00001-{}.metadata.json", + table_location, Uuid::GenerateV7().ToString()); + metadata->location = table_location; + auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); + EXPECT_THAT(status, IsOk()); + + auto table = catalog_->RegisterTable(table_ident, metadata_location); + EXPECT_THAT(table, IsOk()); + ASSERT_EQ(table.value()->name().name, "t1"); + ASSERT_EQ(table.value()->metadata_file_location(), metadata_location); + + // Prepare updates - add a new property + std::vector> updates; + auto property_update = std::make_unique( + std::unordered_map{{"property2", "value2"}}); + updates.push_back(std::move(property_update)); + + // Prepare requirements - assert table must exist + std::vector> requirements; + requirements.push_back(std::make_unique(metadata->table_uuid)); + + // Perform the update on a nonexist table + TableIdentifier nonexist_table_ident{.ns = {}, .name = "nonexist_table"}; + auto res = catalog_->UpdateTable(nonexist_table_ident, std::move(requirements), + std::move(updates)); + EXPECT_THAT(res, IsError(ErrorKind::kNotFound)); + + // Verify requirements failed on an exist table + std::vector> bad_requirements; + bad_requirements.push_back(std::make_unique("invalid-uuid")); + res = + catalog_->UpdateTable(table_ident, std::move(bad_requirements), std::move(updates)); + EXPECT_THAT(res, IsError(ErrorKind::kCommitFailed)); + + // Perform the update + auto update_result = catalog_->UpdateTable(table_ident, requirements, updates); + EXPECT_THAT(update_result, IsOk()); + + // Verify the update by loading the table and checking properties + auto load_result = catalog_->LoadTable(table_ident); + EXPECT_THAT(load_result, IsOk()); + + auto updated_table = std::move(load_result.value()); + + // Verify that metadata file was updated (should have a new version) + EXPECT_EQ(table.value()->uuid(), updated_table->uuid()); + EXPECT_GT(updated_table->last_updated_ms(), table.value()->last_updated_ms()); + EXPECT_THAT(updated_table->metadata_file_location(), + testing::HasSubstr("metadata/00002-")); +} + TEST_F(InMemoryCatalogTest, DropTable) { TableIdentifier tableIdent{.ns = {}, .name = "t1"}; auto result = catalog_->DropTable(tableIdent, false); diff --git a/src/iceberg/test/location_util_test.cc b/src/iceberg/test/location_util_test.cc new file mode 100644 index 000000000..7098868a9 --- /dev/null +++ b/src/iceberg/test/location_util_test.cc @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/location_util.h" + +#include + +namespace iceberg { + +TEST(LocationUtilTest, StripTrailingSlash) { + // Test normal paths with trailing slashes + ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir/")); + ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir//")); + ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir///")); + + // Test paths without trailing slashes + ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir")); + ASSERT_EQ("/path/to/file.txt", LocationUtil::StripTrailingSlash("/path/to/file.txt")); + + // Test root path + ASSERT_EQ("", LocationUtil::StripTrailingSlash("/")); + ASSERT_EQ("", LocationUtil::StripTrailingSlash("//")); + + // Test empty string + ASSERT_EQ("", LocationUtil::StripTrailingSlash("")); + + // Test URLs with protocols + ASSERT_EQ("http://example.com", + LocationUtil::StripTrailingSlash("http://example.com/")); + ASSERT_EQ("https://example.com/path", + LocationUtil::StripTrailingSlash("https://example.com/path/")); + + // Test that protocol endings are preserved + ASSERT_EQ("http://", LocationUtil::StripTrailingSlash("http://")); + ASSERT_EQ("https://", LocationUtil::StripTrailingSlash("https://")); + ASSERT_EQ("s3://", LocationUtil::StripTrailingSlash("s3://")); + + // Test paths with protocol-like substrings in the middle + ASSERT_EQ("/path/http://test", LocationUtil::StripTrailingSlash("/path/http://test/")); + ASSERT_EQ("/path/https://test", + LocationUtil::StripTrailingSlash("/path/https://test/")); + + // Test multiple slashes not at the end + ASSERT_EQ("/path//to/dir", LocationUtil::StripTrailingSlash("/path//to/dir/")); + ASSERT_EQ("/path///to/dir", LocationUtil::StripTrailingSlash("/path///to/dir/")); +} + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 6a2a9e9ab..f12b43afa 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -85,6 +85,7 @@ iceberg_tests = { 'decimal_test.cc', 'endian_test.cc', 'formatter_test.cc', + 'location_util_test.cc', 'string_util_test.cc', 'truncate_util_test.cc', 'uuid_test.cc', diff --git a/src/iceberg/test/metadata_io_test.cc b/src/iceberg/test/metadata_io_test.cc index a7ce162dd..ea4555d3d 100644 --- a/src/iceberg/test/metadata_io_test.cc +++ b/src/iceberg/test/metadata_io_test.cc @@ -17,16 +17,20 @@ * under the License. */ +#include + #include #include #include #include +#include #include #include #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/file_io.h" #include "iceberg/json_internal.h" +#include "iceberg/result.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" #include "iceberg/table_metadata.h" @@ -42,7 +46,10 @@ class MetadataIOTest : public TempFileTestBase { TempFileTestBase::SetUp(); io_ = std::make_shared( std::make_shared<::arrow::fs::LocalFileSystem>()); - temp_filepath_ = CreateNewTempFilePathWithSuffix(".metadata.json"); + location_ = CreateTempDirectory(); + temp_filepath_ = std::format("{}/{}", location_, "metadata/00000-xxx.metadata.json"); + ASSERT_TRUE( + std::filesystem::create_directories(std::format("{}/metadata", location_))); } TableMetadata PrepareMetadata() { @@ -53,7 +60,7 @@ class MetadataIOTest : public TempFileTestBase { return TableMetadata{.format_version = 1, .table_uuid = "1234567890", - .location = "s3://bucket/path", + .location = location_, .last_sequence_number = 0, .schemas = {schema}, .current_schema_id = 1, @@ -73,6 +80,7 @@ class MetadataIOTest : public TempFileTestBase { } std::shared_ptr io_; + std::string location_; std::string temp_filepath_; }; @@ -126,4 +134,74 @@ TEST_F(MetadataIOTest, ReadWriteCompressedMetadata) { EXPECT_EQ(*metadata_read, metadata); } +TEST_F(MetadataIOTest, WriteMetadataWithBase) { + TableMetadata base = PrepareMetadata(); + + { + // Invalid base metadata_file_location, set version to 0 + TableMetadata new_metadata = PrepareMetadata(); + ICEBERG_UNWRAP_OR_FAIL( + auto new_metadata_location, + TableMetadataUtil::Write(*io_, &base, "invalid_location", new_metadata)); + EXPECT_THAT(new_metadata_location, testing::HasSubstr("/metadata/00000-")); + } + + // Reset base metadata_file_location + // base.metadata_file_location = temp_filepath_; + + { + // Specify write location property + TableMetadata new_metadata = PrepareMetadata(); + new_metadata.properties.Set(TableProperties::kWriteMetadataLocation, location_); + ICEBERG_UNWRAP_OR_FAIL( + auto new_metadata_location, + TableMetadataUtil::Write(*io_, &base, temp_filepath_, new_metadata)); + EXPECT_THAT(new_metadata_location, + testing::HasSubstr(std::format("{}/00001-", location_))); + } + + { + // Default write location + TableMetadata new_metadata = PrepareMetadata(); + ICEBERG_UNWRAP_OR_FAIL( + auto new_metadata_location, + TableMetadataUtil::Write(*io_, &base, temp_filepath_, new_metadata)); + EXPECT_THAT(new_metadata_location, + testing::HasSubstr(std::format("{}/metadata/00001-", location_))); + } +} + +TEST_F(MetadataIOTest, RemoveDeletedMetadataFiles) { + TableMetadata base1 = PrepareMetadata(); + base1.properties.Set(TableProperties::kMetadataPreviousVersionsMax, 1); + ICEBERG_UNWRAP_OR_FAIL(auto base1_metadata_location, + TableMetadataUtil::Write(*io_, nullptr, "", base1)); + + ICEBERG_UNWRAP_OR_FAIL(auto base2, + TableMetadataBuilder::BuildFrom(&base1) + ->SetPreviousMetadataLocation(base1_metadata_location) + .Build()); + ICEBERG_UNWRAP_OR_FAIL( + auto base2_metadata_location, + TableMetadataUtil::Write(*io_, &base1, base1_metadata_location, *base2)); + + ICEBERG_UNWRAP_OR_FAIL(auto new_metadata, + TableMetadataBuilder::BuildFrom(base2.get()) + ->SetPreviousMetadataLocation(base2_metadata_location) + .Build()); + ICEBERG_UNWRAP_OR_FAIL(auto new_metadata_location, + TableMetadataUtil::Write( + *io_, base2.get(), base2_metadata_location, *new_metadata)); + + // The first metadata file should not be deleted + new_metadata->properties.Set(TableProperties::kMetadataDeleteAfterCommitEnabled, false); + TableMetadataUtil::DeleteRemovedMetadataFiles(*io_, base2.get(), *new_metadata); + EXPECT_TRUE(std::filesystem::exists(base1_metadata_location)); + + // The first metadata file should be deleted + new_metadata->properties.Set(TableProperties::kMetadataDeleteAfterCommitEnabled, true); + TableMetadataUtil::DeleteRemovedMetadataFiles(*io_, base2.get(), *new_metadata); + EXPECT_FALSE(std::filesystem::exists(base1_metadata_location)); +} + } // namespace iceberg diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc index 5a270c495..bb9ce8c01 100644 --- a/src/iceberg/test/table_metadata_builder_test.cc +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -21,9 +21,11 @@ #include #include +#include #include #include "iceberg/partition_spec.h" +#include "iceberg/result.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" #include "iceberg/sort_field.h" @@ -85,11 +87,14 @@ TEST(TableMetadataBuilderTest, BuildFromEmpty) { EXPECT_EQ(metadata->default_spec_id, PartitionSpec::kInitialSpecId); EXPECT_EQ(metadata->default_sort_order_id, SortOrder::kInitialSortOrderId); EXPECT_EQ(metadata->current_snapshot_id, Snapshot::kInvalidSnapshotId); + EXPECT_TRUE(metadata->metadata_log.empty()); } TEST(TableMetadataBuilderTest, BuildFromExisting) { auto base = CreateBaseMetadata(); + std::string base_metadata_location = "s3://bucket/test/00010-xxx.metadata.json"; auto builder = TableMetadataBuilder::BuildFrom(base.get()); + builder->SetPreviousMetadataLocation(base_metadata_location); ASSERT_NE(builder, nullptr); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); @@ -98,6 +103,50 @@ TEST(TableMetadataBuilderTest, BuildFromExisting) { EXPECT_EQ(metadata->format_version, 2); EXPECT_EQ(metadata->table_uuid, "test-uuid-1234"); EXPECT_EQ(metadata->location, "s3://bucket/test"); + ASSERT_EQ(1, metadata->metadata_log.size()); + EXPECT_EQ(base_metadata_location, metadata->metadata_log[0].metadata_file); + EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[0].timestamp_ms); +} + +TEST(TableMetadataBuilderTest, BuildupMetadataLog) { + auto base = CreateBaseMetadata(); + std::string base_metadata_location = "s3://bucket/test/00010-xxx.metadata.json"; + base->metadata_log = { + {.timestamp_ms = TimePointMs{std::chrono::milliseconds(100)}, + .metadata_file = "s3://bucket/test/00000-aaa.metadata.json"}, + {.timestamp_ms = TimePointMs{std::chrono::milliseconds(200)}, + .metadata_file = "s3://bucket/test/00001-bbb.metadata.json"}, + }; + + { + // Base metadata_log size less than max size + base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 3); + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + builder->SetPreviousMetadataLocation(base_metadata_location); + ASSERT_NE(builder, nullptr); + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(3, metadata->metadata_log.size()); + EXPECT_EQ(base->metadata_log[0].metadata_file, + metadata->metadata_log[0].metadata_file); + EXPECT_EQ(base->metadata_log[1].metadata_file, + metadata->metadata_log[1].metadata_file); + EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[2].timestamp_ms); + EXPECT_EQ(base_metadata_location, metadata->metadata_log[2].metadata_file); + } + + { + // Base metadata_log size greater than max size + base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 2); + auto builder = TableMetadataBuilder::BuildFrom(base.get()); + builder->SetPreviousMetadataLocation(base_metadata_location); + ASSERT_NE(builder, nullptr); + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(2, metadata->metadata_log.size()); + EXPECT_EQ(base->metadata_log[1].metadata_file, + metadata->metadata_log[0].metadata_file); + EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[1].timestamp_ms); + EXPECT_EQ(base_metadata_location, metadata->metadata_log[1].metadata_file); + } } // Test AssignUUID @@ -147,9 +196,9 @@ TEST(TableMetadataBuilderTest, SetProperties) { builder->SetProperties({{"key1", "value1"}, {"key2", "value2"}}); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->properties->configs().size(), 2); - EXPECT_EQ(metadata->properties->configs().at("key1"), "value1"); - EXPECT_EQ(metadata->properties->configs().at("key2"), "value2"); + EXPECT_EQ(metadata->properties.configs().size(), 2); + EXPECT_EQ(metadata->properties.configs().at("key1"), "value1"); + EXPECT_EQ(metadata->properties.configs().at("key2"), "value2"); // Update existing property and add new one builder = TableMetadataBuilder::BuildFromEmpty(2); @@ -157,9 +206,9 @@ TEST(TableMetadataBuilderTest, SetProperties) { builder->SetProperties({{"key1", "new_value1"}, {"key3", "value3"}}); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->properties->configs().size(), 2); - EXPECT_EQ(metadata->properties->configs().at("key1"), "new_value1"); - EXPECT_EQ(metadata->properties->configs().at("key3"), "value3"); + EXPECT_EQ(metadata->properties.configs().size(), 2); + EXPECT_EQ(metadata->properties.configs().at("key1"), "new_value1"); + EXPECT_EQ(metadata->properties.configs().at("key3"), "value3"); } TEST(TableMetadataBuilderTest, RemoveProperties) { @@ -168,9 +217,9 @@ TEST(TableMetadataBuilderTest, RemoveProperties) { builder->RemoveProperties({"key2", "key4"}); // key4 does not exist ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->properties->configs().size(), 2); - EXPECT_EQ(metadata->properties->configs().at("key1"), "value1"); - EXPECT_EQ(metadata->properties->configs().at("key3"), "value3"); + EXPECT_EQ(metadata->properties.configs().size(), 2); + EXPECT_EQ(metadata->properties.configs().at("key1"), "value1"); + EXPECT_EQ(metadata->properties.configs().at("key3"), "value3"); } TEST(TableMetadataBuilderTest, UpgradeFormatVersion) { diff --git a/src/iceberg/util/config.h b/src/iceberg/util/config.h index 7e6742e9b..5adbc96d9 100644 --- a/src/iceberg/util/config.h +++ b/src/iceberg/util/config.h @@ -89,7 +89,7 @@ class ConfigBase { template ConfigBase& Set(const Entry& entry, const T& val) { - configs_.emplace(entry.key_, entry.to_str_(val)); + configs_[entry.key_] = entry.to_str_(val); return *this; } diff --git a/src/iceberg/util/location_util.h b/src/iceberg/util/location_util.h new file mode 100644 index 000000000..547dd5cef --- /dev/null +++ b/src/iceberg/util/location_util.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +class ICEBERG_EXPORT LocationUtil { + public: + static std::string StripTrailingSlash(const std::string& path) { + if (path.empty()) { + return ""; + } + + std::string_view result = path; + while (result.ends_with("/") && !result.ends_with("://")) { + result.remove_suffix(1); + } + return std::string(result); + } +}; + +} // namespace iceberg diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build index 0e0508476..9f3277533 100644 --- a/src/iceberg/util/meson.build +++ b/src/iceberg/util/meson.build @@ -28,6 +28,7 @@ install_headers( 'formatter.h', 'int128.h', 'lazy.h', + 'location_util.h', 'macros.h', 'partition_value_util.h', 'string_util.h',