Skip to content

Commit 7223810

Browse files
committed
feat: add SnapshotUpdate interface for snapshot-producing operations
Add SnapshotUpdate<Derived> interface that extends PendingUpdateTyped<Snapshot> to provide common methods for all updates that create a new table snapshot. This follows the Java Iceberg API pattern where SnapshotUpdate<ThisT> provides a fluent API for operations like AppendFiles, DeleteFiles, and OverwriteFiles. Key features: - Uses CRTP pattern for type-safe fluent API and method chaining - Set() method to add summary properties to snapshots - StageOnly() method to stage snapshots without updating current - Extends PendingUpdateTyped<Snapshot> to inherit Apply() and Commit() - Protected members allow derived classes to access summary and stage_only state This interface will be extended by concrete snapshot operations like: - AppendFiles: Add new data files to the table - DeleteFiles: Remove data files from the table - OverwriteFiles: Replace data files in the table - RewriteFiles: Compact and optimize data files Changes: - Add src/iceberg/snapshot_update.h with SnapshotUpdate interface - Add forward declaration to type_fwd.h - Add comprehensive unit tests in snapshot_update_test.cc (10 test cases) - Update CMakeLists.txt to include new test file (alphabetically sorted)
1 parent 3855012 commit 7223810

File tree

4 files changed

+253
-0
lines changed

4 files changed

+253
-0
lines changed

src/iceberg/snapshot_update.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/snapshot_update.h
23+
/// API for table updates that produce snapshots
24+
25+
#include <string>
26+
#include <string_view>
27+
#include <unordered_map>
28+
29+
#include "iceberg/iceberg_export.h"
30+
#include "iceberg/pending_update.h"
31+
#include "iceberg/type_fwd.h"
32+
33+
namespace iceberg {
34+
35+
/// \brief Interface for updates that produce a new table snapshot
36+
///
37+
/// SnapshotUpdate extends PendingUpdate to provide common methods for all
38+
/// updates that create a new table Snapshot. Implementations include operations
39+
/// like AppendFiles, DeleteFiles, OverwriteFiles, and RewriteFiles.
40+
///
41+
/// This interface uses CRTP (Curiously Recurring Template Pattern) to enable
42+
/// fluent API method chaining in derived classes, matching the Java pattern
43+
/// where SnapshotUpdate<ThisT> allows methods to return the actual derived type.
44+
///
45+
/// \tparam Derived The actual implementation class (e.g., AppendFiles)
46+
template <typename Derived>
47+
class ICEBERG_EXPORT SnapshotUpdate : public PendingUpdateTyped<Snapshot> {
48+
public:
49+
~SnapshotUpdate() override = default;
50+
51+
/// \brief Set a summary property on the snapshot
52+
///
53+
/// Summary properties provide metadata about the changes in the snapshot,
54+
/// such as the operation type, number of files added/deleted, etc.
55+
///
56+
/// \param property The property name
57+
/// \param value The property value
58+
/// \return Reference to derived class for method chaining
59+
Derived& Set(std::string_view property, std::string_view value) {
60+
summary_[std::string(property)] = std::string(value);
61+
return static_cast<Derived&>(*this);
62+
}
63+
64+
/// \brief Stage the snapshot without updating the table's current snapshot
65+
///
66+
/// When StageOnly() is called, the snapshot will be committed to table metadata
67+
/// but will not update the current snapshot ID. The snapshot will not be added
68+
/// to the table's snapshot log. This is useful for creating wap branches or
69+
/// validating changes before making them current.
70+
///
71+
/// \return Reference to derived class for method chaining
72+
Derived& StageOnly() {
73+
stage_only_ = true;
74+
return static_cast<Derived&>(*this);
75+
}
76+
77+
protected:
78+
SnapshotUpdate() = default;
79+
80+
/// \brief Summary properties to set on the snapshot
81+
std::unordered_map<std::string, std::string> summary_;
82+
83+
/// \brief Whether to stage only without updating current snapshot
84+
bool stage_only_ = false;
85+
};
86+
87+
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ add_iceberg_test(table_test
8282
json_internal_test.cc
8383
pending_update_test.cc
8484
schema_json_test.cc
85+
snapshot_update_test.cc
8586
table_test.cc
8687
table_metadata_builder_test.cc
8788
table_requirement_test.cc
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/snapshot_update.h"
21+
22+
#include <gtest/gtest.h>
23+
24+
#include "iceberg/result.h"
25+
#include "iceberg/snapshot.h"
26+
#include "iceberg/test/matchers.h"
27+
28+
namespace iceberg {
29+
30+
// Mock implementation of SnapshotUpdate for testing
31+
class MockSnapshotUpdate : public SnapshotUpdate<MockSnapshotUpdate> {
32+
public:
33+
MockSnapshotUpdate() = default;
34+
35+
Result<Snapshot> Apply() override {
36+
if (should_fail_) {
37+
return ValidationFailed("Mock validation failed");
38+
}
39+
apply_called_ = true;
40+
// In a real implementation, this would build an actual Snapshot
41+
// For testing, return a minimal valid Snapshot
42+
return Snapshot{
43+
.snapshot_id = 1,
44+
.parent_snapshot_id = std::nullopt,
45+
.sequence_number = 1,
46+
.timestamp_ms = TimePointMs{std::chrono::milliseconds{1000}},
47+
.manifest_list = "s3://bucket/metadata/snap-1-manifest-list.avro",
48+
.summary = summary_,
49+
.schema_id = std::nullopt,
50+
};
51+
}
52+
53+
Status Commit() override {
54+
if (should_fail_commit_) {
55+
return CommitFailed("Mock commit failed");
56+
}
57+
commit_called_ = true;
58+
return {};
59+
}
60+
61+
void SetShouldFail(bool fail) { should_fail_ = fail; }
62+
void SetShouldFailCommit(bool fail) { should_fail_commit_ = fail; }
63+
bool ApplyCalled() const { return apply_called_; }
64+
bool CommitCalled() const { return commit_called_; }
65+
66+
// Expose protected members for testing
67+
const std::unordered_map<std::string, std::string>& GetSummary() const {
68+
return summary_;
69+
}
70+
bool GetStageOnly() const { return stage_only_; }
71+
72+
private:
73+
bool should_fail_ = false;
74+
bool should_fail_commit_ = false;
75+
bool apply_called_ = false;
76+
bool commit_called_ = false;
77+
};
78+
79+
TEST(SnapshotUpdateTest, SetSummaryProperty) {
80+
MockSnapshotUpdate update;
81+
update.Set("operation", "append");
82+
83+
EXPECT_EQ(update.GetSummary().size(), 1);
84+
EXPECT_EQ(update.GetSummary().at("operation"), "append");
85+
}
86+
87+
TEST(SnapshotUpdateTest, SetMultipleSummaryProperties) {
88+
MockSnapshotUpdate update;
89+
update.Set("operation", "append").Set("added-files-count", "5");
90+
91+
EXPECT_EQ(update.GetSummary().size(), 2);
92+
EXPECT_EQ(update.GetSummary().at("operation"), "append");
93+
EXPECT_EQ(update.GetSummary().at("added-files-count"), "5");
94+
}
95+
96+
TEST(SnapshotUpdateTest, StageOnly) {
97+
MockSnapshotUpdate update;
98+
update.StageOnly();
99+
100+
EXPECT_TRUE(update.GetStageOnly());
101+
}
102+
103+
TEST(SnapshotUpdateTest, MethodChaining) {
104+
MockSnapshotUpdate update;
105+
update.Set("operation", "append")
106+
.Set("added-files-count", "5")
107+
.Set("added-records", "1000")
108+
.StageOnly();
109+
110+
EXPECT_EQ(update.GetSummary().size(), 3);
111+
EXPECT_EQ(update.GetSummary().at("operation"), "append");
112+
EXPECT_EQ(update.GetSummary().at("added-files-count"), "5");
113+
EXPECT_EQ(update.GetSummary().at("added-records"), "1000");
114+
EXPECT_TRUE(update.GetStageOnly());
115+
}
116+
117+
TEST(SnapshotUpdateTest, ApplySuccess) {
118+
MockSnapshotUpdate update;
119+
auto result = update.Apply();
120+
EXPECT_THAT(result, IsOk());
121+
EXPECT_TRUE(update.ApplyCalled());
122+
}
123+
124+
TEST(SnapshotUpdateTest, ApplyValidationFailed) {
125+
MockSnapshotUpdate update;
126+
update.SetShouldFail(true);
127+
auto result = update.Apply();
128+
EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed));
129+
EXPECT_THAT(result, HasErrorMessage("Mock validation failed"));
130+
}
131+
132+
TEST(SnapshotUpdateTest, CommitSuccess) {
133+
MockSnapshotUpdate update;
134+
auto status = update.Commit();
135+
EXPECT_THAT(status, IsOk());
136+
EXPECT_TRUE(update.CommitCalled());
137+
}
138+
139+
TEST(SnapshotUpdateTest, CommitFailed) {
140+
MockSnapshotUpdate update;
141+
update.SetShouldFailCommit(true);
142+
auto status = update.Commit();
143+
EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed));
144+
EXPECT_THAT(status, HasErrorMessage("Mock commit failed"));
145+
}
146+
147+
TEST(SnapshotUpdateTest, InheritanceFromPendingUpdate) {
148+
std::unique_ptr<PendingUpdate> base_ptr = std::make_unique<MockSnapshotUpdate>();
149+
auto status = base_ptr->Commit();
150+
EXPECT_THAT(status, IsOk());
151+
}
152+
153+
TEST(SnapshotUpdateTest, InheritanceFromPendingUpdateTyped) {
154+
std::unique_ptr<PendingUpdateTyped<Snapshot>> typed_ptr =
155+
std::make_unique<MockSnapshotUpdate>();
156+
auto status = typed_ptr->Commit();
157+
EXPECT_THAT(status, IsOk());
158+
159+
auto result = typed_ptr->Apply();
160+
EXPECT_THAT(result, IsOk());
161+
}
162+
163+
} // namespace iceberg

src/iceberg/type_fwd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ class TableUpdateContext;
159159
class PendingUpdate;
160160
template <typename T>
161161
class PendingUpdateTyped;
162+
template <typename Derived>
163+
class SnapshotUpdate;
162164

163165
/// ----------------------------------------------------------------------------
164166
/// TODO: Forward declarations below are not added yet.

0 commit comments

Comments
 (0)