Skip to content

Commit 797e888

Browse files
committed
fix: add identifier-field-ids JSON serialization/deserialization
- Add serialization of identifier-field-ids in ToJson(Schema) - Add deserialization of identifier-field-ids in SchemaFromJson() - Add comprehensive test coverage for: * Schema with single identifier field * Schema without identifier fields * Schema with multiple identifier fields * Round-trip serialization/deserialization Fixes the TODO in json_internal.cc for identifier-field-ids support.
1 parent 61a7de5 commit 797e888

File tree

3 files changed

+64
-3
lines changed

3 files changed

+64
-3
lines changed

src/iceberg/json_internal.cc

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,9 @@ nlohmann::json ToJson(const Type& type) {
309309
nlohmann::json ToJson(const Schema& schema) {
310310
nlohmann::json json = ToJson(static_cast<const Type&>(schema));
311311
SetOptionalField(json, kSchemaId, schema.schema_id());
312-
// TODO(gangwu): add identifier-field-ids.
312+
if (!schema.IdentifierFieldIds().empty()) {
313+
json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
314+
}
313315
return json;
314316
}
315317

@@ -473,8 +475,19 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json) {
473475
return JsonParseError("Schema must be a struct type, but got {}", SafeDumpJson(json));
474476
}
475477

478+
// Parse identifier-field-ids if present
479+
ICEBERG_ASSIGN_OR_RAISE(
480+
auto identifier_field_ids,
481+
GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));
482+
476483
auto& struct_type = static_cast<StructType&>(*type);
477-
return FromStructType(std::move(struct_type), schema_id);
484+
std::vector<SchemaField> fields;
485+
fields.reserve(struct_type.fields().size());
486+
for (auto& field : struct_type.fields()) {
487+
fields.emplace_back(std::move(field));
488+
}
489+
return std::make_unique<Schema>(std::move(fields), schema_id,
490+
std::move(identifier_field_ids));
478491
}
479492

480493
nlohmann::json ToJson(const PartitionField& partition_field) {

src/iceberg/test/metadata_serde_test.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
144144
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
145145
SchemaField::MakeRequired(2, "y", int64()),
146146
SchemaField::MakeRequired(3, "z", int64())},
147-
/*schema_id=*/1);
147+
/*schema_id=*/1,
148+
/*identifier_field_ids=*/std::vector<int32_t>{1, 2});
148149

149150
auto expected_spec_result = PartitionSpec::Make(
150151
/*spec_id=*/0,

src/iceberg/test/schema_json_test.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,51 @@ TEST(SchemaJsonTest, RoundTrip) {
133133
ASSERT_EQ(dumped_json, json);
134134
}
135135

136+
TEST(SchemaJsonTest, IdentifierFieldIds) {
137+
// Test schema with identifier-field-ids
138+
constexpr std::string_view json_with_identifiers =
139+
R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"data","required":false,"type":"string"}],"identifier-field-ids":[1],"schema-id":1,"type":"struct"})";
140+
141+
// Deserialize
142+
auto from_json_result = SchemaFromJson(nlohmann::json::parse(json_with_identifiers));
143+
ASSERT_TRUE(from_json_result.has_value());
144+
auto schema = std::move(from_json_result.value());
145+
ASSERT_EQ(schema->fields().size(), 2);
146+
ASSERT_EQ(schema->schema_id(), 1);
147+
ASSERT_EQ(schema->IdentifierFieldIds().size(), 1);
148+
ASSERT_EQ(schema->IdentifierFieldIds()[0], 1);
149+
150+
// Serialize back
151+
auto serialized_json = ToJson(*schema).dump();
152+
ASSERT_EQ(serialized_json, json_with_identifiers);
153+
154+
// Test schema without identifier-field-ids
155+
constexpr std::string_view json_without_identifiers =
156+
R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},{"id":2,"name":"name","required":false,"type":"string"}],"schema-id":1,"type":"struct"})";
157+
158+
auto from_json_result2 =
159+
SchemaFromJson(nlohmann::json::parse(json_without_identifiers));
160+
ASSERT_TRUE(from_json_result2.has_value());
161+
auto schema2 = std::move(from_json_result2.value());
162+
ASSERT_TRUE(schema2->IdentifierFieldIds().empty());
163+
164+
// Serialize - should not include identifier-field-ids field
165+
auto serialized_json2 = ToJson(*schema2).dump();
166+
ASSERT_EQ(serialized_json2, json_without_identifiers);
167+
168+
// Test schema with multiple identifier fields
169+
constexpr std::string_view json_multi_identifiers =
170+
R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},{"id":2,"name":"org_id","required":true,"type":"long"},{"id":3,"name":"data","required":false,"type":"string"}],"identifier-field-ids":[1,2],"schema-id":2,"type":"struct"})";
171+
172+
auto from_json_result3 = SchemaFromJson(nlohmann::json::parse(json_multi_identifiers));
173+
ASSERT_TRUE(from_json_result3.has_value());
174+
auto schema3 = std::move(from_json_result3.value());
175+
ASSERT_EQ(schema3->IdentifierFieldIds().size(), 2);
176+
ASSERT_EQ(schema3->IdentifierFieldIds()[0], 1);
177+
ASSERT_EQ(schema3->IdentifierFieldIds()[1], 2);
178+
179+
auto serialized_json3 = ToJson(*schema3).dump();
180+
ASSERT_EQ(serialized_json3, json_multi_identifiers);
181+
}
182+
136183
} // namespace iceberg

0 commit comments

Comments
 (0)