Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions src/iceberg/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "iceberg/table_properties.h"
#include "iceberg/transform.h"
#include "iceberg/type.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/json_util_internal.h"
#include "iceberg/util/macros.h"
Expand Down Expand Up @@ -309,7 +310,9 @@ nlohmann::json ToJson(const Type& type) {
nlohmann::json ToJson(const Schema& schema) {
nlohmann::json json = ToJson(static_cast<const Type&>(schema));
json[kSchemaId] = schema.schema_id();
// TODO(gangwu): add identifier-field-ids.
if (!schema.IdentifierFieldIds().empty()) {
json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
}
return json;
}

Expand Down Expand Up @@ -473,10 +476,21 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json) {
if (type->type_id() != TypeId::kStruct) [[unlikely]] {
return JsonParseError("Schema must be a struct type, but got {}", SafeDumpJson(json));
}
auto& struct_type = internal::checked_cast<StructType&>(*type);

std::vector<SchemaField> fields;
fields.reserve(struct_type.fields().size());
for (auto& field : struct_type.fields()) {
fields.emplace_back(std::move(field));
}

ICEBERG_ASSIGN_OR_RAISE(
auto identifier_field_ids,
GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));

auto& struct_type = static_cast<StructType&>(*type);
auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId);
return FromStructType(std::move(struct_type), schema_id);
return std::make_unique<Schema>(std::move(fields),
schema_id_opt.value_or(Schema::kInitialSchemaId),
std::move(identifier_field_ids));
}

nlohmann::json ToJson(const PartitionField& partition_field) {
Expand Down
3 changes: 2 additions & 1 deletion src/iceberg/test/metadata_serde_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
SchemaField::MakeRequired(2, "y", int64()),
SchemaField::MakeRequired(3, "z", int64())},
/*schema_id=*/1);
/*schema_id=*/1,
/*identifier_field_ids=*/std::vector<int32_t>{1, 2});

auto expected_spec_result = PartitionSpec::Make(
/*spec_id=*/0,
Expand Down
49 changes: 49 additions & 0 deletions src/iceberg/test/schema_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "iceberg/json_internal.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"

namespace iceberg {
Expand Down Expand Up @@ -133,4 +134,52 @@ TEST(SchemaJsonTest, RoundTrip) {
ASSERT_EQ(dumped_json, json);
}

TEST(SchemaJsonTest, IdentifierFieldIds) {
// Test schema with identifier-field-ids
constexpr std::string_view json_with_identifier_str =
R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},
{"id":2,"name":"data","required":false,"type":"string"}],
"identifier-field-ids":[1],
"schema-id":1,
"type":"struct"})";

auto json_with_identifiers = nlohmann::json::parse(json_with_identifier_str);
ICEBERG_UNWRAP_OR_FAIL(auto schema_with_identifers,
SchemaFromJson(json_with_identifiers));
ASSERT_EQ(schema_with_identifers->fields().size(), 2);
ASSERT_EQ(schema_with_identifers->schema_id(), 1);
ASSERT_EQ(schema_with_identifers->IdentifierFieldIds().size(), 1);
ASSERT_EQ(schema_with_identifers->IdentifierFieldIds()[0], 1);
ASSERT_EQ(ToJson(*schema_with_identifers), json_with_identifiers);

// Test schema without identifier-field-ids
constexpr std::string_view json_without_identifiers_str =
R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},
{"id":2,"name":"name","required":false,"type":"string"}],
"schema-id":1,
"type":"struct"})";

auto json_without_identifiers = nlohmann::json::parse(json_without_identifiers_str);
ICEBERG_UNWRAP_OR_FAIL(auto schema_without_identifiers,
SchemaFromJson(json_without_identifiers));
ASSERT_TRUE(schema_without_identifiers->IdentifierFieldIds().empty());
ASSERT_EQ(ToJson(*schema_without_identifiers), json_without_identifiers);

// Test schema with multiple identifier fields
constexpr std::string_view json_multi_identifiers_str =
R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},
{"id":2,"name":"org_id","required":true,"type":"long"},
{"id":3,"name":"data","required":false,"type":"string"}],
"identifier-field-ids":[1,2],
"schema-id":2,
"type":"struct"})";
auto json_multi_identifiers = nlohmann::json::parse(json_multi_identifiers_str);
ICEBERG_UNWRAP_OR_FAIL(auto schema_multi_identifiers,
SchemaFromJson(json_multi_identifiers));
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds().size(), 2);
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[0], 1);
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[1], 2);
ASSERT_EQ(ToJson(*schema_multi_identifiers), json_multi_identifiers);
}

} // namespace iceberg
Loading