Skip to content

Commit 7d9c840

Browse files
committed
Schema::ValidateIdentifierFields as a public static func
1 parent 6450898 commit 7d9c840

File tree

2 files changed

+66
-52
lines changed

2 files changed

+66
-52
lines changed

src/iceberg/schema.cc

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,53 @@
3535

3636
namespace iceberg {
3737

38+
Schema::Schema(std::vector<SchemaField> fields, int32_t schema_id)
39+
: StructType(std::move(fields)), schema_id_(schema_id) {}
40+
41+
Result<std::unique_ptr<Schema>> Schema::Make(std::vector<SchemaField> fields,
42+
int32_t schema_id,
43+
std::vector<int32_t> identifier_field_ids) {
44+
auto schema = std::make_unique<Schema>(std::move(fields), schema_id);
3845

46+
if (!identifier_field_ids.empty()) {
47+
auto id_to_parent = IndexParents(*schema);
48+
for (auto field_id : identifier_field_ids) {
49+
ICEBERG_RETURN_UNEXPECTED(
50+
ValidateIdentifierFields(field_id, *schema, id_to_parent));
51+
}
52+
}
53+
54+
schema->identifier_field_ids_ = std::move(identifier_field_ids);
55+
return schema;
56+
}
57+
58+
Result<std::unique_ptr<Schema>> Schema::Make(
59+
std::vector<SchemaField> fields, int32_t schema_id,
60+
const std::vector<std::string>& identifier_field_names) {
61+
auto schema = std::make_unique<Schema>(std::move(fields), schema_id);
3962

40-
namespace {
41-
Status ValidateIdentifierFields(
63+
std::vector<int32_t> fresh_identifier_ids;
64+
for (const auto& name : identifier_field_names) {
65+
ICEBERG_ASSIGN_OR_RAISE(auto field, schema->FindFieldByName(name));
66+
if (!field) {
67+
return InvalidSchema("Cannot find identifier field: {}", name);
68+
}
69+
fresh_identifier_ids.push_back(field.value().get().field_id());
70+
}
71+
72+
if (!fresh_identifier_ids.empty()) {
73+
auto id_to_parent = IndexParents(*schema);
74+
for (auto field_id : fresh_identifier_ids) {
75+
ICEBERG_RETURN_UNEXPECTED(
76+
ValidateIdentifierFields(field_id, *schema, id_to_parent));
77+
}
78+
}
79+
80+
schema->identifier_field_ids_ = std::move(fresh_identifier_ids);
81+
return schema;
82+
}
83+
84+
Status Schema::ValidateIdentifierFields(
4285
int32_t field_id, const Schema& schema,
4386
const std::unordered_map<int32_t, int32_t>& id_to_parent) {
4487
ICEBERG_ASSIGN_OR_RAISE(auto field_opt, schema.FindFieldById(field_id));
@@ -87,53 +130,6 @@ Status ValidateIdentifierFields(
87130
}
88131
return {};
89132
}
90-
} // namespace
91-
92-
Schema::Schema(std::vector<SchemaField> fields, int32_t schema_id)
93-
: StructType(std::move(fields)), schema_id_(schema_id) {}
94-
95-
Result<std::unique_ptr<Schema>> Schema::Make(std::vector<SchemaField> fields,
96-
int32_t schema_id,
97-
std::vector<int32_t> identifier_field_ids) {
98-
auto schema = std::make_unique<Schema>(std::move(fields), schema_id);
99-
100-
if (!identifier_field_ids.empty()) {
101-
auto id_to_parent = IndexParents(*schema);
102-
for (auto field_id : identifier_field_ids) {
103-
ICEBERG_RETURN_UNEXPECTED(
104-
ValidateIdentifierFields(field_id, *schema, id_to_parent));
105-
}
106-
}
107-
108-
schema->identifier_field_ids_ = std::move(identifier_field_ids);
109-
return schema;
110-
}
111-
112-
Result<std::unique_ptr<Schema>> Schema::Make(
113-
std::vector<SchemaField> fields, int32_t schema_id,
114-
const std::vector<std::string>& identifier_field_names) {
115-
auto schema = std::make_unique<Schema>(std::move(fields), schema_id);
116-
117-
std::vector<int32_t> fresh_identifier_ids;
118-
for (const auto& name : identifier_field_names) {
119-
ICEBERG_ASSIGN_OR_RAISE(auto field, schema->FindFieldByName(name));
120-
if (!field) {
121-
return InvalidSchema("Cannot find identifier field: {}", name);
122-
}
123-
fresh_identifier_ids.push_back(field.value().get().field_id());
124-
}
125-
126-
if (!fresh_identifier_ids.empty()) {
127-
auto id_to_parent = IndexParents(*schema);
128-
for (auto field_id : fresh_identifier_ids) {
129-
ICEBERG_RETURN_UNEXPECTED(
130-
ValidateIdentifierFields(field_id, *schema, id_to_parent));
131-
}
132-
}
133-
134-
schema->identifier_field_ids_ = std::move(fresh_identifier_ids);
135-
return schema;
136-
}
137133

138134
const std::shared_ptr<Schema>& Schema::EmptySchema() {
139135
static const auto empty_schema =

src/iceberg/schema.h

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ class ICEBERG_EXPORT Schema : public StructType {
5858
///
5959
/// \param fields The fields that make up the schema.
6060
/// \param schema_id The unique identifier for this schema (default:kInitialSchemaId).
61-
/// \param identifier_field_ids Field IDs that uniquely identify
62-
/// rows in the table (default: empty).
61+
/// \param identifier_field_ids Field IDs that uniquely identify rows in the table.
6362
/// \return A new Schema instance or Status if failed.
6463
static Result<std::unique_ptr<Schema>> Make(std::vector<SchemaField> fields,
6564
int32_t schema_id,
@@ -70,12 +69,31 @@ class ICEBERG_EXPORT Schema : public StructType {
7069
/// \param fields The fields that make up the schema.
7170
/// \param schema_id The unique identifier for this schema (default: kInitialSchemaId).
7271
/// \param identifier_field_names Canonical names of fields that uniquely identify rows
73-
/// in the table (default: empty).
72+
/// in the table.
7473
/// \return A new Schema instance or Status if failed.
7574
static Result<std::unique_ptr<Schema>> Make(
7675
std::vector<SchemaField> fields, int32_t schema_id,
7776
const std::vector<std::string>& identifier_field_names);
7877

78+
/// \brief Validate that the identifier field with the given ID is valid for the schema
79+
///
80+
/// This method checks that the specified field ID represents a valid identifier field
81+
/// according to Iceberg's identifier field requirements. It verifies that the field:
82+
/// - exists in the schema
83+
/// - is a primitive type
84+
/// - is not optional (required field)
85+
/// - is not a float or double type
86+
/// - is not nested within optional or non-struct parent fields
87+
///
88+
/// \param field_id The ID of the field to validate as an identifier field.
89+
/// \param schema The schema containing the field to validate.
90+
/// \param id_to_parent A mapping from field IDs to their parent field IDs for nested
91+
/// field validation.
92+
/// \return Status indicating success or failure of the validation.
93+
static Status ValidateIdentifierFields(
94+
int32_t field_id, const Schema& schema,
95+
const std::unordered_map<int32_t, int32_t>& id_to_parent);
96+
7997
/// \brief Get an empty schema.
8098
///
8199
/// An empty schema has no fields and a schema ID of 0.

0 commit comments

Comments
 (0)