2121
2222#include < format>
2323#include < functional>
24+ #include < stack>
2425
2526#include " iceberg/result.h"
2627#include " iceberg/row/struct_like.h"
3435
3536namespace iceberg {
3637
37- Schema::Schema (std::vector<SchemaField> fields, int32_t schema_id,
38- std::vector<int32_t > identifier_field_ids)
39- : StructType(std::move(fields)),
40- schema_id_ (schema_id),
41- identifier_field_ids_(std::move(identifier_field_ids)) {}
38+ Schema::Schema (std::vector<SchemaField> fields, int32_t schema_id)
39+ : StructType(std::move(fields)), schema_id_(schema_id) {}
40+
41+ Result<std::unique_ptr<Schema>> Schema::Make (std::vector<SchemaField> fields,
42+ int32_t schema_id,
43+ std::vector<int32_t > identifier_field_ids) {
44+ auto schema = std::make_unique<Schema>(std::move (fields), schema_id);
45+
46+ if (!identifier_field_ids.empty ()) {
47+ auto id_to_parent = IndexParents (*schema);
48+ for (auto field_id : identifier_field_ids) {
49+ ICEBERG_RETURN_UNEXPECTED (
50+ ValidateIdentifierFields (field_id, *schema, id_to_parent));
51+ }
52+ }
53+
54+ schema->identifier_field_ids_ = std::move (identifier_field_ids);
55+ return schema;
56+ }
4257
4358Result<std::unique_ptr<Schema>> Schema::Make (
4459 std::vector<SchemaField> fields, int32_t schema_id,
@@ -53,10 +68,69 @@ Result<std::unique_ptr<Schema>> Schema::Make(
5368 }
5469 fresh_identifier_ids.push_back (field.value ().get ().field_id ());
5570 }
71+
72+ if (!fresh_identifier_ids.empty ()) {
73+ auto id_to_parent = IndexParents (*schema);
74+ for (auto field_id : fresh_identifier_ids) {
75+ ICEBERG_RETURN_UNEXPECTED (
76+ ValidateIdentifierFields (field_id, *schema, id_to_parent));
77+ }
78+ }
79+
5680 schema->identifier_field_ids_ = std::move (fresh_identifier_ids);
5781 return schema;
5882}
5983
84+ Status Schema::ValidateIdentifierFields (
85+ int32_t field_id, const Schema& schema,
86+ const std::unordered_map<int32_t , int32_t >& id_to_parent) {
87+ ICEBERG_ASSIGN_OR_RAISE (auto field_opt, schema.FindFieldById (field_id));
88+ ICEBERG_PRECHECK (field_opt.has_value (),
89+ " Cannot add field {} as an identifier field: field does not exist" ,
90+ field_id);
91+
92+ const SchemaField& field = field_opt.value ().get ();
93+ ICEBERG_PRECHECK (
94+ field.type ()->is_primitive (),
95+ " Cannot add field {} as an identifier field: not a primitive type field" , field_id);
96+ ICEBERG_PRECHECK (!field.optional (),
97+ " Cannot add field {} as an identifier field: not a required field" ,
98+ field_id);
99+ ICEBERG_PRECHECK (
100+ field.type ()->type_id () != TypeId::kDouble &&
101+ field.type ()->type_id () != TypeId::kFloat ,
102+ " Cannot add field {} as an identifier field: must not be float or double field" ,
103+ field_id);
104+
105+ // check whether the nested field is in a chain of required struct fields
106+ // exploring from root for better error message for list and map types
107+ std::stack<int32_t > ancestors;
108+ auto parent_it = id_to_parent.find (field.field_id ());
109+ while (parent_it != id_to_parent.end ()) {
110+ ancestors.push (parent_it->second );
111+ parent_it = id_to_parent.find (parent_it->second );
112+ }
113+
114+ while (!ancestors.empty ()) {
115+ ICEBERG_ASSIGN_OR_RAISE (auto parent_opt, schema.FindFieldById (ancestors.top ()));
116+ ICEBERG_PRECHECK (
117+ parent_opt.has_value (),
118+ " Cannot add field {} as an identifier field: parent field id {} does not exist" ,
119+ field_id, ancestors.top ());
120+ const SchemaField& parent = parent_opt.value ().get ();
121+ ICEBERG_PRECHECK (
122+ parent.type ()->type_id () == TypeId::kStruct ,
123+ " Cannot add field {} as an identifier field: must not be nested in {}" , field_id,
124+ *parent.type ());
125+ ICEBERG_PRECHECK (!parent.optional (),
126+ " Cannot add field {} as an identifier field: must not be nested in "
127+ " optional field {}" ,
128+ field_id, parent.field_id ());
129+ ancestors.pop ();
130+ }
131+ return {};
132+ }
133+
60134const std::shared_ptr<Schema>& Schema::EmptySchema () {
61135 static const auto empty_schema =
62136 std::make_shared<Schema>(std::vector<SchemaField>{}, kInitialSchemaId );
0 commit comments