2121
2222#include < format>
2323#include < functional>
24+ #include < stack>
2425
2526#include " iceberg/result.h"
2627#include " iceberg/row/struct_like.h"
3435
3536namespace iceberg {
3637
37- Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id,
38- std::vector<int32_t > identifier_field_ids)
39- : StructType(std::move(fields)),
40- schema_id_ (schema_id),
41- identifier_field_ids_(std::move(identifier_field_ids)) {}
38+ namespace {
39+ Status ValidateIdentifierFields (
40+ int32_t field_id, const Schema& schema,
41+ const std::unordered_map<int32_t , int32_t >& id_to_parent) {
42+ ICEBERG_ASSIGN_OR_RAISE (auto field_opt, schema.FindFieldById (field_id));
43+ ICEBERG_PRECHECK (field_opt.has_value (),
44+ " Cannot add field {} as an identifier field: field does not exist" ,
45+ field_id);
46+
47+ const SchemaField& field = field_opt.value ().get ();
48+ ICEBERG_PRECHECK (
49+ field.type ()->is_primitive (),
50+ " Cannot add field {} as an identifier field: not a primitive type field" , field_id);
51+ ICEBERG_PRECHECK (!field.optional (),
52+ " Cannot add field {} as an identifier field: not a required field" ,
53+ field_id);
54+ ICEBERG_PRECHECK (
55+ field.type ()->type_id () != TypeId::kDouble &&
56+ field.type ()->type_id () != TypeId::kFloat ,
57+ " Cannot add field {} as an identifier field: must not be float or double field" ,
58+ field_id);
59+
60+ // check whether the nested field is in a chain of required struct fields
61+ // exploring from root for better error message for list and map types
62+ std::stack<int32_t > ancestors;
63+ auto parent_it = id_to_parent.find (field.field_id ());
64+ while (parent_it != id_to_parent.end ()) {
65+ ancestors.push (parent_it->second );
66+ parent_it = id_to_parent.find (parent_it->second );
67+ }
68+
69+ while (!ancestors.empty ()) {
70+ ICEBERG_ASSIGN_OR_RAISE (auto parent_opt, schema.FindFieldById (ancestors.top ()));
71+ ICEBERG_PRECHECK (
72+ parent_opt.has_value (),
73+ " Cannot add field {} as an identifier field: parent field id {} does not exist" ,
74+ field_id, ancestors.top ());
75+ const SchemaField& parent = parent_opt.value ().get ();
76+ ICEBERG_PRECHECK (
77+ parent.type ()->type_id () == TypeId::kStruct ,
78+ " Cannot add field {} as an identifier field: must not be nested in {}" , field_id,
79+ *parent.type ());
80+ ICEBERG_PRECHECK (!parent.optional (),
81+ " Cannot add field {} as an identifier field: must not be nested in "
82+ " optional field {}" ,
83+ field_id, parent.field_id ());
84+ ancestors.pop ();
85+ }
86+ return {};
87+ }
88+ } // namespace
89+
90+ const std::shared_ptr<Schema> Schema::kEmptySchema =
91+ std::make_shared<Schema>(std::vector<SchemaField>{}, std::nullopt );
92+
93+ Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
94+ : StructType(std::move(fields)), schema_id_(schema_id) {}
95+
96+ Result<std::unique_ptr<Schema>> Schema::Make (std::vector<SchemaField> fields,
97+ std::optional<int32_t > schema_id,
98+ std::vector<int32_t > identifier_field_ids) {
99+ auto schema = std::make_unique<Schema>(std::move (fields), schema_id);
100+
101+ if (!identifier_field_ids.empty ()) {
102+ auto id_to_parent = IndexParents (*schema);
103+ for (auto field_id : identifier_field_ids) {
104+ ICEBERG_RETURN_UNEXPECTED (
105+ ValidateIdentifierFields (field_id, *schema, id_to_parent));
106+ }
107+ }
108+
109+ schema->identifier_field_ids_ = std::move (identifier_field_ids);
110+ return schema;
111+ }
42112
43113Result<std::unique_ptr<Schema>> Schema::Make (
44114 std::vector<SchemaField> fields, std::optional<int32_t > schema_id,
@@ -53,6 +123,15 @@ Result<std::unique_ptr<Schema>> Schema::Make(
53123 }
54124 fresh_identifier_ids.push_back (field.value ().get ().field_id ());
55125 }
126+
127+ if (!fresh_identifier_ids.empty ()) {
128+ auto id_to_parent = IndexParents (*schema);
129+ for (auto field_id : fresh_identifier_ids) {
130+ ICEBERG_RETURN_UNEXPECTED (
131+ ValidateIdentifierFields (field_id, *schema, id_to_parent));
132+ }
133+ }
134+
56135 schema->identifier_field_ids_ = std::move (fresh_identifier_ids);
57136 return schema;
58137}
0 commit comments