diff --git a/DEPENDENCIES b/DEPENDENCIES index a888d113..08be966f 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 6db6600fed02d8b7e1de54e7df340bb1a16ab551 +core https://github.com/sourcemeta/core 4e9d280a8a452885c7cd2bc488799a4f6410f4d8 bootstrap https://github.com/twbs/bootstrap 1a6fdfae6be09b09eaced8f0e442ca6f7680a61e diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h index b9837b63..7682ae53 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h @@ -638,39 +638,4 @@ auto from_json(const JSON &value) -> std::optional { } // namespace sourcemeta::core -// This hash specialisation is intentationally constant with a decent tolerance -// to collisions -namespace std { -template -struct hash>> { - auto - operator()(const sourcemeta::core::GenericPointer< - PropertyT, - sourcemeta::core::PropertyHashJSON> - &pointer) const noexcept -> std::size_t { - const auto size{pointer.size()}; - if (size == 0) { - return size; - } - - const auto &first{pointer.at(0)}; - const auto &middle{pointer.at(size / 2)}; - const auto &last{pointer.at(size - 1)}; - - return size + - (first.is_property() - ? static_cast(first.property_hash().a) - : first.to_index()) + - (middle.is_property() - ? static_cast(middle.property_hash().a) - : middle.to_index()) + - (last.is_property() - ? static_cast(last.property_hash().a) - : last.to_index()); - } -}; -} // namespace std - #endif diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index f5fbdf5c..1f968fcd 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -705,6 +705,13 @@ template class GenericPointer { return this->data == other.data; } + /// Compare with a reference wrapper + [[nodiscard]] auto + operator==(const std::reference_wrapper> + &other) const noexcept -> bool { + return this->data == other.get().data; + } + /// Overload to support ordering of JSON Pointers. Typically for sorting /// reasons. [[nodiscard]] auto @@ -713,6 +720,81 @@ template class GenericPointer { return this->data < other.data; } + /// Compare with a reference wrapper for ordering + [[nodiscard]] auto + operator<(const std::reference_wrapper> + &other) const noexcept -> bool { + return this->data < other.get().data; + } + + /// Hash functor for use with containers + struct Hasher { + using is_transparent = void; + + auto + operator()(const GenericPointer &pointer) const noexcept + -> std::size_t { + const auto size{pointer.size()}; + if (size == 0) { + return size; + } + + const auto &first{pointer.at(0)}; + const auto &middle{pointer.at(size / 2)}; + const auto &last{pointer.at(size - 1)}; + + return size + + (first.is_property() + ? static_cast(first.property_hash().a) + : first.to_index()) + + (middle.is_property() + ? static_cast(middle.property_hash().a) + : middle.to_index()) + + (last.is_property() + ? static_cast(last.property_hash().a) + : last.to_index()); + } + + auto operator()( + const std::reference_wrapper> + &reference) const noexcept -> std::size_t { + return (*this)(reference.get()); + } + }; + + /// Comparator for use with containers + struct Comparator { + using is_transparent = void; + + auto operator()(const GenericPointer &left, + const GenericPointer &right) const noexcept + -> bool { + return left == right; + } + + auto operator()( + const std::reference_wrapper> + &left, + const std::reference_wrapper> + &right) const noexcept -> bool { + return left.get() == right.get(); + } + + auto operator()( + const std::reference_wrapper> + &left, + const GenericPointer &right) const noexcept -> bool { + return left.get() == right; + } + + auto operator()( + const GenericPointer &left, + const std::reference_wrapper> + &right) const noexcept -> bool { + return left == right.get(); + } + }; + private: Container data; }; diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 64fe89ac..63fac3e4 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -281,7 +281,8 @@ auto store(sourcemeta::core::SchemaFrame::Locations &frame, const std::string_view dialect, const sourcemeta::core::SchemaBaseDialect base_dialect, const std::optional &parent, - const bool property_name, const bool ignore_if_present = false, + const bool property_name, const bool orphan, + const bool ignore_if_present = false, const bool already_canonical = false) -> void { auto canonical{already_canonical ? std::move(uri) : sourcemeta::core::URI::canonicalize(uri)}; @@ -294,7 +295,8 @@ auto store(sourcemeta::core::SchemaFrame::Locations &frame, .relative_pointer = relative_pointer_offset, .dialect = dialect, .base_dialect = base_dialect, - .property_name = property_name}}); + .property_name = property_name, + .orphan = orphan}}); if (!ignore_if_present && !inserted) { throw_already_exists(iterator->first.second); } @@ -380,6 +382,7 @@ auto SchemaFrame::to_json( JSON{JSON::String{to_string(location.second.base_dialect)}}); entry.assign_assume_new("propertyName", JSON{location.second.property_name}); + entry.assign_assume_new("orphan", JSON{location.second.orphan}); switch (location.first.first) { case SchemaReferenceType::Static: @@ -437,8 +440,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, std::string_view default_id, const SchemaFrame::Paths &paths) -> void { this->reset(); - assert(std::unordered_set(paths.cbegin(), paths.cend()).size() == - paths.size()); + assert((std::unordered_set(paths.cbegin(), + paths.cend()) + .size() == paths.size())); std::vector subschema_entries; std::map subschemas; std::map> base_uris; @@ -494,7 +498,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, store(this->locations_, SchemaReferenceType::Static, SchemaFrame::LocationType::Resource, default_id_canonical, this->root_, path, path.size(), root_dialect, - root_base_dialect.value(), std::nullopt, false); + root_base_dialect.value(), std::nullopt, false, false); base_uris.insert({path, {root_id.value(), default_id_canonical}}); } @@ -612,7 +616,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Resource, new_id, new_id, common_pointer_weak, common_pointer_weak.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name); + common_parent, entry.common.property_name, + entry.common.orphan); } auto base_uri_match{base_uris.find(common_pointer_weak)}; @@ -682,7 +687,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name); + common_parent, entry.common.property_name, + entry.common.orphan); } if (type == AnchorType::Dynamic || type == AnchorType::All) { @@ -690,7 +696,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name); + common_parent, entry.common.property_name, + entry.common.orphan); // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( @@ -699,7 +706,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name, true); + common_parent, entry.common.property_name, + entry.common.orphan, true); } } } else { @@ -729,7 +737,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, anchor_uri, base_view, common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name); + common_parent, entry.common.property_name, + entry.common.orphan); } if (type == AnchorType::Dynamic || type == AnchorType::All) { @@ -738,9 +747,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, anchor_uri, base_view, common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name); + common_parent, entry.common.property_name, + entry.common.orphan); - // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( Vocabularies::Known::JSON_Schema_2020_12_Core)) { store(this->locations_, @@ -748,7 +757,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, SchemaFrame::LocationType::Anchor, anchor_uri, base_view, common_pointer_weak, bases.second.size(), entry.common.dialect, entry.common.base_dialect.value(), - common_parent, entry.common.property_name, true); + common_parent, entry.common.property_name, + entry.common.orphan, true); } } @@ -851,7 +861,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, base_view, pointer_weak, nearest_base_depth, dialect_for_pointer, current_base_dialect, subschema_it->second.parent, - subschema_it->second.property_name, false, true); + subschema_it->second.property_name, + subschema_it->second.orphan, false, true); } else { const auto &parent_pointer{combined.dialect_match.has_value() ? combined.dialect_match->second @@ -860,12 +871,14 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const bool parent_property_name{ parent_subschema_it != subschemas.cend() && parent_subschema_it->second.property_name}; + const bool parent_orphan{parent_subschema_it != subschemas.cend() && + parent_subschema_it->second.orphan}; store(this->locations_, SchemaReferenceType::Static, SchemaFrame::LocationType::Pointer, std::move(result), base_view, pointer_weak, nearest_base_depth, dialect_for_pointer, current_base_dialect, parent_pointer, - parent_property_name, false, true); + parent_property_name, parent_orphan, false, true); } } } @@ -1170,11 +1183,28 @@ auto SchemaFrame::traverse(const std::string_view uri) const auto SchemaFrame::traverse(const WeakPointer &pointer) const -> std::optional> { - // TODO: This is slow. Consider adding a pointer-indexed secondary - // lookup structure to SchemaFrame - for (const auto &entry : this->locations_) { - if (entry.second.pointer == pointer) { - return entry.second; + this->populate_pointer_to_location(); + const auto iterator{this->pointer_to_location_.find(std::cref(pointer))}; + if (iterator == this->pointer_to_location_.cend() || + iterator->second.empty()) { + return std::nullopt; + } + + return *(iterator->second.front()); +} + +auto SchemaFrame::traverse(const WeakPointer &pointer, + const LocationType type) const + -> std::optional> { + this->populate_pointer_to_location(); + const auto iterator{this->pointer_to_location_.find(std::cref(pointer))}; + if (iterator == this->pointer_to_location_.cend()) { + return std::nullopt; + } + + for (const auto *location : iterator->second) { + if (location->type == type) { + return *location; } } @@ -1183,9 +1213,24 @@ auto SchemaFrame::traverse(const WeakPointer &pointer) const auto SchemaFrame::uri(const WeakPointer &pointer) const -> std::optional> { - for (const auto &entry : this->locations_) { - if (entry.second.pointer == pointer) { - return entry.first.second; + this->populate_pointer_to_location(); + const auto iterator{this->pointer_to_location_.find(std::cref(pointer))}; + if (iterator == this->pointer_to_location_.cend()) { + return std::nullopt; + } + + const Location *best{nullptr}; + for (const auto *location : iterator->second) { + if (best == nullptr || location->type < best->type) { + best = location; + } + } + + if (best != nullptr) { + for (const auto &entry : this->locations_) { + if (&entry.second == best) { + return entry.first.second; + } } } @@ -1341,9 +1386,274 @@ auto SchemaFrame::empty() const noexcept -> bool { } auto SchemaFrame::reset() -> void { + // Note that order of removal is important to avoid undefined behaviour + this->pointer_to_location_.clear(); + this->reachability_.clear(); this->root_.clear(); this->locations_.clear(); this->references_.clear(); } +auto SchemaFrame::populate_pointer_to_location() const -> void { + if (!this->pointer_to_location_.empty()) { + return; + } + + this->pointer_to_location_.reserve(this->locations_.size()); + for (const auto &entry : this->locations_) { + this->pointer_to_location_[std::cref(entry.second.pointer)].push_back( + &entry.second); + } +} + +// TODO: Find a way to split or simplify this monster while preserving +// its performance? +auto SchemaFrame::populate_reachability(const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> void { + if (!this->reachability_.empty()) { + return; + } + + // --------------------------------------------------------------------------- + // (1) Find all unreachable pointers + // --------------------------------------------------------------------------- + + std::vector> unreachable_pointers; + + if (this->pointer_to_location_.empty()) { + std::unordered_set, + WeakPointer::Hasher, WeakPointer::Comparator> + has_non_pointer_location; + std::unordered_set, + WeakPointer::Hasher, WeakPointer::Comparator> + has_non_orphan; + + for (const auto &entry : this->locations_) { + auto [iterator, inserted] = this->pointer_to_location_.try_emplace( + std::cref(entry.second.pointer), std::vector{}); + iterator->second.push_back(&entry.second); + if (entry.second.type != LocationType::Pointer) { + has_non_pointer_location.insert(iterator->first); + if (!entry.second.orphan) { + has_non_orphan.insert(iterator->first); + } + } + } + + for (const auto &pointer_reference : has_non_pointer_location) { + const bool is_reachable = has_non_orphan.contains(pointer_reference); + this->reachability_.emplace(pointer_reference, is_reachable); + if (!is_reachable) { + unreachable_pointers.push_back(pointer_reference); + } + } + } else { + for (const auto &[pointer_reference, locations] : + this->pointer_to_location_) { + const auto has_non_pointer{ + std::ranges::any_of(locations, [](const Location *location) { + return location->type != LocationType::Pointer; + })}; + if (!has_non_pointer) { + continue; + } + + const auto any_non_orphan{ + std::ranges::any_of(locations, [](const Location *location) { + return location->type != LocationType::Pointer && !location->orphan; + })}; + this->reachability_.emplace(pointer_reference, any_non_orphan); + if (!any_non_orphan) { + unreachable_pointers.push_back(pointer_reference); + } + } + } + + // --------------------------------------------------------------------------- + // (2) Build a reverse mapping from reference destinations to their sources + // --------------------------------------------------------------------------- + + std::vector> + reference_destinations; + reference_destinations.reserve(this->references_.size()); + + for (const auto &reference : this->references_) { + const auto &source_pointer{reference.first.second}; + if (source_pointer.empty()) { + continue; + } + + const WeakPointer *destination_pointer{nullptr}; + const auto destination_location{this->locations_.find( + {SchemaReferenceType::Static, reference.second.destination})}; + if (destination_location != this->locations_.cend()) { + destination_pointer = &destination_location->second.pointer; + } else { + const auto dynamic_destination{this->locations_.find( + {SchemaReferenceType::Dynamic, reference.second.destination})}; + if (dynamic_destination != this->locations_.cend()) { + destination_pointer = &dynamic_destination->second.pointer; + } + } + + if (destination_pointer != nullptr) { + reference_destinations.emplace_back(&source_pointer, destination_pointer); + } + } + + std::unordered_map, + std::vector, WeakPointer::Hasher, + WeakPointer::Comparator> + references_by_destination; + for (const auto &[source, destination] : reference_destinations) { + references_by_destination[std::cref(*destination)].push_back(source); + } + + // --------------------------------------------------------------------------- + // (3) Precompute which references could make each orphan reachable + // --------------------------------------------------------------------------- + + struct PotentialSource { + const WeakPointer *source_pointer; + bool crosses; + }; + struct PotentialReach { + std::reference_wrapper pointer; + std::vector potential_sources; + }; + std::vector unreachable_with_sources; + unreachable_with_sources.reserve(unreachable_pointers.size()); + + std::unordered_map vocabularies_cache; + + for (const auto &pointer_reference : unreachable_pointers) { + const auto &pointer{pointer_reference.get()}; + PotentialReach entry{.pointer = pointer_reference, .potential_sources = {}}; + + WeakPointer ancestor = pointer; + while (!ancestor.empty()) { + auto destination_iterator = + references_by_destination.find(std::cref(ancestor)); + if (destination_iterator != references_by_destination.end()) { + bool crosses{false}; + if (ancestor != pointer) { + auto check_location{this->traverse(pointer)}; + while (check_location.has_value()) { + const auto &location{check_location->get()}; + if (location.pointer == ancestor) { + break; + } + + if (!location.parent.has_value()) { + break; + } + + const auto parent_location{this->traverse(location.parent.value())}; + if (!parent_location.has_value()) { + break; + } + + const auto relative{ + location.pointer.slice(location.parent.value().size())}; + if (!relative.empty() && relative.at(0).is_property()) { + const auto &parent_loc{parent_location->get()}; + auto vocab_iterator = + vocabularies_cache.find(parent_loc.base_dialect); + if (vocab_iterator == vocabularies_cache.end()) { + auto [inserted_iterator, inserted] = vocabularies_cache.emplace( + parent_loc.base_dialect, + this->vocabularies(parent_loc, resolver)); + vocab_iterator = inserted_iterator; + } + + const auto &keyword_result{ + walker(relative.at(0).to_property(), vocab_iterator->second)}; + if (keyword_result.type == SchemaKeywordType::LocationMembers) { + crosses = true; + break; + } + } + + check_location = parent_location; + } + } + + for (const auto *source_pointer : destination_iterator->second) { + entry.potential_sources.push_back(PotentialSource{ + .source_pointer = source_pointer, .crosses = crosses}); + } + } + ancestor = ancestor.initial(); + } + + if (!entry.potential_sources.empty()) { + unreachable_with_sources.push_back(std::move(entry)); + } + } + + std::ranges::sort(unreachable_with_sources, [](const PotentialReach &left, + const PotentialReach &right) { + return left.pointer.get().size() < right.pointer.get().size(); + }); + + // --------------------------------------------------------------------------- + // (4) Propagate reachability through references using fixpoint iteration + // --------------------------------------------------------------------------- + + bool changed{true}; + while (changed) { + changed = false; + + auto write_iterator = unreachable_with_sources.begin(); + for (auto read_iterator = unreachable_with_sources.begin(); + read_iterator != unreachable_with_sources.end(); ++read_iterator) { + bool became_reachable = false; + + for (const auto &potential_source : read_iterator->potential_sources) { + if (potential_source.crosses) { + continue; + } + + const auto &source_parent{potential_source.source_pointer->initial()}; + bool source_parent_reachable{source_parent.empty()}; + if (!source_parent_reachable) { + const auto reachability_iterator{ + this->reachability_.find(std::cref(source_parent))}; + source_parent_reachable = + reachability_iterator != this->reachability_.end() && + reachability_iterator->second; + } + + if (source_parent_reachable) { + became_reachable = true; + break; + } + } + + if (became_reachable) { + this->reachability_[read_iterator->pointer] = true; + changed = true; + } else { + if (write_iterator != read_iterator) { + *write_iterator = std::move(*read_iterator); + } + ++write_iterator; + } + } + unreachable_with_sources.erase(write_iterator, + unreachable_with_sources.end()); + } +} + +auto SchemaFrame::is_reachable(const Location &location, + const SchemaWalker &walker, + const SchemaResolver &resolver) const -> bool { + assert(location.type != LocationType::Pointer); + this->populate_reachability(walker, resolver); + const auto iterator{this->reachability_.find(std::cref(location.pointer))}; + assert(iterator != this->reachability_.end()); + return iterator->second; +} + } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index 50dffa54..1061f872 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -17,6 +17,7 @@ #include // std::optional #include // std::set #include // std::tuple +#include // std::unordered_map #include // std::unordered_set #include // std::pair #include // std::vector @@ -61,6 +62,12 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { SchemaFrame(const Mode mode) : mode_{mode} {} + // We rely on internal caches that would be dangling otherwise + SchemaFrame(const SchemaFrame &) = delete; + auto operator=(const SchemaFrame &) -> SchemaFrame & = delete; + SchemaFrame(SchemaFrame &&) = delete; + auto operator=(SchemaFrame &&) -> SchemaFrame & = delete; + // Query the current mode that the schema frame was configured with [[nodiscard]] auto mode() const noexcept -> Mode { return this->mode_; } @@ -114,6 +121,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { std::string_view dialect; SchemaBaseDialect base_dialect; bool property_name; + bool orphan; }; /// A JSON Schema reference frame is a mapping of URIs to schema identifiers, @@ -184,6 +192,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { [[nodiscard]] auto traverse(const WeakPointer &pointer) const -> std::optional>; + /// Get the location of a specific type associated with a given pointer + [[nodiscard]] auto traverse(const WeakPointer &pointer, + const LocationType type) const + -> std::optional>; + /// Turn an absolute pointer into a location URI [[nodiscard]] auto uri(const WeakPointer &pointer) const -> std::optional>; @@ -228,7 +241,16 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Reset the frame, clearing all analysed data auto reset() -> void; + /// Determines if a location could be evaluated during validation + [[nodiscard]] auto is_reachable(const Location &location, + const SchemaWalker &walker, + const SchemaResolver &resolver) const -> bool; + private: + auto populate_pointer_to_location() const -> void; + auto populate_reachability(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + Mode mode_; // Exporting symbols that depends on the standard C++ library is considered // safe. @@ -239,6 +261,13 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { JSON::String root_; Locations locations_; References references_; + mutable std::unordered_map, + std::vector, WeakPointer::Hasher, + WeakPointer::Comparator> + pointer_to_location_; + mutable std::unordered_map, bool, + WeakPointer::Hasher, WeakPointer::Comparator> + reachability_; #if defined(_MSC_VER) #pragma warning(default : 4251 4275) #endif diff --git a/vendor/core/src/core/jsonschema/transformer.cc b/vendor/core/src/core/jsonschema/transformer.cc index 9fa9c61d..2ff1010e 100644 --- a/vendor/core/src/core/jsonschema/transformer.cc +++ b/vendor/core/src/core/jsonschema/transformer.cc @@ -47,7 +47,9 @@ auto check_rules( const sourcemeta::core::SchemaTransformer::Callback &callback, const sourcemeta::core::JSON::String &exclude_keyword, const bool non_mutating_only) -> std::pair { - std::unordered_set visited; + std::unordered_set + visited; bool result{true}; std::size_t subschema_count{0}; std::size_t subschema_failures{0}; @@ -213,7 +215,7 @@ auto SchemaTransformer::apply(JSON &schema, const SchemaWalker &walker, default_id); } - std::unordered_set visited; + std::unordered_set visited; bool applied{false}; for (const auto &entry : frame.locations()) { diff --git a/vendor/core/src/extension/alterschema/common/empty_object_as_true.h b/vendor/core/src/extension/alterschema/common/empty_object_as_true.h index 34019077..288cab7d 100644 --- a/vendor/core/src/extension/alterschema/common/empty_object_as_true.h +++ b/vendor/core/src/extension/alterschema/common/empty_object_as_true.h @@ -1,7 +1,7 @@ class EmptyObjectAsTrue final : public SchemaTransformRule { public: using mutates = std::true_type; - using reframe_after_transform = std::true_type; + using reframe_after_transform = std::false_type; EmptyObjectAsTrue() : SchemaTransformRule{ "empty_object_as_true", diff --git a/vendor/core/src/extension/alterschema/common/orphan_definitions.h b/vendor/core/src/extension/alterschema/common/orphan_definitions.h index f0863276..047156fe 100644 --- a/vendor/core/src/extension/alterschema/common/orphan_definitions.h +++ b/vendor/core/src/extension/alterschema/common/orphan_definitions.h @@ -14,8 +14,8 @@ class OrphanDefinitions final : public SchemaTransformRule { const sourcemeta::core::Vocabularies &vocabularies, const sourcemeta::core::SchemaFrame &frame, const sourcemeta::core::SchemaFrame::Location &location, - const sourcemeta::core::SchemaWalker &, - const sourcemeta::core::SchemaResolver &) const + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver) const -> sourcemeta::core::SchemaTransformRule::Result override { ONLY_CONTINUE_IF(schema.is_object()); const bool has_modern_core{ @@ -30,35 +30,11 @@ class OrphanDefinitions final : public SchemaTransformRule { schema.defines("definitions")}; ONLY_CONTINUE_IF(has_defs || has_definitions); - bool has_external_to_defs{false}; - bool has_external_to_definitions{false}; - std::unordered_set outside_referenced_defs; - std::unordered_set outside_referenced_definitions; - - for (const auto &[key, reference] : frame.references()) { - const auto destination_location{frame.traverse(reference.destination)}; - if (destination_location.has_value()) { - const auto &destination_pointer{destination_location->get().pointer}; - if (has_defs) { - process_reference(key.second, destination_pointer, location.pointer, - "$defs", has_external_to_defs, - outside_referenced_defs); - } - - if (has_definitions) { - process_reference(key.second, destination_pointer, location.pointer, - "definitions", has_external_to_definitions, - outside_referenced_definitions); - } - } - } - std::vector orphans; - collect_orphans(schema, "$defs", has_defs, has_external_to_defs, - outside_referenced_defs, orphans); - collect_orphans(schema, "definitions", has_definitions, - has_external_to_definitions, outside_referenced_definitions, - orphans); + collect_orphans(frame, walker, resolver, location.pointer, schema, "$defs", + has_defs, orphans); + collect_orphans(frame, walker, resolver, location.pointer, schema, + "definitions", has_definitions, orphans); ONLY_CONTINUE_IF(!orphans.empty()); return APPLIES_TO_POINTERS(std::move(orphans)); @@ -73,55 +49,72 @@ class OrphanDefinitions final : public SchemaTransformRule { schema.at(container).erase(pointer.at(1).to_property()); } - remove_empty_container(schema, "$defs"); - remove_empty_container(schema, "definitions"); + if (schema.defines("$defs") && schema.at("$defs").empty()) { + schema.erase("$defs"); + } + + if (schema.defines("definitions") && schema.at("definitions").empty()) { + schema.erase("definitions"); + } } private: - static auto process_reference( - const WeakPointer &source_pointer, const WeakPointer &destination_pointer, - const WeakPointer &prefix, std::string_view container, bool &has_external, - std::unordered_set &referenced) -> void { - if (!destination_pointer.starts_with(prefix, container) || - destination_pointer.size() <= prefix.size() + 1) { - return; - } + static auto has_reachable_reference_through( + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + const WeakPointer &pointer) -> bool { + for (const auto &reference : frame.references()) { + const auto destination{frame.traverse(reference.second.destination)}; + if (!destination.has_value()) { + continue; + } - const auto &entry_token{destination_pointer.at(prefix.size() + 1)}; - if (entry_token.is_property()) { - const auto &entry_name{entry_token.to_property()}; - if (!source_pointer.starts_with(prefix, container)) { - has_external = true; - referenced.insert(entry_name); - } else if (!source_pointer.starts_with(prefix, container, entry_name)) { - referenced.insert(entry_name); + if (!destination->get().pointer.starts_with(pointer)) { + continue; + } + + const auto &source_pointer{reference.first.second}; + if (source_pointer.empty()) { + return true; } - } - } - static auto - collect_orphans(const JSON &schema, const JSON::String &container, - const bool has_container, const bool has_external_reference, - const std::unordered_set &referenced, - std::vector &orphans) -> void { - if (has_container) { - const auto &maybe_object{schema.at(container)}; - if (maybe_object.is_object()) { - // If no external references to container, all definitions are orphans - // Otherwise, only unreferenced definitions are orphans - for (const auto &entry : maybe_object.as_object()) { - if (!has_external_reference || !referenced.contains(entry.first)) { - orphans.push_back(Pointer{container, entry.first}); - } - } + const auto source_location{frame.traverse( + source_pointer.initial(), + sourcemeta::core::SchemaFrame::LocationType::Subschema)}; + if (source_location.has_value() && + frame.is_reachable(source_location->get(), walker, resolver)) { + return true; } } + + return false; } - static auto remove_empty_container(JSON &schema, const JSON::String &name) - -> void { - if (schema.defines(name) && schema.at(name).empty()) { - schema.erase(name); + static auto collect_orphans(const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + const WeakPointer &prefix, const JSON &schema, + const JSON::String &container, + const bool has_container, + std::vector &orphans) -> void { + if (!has_container || !schema.at(container).is_object()) { + return; + } + + for (const auto &entry : schema.at(container).as_object()) { + const WeakPointer entry_pointer{std::cref(container), + std::cref(entry.first)}; + const auto absolute_entry_pointer{prefix.concat(entry_pointer)}; + const auto entry_location{frame.traverse( + absolute_entry_pointer, + sourcemeta::core::SchemaFrame::LocationType::Subschema)}; + if (entry_location.has_value() && + !frame.is_reachable(entry_location->get(), walker, resolver) && + !has_reachable_reference_through(frame, walker, resolver, + absolute_entry_pointer)) { + orphans.push_back(Pointer{container, entry.first}); + } } } };