From a3fc1459a4b8952f0ab6869817c29e8b36330f34 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 4 Feb 2026 15:16:55 -0400 Subject: [PATCH 1/2] Heavily optimise `SchemaFrame::is_reachable` across bases Signed-off-by: Juan Cruz Viotti --- src/core/jsonschema/frame.cc | 487 +++++++++--------- .../sourcemeta/core/jsonschema_frame.h | 54 +- 2 files changed, 303 insertions(+), 238 deletions(-) diff --git a/src/core/jsonschema/frame.cc b/src/core/jsonschema/frame.cc index e1ee47838..f0b7dea3b 100644 --- a/src/core/jsonschema/frame.cc +++ b/src/core/jsonschema/frame.cc @@ -1397,6 +1397,11 @@ auto SchemaFrame::reset() -> void { this->pointers_with_non_orphan_.clear(); this->pointer_to_location_.clear(); this->reachability_.clear(); + this->references_by_destination_.clear(); + this->location_members_children_.clear(); + this->descendants_by_pointer_.clear(); + this->potential_sources_by_location_.clear(); + this->reachability_graph_.clear(); this->root_.clear(); this->locations_.clear(); this->references_.clear(); @@ -1415,115 +1420,135 @@ auto SchemaFrame::populate_pointer_to_location() const -> void { } } -// TODO: Find a way to split or simplify this monster while preserving -// its performance? -auto SchemaFrame::populate_reachability(const Location &base, - const SchemaWalker &walker, - const SchemaResolver &resolver) const - -> const ReachabilityCache & { - for (auto &entry : this->reachability_) { - if (entry.first == &base) { - return entry.second; - } +auto SchemaFrame::populate_location_members( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->location_members_children_.empty()) { + return; } - auto &cache = - this->reachability_.emplace_back(&base, ReachabilityCache{}).second; - - // --------------------------------------------------------------------------- - // (1) Find all unreachable locations - // --------------------------------------------------------------------------- + this->populate_pointer_to_location(); - if (this->pointers_with_non_orphan_.empty()) { - for (const auto &entry : this->locations_) { - if (entry.second.type != LocationType::Pointer && !entry.second.orphan) { - this->pointers_with_non_orphan_.insert(std::cref(entry.second.pointer)); - } + for (const auto &entry : this->locations_) { + if (entry.second.type != LocationType::Subschema) { + continue; + } + if (!entry.second.parent.has_value()) { + continue; + } + const auto &parent_pointer{entry.second.parent.value()}; + const auto relative{entry.second.pointer.slice(parent_pointer.size())}; + if (relative.empty() || !relative.at(0).is_property()) { + continue; + } + const auto parent_location{this->traverse(parent_pointer)}; + if (!parent_location.has_value()) { + continue; } + const auto vocabs{this->vocabularies(parent_location->get(), resolver)}; + const auto &keyword_result{walker(relative.at(0).to_property(), vocabs)}; + if (keyword_result.type == SchemaKeywordType::LocationMembers) { + this->location_members_children_.insert(std::cref(entry.second.pointer)); + } + } +} + +auto SchemaFrame::populate_descendants() const -> void { + if (!this->descendants_by_pointer_.empty()) { + return; } - std::vector unreachable_locations; + this->populate_pointer_to_location(); + for (const auto &entry : this->locations_) { if (entry.second.type == LocationType::Pointer) { continue; } const auto &pointer{entry.second.pointer}; - auto cache_iter = cache.find(std::cref(pointer)); - if (cache_iter != cache.end()) { - continue; + const auto *location{&entry.second}; + + WeakPointer prefix; + for (std::size_t index = 0; index <= pointer.size(); ++index) { + auto prefix_iter = this->pointer_to_location_.find(std::cref(prefix)); + if (prefix_iter != this->pointer_to_location_.end() && + !prefix_iter->second.empty()) { + const auto &key_pointer{prefix_iter->second.front()->pointer}; + this->descendants_by_pointer_[std::cref(key_pointer)].push_back( + location); + } + if (index < pointer.size()) { + const auto &token{pointer.at(index)}; + if (token.is_property()) { + prefix.emplace_back(token.to_property(), token.property_hash()); + } else { + prefix.push_back(token.to_index()); + } + } } + } +} - bool is_reachable{false}; - if (pointer == base.pointer) { - is_reachable = true; - } else if (pointer.starts_with(base.pointer)) { - is_reachable = base.orphan || this->pointers_with_non_orphan_.contains( - std::cref(pointer)); - } +auto SchemaFrame::populate_potential_sources( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->potential_sources_by_location_.empty()) { + return; + } - cache.emplace(std::cref(pointer), is_reachable); - if (!is_reachable) { - unreachable_locations.push_back(&entry.second); + this->populate_reference_graph(); + this->populate_location_members(walker, resolver); + + for (const auto &entry : this->locations_) { + if (entry.second.type == LocationType::Pointer) { + continue; } - } - // --------------------------------------------------------------------------- - // (2) Filter out descendants that cross a container boundary - // --------------------------------------------------------------------------- + const auto &pointer{entry.second.pointer}; + const auto *location{&entry.second}; + std::vector sources; - if (base.orphan) { - std::vector> nested_entries; - for (const auto &entry : this->locations_) { - if (entry.second.type != LocationType::Subschema) { - continue; - } - const auto &pointer{entry.second.pointer}; - if (pointer == base.pointer || !pointer.starts_with(base.pointer)) { - continue; - } - if (!entry.second.parent.has_value()) { - continue; - } - const auto &parent_pointer{entry.second.parent.value()}; - const auto relative{pointer.slice(parent_pointer.size())}; - if (relative.empty() || !relative.at(0).is_property()) { - continue; - } - const auto parent_location{this->traverse(parent_pointer)}; - if (!parent_location.has_value()) { - continue; + WeakPointer ancestor = pointer; + bool first_iteration{true}; + while (first_iteration || !ancestor.empty()) { + auto destination_iterator = + this->references_by_destination_.find(std::cref(ancestor)); + if (destination_iterator != this->references_by_destination_.end()) { + bool crosses{false}; + if (ancestor != pointer) { + for (const auto &boundary_ref : this->location_members_children_) { + const auto &boundary{boundary_ref.get()}; + if (pointer.starts_with(boundary) && + !ancestor.starts_with(boundary)) { + crosses = true; + break; + } + } + } + + for (const auto *source_pointer : destination_iterator->second) { + sources.push_back( + PotentialSource{.source_pointer = source_pointer, + .source_parent = source_pointer->initial(), + .crosses = crosses}); + } } - const auto vocabularies{ - this->vocabularies(parent_location->get(), resolver)}; - const auto &keyword_result{ - walker(relative.at(0).to_property(), vocabularies)}; - if (keyword_result.type == SchemaKeywordType::LocationMembers) { - nested_entries.push_back(std::cref(pointer)); + + if (ancestor.empty()) { + break; } + ancestor = ancestor.initial(); + first_iteration = false; } - for (const auto &entry : this->locations_) { - if (entry.second.type == LocationType::Pointer) { - continue; - } - auto cache_iter = cache.find(std::cref(entry.second.pointer)); - if (cache_iter == cache.end() || !cache_iter->second) { - continue; - } - for (const auto &nested : nested_entries) { - if (entry.second.pointer.starts_with(nested.get())) { - cache_iter->second = false; - unreachable_locations.push_back(&entry.second); - break; - } - } + if (!sources.empty()) { + this->potential_sources_by_location_[location] = std::move(sources); } } +} - // --------------------------------------------------------------------------- - // (3) Build a reverse mapping from reference destinations to their sources - // --------------------------------------------------------------------------- +auto SchemaFrame::populate_reference_graph() const -> void { + if (!this->references_by_destination_.empty()) { + return; + } std::unordered_map> dynamic_anchors_by_fragment; @@ -1573,192 +1598,179 @@ auto SchemaFrame::populate_reachability(const Location &base, } } - std::unordered_map, - std::vector, WeakPointer::Hasher, - WeakPointer::Comparator> - references_by_destination; for (const auto &[source, destination] : reference_destinations) { - references_by_destination[std::cref(*destination)].push_back(source); + this->references_by_destination_[std::cref(*destination)].push_back(source); } +} - // --------------------------------------------------------------------------- - // (4) Precompute which references could make each orphan reachable - // --------------------------------------------------------------------------- - - std::unordered_set needed_pointers; - for (const auto *unreachable_location : unreachable_locations) { - needed_pointers.insert(unreachable_location->pointer); - const Location *loc{unreachable_location}; - while (loc != nullptr && loc->parent.has_value()) { - auto [iter, inserted] = needed_pointers.insert(loc->parent.value()); - if (!inserted) { - break; - } - loc = nullptr; - for (const auto &entry : this->locations_) { - if (entry.second.pointer == *iter) { - loc = &entry.second; - break; - } - } - } +auto SchemaFrame::populate_reachability_graph( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->reachability_graph_.empty()) { + return; } + this->populate_pointer_to_location(); + this->populate_location_members(walker, resolver); + this->populate_reference_graph(); + + // Build parent -> child edges for ALL location types (including Pointer) for (const auto &entry : this->locations_) { - if (needed_pointers.contains(entry.second.pointer)) { - this->pointer_to_location_[std::cref(entry.second.pointer)].push_back( - &entry.second); + if (entry.second.pointer.empty()) { + continue; } - } - - struct PotentialSource { - const WeakPointer *source_pointer; - bool crosses; - }; - struct PotentialReach { - const Location *location; - std::vector potential_sources; - }; - std::vector unreachable_with_sources; - unreachable_with_sources.reserve(unreachable_locations.size()); - - std::unordered_map vocabularies_cache; - - for (const auto *unreachable_location : unreachable_locations) { - const auto &pointer{unreachable_location->pointer}; - PotentialReach entry{.location = unreachable_location, - .potential_sources = {}}; - - WeakPointer ancestor = pointer; - bool first_iteration{true}; - while (first_iteration || !ancestor.empty()) { - auto destination_iterator = - references_by_destination.find(std::cref(ancestor)); - if (destination_iterator != references_by_destination.end()) { - bool crosses{false}; - if (ancestor != pointer) { - const Location *check_location{unreachable_location}; - while (check_location != nullptr) { - if (check_location->pointer == ancestor) { - break; - } - if (!check_location->parent.has_value()) { - break; - } - - const auto parent_location{ - this->traverse(check_location->parent.value())}; - if (!parent_location.has_value()) { - break; - } + const auto parent_pointer{entry.second.pointer.initial()}; + auto parent_iter = + this->pointer_to_location_.find(std::cref(parent_pointer)); + if (parent_iter == this->pointer_to_location_.end()) { + continue; + } - const auto relative{check_location->pointer.slice( - check_location->parent.value().size())}; - if (!relative.empty() && relative.at(0).is_property()) { - const auto &parent_loc{parent_location->get()}; - auto vocab_iterator = - vocabularies_cache.find(parent_loc.base_dialect); - if (vocab_iterator == vocabularies_cache.end()) { - auto [inserted_iterator, inserted] = vocabularies_cache.emplace( - parent_loc.base_dialect, - this->vocabularies(parent_loc, resolver)); - vocab_iterator = inserted_iterator; - } + // Add edge from ALL parent locations at this pointer path + // This ensures that regardless of which Location* we start BFS with, + // we'll find the edges + for (const Location *parent_location : parent_iter->second) { + this->reachability_graph_[parent_location].push_back( + ReachabilityEdge{.target = &entry.second, + .orphan_context_only = entry.second.orphan, + .is_reference = false}); + } + } - const auto &keyword_result{ - walker(relative.at(0).to_property(), vocab_iterator->second)}; - if (keyword_result.type == SchemaKeywordType::LocationMembers) { - crosses = true; - break; - } - } + // Build reference edges: source_parent -> destination + for (const auto &[destination_ref, sources] : + this->references_by_destination_) { + auto dest_locations_iter = this->pointer_to_location_.find(destination_ref); + if (dest_locations_iter == this->pointer_to_location_.end()) { + continue; + } - check_location = &parent_location->get(); - } - } + // Find any non-Pointer destination location (prefer Subschema/Resource) + const Location *dest_location{nullptr}; + for (const auto *loc : dest_locations_iter->second) { + if (loc->type != LocationType::Pointer) { + dest_location = loc; + break; + } + } + // If no non-Pointer location, use any location + if (!dest_location && !dest_locations_iter->second.empty()) { + dest_location = dest_locations_iter->second.front(); + } + if (!dest_location) { + continue; + } - for (const auto *source_pointer : destination_iterator->second) { - entry.potential_sources.push_back(PotentialSource{ - .source_pointer = source_pointer, .crosses = crosses}); - } + for (const auto *source_pointer : sources) { + if (source_pointer->empty()) { + continue; + } + const auto source_parent_pointer{source_pointer->initial()}; + auto source_parent_iter = + this->pointer_to_location_.find(std::cref(source_parent_pointer)); + if (source_parent_iter == this->pointer_to_location_.end()) { + continue; } - if (ancestor.empty()) { - break; + // Add reference edges from ALL source parent locations + for (const Location *source_parent_location : + source_parent_iter->second) { + this->reachability_graph_[source_parent_location].push_back( + ReachabilityEdge{.target = dest_location, + .orphan_context_only = false, + .is_reference = true}); } - ancestor = ancestor.initial(); - first_iteration = false; } + } +} - if (!entry.potential_sources.empty()) { - unreachable_with_sources.push_back(std::move(entry)); - } +auto SchemaFrame::populate_reachability(const Location &base, + const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> const ReachabilityCache & { + const ReachabilityKey key{.pointer = &base.pointer, .orphan = base.orphan}; + auto cache_iter = this->reachability_.find(key); + if (cache_iter != this->reachability_.end()) { + return cache_iter->second; } - std::ranges::sort(unreachable_with_sources, [](const PotentialReach &left, - const PotentialReach &right) { - return left.location->pointer.size() < right.location->pointer.size(); - }); + auto &cache = this->reachability_[key]; - // --------------------------------------------------------------------------- - // (5) Propagate reachability through references using fixpoint iteration - // --------------------------------------------------------------------------- + // Build the reachability graph (once per frame, shared across all bases) + this->populate_reachability_graph(walker, resolver); - bool changed{true}; - while (changed) { - changed = false; - std::vector> newly_reachable; + // Use the base location directly (it's a reference to an entry in locations_) + const Location *base_location{&base}; - auto write_iterator = unreachable_with_sources.begin(); - for (auto read_iterator = unreachable_with_sources.begin(); - read_iterator != unreachable_with_sources.end(); ++read_iterator) { - bool became_reachable = false; + // BFS queue and visited set + std::vector queue; + std::unordered_set visited; - for (const auto &potential_source : read_iterator->potential_sources) { - if (potential_source.crosses) { - continue; + // Helper lambda to mark all locations at a pointer path as reachable + auto mark_pointer_reachable = [this, &cache](const WeakPointer &pointer) { + auto locations_iter = this->pointer_to_location_.find(std::cref(pointer)); + if (locations_iter != this->pointer_to_location_.end()) { + for (const auto *loc : locations_iter->second) { + if (loc->type != LocationType::Pointer) { + cache.emplace(std::cref(loc->pointer), true); } + } + } + }; - const auto &source_parent{potential_source.source_pointer->initial()}; - const auto reachability_iterator{cache.find(std::cref(source_parent))}; - const bool source_parent_reachable{reachability_iterator != - cache.end() && - reachability_iterator->second}; + queue.push_back(base_location); + visited.insert(base_location); + mark_pointer_reachable(base_location->pointer); - if (source_parent_reachable) { - became_reachable = true; - break; - } - } + std::size_t queue_index{0}; + while (queue_index < queue.size()) { + const Location *current = queue[queue_index++]; - if (became_reachable) { - cache[std::cref(read_iterator->location->pointer)] = true; - newly_reachable.push_back(std::cref(read_iterator->location->pointer)); - changed = true; - } else { - if (write_iterator != read_iterator) { - *write_iterator = std::move(*read_iterator); - } - ++write_iterator; - } + auto edges_iter = this->reachability_graph_.find(current); + if (edges_iter == this->reachability_graph_.end()) { + continue; } - unreachable_with_sources.erase(write_iterator, - unreachable_with_sources.end()); - for (auto &[cache_pointer, cache_reachable] : cache) { - if (cache_reachable) { + for (const auto &edge : edges_iter->second) { + // Skip if already visited + if (visited.contains(edge.target)) { continue; } - if (!this->pointers_with_non_orphan_.contains(cache_pointer)) { + + // For orphan_context_only edges (hierarchical edges to orphan children): + // - From non-orphan base AND non-orphan current: block (orphans should be + // reached via references first) + // - From non-orphan base AND orphan current: allow (we've entered orphan + // context via reference, can reach non-nested orphan descendants) + // - From orphan base: allow (but filtered by nested boundary check below) + if (edge.orphan_context_only && !base.orphan && !current->orphan) { continue; } - for (const auto &reached : newly_reachable) { - if (cache_pointer.get().starts_with(reached.get())) { - cache_reachable = true; - break; + + // When traversing via a HIERARCHICAL edge to an orphan that's a direct + // child of a LocationMembers keyword ($defs entry), check if we're + // crossing into a nested orphan context. + // + // A target in location_members_children_ is under a LocationMembers + // keyword. Block if the keyword is at or under current (meaning we're + // either at the $defs trying to enter, or entering a nested $defs). + if (!edge.is_reference && edge.orphan_context_only) { + auto target_ref = this->location_members_children_.find( + std::cref(edge.target->pointer)); + if (target_ref != this->location_members_children_.end()) { + // Target is a child of a LocationMembers keyword + // The keyword is at target.pointer.initial() + const auto keyword_path{edge.target->pointer.initial()}; + // Block if keyword is at or under current + if (keyword_path.starts_with(current->pointer)) { + continue; + } } } + + visited.insert(edge.target); + queue.push_back(edge.target); + mark_pointer_reachable(edge.target->pointer); } } @@ -1771,8 +1783,9 @@ auto SchemaFrame::is_reachable(const Location &base, const Location &location, assert(location.type != LocationType::Pointer); const auto &cache{this->populate_reachability(base, walker, resolver)}; const auto iterator{cache.find(std::cref(location.pointer))}; - assert(iterator != cache.end()); - return iterator->second; + // With the new graph-based approach, only reachable locations are in the + // cache. If not found, the location is unreachable. + return iterator != cache.end() && iterator->second; } } // namespace sourcemeta::core diff --git a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index 954860f69..cd3b52bfd 100644 --- a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -268,11 +268,63 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { using ReachabilityCache = std::unordered_map, bool, WeakPointer::Hasher, WeakPointer::Comparator>; - mutable std::vector> + struct ReachabilityKey { + const WeakPointer *pointer; + bool orphan; + auto operator==(const ReachabilityKey &other) const noexcept -> bool { + return this->pointer == other.pointer && this->orphan == other.orphan; + } + }; + struct ReachabilityKeyHasher { + auto operator()(const ReachabilityKey &key) const noexcept -> std::size_t { + return std::hash{}(key.pointer) ^ + (std::hash{}(key.orphan) << 1); + } + }; + mutable std::unordered_map reachability_; + mutable std::unordered_map, + std::vector, + WeakPointer::Hasher, WeakPointer::Comparator> + references_by_destination_; + mutable std::unordered_set, + WeakPointer::Hasher, WeakPointer::Comparator> + location_members_children_; + mutable std::unordered_map, + std::vector, WeakPointer::Hasher, + WeakPointer::Comparator> + descendants_by_pointer_; + struct PotentialSource { + const WeakPointer *source_pointer; + WeakPointer source_parent; + bool crosses; + }; + mutable std::unordered_map> + potential_sources_by_location_; + // Reachability graph: edges represent "if source is reachable, target + // becomes reachable" + struct ReachabilityEdge { + const Location *target; + // If true, this edge leads to an orphan location (child of LocationMembers) + bool orphan_context_only; + // If true, this is a reference edge (not a hierarchical parent-child edge) + bool is_reference; + }; + mutable std::unordered_map> + reachability_graph_; bool standalone_{false}; auto populate_pointer_to_location() const -> void; + auto populate_reference_graph() const -> void; + auto populate_location_members(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_descendants() const -> void; + auto populate_potential_sources(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_reachability_graph(const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> void; auto populate_reachability(const Location &base, const SchemaWalker &walker, const SchemaResolver &resolver) const -> const ReachabilityCache &; From 38fe8f2c6c7316da60c9e07bb3545e52b2aabe33 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 4 Feb 2026 16:29:43 -0400 Subject: [PATCH 2/2] Cleanup Signed-off-by: Juan Cruz Viotti --- src/core/jsonschema/frame.cc | 100 +++++++----------- .../sourcemeta/core/jsonschema_frame.h | 4 - 2 files changed, 36 insertions(+), 68 deletions(-) diff --git a/src/core/jsonschema/frame.cc b/src/core/jsonschema/frame.cc index f0b7dea3b..72ef00cd6 100644 --- a/src/core/jsonschema/frame.cc +++ b/src/core/jsonschema/frame.cc @@ -1613,23 +1613,19 @@ auto SchemaFrame::populate_reachability_graph( this->populate_location_members(walker, resolver); this->populate_reference_graph(); - // Build parent -> child edges for ALL location types (including Pointer) for (const auto &entry : this->locations_) { if (entry.second.pointer.empty()) { continue; } const auto parent_pointer{entry.second.pointer.initial()}; - auto parent_iter = + auto parent_iterator = this->pointer_to_location_.find(std::cref(parent_pointer)); - if (parent_iter == this->pointer_to_location_.end()) { + if (parent_iterator == this->pointer_to_location_.end()) { continue; } - // Add edge from ALL parent locations at this pointer path - // This ensures that regardless of which Location* we start BFS with, - // we'll find the edges - for (const Location *parent_location : parent_iter->second) { + for (const Location *parent_location : parent_iterator->second) { this->reachability_graph_[parent_location].push_back( ReachabilityEdge{.target = &entry.second, .orphan_context_only = entry.second.orphan, @@ -1637,27 +1633,28 @@ auto SchemaFrame::populate_reachability_graph( } } - // Build reference edges: source_parent -> destination - for (const auto &[destination_ref, sources] : + for (const auto &[destination_reference, sources] : this->references_by_destination_) { - auto dest_locations_iter = this->pointer_to_location_.find(destination_ref); - if (dest_locations_iter == this->pointer_to_location_.end()) { + auto destination_locations_iterator = + this->pointer_to_location_.find(destination_reference); + if (destination_locations_iterator == this->pointer_to_location_.end()) { continue; } - // Find any non-Pointer destination location (prefer Subschema/Resource) - const Location *dest_location{nullptr}; - for (const auto *loc : dest_locations_iter->second) { - if (loc->type != LocationType::Pointer) { - dest_location = loc; + const Location *destination_location{nullptr}; + for (const auto *location : destination_locations_iterator->second) { + if (location->type != LocationType::Pointer) { + destination_location = location; break; } } - // If no non-Pointer location, use any location - if (!dest_location && !dest_locations_iter->second.empty()) { - dest_location = dest_locations_iter->second.front(); + + if (!destination_location && + !destination_locations_iterator->second.empty()) { + destination_location = destination_locations_iterator->second.front(); } - if (!dest_location) { + + if (!destination_location) { continue; } @@ -1665,18 +1662,18 @@ auto SchemaFrame::populate_reachability_graph( if (source_pointer->empty()) { continue; } + const auto source_parent_pointer{source_pointer->initial()}; - auto source_parent_iter = + auto source_parent_iterator = this->pointer_to_location_.find(std::cref(source_parent_pointer)); - if (source_parent_iter == this->pointer_to_location_.end()) { + if (source_parent_iterator == this->pointer_to_location_.end()) { continue; } - // Add reference edges from ALL source parent locations for (const Location *source_parent_location : - source_parent_iter->second) { + source_parent_iterator->second) { this->reachability_graph_[source_parent_location].push_back( - ReachabilityEdge{.target = dest_location, + ReachabilityEdge{.target = destination_location, .orphan_context_only = false, .is_reference = true}); } @@ -1689,30 +1686,24 @@ auto SchemaFrame::populate_reachability(const Location &base, const SchemaResolver &resolver) const -> const ReachabilityCache & { const ReachabilityKey key{.pointer = &base.pointer, .orphan = base.orphan}; - auto cache_iter = this->reachability_.find(key); - if (cache_iter != this->reachability_.end()) { - return cache_iter->second; + auto cache_iterator = this->reachability_.find(key); + if (cache_iterator != this->reachability_.end()) { + return cache_iterator->second; } auto &cache = this->reachability_[key]; - - // Build the reachability graph (once per frame, shared across all bases) this->populate_reachability_graph(walker, resolver); - - // Use the base location directly (it's a reference to an entry in locations_) const Location *base_location{&base}; - - // BFS queue and visited set std::vector queue; std::unordered_set visited; - // Helper lambda to mark all locations at a pointer path as reachable auto mark_pointer_reachable = [this, &cache](const WeakPointer &pointer) { - auto locations_iter = this->pointer_to_location_.find(std::cref(pointer)); - if (locations_iter != this->pointer_to_location_.end()) { - for (const auto *loc : locations_iter->second) { - if (loc->type != LocationType::Pointer) { - cache.emplace(std::cref(loc->pointer), true); + auto locations_iterator = + this->pointer_to_location_.find(std::cref(pointer)); + if (locations_iterator != this->pointer_to_location_.end()) { + for (const auto *location : locations_iterator->second) { + if (location->type != LocationType::Pointer) { + cache.emplace(std::cref(location->pointer), true); } } } @@ -1726,42 +1717,25 @@ auto SchemaFrame::populate_reachability(const Location &base, while (queue_index < queue.size()) { const Location *current = queue[queue_index++]; - auto edges_iter = this->reachability_graph_.find(current); - if (edges_iter == this->reachability_graph_.end()) { + auto edges_iterator = this->reachability_graph_.find(current); + if (edges_iterator == this->reachability_graph_.end()) { continue; } - for (const auto &edge : edges_iter->second) { - // Skip if already visited + for (const auto &edge : edges_iterator->second) { if (visited.contains(edge.target)) { continue; } - // For orphan_context_only edges (hierarchical edges to orphan children): - // - From non-orphan base AND non-orphan current: block (orphans should be - // reached via references first) - // - From non-orphan base AND orphan current: allow (we've entered orphan - // context via reference, can reach non-nested orphan descendants) - // - From orphan base: allow (but filtered by nested boundary check below) if (edge.orphan_context_only && !base.orphan && !current->orphan) { continue; } - // When traversing via a HIERARCHICAL edge to an orphan that's a direct - // child of a LocationMembers keyword ($defs entry), check if we're - // crossing into a nested orphan context. - // - // A target in location_members_children_ is under a LocationMembers - // keyword. Block if the keyword is at or under current (meaning we're - // either at the $defs trying to enter, or entering a nested $defs). if (!edge.is_reference && edge.orphan_context_only) { - auto target_ref = this->location_members_children_.find( + auto target_iterator = this->location_members_children_.find( std::cref(edge.target->pointer)); - if (target_ref != this->location_members_children_.end()) { - // Target is a child of a LocationMembers keyword - // The keyword is at target.pointer.initial() + if (target_iterator != this->location_members_children_.end()) { const auto keyword_path{edge.target->pointer.initial()}; - // Block if keyword is at or under current if (keyword_path.starts_with(current->pointer)) { continue; } @@ -1783,8 +1757,6 @@ auto SchemaFrame::is_reachable(const Location &base, const Location &location, assert(location.type != LocationType::Pointer); const auto &cache{this->populate_reachability(base, walker, resolver)}; const auto iterator{cache.find(std::cref(location.pointer))}; - // With the new graph-based approach, only reachable locations are in the - // cache. If not found, the location is unreachable. return iterator != cache.end() && iterator->second; } diff --git a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index cd3b52bfd..f18a5f1c5 100644 --- a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -302,13 +302,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { }; mutable std::unordered_map> potential_sources_by_location_; - // Reachability graph: edges represent "if source is reachable, target - // becomes reachable" struct ReachabilityEdge { const Location *target; - // If true, this edge leads to an orphan location (child of LocationMembers) bool orphan_context_only; - // If true, this is a reference edge (not a hierarchical parent-child edge) bool is_reference; }; mutable std::unordered_map>