From 1bc96034c88d7399269d5c7f2f7d359c1499ae39 Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:32:33 +0300 Subject: [PATCH 1/2] feat: add parser of simplified sparql --- AGENTS.md | 84 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index c20001e..a8d713b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,7 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Public modules: formats, graph, sparql, lagraph_sys, utils +│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) @@ -121,15 +121,15 @@ regenerates it with `--features regenerate-bindings`. **Do not hand-edit this fi ### Edge -[`Edge`](src/graph/mod.rs:154) is the universal currency between format parsers and graph +[`Edge`](src/graph/mod.rs:158) is the universal currency between format parsers and graph builders: `{ source: String, target: String, label: String }`. ### GraphSource trait -[`GraphSource`](src/graph/mod.rs:164) is implemented by any data source that knows how to +[`GraphSource`](src/graph/mod.rs:168) is implemented by any data source that knows how to feed itself into a specific [`GraphBuilder`]: -- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:165) — consumes the +- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:169) — consumes the source and returns the populated builder. [`Csv`](src/formats/csv.rs:52) implements `GraphSource` directly, so it @@ -137,24 +137,24 @@ can be passed to [`GraphBuilder::load`]. ### GraphBuilder trait -[`GraphBuilder`](src/graph/mod.rs:169) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:188): +[`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a +[`GraphDecomposition`](src/graph/mod.rs:192): -- [`load>(self, source: S)`](src/graph/mod.rs:179) — primary entry point; +- [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. -- [`build(self)`](src/graph/mod.rs:184) — finalise into an immutable graph. +- [`build(self)`](src/graph/mod.rs:188) — finalise into an immutable graph. `InMemoryBuilder` also exposes lower-level helpers outside the trait: -- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:62) — ingest one edge. -- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:72) — consume an +- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:83) — ingest one edge. +- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:93) — consume an `IntoIterator>`. -- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:85) — accept +- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:106) — accept a pre-built `GrB_Matrix` for a label, wrapping it in an `LAGraph_Graph` immediately. ### Backend trait & Graph\ handle -[`Backend`](src/graph/mod.rs:217) associates a marker type with a concrete builder/graph pair: +[`Backend`](src/graph/mod.rs:221) associates a marker type with a concrete builder/graph pair: ```rust pub trait Backend { @@ -163,28 +163,28 @@ pub trait Backend { } ``` -[`Graph`](src/graph/mod.rs:229) is a zero-sized handle parameterised by a `Backend`: +[`Graph`](src/graph/mod.rs:233) is a zero-sized handle parameterised by a `Backend`: -- [`Graph::::builder()`](src/graph/mod.rs:234) — returns a fresh `InMemoryBuilder`. -- [`Graph::::try_from(source)`](src/graph/mod.rs:238) — builds a graph from a single +- [`Graph::::builder()`](src/graph/mod.rs:238) — returns a fresh `InMemoryBuilder`. +- [`Graph::::try_from(source)`](src/graph/mod.rs:242) — builds a graph from a single source in one call. -[`InMemory`](src/graph/inmemory.rs:26) is the concrete backend marker type. +[`InMemory`](src/graph/inmemory.rs:27) is the concrete backend marker type. ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:188) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:192) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:195) / [`get_node_name(mapped_id)`](src/graph/mod.rs:198) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:199) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. ### InMemoryBuilder / InMemoryGraph -[`InMemoryBuilder`](src/graph/inmemory.rs:35) is the primary `GraphBuilder` implementation. +[`InMemoryBuilder`](src/graph/inmemory.rs:36) is the primary `GraphBuilder` implementation. It collects edges in RAM, then [`build()`](src/graph/inmemory.rs:131) calls GraphBLAS to create one `GrB_Matrix` per label via COO format, wraps each in an -`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:173). +`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:174). Multiple CSV sources can be chained with repeated `.load()` calls; all edges are merged into a single graph. @@ -196,7 +196,7 @@ which is used by the MatrixMarket loader. ### Format parsers -Two built-in parsers are available: +CSV and MatrixMarket edge loaders are available: #### CSV format @@ -272,6 +272,42 @@ The module also handles spargebra's desugaring of sequence paths (`?x // ?y`) from a chain of BGP triples back into a single [`PropertyPathExpression::Sequence`]. +### SPARQL parsing (`src/sparql/mod.rs`) + +The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) +crate to parse SPARQL 1.1 query strings and extract the single property-path +triple pattern that pathrex's RPQ evaluators operate on. + +**Supported query form:** `SELECT` queries with exactly one triple or property +path pattern in the `WHERE` clause, e.g.: + +```sparql +SELECT ?x ?y WHERE { ?x /* ?y . } +``` + +Key public items: + +- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a + [`spargebra::Query`]. +- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a + `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). +- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining + `parse_query` + `extract_path` in one call. +- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` + ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` + ([`TermPattern`]). +- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures + (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, + `VariablePredicate`). +- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] + wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. +- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the + default base IRI constant. + +The module also handles spargebra's desugaring of sequence paths +(`?x // ?y`) from a chain of BGP triples back into a single +[`PropertyPathExpression::Sequence`]. + ### FFI layer [`lagraph_sys`](src/lagraph_sys.rs) exposes raw C bindings for GraphBLAS and @@ -280,7 +316,7 @@ LAGraph. Safe Rust wrappers live in [`graph::mod`](src/graph/mod.rs): - [`LagraphGraph`](src/graph/mod.rs:48) — RAII wrapper around `LAGraph_Graph` (calls `LAGraph_Delete` on drop). Also provides [`LagraphGraph::from_coo()`](src/graph/mod.rs:85) to build directly from COO arrays. -- [`GraphblasVector`](src/graph/mod.rs:124) — RAII wrapper around `GrB_Vector`. +- [`GraphblasVector`](src/graph/mod.rs:128) — RAII wrapper around `GrB_Vector`. - [`ensure_grb_init()`](src/graph/mod.rs:39) — one-time `LAGraph_Init` via `std::sync::Once`. ### Macros (`src/utils.rs`) From df016c2d378a91955865f35e6718a2676f0497d4 Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:49:25 +0300 Subject: [PATCH 2/2] feat: add rpq evalution abstraction, implement with LAGraph_RegularPathQuerry --- AGENTS.md | 114 ++++++---- build.rs | 1 + src/graph/mod.rs | 1 + src/lagraph_sys_generated.rs | 34 +++ src/lib.rs | 3 +- src/rpq/mod.rs | 54 +++++ src/rpq/nfarpq.rs | 409 +++++++++++++++++++++++++++++++++++ src/utils.rs | 20 ++ tests/inmemory_tests.rs | 23 +- tests/nfarpq_tests.rs | 363 +++++++++++++++++++++++++++++++ 10 files changed, 952 insertions(+), 70 deletions(-) create mode 100644 src/rpq/mod.rs create mode 100644 src/rpq/nfarpq.rs create mode 100644 tests/nfarpq_tests.rs diff --git a/AGENTS.md b/AGENTS.md index a8d713b..10bd347 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,16 +14,19 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys +│ ├── lib.rs # Modules: formats, graph, rpq, sparql, utils, lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) -│ ├── utils.rs # Internal helpers: CountingBuilder, CountOutput, VecSource, -│ │ # grb_ok! and la_ok! macros +│ ├── utils.rs # Public helpers: CountingBuilder, CountOutput, VecSource, +│ │ # grb_ok! and la_ok! macros, build_graph │ ├── graph/ │ │ ├── mod.rs # Core traits (GraphBuilder, GraphDecomposition, GraphSource, │ │ │ # Backend, Graph), error types, RAII wrappers, GrB init │ │ └── inmemory.rs # InMemory marker, InMemoryBuilder, InMemoryGraph +│ ├── rpq/ +│ │ ├── mod.rs # RPQ evaluation trait (RpqEvaluator), RpqResult, RpqError +│ │ └── nfarpq.rs # NFA-based RPQ evaluator using LAGraph_RegularPathQuery │ ├── sparql/ │ │ └── mod.rs # SPARQL parsing (spargebra), PathTriple extraction, parse_rpq │ └── formats/ @@ -32,7 +35,8 @@ pathrex/ │ └── mm.rs # MatrixMarket directory loader (vertices.txt, edges.txt, *.txt) ├── tests/ │ ├── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph -│ └── mm_tests.rs # Integration tests for MatrixMarket format +│ ├── mm_tests.rs # Integration tests for MatrixMarket format +│ └── nfarpq_tests.rs # Integration tests for NfaRpqEvaluator ├── deps/ │ └── LAGraph/ # Git submodule (SparseLinearAlgebra/LAGraph) └── .github/workflows/ci.yml # CI: build GraphBLAS + LAGraph, cargo build & test @@ -138,7 +142,7 @@ can be passed to [`GraphBuilder::load`]. ### GraphBuilder trait [`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:192): +[`GraphDecomposition`](src/graph/mod.rs:193): - [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. @@ -173,11 +177,11 @@ pub trait Backend { ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:193) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:197) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:200) / [`get_node_name(mapped_id)`](src/graph/mod.rs:203) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:204) — total unique nodes. ### InMemoryBuilder / InMemoryGraph @@ -220,7 +224,7 @@ Name-based lookup requires `has_header: true`. [`MatrixMarket`](src/formats/mm.rs:159) loads an edge-labeled graph from a directory with: -- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:199) returns the matching **0-based** matrix index +- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:200) returns the matching **0-based** matrix index - `edges.txt` — one line per label: ` <1-based-index>` (selects `n.txt`) - `.txt` — MatrixMarket adjacency matrix for label with index `n` @@ -272,41 +276,42 @@ The module also handles spargebra's desugaring of sequence paths (`?x // ?y`) from a chain of BGP triples back into a single [`PropertyPathExpression::Sequence`]. -### SPARQL parsing (`src/sparql/mod.rs`) +### RPQ evaluation (`src/rpq/`) -The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) -crate to parse SPARQL 1.1 query strings and extract the single property-path -triple pattern that pathrex's RPQ evaluators operate on. +The [`rpq`](src/rpq/mod.rs) module provides an abstraction for evaluating +Regular Path Queries (RPQs) over edge-labeled graphs using GraphBLAS/LAGraph. -**Supported query form:** `SELECT` queries with exactly one triple or property -path pattern in the `WHERE` clause, e.g.: +Key public items: -```sparql -SELECT ?x ?y WHERE { ?x /* ?y . } -``` +- [`RpqEvaluator`](src/rpq/mod.rs:47) — trait with a single method + [`evaluate(subject, path, object, graph)`](src/rpq/mod.rs:48) that takes + SPARQL [`TermPattern`] endpoints, a [`PropertyPathExpression`] path, and a + [`GraphDecomposition`], returning an [`RpqResult`](src/rpq/mod.rs:42). +- [`RpqResult`](src/rpq/mod.rs:42) — wraps a [`GraphblasVector`] of reachable + vertices. +- [`RpqError`](src/rpq/mod.rs:21) — error enum covering parse errors, extraction + errors, unsupported paths, missing labels/vertices, and GraphBLAS failures. -Key public items: +#### `NfaRpqEvaluator` (`src/rpq/nfarpq.rs`) -- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a - [`spargebra::Query`]. -- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a - `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). -- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining - `parse_query` + `extract_path` in one call. -- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` - ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` - ([`TermPattern`]). -- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures - (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, - `VariablePredicate`). -- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] - wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. -- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the - default base IRI constant. +[`NfaRpqEvaluator`](src/rpq/nfarpq.rs:265) implements [`RpqEvaluator`] by: -The module also handles spargebra's desugaring of sequence paths -(`?x // ?y`) from a chain of BGP triples back into a single -[`PropertyPathExpression::Sequence`]. +1. Converting a [`PropertyPathExpression`] into an [`Nfa`](src/rpq/nfarpq.rs:27) + via Thompson's construction ([`Nfa::from_property_path()`](src/rpq/nfarpq.rs:35)). +2. Eliminating ε-transitions via epsilon closure + ([`NfaBuilder::epsilon_closure()`](src/rpq/nfarpq.rs:198)). +3. Building one `LAGraph_Graph` per NFA label transition + ([`Nfa::build_lagraph_matrices()`](src/rpq/nfarpq.rs:43)). +4. Calling [`LAGraph_RegularPathQuery`] with the NFA matrices, data-graph + matrices, start/final states, and source vertices. + +Supported path operators: `NamedNode`, `Sequence`, `Alternative`, +`ZeroOrMore`, `OneOrMore`, `ZeroOrOne`. `Reverse` and `NegatedPropertySet` +return [`RpqError::UnsupportedPath`]. + +Subject/object resolution: a [`TermPattern::Variable`] means "all vertices"; +a [`TermPattern::NamedNode`] resolves to a single vertex via +[`GraphDecomposition::get_node_id()`](src/graph/mod.rs:200). ### FFI layer @@ -316,10 +321,11 @@ LAGraph. Safe Rust wrappers live in [`graph::mod`](src/graph/mod.rs): - [`LagraphGraph`](src/graph/mod.rs:48) — RAII wrapper around `LAGraph_Graph` (calls `LAGraph_Delete` on drop). Also provides [`LagraphGraph::from_coo()`](src/graph/mod.rs:85) to build directly from COO arrays. -- [`GraphblasVector`](src/graph/mod.rs:128) — RAII wrapper around `GrB_Vector`. +- [`GraphblasVector`](src/graph/mod.rs:128) — RAII wrapper around `GrB_Vector` + (derives `Debug`). - [`ensure_grb_init()`](src/graph/mod.rs:39) — one-time `LAGraph_Init` via `std::sync::Once`. -### Macros (`src/utils.rs`) +### Macros & helpers (`src/utils.rs`) Two `#[macro_export]` macros handle FFI error mapping: @@ -329,20 +335,28 @@ Two `#[macro_export]` macros handle FFI error mapping: appending the required `*mut i8` message buffer, and maps failure to `GraphError::LAGraph(info, msg)`. +A convenience function is also provided: + +- [`build_graph(edges)`](src/utils.rs:184) — builds an `InMemoryGraph` from a + slice of `(&str, &str, &str)` triples (source, target, label). Used by + integration tests. + ## Coding Conventions - **Rust edition 2024**. -- Error handling via `thiserror` derive macros; two main error enums: - [`GraphError`](src/graph/mod.rs:15) and [`FormatError`](src/formats/mod.rs:24). +- Error handling via `thiserror` derive macros; three main error enums: + [`GraphError`](src/graph/mod.rs:15), [`FormatError`](src/formats/mod.rs:24), + and [`RpqError`](src/rpq/mod.rs:21). - `FormatError` converts into `GraphError` via `#[from] FormatError` on the `GraphError::Format` variant. -- Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, and - `graph/inmemory.rs`. All raw pointers are wrapped in RAII types that free - resources on drop. +- Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, + `graph/inmemory.rs`, and `rpq/nfarpq.rs`. All raw pointers are wrapped in + RAII types that free resources on drop. - `unsafe impl Send + Sync` is provided for `LagraphGraph` and `GraphblasVector` because GraphBLAS handles are thread-safe after init. - Unit tests live in `#[cfg(test)] mod tests` blocks inside each module. - Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs). + Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), + [`tests/mm_tests.rs`](tests/mm_tests.rs), and [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs). ## Testing @@ -362,7 +376,11 @@ Tests in `src/formats/csv.rs` are pure Rust and need no native dependencies. Tests in `src/sparql/mod.rs` are pure Rust and need no native dependencies. -Tests in `src/graph/inmemory.rs` and [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs) +Tests in `src/rpq/nfarpq.rs` (NFA construction unit tests) are pure Rust and need no +native dependencies. + +Tests in `src/graph/inmemory.rs`, [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), +[`tests/mm_tests.rs`](tests/mm_tests.rs), and [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs) call real GraphBLAS/LAGraph and require the native libraries to be present. ## CI diff --git a/build.rs b/build.rs index 475046e..4e85f0d 100644 --- a/build.rs +++ b/build.rs @@ -83,6 +83,7 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_Delete") .allowlist_function("LAGraph_Cached_AT") .allowlist_function("LAGraph_MMRead") + .allowlist_function("LAGraph_RegularPathQuery") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/mod.rs b/src/graph/mod.rs index d096b6f..05b0125 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -125,6 +125,7 @@ impl Drop for LagraphGraph { unsafe impl Send for LagraphGraph {} unsafe impl Sync for LagraphGraph {} +#[derive(Debug)] pub struct GraphblasVector { pub inner: GrB_Vector, } diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 3201d28..601acc5 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -261,3 +261,37 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +unsafe extern "C" { + pub fn LAGraph_RegularPathQuery( + reachable: *mut GrB_Vector, + R: *mut LAGraph_Graph, + nl: usize, + QS: *const GrB_Index, + nqs: usize, + QF: *const GrB_Index, + nqf: usize, + G: *mut LAGraph_Graph, + S: *const GrB_Index, + ns: usize, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum RPQMatrixOp { + RPQ_MATRIX_OP_LABEL = 0, + RPQ_MATRIX_OP_LOR = 1, + RPQ_MATRIX_OP_CONCAT = 2, + RPQ_MATRIX_OP_KLEENE = 3, + RPQ_MATRIX_OP_KLEENE_L = 4, + RPQ_MATRIX_OP_KLEENE_R = 5, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct RPQMatrixPlan { + pub op: RPQMatrixOp, + pub lhs: *mut RPQMatrixPlan, + pub rhs: *mut RPQMatrixPlan, + pub mat: GrB_Matrix, + pub res_mat: GrB_Matrix, +} diff --git a/src/lib.rs b/src/lib.rs index 0d11b1f..0f89008 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ pub mod formats; pub mod graph; +pub mod rpq; pub mod sparql; #[allow(unused_unsafe, dead_code)] -pub(crate) mod utils; +pub mod utils; pub mod lagraph_sys; diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs new file mode 100644 index 0000000..df17765 --- /dev/null +++ b/src/rpq/mod.rs @@ -0,0 +1,54 @@ +//! Regular Path Query (RPQ) evaluation over edge-labeled graphs. +//! ```rust,ignore +//! use pathrex::sparql::parse_rpq; +//! use pathrex::rpq::{RpqEvaluator, nfarpq::NfaRpqEvaluator}; +//! +//! let triple = parse_rpq("SELECT ?x ?y WHERE { ?x /* ?y . }")?; +//! let result = NfaRpqEvaluator.evaluate(&triple.subject, &triple.path, &triple.object, &graph)?; +//! ``` + +pub mod nfarpq; + +use crate::graph::GraphDecomposition; +use crate::graph::GraphblasVector; +use crate::sparql::ExtractError; +use spargebra::SparqlSyntaxError; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum RpqError { + #[error("SPARQL syntax error: {0}")] + Parse(#[from] SparqlSyntaxError), + + #[error("query extraction error: {0}")] + Extract(#[from] ExtractError), + + #[error("unsupported path expression: {0}")] + UnsupportedPath(String), + + #[error("label not found in graph: '{0}'")] + LabelNotFound(String), + + #[error("vertex not found in graph: '{0}'")] + VertexNotFound(String), + + #[error("GraphBLAS/LAGraph error: {0}")] + GraphBlas(String), +} + +#[derive(Debug)] +pub struct RpqResult { + pub reachable: GraphblasVector, +} + +pub trait RpqEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result; +} diff --git a/src/rpq/nfarpq.rs b/src/rpq/nfarpq.rs new file mode 100644 index 0000000..b50625f --- /dev/null +++ b/src/rpq/nfarpq.rs @@ -0,0 +1,409 @@ +//! NFA-based RPQ evaluation using `LAGraph_RegularPathQuery`. + +use crate::graph::{ + GraphDecomposition, GraphError, GraphblasVector, LagraphGraph, ensure_grb_init, +}; +use crate::grb_ok; +use crate::la_ok; +use crate::lagraph_sys::*; +use crate::lagraph_sys::{GrB_BOOL, GrB_LOR, GrB_Matrix_build_BOOL, GrB_Matrix_new, LAGraph_Kind}; +use crate::rpq::{RpqError, RpqEvaluator, RpqResult}; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; +use std::collections::{HashMap, HashSet, VecDeque}; + +/// Transitions for a single edge label in the NFA. +/// +/// `rows[i]` and `cols[i]` form a parallel pair: there is a transition from +/// state `rows[i]` to state `cols[i]` on `label`. +#[derive(Debug, Clone)] +pub struct NfaLabelTransitions { + pub label: String, + pub rows: Vec, + pub cols: Vec, +} + +#[derive(Debug, Clone)] +pub struct Nfa { + pub num_states: usize, + pub start_states: Vec, + pub final_states: Vec, + pub transitions: Vec, +} + +impl Nfa { + pub fn from_property_path(path: &PropertyPathExpression) -> Result { + let mut builder = NfaBuilder::new(); + let (start, end) = builder.build(path)?; + builder.mark_start(start); + builder.mark_final(end); + Ok(builder.into_nfa()) + } + + pub fn build_lagraph_matrices(&self) -> Result, RpqError> { + ensure_grb_init().map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + let n = self.num_states as GrB_Index; + let mut result = Vec::with_capacity(self.transitions.len()); + + for trans in &self.transitions { + let mut mat: GrB_Matrix = std::ptr::null_mut(); + grb_ok!(GrB_Matrix_new(&mut mat, GrB_BOOL, n, n)) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + + if !trans.rows.is_empty() { + let vals: Vec = vec![true; trans.rows.len()]; + grb_ok!(GrB_Matrix_build_BOOL( + mat, + trans.rows.as_ptr(), + trans.cols.as_ptr(), + vals.as_ptr(), + trans.rows.len() as u64, + GrB_LOR, + )) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + } + + let lg = LagraphGraph::new(mat, LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED) + .map_err(|e| RpqError::GraphBlas(format!("{e}")))?; + result.push((trans.label.clone(), lg)); + } + + Ok(result) + } +} + +#[derive(Debug, Clone)] +struct Transition { + from: usize, + to: usize, + label: Option, +} + +struct NfaBuilder { + num_states: usize, + transitions: Vec, + start_states: Vec, + final_states: Vec, +} + +impl NfaBuilder { + fn new() -> Self { + Self { + num_states: 0, + transitions: Vec::new(), + start_states: Vec::new(), + final_states: Vec::new(), + } + } + + fn new_state(&mut self) -> usize { + let s = self.num_states; + self.num_states += 1; + s + } + + fn add_epsilon(&mut self, from: usize, to: usize) { + self.transitions.push(Transition { + from, + to, + label: None, + }); + } + + fn add_label(&mut self, from: usize, to: usize, label: String) { + self.transitions.push(Transition { + from, + to, + label: Some(label), + }); + } + + fn mark_start(&mut self, s: usize) { + self.start_states.push(s); + } + + fn mark_final(&mut self, s: usize) { + self.final_states.push(s); + } + + fn build(&mut self, path: &PropertyPathExpression) -> Result<(usize, usize), RpqError> { + match path { + PropertyPathExpression::NamedNode(nn) => { + let s = self.new_state(); + let e = self.new_state(); + self.add_label(s, e, nn.as_str().to_owned()); + Ok((s, e)) + } + + PropertyPathExpression::Sequence(lhs, rhs) => { + let (ls, le) = self.build(lhs)?; + let (rs, re) = self.build(rhs)?; + self.add_epsilon(le, rs); + Ok((ls, re)) + } + + PropertyPathExpression::Alternative(lhs, rhs) => { + let s = self.new_state(); + let e = self.new_state(); + let (ls, le) = self.build(lhs)?; + let (rs, re) = self.build(rhs)?; + self.add_epsilon(s, ls); + self.add_epsilon(s, rs); + self.add_epsilon(le, e); + self.add_epsilon(re, e); + Ok((s, e)) + } + + PropertyPathExpression::ZeroOrMore(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, is); + self.add_epsilon(ie, e); + self.add_epsilon(s, e); + Ok((s, e)) + } + + PropertyPathExpression::OneOrMore(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, is); + self.add_epsilon(ie, e); + Ok((s, e)) + } + + PropertyPathExpression::ZeroOrOne(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, e); + self.add_epsilon(s, e); + Ok((s, e)) + } + + PropertyPathExpression::Reverse(_) => Err(RpqError::UnsupportedPath( + "Reverse paths are not supported".into(), + )), + + PropertyPathExpression::NegatedPropertySet(_) => Err(RpqError::UnsupportedPath( + "NegatedPropertySet paths are not supported".into(), + )), + } + } + + fn epsilon_closure(&self, states: &[usize]) -> HashSet { + let mut closure: HashSet = states.iter().copied().collect(); + let mut queue: VecDeque = states.iter().copied().collect(); + while let Some(s) = queue.pop_front() { + for t in &self.transitions { + if t.from == s && t.label.is_none() && !closure.contains(&t.to) { + closure.insert(t.to); + queue.push_back(t.to); + } + } + } + closure + } + + fn into_nfa(self) -> Nfa { + let n = self.num_states; + + let closures: Vec> = (0..n).map(|s| self.epsilon_closure(&[s])).collect(); + + let mut label_map: HashMap> = HashMap::new(); + for from in 0..n { + for t in &self.transitions { + if t.from == from { + if let Some(label) = &t.label { + for &cf in &closures[from] { + for &ct in &closures[t.to] { + label_map.entry(label.clone()).or_default().push((cf, ct)); + } + } + } + } + } + } + + let start_closure = self.epsilon_closure(&self.start_states); + let start_states: Vec = + start_closure.into_iter().map(|s| s as GrB_Index).collect(); + + let final_set: HashSet = self.final_states.iter().copied().collect(); + let final_states: Vec = (0..n) + .filter(|s| closures[*s].iter().any(|c| final_set.contains(c))) + .map(|s| s as GrB_Index) + .collect(); + + let transitions: Vec = label_map + .into_iter() + .map(|(label, pairs)| { + let mut rows = Vec::with_capacity(pairs.len()); + let mut cols = Vec::with_capacity(pairs.len()); + for (r, c) in pairs { + rows.push(r as GrB_Index); + cols.push(c as GrB_Index); + } + NfaLabelTransitions { label, rows, cols } + }) + .collect(); + + Nfa { + num_states: n, + start_states, + final_states, + transitions, + } + } +} + +/// Evaluates RPQs using `LAGraph_RegularPathQuery`. +pub struct NfaRpqEvaluator; + +impl RpqEvaluator for NfaRpqEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result { + let nfa = Nfa::from_property_path(path)?; + let nfa_matrices = nfa.build_lagraph_matrices()?; + + let src_id = resolve_vertex(subject, graph, true)?; + let _dst_id = resolve_vertex(object, graph, false)?; + + let n = graph.num_nodes(); + + let source_vertices: Vec = match src_id { + Some(id) => vec![id as GrB_Index], + None => (0..n as GrB_Index).collect(), + }; + + let mut nfa_graph_ptrs: Vec = + nfa_matrices.iter().map(|(_, lg)| lg.inner).collect(); + + let mut data_graph_ptrs: Vec = Vec::with_capacity(nfa_matrices.len()); + for (label, _) in &nfa_matrices { + let lg = graph + .get_graph(label) + .map_err(|_| RpqError::LabelNotFound(label.clone()))?; + data_graph_ptrs.push(lg.inner); + } + + let mut reachable: GrB_Vector = std::ptr::null_mut(); + + la_ok!(LAGraph_RegularPathQuery( + &mut reachable, + nfa_graph_ptrs.as_mut_ptr(), + nfa_matrices.len(), + nfa.start_states.as_ptr(), + nfa.start_states.len(), + nfa.final_states.as_ptr(), + nfa.final_states.len(), + data_graph_ptrs.as_mut_ptr(), + source_vertices.as_ptr(), + source_vertices.len(), + )) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + + let result_vec = GraphblasVector { inner: reachable }; + + Ok(RpqResult { + reachable: result_vec, + }) + } +} + +fn resolve_vertex( + term: &TermPattern, + graph: &G, + is_subject: bool, +) -> Result, RpqError> { + match term { + TermPattern::Variable(_) => Ok(None), + TermPattern::NamedNode(nn) => { + let iri = nn.as_str(); + graph + .get_node_id(iri) + .map(Some) + .ok_or_else(|| RpqError::VertexNotFound(iri.to_owned())) + } + other => { + let msg = format!("{other}"); + if is_subject { + Err(RpqError::VertexNotFound(format!( + "unsupported subject term: {msg}" + ))) + } else { + Err(RpqError::VertexNotFound(format!( + "unsupported object term: {msg}" + ))) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use spargebra::algebra::PropertyPathExpression; + use spargebra::term::NamedNode; + + fn named(iri: &str) -> PropertyPathExpression { + PropertyPathExpression::NamedNode(NamedNode::new_unchecked(iri)) + } + + #[test] + fn test_single_label() { + let nfa = Nfa::from_property_path(&named("knows")).unwrap(); + assert_eq!(nfa.num_states, 2); + assert_eq!(nfa.start_states.len(), 1); + assert_eq!(nfa.final_states.len(), 1); + assert_eq!(nfa.transitions.len(), 1); + assert_eq!(nfa.transitions[0].label, "knows"); + assert_eq!(nfa.transitions[0].rows.len(), 1); + } + + #[test] + fn test_sequence() { + let path = PropertyPathExpression::Sequence(Box::new(named("a")), Box::new(named("b"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_alternative() { + let path = PropertyPathExpression::Alternative(Box::new(named("a")), Box::new(named("b"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_zero_or_more() { + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + // Start state should also be a final state (zero matches). + let start_set: HashSet = nfa.start_states.iter().copied().collect(); + let final_set: HashSet = nfa.final_states.iter().copied().collect(); + assert!(!start_set.is_disjoint(&final_set)); + } + + #[test] + fn test_reverse_unsupported() { + let path = PropertyPathExpression::Reverse(Box::new(named("knows"))); + assert!(matches!( + Nfa::from_property_path(&path), + Err(RpqError::UnsupportedPath(_)) + )); + } +} diff --git a/src/utils.rs b/src/utils.rs index 7cb37d3..e4add86 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -180,3 +180,23 @@ macro_rules! la_ok { } }}; } + +pub fn build_graph(edges: &[(&str, &str, &str)]) -> ::Graph { + let builder = InMemoryBuilder::new(); + let edges = edges + .iter() + .cloned() + .map(|(s, t, l)| { + Ok(Edge { + source: s.to_string(), + label: l.to_string(), + target: t.to_string(), + }) + }) + .collect::>>(); + builder + .with_stream(edges.into_iter()) + .expect("Should insert edges stream") + .build() + .expect("build must succeed") +} diff --git a/tests/inmemory_tests.rs b/tests/inmemory_tests.rs index d30c305..dccd256 100644 --- a/tests/inmemory_tests.rs +++ b/tests/inmemory_tests.rs @@ -1,28 +1,9 @@ use pathrex::formats::csv::Csv; +use pathrex::utils::build_graph; use pathrex::graph::{ - Backend, Edge, Graph, GraphBuilder, GraphDecomposition, GraphError, InMemory, InMemoryBuilder, + Edge, Graph, GraphBuilder, GraphDecomposition, GraphError, InMemory, InMemoryBuilder, }; -fn build_graph(edges: &[(&str, &str, &str)]) -> ::Graph { - let builder = InMemoryBuilder::new(); - let edges = edges - .iter() - .cloned() - .map(|(s, t, l)| { - Ok(Edge { - source: s.to_string(), - label: l.to_string(), - target: t.to_string(), - }) - }) - .collect::>>(); - builder - .with_stream(edges.into_iter()) - .expect("Should insert edges stream") - .build() - .expect("build must succeed") -} - #[test] fn node_ids_are_unique() { let graph = build_graph(&[ diff --git a/tests/nfarpq_tests.rs b/tests/nfarpq_tests.rs new file mode 100644 index 0000000..53029c3 --- /dev/null +++ b/tests/nfarpq_tests.rs @@ -0,0 +1,363 @@ +use pathrex::graph::GraphDecomposition; +use pathrex::lagraph_sys::{GrB_Index, GrB_Vector_extractTuples_BOOL, GrB_Vector_nvals}; +use pathrex::rpq::nfarpq::NfaRpqEvaluator; +use pathrex::rpq::{RpqError, RpqEvaluator, RpqResult}; +use pathrex::utils::build_graph; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::{NamedNode, TermPattern, Variable}; + +fn named(iri: &str) -> PropertyPathExpression { + PropertyPathExpression::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn var(name: &str) -> TermPattern { + TermPattern::Variable(Variable::new_unchecked(name)) +} + +fn named_term(iri: &str) -> TermPattern { + TermPattern::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn reachable_indices(result: &RpqResult) -> Vec { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + if nvals == 0 { + return Vec::new(); + } + let mut indices = vec![0u64; nvals as usize]; + let mut values = vec![false; nvals as usize]; + GrB_Vector_extractTuples_BOOL( + indices.as_mut_ptr(), + values.as_mut_ptr(), + &mut nvals, + result.reachable.inner, + ); + indices.truncate(nvals as usize); + indices + } +} + +fn reachable_count(result: &RpqResult) -> u64 { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + nvals + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&var("x"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&named_term("A"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B (id={b_id}) should be reachable from A via 'knows', got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&var("x"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C (id={c_id}) should be reachable from A via knows/likes, got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: ?x | ?y +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = + PropertyPathExpression::Alternative(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable via knows|likes" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::OneOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + !indices.contains(&(a_id as GrB_Index)), + "A shouldn't be reachable" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ? ?y +#[test] +fn test_zero_or_one_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrOne(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + !indices.contains(&(c_id as GrB_Index)), + "C should NOT be reachable (two hops, but path is ?)" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("nonexistent"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&named_term("Z"), &named("knows"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_object_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("knows"), &named_term("Z"), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error for object, got: {result:?}" + ); +} + +#[test] +fn test_reverse_path_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Reverse(Box::new(named("knows"))); + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath error, got: {result:?}" + ); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 3, "all 3 nodes should be reachable in a cycle"); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: ?x /*/ ?y +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + // knows / likes* / knows + let path = PropertyPathExpression::Sequence( + Box::new(PropertyPathExpression::Sequence( + Box::new(named("knows")), + Box::new(PropertyPathExpression::ZeroOrMore(Box::new(named("likes")))), + )), + Box::new(named("knows")), + ); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let d_id = graph.get_node_id("D").expect("D should exist"); + assert!( + indices.contains(&(d_id as GrB_Index)), + "D should be reachable via knows/likes*/knows, got indices: {indices:?}" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +}