From 1103c6bc74d38cdd968d1437a63c973c19e58a16 Mon Sep 17 00:00:00 2001 From: Belyanin Georgiy Date: Thu, 4 Jul 2024 15:51:30 +0300 Subject: [PATCH 01/11] Add regular path query algorithm This commit adds an implementation of the regular path query algorithm based on linear-algebra graph processing approach. The algorithm finds a set of nodes in a edge-labelled directed graph. These nodes are reachable by paths starting from one of source nodes and having edges labels conform a word from the specified regular language. This algorithm is based on the bread-first-search algorithm over the adjacency matrices. Regular languages are defined by non-deterministic finite automaton. The algorithm considers the paths on which "label words" are accepted by the specified NFA. The algorithm is used with the following inputs: * A regular automaton adjacency matrix decomposition. * A graph adjacency matrix decomposition. * An array of the starting node indices. It results with a vector, having v[i] = 1 iff the node is reachable by a path satisfying the provided regular constraints. --- experimental/algorithm/LAGraph_RegularPathQuery.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/experimental/algorithm/LAGraph_RegularPathQuery.c b/experimental/algorithm/LAGraph_RegularPathQuery.c index 8173daadb6..6a528f18e5 100644 --- a/experimental/algorithm/LAGraph_RegularPathQuery.c +++ b/experimental/algorithm/LAGraph_RegularPathQuery.c @@ -255,7 +255,11 @@ int LAGraph_RegularPathQuery } // Check source nodes in the graph +<<<<<<< HEAD for (size_t i = 0 ; i < ns ; i++) +======= + for (GrB_Index i = 0; i < ns; i++) +>>>>>>> 30e086c8 (Add regular path query algorithm) { GrB_Index s = S [i] ; LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; From 14192f6c4e18c5d64afb10df1737e3abbd0e4e5e Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sat, 7 Dec 2024 18:36:50 +0300 Subject: [PATCH 02/11] Make the RPQ algorithm evaluate 2-RPQs This patch is used to make the regular path query algorithm work with 2-RPQs. 2-RPQs represent RPQs extended with possibility of traversing graphs into the directions opposite to the presented edges. E.g. SPARQL 2-RPQ `Alice ^ ?x` could be used to find Alice and all of her sisters by getting all Alice mother's daughters. 2-RPQ support is provided by adding two extra parameters to the RPQ algorithm. One of them is used to specify some of the provided labels as inversed. The second one inverses the whole query allowing to execute single-destination RPQs (e.g. `?x Bob` gets Bob's parents). --- .../algorithm/LAGraph_RegularPathQuery.c | 74 ++++++++++++++----- experimental/test/test_RegularPathQuery.c | 42 +++++++++-- include/LAGraphX.h | 4 +- 3 files changed, 97 insertions(+), 23 deletions(-) diff --git a/experimental/algorithm/LAGraph_RegularPathQuery.c b/experimental/algorithm/LAGraph_RegularPathQuery.c index 6a528f18e5..8e117f7fa8 100644 --- a/experimental/algorithm/LAGraph_RegularPathQuery.c +++ b/experimental/algorithm/LAGraph_RegularPathQuery.c @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// LAGraph_RegularPathQuery.c: regular path query +// LAGraph_2RegularPathQuery.c: 2-way regular path query //------------------------------------------------------------------------------ // // LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. @@ -10,6 +10,9 @@ //------------------------------------------------------------------------------ +// TODO: This is a copy-pasted description of the original RPQ algorithm with +// support for 2-RPQs. +// // For an edge-labelled directed graph the algorithm computes the set of nodes // for which these conditions are held: // * The node is reachable by a path from one of the source nodes. @@ -96,7 +99,7 @@ #include "LG_internal.h" #include "LAGraphX.h" -int LAGraph_RegularPathQuery +int LAGraph_2RegularPathQuery ( // output: GrB_Vector *reachable, // reachable(i) = true if node i is reachable @@ -105,6 +108,7 @@ int LAGraph_RegularPathQuery // input: LAGraph_Graph *R, // input non-deterministic finite automaton // adjacency matrix decomposition + bool *inverse_labels, // inversed labels size_t nl, // total label count, # of matrices graph and // NFA adjacency matrix decomposition const GrB_Index *QS, // starting states in NFA @@ -114,6 +118,7 @@ int LAGraph_RegularPathQuery LAGraph_Graph *G, // input graph adjacency matrix decomposition const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices + bool inverse, // inverse the whole query char *msg // LAGraph output message ) { @@ -137,6 +142,7 @@ int LAGraph_RegularPathQuery GrB_Index ng = 0 ; // # nodes in the graph GrB_Index nr = 0 ; // # states in the NFA + GrB_Index nv = 0 ; // # pair count in the frontier GrB_Index states = ns ; // # pairs in the current // correspondence between the graph and // the NFA @@ -146,6 +152,7 @@ int LAGraph_RegularPathQuery GrB_Index vals = 0 ; // utility matrix value count GrB_Matrix *A = NULL ; + GrB_Matrix *AT = NULL ; GrB_Matrix *B = NULL ; GrB_Matrix *BT = NULL ; @@ -169,16 +176,29 @@ int LAGraph_RegularPathQuery } LG_TRY (LAGraph_Malloc ((void **) &A, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &AT, nl, sizeof (GrB_Matrix), msg)) ; for (size_t i = 0 ; i < nl ; i++) { if (G[i] == NULL) { A[i] = NULL ; + AT[i] = NULL ; continue ; } A[i] = G[i]->A ; + if (G[i]->kind == LAGraph_ADJACENCY_UNDIRECTED || + G[i]->is_symmetric_structure == LAGraph_TRUE) + { + AT[i] = A[i] ; + } + else + { + // AT[i] could be NULL and the matrix will be transposed by a + // descriptor + AT[i] = G[i]->AT ; + } } LG_TRY (LAGraph_Malloc ((void **) &B, nl, sizeof (GrB_Matrix), msg)) ; @@ -191,6 +211,7 @@ int LAGraph_RegularPathQuery if (R[i] == NULL) { B[i] = NULL ; + BT[i] = NULL ; continue ; } @@ -255,11 +276,7 @@ int LAGraph_RegularPathQuery } // Check source nodes in the graph -<<<<<<< HEAD for (size_t i = 0 ; i < ns ; i++) -======= - for (GrB_Index i = 0; i < ns; i++) ->>>>>>> 30e086c8 (Add regular path query algorithm) { GrB_Index s = S [i] ; LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; @@ -318,20 +335,43 @@ int LAGraph_RegularPathQuery // Traverse the NFA // Try to use a provided transposed matrix or use the descriptor - if (BT[i] != NULL) - { - GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, - GrB_LOR_LAND_SEMIRING_BOOL, BT[i], frontier, GrB_DESC_R)) ; - } - else - { - GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, - GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_RT0)) ; + if (!inverse) { + if (BT[i] != NULL) + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + GrB_LOR_LAND_SEMIRING_BOOL, BT[i], frontier, GrB_DESC_R)) ; + } + else + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_RT0)) ; + } + } else { + GRB_TRY (GrB_mxm (symbol_frontier, NULL, NULL, GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_R )) ; } + GrB_Matrix_nvals( &nv, symbol_frontier); + if (nv == 0) + continue; + // Traverse the graph - GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, - GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + if (!inverse_labels[i]) { + if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + } else if (AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, AT[i], GrB_DESC_SC)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SCT1)) ; + } + } else { + if (!inverse && AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, AT[i], GrB_DESC_SC)) ; + } else if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SCT1)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + } + } } // Accumulate the new state <-> node correspondence diff --git a/experimental/test/test_RegularPathQuery.c b/experimental/test/test_RegularPathQuery.c index f5e04c20a3..f2e0981fd0 100644 --- a/experimental/test/test_RegularPathQuery.c +++ b/experimental/test/test_RegularPathQuery.c @@ -211,11 +211,43 @@ void test_RegularPathQueryBasic (void) OK (LAGraph_Delete (&(G[i]), msg)) ; } - for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) - { - if (R[i] == NULL) continue ; - OK (LAGraph_Delete (&(R[i]), msg)) ; - } + // TODO: Use 2RPQ HERE. + //OK (LAGraph_RegularPathQuery (&r, R, MAX_LABELS, QS, nqs, + // QF, nqf, G, S, ns, msg)) ; + + // Extract results from the output vector + GrB_Index *reachable ; + bool *values ; + + GrB_Index nvals ; + GrB_Vector_nvals (&nvals, r) ; + + OK (LAGraph_Malloc ((void **) &reachable, MAX_RESULTS, sizeof (GrB_Index), msg)) ; + OK (LAGraph_Malloc ((void **) &values, MAX_RESULTS, sizeof (GrB_Index), msg)) ; + + GrB_Vector_extractTuples (reachable, values, &nvals, r) ; + + // Compare the results with expected values + TEST_CHECK (nvals == files[k].expected_count) ; + for (uint64_t i = 0 ; i < nvals ; i++) + TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + // Cleanup + OK (LAGraph_Free ((void **) &values, NULL)) ; + OK (LAGraph_Free ((void **) &reachable, NULL)) ; + + OK (GrB_free (&r)) ; + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++) + { + if (G[i] == NULL) continue ; + OK (LAGraph_Delete (&(G[i]), msg)) ; + } + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) + { + if (R[i] == NULL) continue ; + OK (LAGraph_Delete (&(R[i]), msg)) ; } } diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 3355addb2e..202d19a67e 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -834,7 +834,7 @@ int LAGraph_scc ( //**************************************************************************** LAGRAPHX_PUBLIC -int LAGraph_RegularPathQuery // nodes reachable from the starting by the +int LAGraph_2RegularPathQuery // nodes reachable from the starting by the // path satisfying regular expression ( // output: @@ -844,6 +844,7 @@ int LAGraph_RegularPathQuery // nodes reachable from the starting by the // input: LAGraph_Graph *R, // input non-deterministic finite automaton // adjacency matrix decomposition + bool *inverse_labels, // inversed labels size_t nl, // total label count, # of matrices graph and // NFA adjacency matrix decomposition const GrB_Index *QS, // starting states in NFA @@ -853,6 +854,7 @@ int LAGraph_RegularPathQuery // nodes reachable from the starting by the LAGraph_Graph *G, // input graph adjacency matrix decomposition const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices + bool inverse, // inverse the whole query char *msg // LAGraph output message ); //**************************************************************************** From 1e937d91a9c90a1eed4b6fe5e2f0db3cfadb8f20 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sun, 8 Dec 2024 11:04:53 +0300 Subject: [PATCH 03/11] Make MM reader for BOOL matrices ignore dups This patch provides a workaround for benchmarking 2-RPQ algorithm on a few real-world datasets like Wikidata or yago-2s by allowing duplicates in MatrixMarket files corresponding to boolean matrices since most of the publicly available graphs likely to have duplicates. --- src/utility/LAGraph_MMRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utility/LAGraph_MMRead.c b/src/utility/LAGraph_MMRead.c index 5aa9e390cb..e524570f49 100644 --- a/src/utility/LAGraph_MMRead.c +++ b/src/utility/LAGraph_MMRead.c @@ -989,7 +989,7 @@ int LAGraph_MMRead if (type == GrB_BOOL) { - GRB_TRY (GrB_Matrix_build_BOOL (*A, I, J, (bool *) X, nvals2, NULL)) ; + GRB_TRY (GrB_Matrix_build_BOOL (*A, I, J, (bool *) X, nvals2, GxB_IGNORE_DUP)) ; } else if (type == GrB_INT8) { From d48ebb2a4bd00f2469adb5d24dc579ae09649713 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 3 Jan 2025 13:52:15 +0300 Subject: [PATCH 04/11] Add regular path query algorithm for all paths [WIP] Full description TBD. --- experimental/algorithm/LAGraph_2Rpq.c | 770 ++++++++++++++++++++++++++ experimental/test/test_2Rpq.c | 249 +++++++++ include/LAGraphX.h | 93 ++++ 3 files changed, 1112 insertions(+) create mode 100644 experimental/algorithm/LAGraph_2Rpq.c create mode 100644 experimental/test/test_2Rpq.c diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c new file mode 100644 index 0000000000..6d01d89d01 --- /dev/null +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -0,0 +1,770 @@ +// +// Different RPQ semantics +// + +#define LG_FREE_WORK \ +{ \ +} +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ +} + +#include "LG_internal.h" +#include "LAGraphX.h" +#include +#include + +#define PATH_LIMIT 100000 + +typedef struct { + Path paths[QUICK_PATH_COUNT]; + size_t path_count; + Path *extra_paths; +} MultiplePaths ; + +MultiplePaths multiple_paths_identity ; + +void Path_print (const Path *x) +{ + if (x->vertex_count == 0) + { + printf ("empty path \n") ; + return ; + } + + for (size_t i = 0 ; i < x->vertex_count ; i++) + { + // Increase the vertex by 1 since usually user expects the same + // numbering as in the input determined by MTX file in which the + // entries are enumerated starting from 1. + printf ("(%ld)", (i < QUICK_PATH_LENGTH ? x->vertices[i] : x->extra_vertices[i - QUICK_PATH_LENGTH]) + 1) ; + + if (i != x->vertex_count - 1) + { + printf ("-") ; + } + } + + printf ("\n") ; +} + +static void MultiplePaths_print (const MultiplePaths *x) +{ + printf("Multiple paths:\n") ; + for (size_t i = 0 ; i < x->path_count ; i++) + { + + printf("\t Path %ld: ", i) ; + Path_print (&x->paths[i]) ; + } + printf("\n") ; +} + +GrB_Type multiple_paths ; +GrB_BinaryOp combine_multiple_paths_op ; +GrB_Monoid combine_multiple_paths ; +GrB_BinaryOp first_multiple_paths ; +GrB_BinaryOp second_multiple_paths ; +GrB_Semiring first_combine_multiple_paths ; +GrB_Semiring second_combine_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_simple ; +GrB_IndexUnaryOp extend_multiple_trails ; + +void first_multiple_paths_f(MultiplePaths *z, MultiplePaths *x, bool *_y) +{ + *z = *x; +} +void second_multiple_paths_f(MultiplePaths *z, bool *_x, MultiplePaths *y) +{ + *z = *y; +} + +void combine_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, const MultiplePaths *y) +{ + z->path_count = x->path_count + y->path_count ; + assert (z->path_count < QUICK_PATH_COUNT) ; + + for (size_t i = 0 ; i < x->path_count ; i++) + { + z->paths[i] = x->paths[i] ; + } + + for (size_t i = 0 ; i < y->path_count ; i++) + { + z->paths[x->path_count + i] = y->paths[i] ; + } + + // TODO: Support more than QUICK_PATH_COUNT paths. +} + +static inline void path_extend(Path *path, Vertex vertex) +{ + if (path->vertex_count == 0) + { + return ; + } + + if (path->vertex_count < QUICK_PATH_LENGTH) + { + path->vertices[path->vertex_count++] = vertex ; + } + else + { + if (path->extra_vertices == NULL) + { + LG_TRY (LAGraph_Calloc ((void **) &path->extra_vertices, 64, sizeof (Vertex), NULL)) ; + } + path->extra_vertices [(path->vertex_count++) - QUICK_PATH_LENGTH] = vertex ; + } + + // TODO: Support more than QUICK_PATH_LENGTH vertices. +} + +static inline bool path_is_empty(Path *path) +{ + return path->vertex_count == 0; +} + + +static inline void multiple_paths_append(MultiplePaths *multiple_paths, const Path *path) +{ + multiple_paths->paths[multiple_paths->path_count++] = *path ; + + // TODO: Support more than QUICK_PATH_COUNT paths. +} + +// +// ALL PATHS. +// + +// NB: Using this semantic without a length limit makes the code behave like a +// procedure for searching all paths satisfying the constraints. +// It means it may not finish if there is loops. + +void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) +{ + /*if (z != x) + for (size_t i = 0 ; i < x->path_count ; i++) + { + multiple_paths_append(z, &x->paths[i]) ; + path_extend (&z->paths[i], col) ; + } + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + path_extend (&z->paths[i], col) ; + } + //} +} + +// +// ALL SIMPLE +// + +static inline bool path_extending_will_add_repeated_non_starting_vertex(const Path *path, Vertex vertex) +{ + if (path->vertex_count <= 1) + { + return false ; + } + + for (size_t i = 1 ; i < path->vertex_count ; i++) + { + if (path->vertices[i] == vertex) + { + return true ; + } + } + + Vertex last_vertex = path->vertices[path->vertex_count - 1] ; + + return path->vertices[0] == last_vertex; +} + +void extend_multiple_simple_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) +{ + /*if (z != x) + { + for (size_t i = 0 ; i < x->path_count ; i++) + { + const Path *path = &x->paths[i] ; + if (path_has_loop_at_end (path)) + { + continue; + } + + multiple_paths_append(z, path) ; + path_extend (&z->paths[i], col) ; + } + } + else + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + if (path_extending_will_add_repeated_non_starting_vertex (path, col)) + { + path->vertex_count = 0 ; + continue ; + } + + path_extend (&z->paths[i], col) ; + } + //} +} + +// +// ALL TRAILS +// + +static inline bool path_extending_will_add_repeated_edge(const Path *path, Vertex vertex_2) +{ + if (path->vertex_count == 0) + { + return false ; + } + + // We identify edges as pairs of vertices. + Vertex vertex_1 = path->vertices[path->vertex_count - 1] ; + + for (size_t i = 0 ; i < path->vertex_count - 1; i++) + { + if (path->vertices[i] == vertex_1 && path->vertices[i + 1] == vertex_2) + { + return true ; + } + } + + return false ; +} +void extend_multiple_trails_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *y) +{ + /*if (z != x) + { + z->path_count = x->path_count ; + + for (size_t i = 0 ; i < x->path_count ; i++) + { + const Path *path = &x->paths[i] ; + if (path_extending_will_add_repeated_edge (path, col)) + { + continue ; + } + + multiple_paths_append(z, path) ; + path_extend (&z->paths[i], col) ; + } + } + else + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + if (path_extending_will_add_repeated_edge (path, col)) + { + path->vertex_count = 0 ; + continue ; + } + + path_extend (&z->paths[i], col) ; + } + //} +} + +#define LG_FREE_WORK \ +{ \ + GrB_free (&frontier) ; \ + GrB_free (&next_frontier) ; \ + GrB_free (&symbol_frontier) ; \ + GrB_free (&final_reducer) ; \ + LAGraph_Free ((void **) &A, NULL) ; \ + LAGraph_Free ((void **) &B, NULL) ; \ + LAGraph_Free ((void **) &BT, NULL) ; \ +} + +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ + LAGraph_Free ((void **) paths, NULL) ; \ +} + +static int LAGraph_2Rpq +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg, // LAGraph output message + GrB_IndexUnaryOp op // index unary op for a specific semantic +) +{ + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + LG_CLEAR_MSG ; + + GrB_Matrix frontier = NULL ; // traversal frontier representing + // correspondence between NFA states + // and graph vertices + GrB_Matrix symbol_frontier = NULL ; // part of the new frontier for the + // specific label + GrB_Matrix next_frontier = NULL ; // frontier value on the next + // traversal step + GrB_Vector final_reducer = NULL ; // auxiliary vector for reducing the + // visited matrix to an answer + + GrB_Index ng = 0 ; // # nodes in the graph + GrB_Index nr = 0 ; // # states in the NFA + GrB_Index nv = 0 ; // # pair count in the frontier + GrB_Index states = ns ; // # pairs in the current + // correspondence between the graph and + // the NFA + + GrB_Index rows = 0 ; // utility matrix row count + GrB_Index cols = 0 ; // utility matrix column count + GrB_Index vals = 0 ; // utility matrix value count + + // TODO: This names might be too short. + GrB_Semiring sr1 = first_combine_multiple_paths ; + GrB_Semiring sr2 = second_combine_multiple_paths ; + GrB_BinaryOp acc = combine_multiple_paths_op ; + + GrB_Matrix *A = NULL ; + GrB_Matrix *AT = NULL ; + GrB_Matrix *B = NULL ; + GrB_Matrix *BT = NULL ; + + LG_ASSERT (paths != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (path_count != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (G != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (R != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (S != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (op != NULL, GrB_NULL_POINTER) ; + + (*paths) = NULL ; + (*path_count) = 0 ; + + for (size_t i = 0 ; i < nl ; i++) + { + if (G[i] == NULL) continue ; + LG_TRY (LAGraph_CheckGraph (G[i], msg)) ; + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (R[i] == NULL) continue ; + LG_TRY (LAGraph_CheckGraph (R[i], msg)) ; + } + + LG_TRY (LAGraph_Malloc ((void **) &A, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &AT, nl, sizeof (GrB_Matrix), msg)) ; + + for (size_t i = 0 ; i < nl ; i++) + { + if (G[i] == NULL) + { + A[i] = NULL ; + AT[i] = NULL ; + continue ; + } + + A[i] = G[i]->A ; + if (G[i]->kind == LAGraph_ADJACENCY_UNDIRECTED || + G[i]->is_symmetric_structure == LAGraph_TRUE) + { + AT[i] = A[i] ; + } + else + { + // AT[i] could be NULL and the matrix will be transposed by a + // descriptor + AT[i] = G[i]->AT ; + } + } + + LG_TRY (LAGraph_Malloc ((void **) &B, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &BT, nl, sizeof (GrB_Matrix), msg)) ; + + for (size_t i = 0 ; i < nl ; i++) + { + BT[i] = NULL ; + + if (R[i] == NULL) + { + B[i] = NULL ; + BT[i] = NULL ; + continue ; + } + + B[i] = R[i]->A ; + if (R[i]->is_symmetric_structure == LAGraph_TRUE) + { + BT[i] = B[i] ; + } + else + { + // BT[i] could be NULL and the matrix will be transposed by a + // descriptor + BT[i] = R[i]->AT ; + } + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&ng, A[i])) ; + break ; + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (B[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&nr, B[i])) ; + break ; + } + + // Check all the matrices in graph adjacency matrix decomposition are + // square and of the same dimensions + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&rows, A[i])) ; + GRB_TRY (GrB_Matrix_ncols (&cols, A[i])) ; + + LG_ASSERT_MSG (rows == ng && cols == ng, LAGRAPH_NOT_CACHED, + "all the matrices in the graph adjacency matrix decomposition " + "should have the same dimensions and be square") ; + } + + // Check all the matrices in NFA adjacency matrix decomposition are + // square and of the same dimensions + for (size_t i = 0 ; i < nl ; i++) + { + if (B[i] == NULL) continue ; + + GrB_Index rows = 0 ; + GrB_Index cols = 0 ; + + GRB_TRY (GrB_Matrix_nrows (&rows, B[i])) ; + GRB_TRY (GrB_Matrix_ncols (&cols, B[i])) ; + + LG_ASSERT_MSG (rows == nr && cols == nr, LAGRAPH_NOT_CACHED, + "all the matrices in the NFA adjacency matrix decomposition " + "should have the same dimensions and be square") ; + } + + // Check source nodes in the graph + for (size_t i = 0 ; i < ns ; i++) + { + GrB_Index s = S [i] ; + LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; + } + + // Check starting states of the NFA + for (size_t i = 0 ; i < nqs ; i++) + { + GrB_Index qs = QS [i] ; + LG_ASSERT_MSG (qs < nr, GrB_INVALID_INDEX, + "invalid NFA starting state") ; + } + + // Check final states of the NFA + for (size_t i = 0 ; i < nqf ; i++) + { + GrB_Index qf = QF [i] ; + LG_ASSERT_MSG (qf < nr, GrB_INVALID_INDEX, "invalid NFA final state") ; + } + + // ------------------------------------------------------------------------- + // initialization + // ------------------------------------------------------------------------- + + LG_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, sizeof (Path), msg)) ; + + GRB_TRY (GrB_Vector_new (&final_reducer, GrB_BOOL, nr)) ; + + // Initialize matrix for reducing the result + GRB_TRY (GrB_assign (final_reducer, NULL, NULL, true, QF, nqf, NULL)) ; + + GRB_TRY (GrB_Matrix_new (&next_frontier, multiple_paths, nr, ng)) ; + + // Initialize frontier with the source nodes + + for (size_t i = 0 ; i < ns ; i++) + { + GrB_Index s = S[i] ; + MultiplePaths value = { + .paths = { + { + .vertices = { s }, + .vertex_count = 1 + } + }, + .path_count = 1 + }; + + for (size_t j = 0 ; j < nqs ; j++) + { + GrB_Index qs = QS[j] ; + + GRB_TRY (GrB_Matrix_setElement_UDT (next_frontier, &value, qs, s)) ; + } + } + + // Initialize a few utility matrices + GRB_TRY (GrB_Matrix_new (&frontier, multiple_paths, nr, ng)) ; + GRB_TRY (GrB_Matrix_new (&symbol_frontier, multiple_paths, nr, ng)) ; + + // Main loop + while (true) + { + //printf("Iteration\n"); + GrB_Index nvals = 0 ; + GRB_TRY (GrB_Matrix_nvals (&nvals, next_frontier)) ; + + MultiplePaths *X ; + GrB_Index *I ; + bool had_non_empty_path = false ; + + //MultiplePaths *X; + LG_TRY (LAGraph_Calloc ((void **) &X, nvals, sizeof (MultiplePaths), msg)) ; + LG_TRY (LAGraph_Calloc ((void **) &I, nvals, sizeof (GrB_Index), msg)) ; + + // TODO: Change to a generic call. + GRB_TRY (GrB_Matrix_extractTuples_UDT (I, GrB_NULL, (void**) X, &nvals, next_frontier)) ; + //printf("Next frontier with %d entries\n", nvals); + + for (size_t i = 0 ; i < nvals ; i++) + { + for (size_t j = 0 ; j < X[i].path_count ; j++) + { + if (!path_is_empty(&X[i].paths[j])) + { + had_non_empty_path = true; + break; + } + } + + //MultiplePaths_print (&X[i]) ; + bool final = false ; + for (size_t j = 0 ; j < nqf ; j++) + { + if (I[i] == QF[j]) + { + final = true ; + break ; + } + } + //printf("Path at %ld final is %b", I[i], final) ; + + if (!final) + { + continue ; + } + + //printf("Found final paths!\n"); + for (size_t j = 0 ; j < X[i].path_count && (*path_count) < limit ; j++) + { + const Path *path = &X[i].paths[j] ; + if (!path_is_empty(path)) + { + (*paths)[(*path_count)++] = *path ; + } + } + } + + if (!had_non_empty_path || (*path_count) == limit) + { + //printf("breaking\n"); + break; + } + + GrB_Matrix old_frontier = frontier ; + frontier = next_frontier ; + next_frontier = old_frontier ; + + GRB_TRY (GrB_Matrix_clear(next_frontier)) ; + + // Obtain a new relation between the NFA states and the graph nodes + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL || B[i] == NULL) continue ; + + // Traverse the NFA + // Try to use a provided transposed matrix or use the descriptor + if (!inverse) { + if (BT[i] != NULL) + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + sr2, BT[i], frontier, GrB_DESC_R)) ; + } + else + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + sr2, B[i], frontier, GrB_DESC_RT0)) ; + } + } else { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, sr2, B[i], frontier, GrB_DESC_R )) ; + } + + // TODO: Skip the iteration if symbol_frontier is already empty. + + // Traverse the graph + if (!inverse_labels[i]) { + if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + } else if (AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + } + } else { + if (!inverse && AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + } else if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + } + } + } + + GRB_TRY (GrB_apply (next_frontier, GrB_NULL, GrB_NULL, op, next_frontier, false, GrB_NULL)) ; + + } + + //LG_FREE_WORK ; + return (GrB_SUCCESS) ; +} + + +int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular + // expression. Simple paths are paths without + // loops or the ones with the same starting + // and final nodes. +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_simple) ; +} + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllTrails // All trails satisfying regular expression. + // Trails are paths without repeated edges. +( + // output: + Path **paths, // trails from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_trails) ; +} + +int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg // LAGraph output message + ) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, limit, msg, extend_multiple_paths) ; +} + +#define LG_FREE_WORK \ +{ \ +} +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ +} + +int LAGraph_Rpq_initialize(char *msg) +{ + GRB_TRY (GrB_Type_new (&multiple_paths, sizeof(MultiplePaths))) ; + + GRB_TRY (GrB_BinaryOp_new (&combine_multiple_paths_op, (GxB_binary_function) &combine_multiple_paths_f, multiple_paths, multiple_paths, multiple_paths)) ; + GRB_TRY (GrB_BinaryOp_new (&first_multiple_paths, (GxB_binary_function) &first_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_BinaryOp_new (&second_multiple_paths, (GxB_binary_function) &second_multiple_paths_f, multiple_paths, GrB_BOOL, multiple_paths)) ; + GRB_TRY (GrB_Monoid_new (&combine_multiple_paths, combine_multiple_paths_op, (void*) &multiple_paths_identity)) ; + + GRB_TRY (GrB_Semiring_new (&first_combine_multiple_paths, combine_multiple_paths, first_multiple_paths)) ; + GRB_TRY (GrB_Semiring_new (&second_combine_multiple_paths, combine_multiple_paths, second_multiple_paths)) ; + + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_paths, (GxB_index_unary_function) &extend_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_simple, (GxB_index_unary_function) &extend_multiple_simple_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_trails, (GxB_index_unary_function) &extend_multiple_trails_f, multiple_paths, multiple_paths, GrB_BOOL)) ; +} diff --git a/experimental/test/test_2Rpq.c b/experimental/test/test_2Rpq.c new file mode 100644 index 0000000000..173f88a9cf --- /dev/null +++ b/experimental/test/test_2Rpq.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include + +#define LEN 512 +#define MAX_LABELS 3 +#define MAX_RESULTS 2000000 + +char msg [LAGRAPH_MSG_LEN] ; +LAGraph_Graph G[MAX_LABELS] ; +LAGraph_Graph R[MAX_LABELS] ; +GrB_Matrix A ; + +char testcase_name [LEN+1] ; +char filename [LEN+1] ; + +typedef struct +{ + const char* name ; + const char* graphs[MAX_LABELS] ; + const char* fas[MAX_LABELS] ; + const char* fa_meta ; + const char* sources ; + const GrB_Index expected[MAX_RESULTS] ; + const size_t expected_count ; +} +matrix_info ; + +const matrix_info files [ ] = +{ + {"simple 1 or more", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/1_a.mtx", NULL }, // Regex: a+ + "rpq_data/1_meta.txt", + "rpq_data/1_sources.txt", + {2, 4, 6, 7}, 4}, + {"simple kleene star", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/2_a.mtx", "rpq_data/2_b.mtx", NULL}, // Regex: (a b)* + "rpq_data/2_meta.txt", + "rpq_data/2_sources.txt", + {2, 6, 8}, 3}, + {"kleene star of the conjunction", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/3_a.mtx", "rpq_data/3_b.mtx", NULL}, // Regex: (a | b)* + "rpq_data/3_meta.txt", + "rpq_data/3_sources.txt", + {3, 6}, 2}, + {"simple repeat from n to m times", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"", "rpq_data/4_b.mtx", NULL}, // Regex: b b b (b b)? + "rpq_data/4_meta.txt", + "rpq_data/4_sources.txt", + {3, 4, 6}, 3}, + {NULL, NULL, NULL, NULL}, +} ; + +//**************************************************************************** +void test_Rpq_Simple (void) +{ + LAGraph_Init (msg) ; + LAGraph_Rpq_initialize (msg) ; + + for (int k = 0 ; ; k++) + { + if (files[k].sources == NULL) break ; + + snprintf (testcase_name, LEN, "basic regular path query %s", files[k].name) ; + TEST_CASE (testcase_name) ; + + // Load graph from MTX files representing its adjacency matrix + // decomposition + for (int i = 0 ; ; i++) + { + const char *name = files[k].graphs[i] ; + + if (name == NULL) break ; + if (strlen(name) == 0) continue ; + + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + OK (LAGraph_MMRead (&A, f, msg)) ; + OK (fclose (f)); + + OK (LAGraph_New (&(G[i]), &A, LAGraph_ADJACENCY_DIRECTED, msg)) ; + + TEST_CHECK (A == NULL) ; + } + + // Load NFA from MTX files representing its adjacency matrix + // decomposition + for (int i = 0 ; ; i++) + { + const char *name = files[k].fas[i] ; + + if (name == NULL) break ; + if (strlen(name) == 0) continue ; + + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + OK (LAGraph_MMRead (&A, f, msg)) ; + OK (fclose (f)) ; + + OK (LAGraph_New (&(R[i]), &A, LAGraph_ADJACENCY_DIRECTED, msg)) ; + OK (LAGraph_Cached_AT (R[i], msg)) ; + + TEST_CHECK (A == NULL) ; + } + + // Note the matrix rows/cols are enumerated from 0 to n-1. Meanwhile, in + // MTX format they are enumerated from 1 to n. Thus, when + // loading/comparing the results these values should be + // decremented/incremented correspondingly. + + // Load graph source nodes from the sources file + GrB_Index s ; + GrB_Index S[16] ; + size_t ns = 0 ; + + const char *name = files[k].sources ; + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + + while (fscanf(f, "%ld", &s) != EOF) + S[ns++] = s - 1 ; + + OK (fclose(f)) ; + + // Load NFA starting states from the meta file + GrB_Index qs ; + GrB_Index QS[16] ; + size_t nqs = 0 ; + + name = files[k].fa_meta ; + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + + TEST_CHECK (fscanf(f, "%ld", &nqs) != EOF) ; + + for (uint64_t i = 0; i < nqs; i++) { + TEST_CHECK (fscanf(f, "%ld", &qs) != EOF) ; + QS[i] = qs - 1 ; + } + + // Load NFA final states from the same file + uint64_t qf ; + uint64_t QF[16] ; + size_t nqf = 0 ; + + TEST_CHECK (fscanf(f, "%ld", &nqf) != EOF) ; + + for (uint64_t i = 0; i < nqf; i++) { + TEST_CHECK (fscanf(f, "%ld", &qf) != EOF) ; + QF[i] = qf - 1 ; + } + + OK (fclose(f)) ; + + // Evaluate the algorithm + GrB_Vector r = NULL ; + + bool inverse_labels[] = {false, false, false, false, false, false, false, false, false, false, false, false, false}; + bool inverse = false; + + Path *paths ; + size_t path_count ; + int res = LAGraph_2Rpq_AllSimple (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL SIMPLE:\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + // Cleanup + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + res = LAGraph_2Rpq_AllTrails (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL TRAILS:\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + // Cleanup + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + res = LAGraph_2Rpq_AllPaths (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, 10, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL PATHS (LIMIT = 10):\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++) + { + if (G[i] == NULL) continue ; + OK (LAGraph_Delete (&(G[i]), msg)) ; + } + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) + { + if (R[i] == NULL) continue ; + OK (LAGraph_Delete (&(R[i]), msg)) ; + } + } + + LAGraph_Finalize (msg) ; +} + +TEST_LIST = { + {"Rpq_Simple", test_Rpq_Simple}, + {NULL, NULL} +}; diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 202d19a67e..3f9a6dadfa 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -858,6 +858,99 @@ int LAGraph_2RegularPathQuery // nodes reachable from the starting by the char *msg // LAGraph output message ); //**************************************************************************** +#define QUICK_PATH_LENGTH 14 +#define QUICK_PATH_COUNT 2 + +typedef uint64_t Vertex ; + +typedef struct { + Vertex vertices[QUICK_PATH_LENGTH]; + size_t vertex_count; + Vertex *extra_vertices; +} Path ; + +void Path_print (const Path *x); + +LAGRAPHX_PUBLIC +int LAGraph_Rpq_initialize (char *msg); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular + // expression. Simple paths are paths without + // loops or the ones with the same starting + // and final nodes. +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllTrails // All trails satisfying regular expression. + // Trails are paths without repeated edges. +( + // output: + Path **paths, // trails from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg // LAGraph output message +); +//**************************************************************************** LAGRAPHX_PUBLIC int LAGraph_VertexCentrality_Triangle // vertex triangle-centrality ( From d5d616140da23f82c2e8bb4c3ecada02bb376ef2 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 24 Apr 2026 18:12:56 +0300 Subject: [PATCH 05/11] Add linear allocator in 2RPQ for many paths [WIP] Handle too many paths via custom arena-based linear allocator that is cleared at the end of the 2RPQ ALL PATHS procedure. It is used to construct elements of matrices having too many paths in them. It also offers OOM detection. --- experimental/algorithm/LAGraph_2Rpq.c | 108 ++++++++++++++++++++++---- include/LAGraphX.h | 11 ++- 2 files changed, 102 insertions(+), 17 deletions(-) diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c index 6d01d89d01..39bdda633f 100644 --- a/experimental/algorithm/LAGraph_2Rpq.c +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -15,12 +15,44 @@ #include #include + +#define MAX_MEM (1 * 1024 * 1024 * 1024) + +const char memory_arena[MAX_MEM]; +size_t cntr = 0; +bool oom = false; +void *xalloc(size_t size) { + assert(size % sizeof(int64_t) == 0); + if (cntr + size > MAX_MEM) { + // OOM: loop back to the beggining and flag the answer as wrong. + oom = true; + cntr = 0; + } + + void *ptr = (void*) &(memory_arena[cntr]); + cntr += size; + return ptr; +} +void xfree(void) { +#ifdef NDEBUG + printf("2RPQ arena allocator stats: allocated=%llu memory_limit=%llu\n", cntr, MAX_MEM); +#endif + oom = false; + cntr = 0; +} + + #define PATH_LIMIT 100000 +typedef struct MultiplePathsExtra { + size_t count; + Path paths[0]; +} MultiplePathsExtra; + typedef struct { Path paths[QUICK_PATH_COUNT]; size_t path_count; - Path *extra_paths; + MultiplePathsExtra *extra; } MultiplePaths ; MultiplePaths multiple_paths_identity ; @@ -38,7 +70,7 @@ void Path_print (const Path *x) // Increase the vertex by 1 since usually user expects the same // numbering as in the input determined by MTX file in which the // entries are enumerated starting from 1. - printf ("(%ld)", (i < QUICK_PATH_LENGTH ? x->vertices[i] : x->extra_vertices[i - QUICK_PATH_LENGTH]) + 1) ; + printf ("(%llu)", (i < QUICK_PATH_LENGTH ? x->vertices[i] : x->extra->vertices[i - QUICK_PATH_LENGTH]) + 1) ; if (i != x->vertex_count - 1) { @@ -81,24 +113,66 @@ void second_multiple_paths_f(MultiplePaths *z, bool *_x, MultiplePaths *y) *z = *y; } +static inline MultiplePathsExtra *multiple_paths_extra_alloc(size_t count) { + MultiplePathsExtra *extra = xalloc(sizeof(MultiplePathsExtra) + count * sizeof(Path)); + extra->count = count; + return extra; +} + +#define nth_path(x, j) (((j) < QUICK_PATH_COUNT) ? ((x)->paths[j]) : ((x)->extra->paths[j - QUICK_PATH_COUNT])) +#define set_nth_path(x, j, v) \ + do { \ + if (j < QUICK_PATH_COUNT) { \ + x->paths[j] = v; \ + } else { \ + x->extra->paths[j - QUICK_PATH_COUNT] = v; \ + } \ + } while (0) + void combine_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, const MultiplePaths *y) { - z->path_count = x->path_count + y->path_count ; - assert (z->path_count < QUICK_PATH_COUNT) ; + size_t path_count = x->path_count + y->path_count ; + z->path_count = path_count ; - for (size_t i = 0 ; i < x->path_count ; i++) - { - z->paths[i] = x->paths[i] ; - } - for (size_t i = 0 ; i < y->path_count ; i++) - { - z->paths[x->path_count + i] = y->paths[i] ; + if (path_count <= QUICK_PATH_COUNT) { + size_t i = 0; + for (size_t j = 0 ; j < x->path_count ; j++) + { + z->paths[i++] = x->paths[j] ; + } + + for (size_t j = 0 ; j < y->path_count ; j++) + { + z->paths[i++] = y->paths[j] ; + } + return; + } else { + MultiplePathsExtra *extra = multiple_paths_extra_alloc (path_count - QUICK_PATH_COUNT); + z->extra = extra; + + size_t i = 0; + for (size_t j = 0 ; j < x->path_count ; j++) + { + Path path = nth_path(x, j); + set_nth_path(z, i, path); + i++; + } + + for (size_t j = 0 ; j < y->path_count ; j++) + { + Path path = nth_path(y, j) ; + set_nth_path(z, i, path); + i++; + } } + + // TODO: Support more than QUICK_PATH_COUNT paths. } + static inline void path_extend(Path *path, Vertex vertex) { if (path->vertex_count == 0) @@ -112,11 +186,12 @@ static inline void path_extend(Path *path, Vertex vertex) } else { - if (path->extra_vertices == NULL) + if (path->extra == NULL) { - LG_TRY (LAGraph_Calloc ((void **) &path->extra_vertices, 64, sizeof (Vertex), NULL)) ; + LAGraph_Calloc ((void **) &path->extra, 64, sizeof (Vertex), NULL) ; } - path->extra_vertices [(path->vertex_count++) - QUICK_PATH_LENGTH] = vertex ; + + path->extra->vertices [(path->vertex_count++) - QUICK_PATH_LENGTH] = vertex ; } // TODO: Support more than QUICK_PATH_LENGTH vertices. @@ -655,7 +730,12 @@ static int LAGraph_2Rpq } + if (oom) { + // RODION, PLEASE HANDLE SOMEHOW + } + //LG_FREE_WORK ; + xfree(); return (GrB_SUCCESS) ; } diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 3f9a6dadfa..a3d8bfc2a5 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -858,15 +858,20 @@ int LAGraph_2RegularPathQuery // nodes reachable from the starting by the char *msg // LAGraph output message ); //**************************************************************************** -#define QUICK_PATH_LENGTH 14 -#define QUICK_PATH_COUNT 2 +#define QUICK_PATH_LENGTH 20 +#define QUICK_PATH_COUNT 1 typedef uint64_t Vertex ; +typedef struct PathExtra { + size_t len; + Vertex vertices[0]; +} PathExtra ; + typedef struct { Vertex vertices[QUICK_PATH_LENGTH]; size_t vertex_count; - Vertex *extra_vertices; + PathExtra *extra; } Path ; void Path_print (const Path *x); From 545f540ab2e7d93c2892221289f9aa2315ff6f8c Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 2 May 2026 23:08:59 +0300 Subject: [PATCH 06/11] feat: add allocator --- experimental/algorithm/LAGraph_2Rpq.c | 1214 ++++++++++++++++++++----- include/LAGraphX.h | 3 + 2 files changed, 999 insertions(+), 218 deletions(-) diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c index 39bdda633f..36fef4bc71 100644 --- a/experimental/algorithm/LAGraph_2Rpq.c +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -12,37 +12,162 @@ #include "LG_internal.h" #include "LAGraphX.h" + #include #include +#include +#include +#include +#include +#include +#include + +#define PATH_LIMIT 100000 + +// This define and three functions below need for YAGO dataset. +// Because we have OOM on it +// +#define PATHS_PER_POINT_LIMIT 15000 + +static atomic_int path_limit_exceeded ; + +static void path_limit_reset (void) +{ + atomic_store (&path_limit_exceeded, 0) ; +} + +static void path_limit_mark_exceeded (void) +{ + atomic_store (&path_limit_exceeded, 1) ; +} + +static bool path_limit_failed (void) +{ + return atomic_load (&path_limit_exceeded) != 0 ; +} +// -#define MAX_MEM (1 * 1024 * 1024 * 1024) +// Temporary storage for data referenced by GraphBLAS UDT values. +// GraphBLAS copies MultiplePaths by value, so pointers inside it must point to +// memory that remains alive while frontier matrices are alive. +#define TEMP_ARENA_BYTES ((size_t) 32 * 1024 * 1024 * 1024) + +static unsigned char *temp_arena_data = NULL ; +static size_t temp_arena_capacity = 0 ; +static atomic_size_t temp_arena_offset ; +static atomic_int temp_arena_oom ; + +static size_t align_up_size (size_t value, size_t align) +{ + size_t rem = value % align ; -const char memory_arena[MAX_MEM]; -size_t cntr = 0; -bool oom = false; -void *xalloc(size_t size) { - assert(size % sizeof(int64_t) == 0); - if (cntr + size > MAX_MEM) { - // OOM: loop back to the beggining and flag the answer as wrong. - oom = true; - cntr = 0; + if (rem == 0) + { + return value ; } - void *ptr = (void*) &(memory_arena[cntr]); - cntr += size; - return ptr; + return value + (align - rem) ; } -void xfree(void) { -#ifdef NDEBUG - printf("2RPQ arena allocator stats: allocated=%llu memory_limit=%llu\n", cntr, MAX_MEM); -#endif - oom = false; - cntr = 0; + +static int temp_arena_init (char *msg) +{ + int info ; + + temp_arena_data = NULL ; + temp_arena_capacity = 0 ; + atomic_store (&temp_arena_offset, 0) ; + atomic_store (&temp_arena_oom, 0) ; + + info = LAGraph_Malloc ((void **) &temp_arena_data, + TEMP_ARENA_BYTES, sizeof (unsigned char), msg) ; + if (info != GrB_SUCCESS) + { + atomic_store (&temp_arena_oom, 1) ; + return info ; + } + + temp_arena_capacity = TEMP_ARENA_BYTES ; + return GrB_SUCCESS ; } +static void temp_arena_destroy (void) +{ + LAGraph_Free ((void **) &temp_arena_data, NULL) ; + temp_arena_capacity = 0 ; + atomic_store (&temp_arena_offset, 0) ; + atomic_store (&temp_arena_oom, 0) ; +} -#define PATH_LIMIT 100000 +static void *temp_alloc (size_t size) +{ + size_t align ; + size_t aligned_size ; + size_t start ; + + if (size == 0) + { + return NULL ; + } + + align = _Alignof (max_align_t) ; + aligned_size = align_up_size (size, align) ; + start = atomic_fetch_add (&temp_arena_offset, aligned_size) ; + + if (start > temp_arena_capacity || + aligned_size > temp_arena_capacity - start) + { + atomic_store (&temp_arena_oom, 1) ; + return NULL ; + } + + return (void *) (temp_arena_data + start) ; +} + +static void *temp_calloc_bytes (size_t size) +{ + void *ptr ; + + ptr = temp_alloc (size) ; + if (ptr != NULL) + { + memset (ptr, 0, size) ; + } + + return ptr ; +} + +static bool temp_alloc_failed (void) +{ + return atomic_load (&temp_arena_oom) != 0 ; +} + +static size_t temp_arena_used (void) +{ + return atomic_load (&temp_arena_offset) ; +} +// + + + +// JIT kernels are compiled into a separate shared object. They cannot call +// static functions from this translation unit, so expose tiny wrappers for +// the stateful parts: arena allocation and path-limit reporting. +// +// These functions must be visible from the dynamic symbol table. If LAGraphX +// builds with hidden visibility, LAGRAPHX_PUBLIC is important here. +LAGRAPHX_PUBLIC +void *LAGraph_Rpq_jit_temp_calloc_bytes (size_t size) +{ + return temp_calloc_bytes (size) ; +} + +LAGRAPHX_PUBLIC +void LAGraph_Rpq_jit_path_limit_mark_exceeded (void) +{ + path_limit_mark_exceeded () ; +} +// typedef struct MultiplePathsExtra { size_t count; @@ -57,6 +182,488 @@ typedef struct { MultiplePaths multiple_paths_identity ; +GrB_Type multiple_paths ; +GrB_BinaryOp combine_multiple_paths_op ; +GrB_Monoid combine_multiple_paths ; +GrB_BinaryOp first_multiple_paths ; +GrB_BinaryOp second_multiple_paths ; +GrB_Semiring first_combine_multiple_paths ; +GrB_Semiring second_combine_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_simple ; +GrB_IndexUnaryOp extend_multiple_trails ; + + + + +// JIT definitions +#define MULTIPLE_PATHS_TYPE_DEFN \ +"#include \n" \ +"#include \n" \ +"#include \n" \ +"#include \n" \ +"#define QUICK_PATH_LENGTH 20\n" \ +"#define QUICK_PATH_COUNT 1\n" \ +"#define PATHS_PER_POINT_LIMIT 15000\n" \ +"typedef uint64_t Vertex;\n" \ +"typedef struct PathExtra {\n" \ +" size_t len;\n" \ +" Vertex vertices[];\n" \ +"} PathExtra;\n" \ +"typedef struct Path {\n" \ +" Vertex vertices[QUICK_PATH_LENGTH];\n" \ +" size_t vertex_count;\n" \ +" PathExtra *extra;\n" \ +"} Path;\n" \ +"typedef struct MultiplePathsExtra {\n" \ +" size_t count;\n" \ +" Path paths[];\n" \ +"} MultiplePathsExtra;\n" \ +"typedef struct MultiplePaths {\n" \ +" Path paths[QUICK_PATH_COUNT];\n" \ +" size_t path_count;\n" \ +" MultiplePathsExtra *extra;\n" \ +"} MultiplePaths;\n" \ +"extern void *LAGraph_Rpq_jit_temp_calloc_bytes(size_t size);\n" \ +"extern void LAGraph_Rpq_jit_path_limit_mark_exceeded(void);\n" \ +"static size_t path_extra_len_jit(const Path *path)\n" \ +"{\n" \ +" return path->vertex_count > QUICK_PATH_LENGTH ?\n" \ +" path->vertex_count - QUICK_PATH_LENGTH : 0;\n" \ +"}\n" \ +"static Vertex path_get_vertex_jit(const Path *path, size_t i)\n" \ +"{\n" \ +" if (i < QUICK_PATH_LENGTH) return path->vertices[i];\n" \ +" return path->extra->vertices[i - QUICK_PATH_LENGTH];\n" \ +"}\n" \ +"static bool path_is_empty_jit(const Path *path)\n" \ +"{\n" \ +" return path->vertex_count == 0;\n" \ +"}\n" \ +"static Vertex path_start_vertex_jit(const Path *path)\n" \ +"{\n" \ +" return path_get_vertex_jit(path, 0);\n" \ +"}\n" \ +"static Vertex path_last_vertex_jit(const Path *path)\n" \ +"{\n" \ +" return path_get_vertex_jit(path, path->vertex_count - 1);\n" \ +"}\n" \ +"static bool path_is_closed_cycle_jit(const Path *path)\n" \ +"{\n" \ +" return path->vertex_count > 1 &&\n" \ +" path_start_vertex_jit(path) == path_last_vertex_jit(path);\n" \ +"}\n" \ +"static PathExtra *path_extra_temp_alloc_copy_plus_one_jit(\n" \ +" const Path *src, Vertex vertex)\n" \ +"{\n" \ +" size_t old_extra_len = path_extra_len_jit(src);\n" \ +" size_t new_extra_len = old_extra_len + 1;\n" \ +" size_t nbytes = sizeof(PathExtra) +\n" \ +" new_extra_len * sizeof(Vertex);\n" \ +" PathExtra *extra = (PathExtra *)\n" \ +" LAGraph_Rpq_jit_temp_calloc_bytes(nbytes);\n" \ +" if (extra == NULL) return NULL;\n" \ +" extra->len = new_extra_len;\n" \ +" if (old_extra_len > 0)\n" \ +" {\n" \ +" memcpy(extra->vertices, src->extra->vertices,\n" \ +" old_extra_len * sizeof(Vertex));\n" \ +" }\n" \ +" extra->vertices[old_extra_len] = vertex;\n" \ +" return extra;\n" \ +"}\n" \ +"static void path_extend_jit(Path *path, Vertex vertex)\n" \ +"{\n" \ +" if (path->vertex_count == 0) return;\n" \ +" if (path->vertex_count < QUICK_PATH_LENGTH)\n" \ +" {\n" \ +" path->vertices[path->vertex_count] = vertex;\n" \ +" path->vertex_count++;\n" \ +" return;\n" \ +" }\n" \ +" PathExtra *new_extra =\n" \ +" path_extra_temp_alloc_copy_plus_one_jit(path, vertex);\n" \ +" if (new_extra == NULL)\n" \ +" {\n" \ +" path->vertex_count = 0;\n" \ +" path->extra = NULL;\n" \ +" return;\n" \ +" }\n" \ +" path->extra = new_extra;\n" \ +" path->vertex_count++;\n" \ +"}\n" \ +"static size_t multiple_paths_capacity_jit(const MultiplePaths *x)\n" \ +"{\n" \ +" size_t cap = QUICK_PATH_COUNT;\n" \ +" if (x->extra != NULL) cap += x->extra->count;\n" \ +" return cap;\n" \ +"}\n" \ +"static const Path *multiple_paths_nth_const_jit(\n" \ +" const MultiplePaths *x, size_t j)\n" \ +"{\n" \ +" if (j < QUICK_PATH_COUNT) return &x->paths[j];\n" \ +" return &x->extra->paths[j - QUICK_PATH_COUNT];\n" \ +"}\n" \ +"static Path *multiple_paths_nth_mut_jit(MultiplePaths *x, size_t j)\n" \ +"{\n" \ +" if (j < QUICK_PATH_COUNT) return &x->paths[j];\n" \ +" return &x->extra->paths[j - QUICK_PATH_COUNT];\n" \ +"}\n" \ +"static MultiplePathsExtra *multiple_paths_extra_temp_alloc_jit(\n" \ +" size_t extra_count)\n" \ +"{\n" \ +" if (extra_count == 0) return NULL;\n" \ +" size_t nbytes = sizeof(MultiplePathsExtra) +\n" \ +" extra_count * sizeof(Path);\n" \ +" MultiplePathsExtra *extra = (MultiplePathsExtra *)\n" \ +" LAGraph_Rpq_jit_temp_calloc_bytes(nbytes);\n" \ +" if (extra == NULL) return NULL;\n" \ +" extra->count = extra_count;\n" \ +" return extra;\n" \ +"}\n" \ +"static bool multiple_paths_prepare_jit(MultiplePaths *x,\n" \ +" size_t capacity)\n" \ +"{\n" \ +" memset(x, 0, sizeof(*x));\n" \ +" if (capacity > PATHS_PER_POINT_LIMIT)\n" \ +" {\n" \ +" LAGraph_Rpq_jit_path_limit_mark_exceeded();\n" \ +" return false;\n" \ +" }\n" \ +" if (capacity > QUICK_PATH_COUNT)\n" \ +" {\n" \ +" x->extra = multiple_paths_extra_temp_alloc_jit(\n" \ +" capacity - QUICK_PATH_COUNT);\n" \ +" if (x->extra == NULL)\n" \ +" {\n" \ +" x->path_count = 0;\n" \ +" return false;\n" \ +" }\n" \ +" }\n" \ +" return true;\n" \ +"}\n" \ +"static bool multiple_paths_append_jit(MultiplePaths *x,\n" \ +" const Path *path)\n" \ +"{\n" \ +" if (x->path_count >= PATHS_PER_POINT_LIMIT)\n" \ +" {\n" \ +" LAGraph_Rpq_jit_path_limit_mark_exceeded();\n" \ +" return false;\n" \ +" }\n" \ +" if (x->path_count >= multiple_paths_capacity_jit(x))\n" \ +" {\n" \ +" return false;\n" \ +" }\n" \ +" *multiple_paths_nth_mut_jit(x, x->path_count) = *path;\n" \ +" x->path_count++;\n" \ +" return true;\n" \ +"}\n" \ +"static void multiple_paths_append_unchecked_jit(MultiplePaths *x,\n" \ +" const Path *path)\n" \ +"{\n" \ +" *multiple_paths_nth_mut_jit(x, x->path_count) = *path;\n" \ +" x->path_count++;\n" \ +"}\n" \ +"static bool path_extending_will_add_repeated_non_starting_vertex_jit(\n"\ +" const Path *path, Vertex vertex)\n" \ +"{\n" \ +" if (path->vertex_count == 0) return false;\n" \ +" if (path_is_closed_cycle_jit(path)) return true;\n" \ +" if (vertex == path_start_vertex_jit(path)) return false;\n" \ +" for (size_t i = 1; i < path->vertex_count; i++)\n" \ +" {\n" \ +" if (path_get_vertex_jit(path, i) == vertex) return true;\n" \ +" }\n" \ +" return false;\n" \ +"}\n" \ +"static bool path_extending_will_add_repeated_edge_jit(\n" \ +" const Path *path, Vertex vertex_2)\n" \ +"{\n" \ +" if (path->vertex_count == 0) return false;\n" \ +" Vertex vertex_1 = path_last_vertex_jit(path);\n" \ +" for (size_t i = 0; i + 1 < path->vertex_count; i++)\n" \ +" {\n" \ +" if (path_get_vertex_jit(path, i) == vertex_1 &&\n" \ +" path_get_vertex_jit(path, i + 1) == vertex_2)\n" \ +" {\n" \ +" return true;\n" \ +" }\n" \ +" }\n" \ +" return false;\n" \ +"}\n" + +#define FIRST_MULTIPLE_PATHS_DEFN \ +"void first_multiple_paths_f(MultiplePaths *z, MultiplePaths *x,\n" \ +" bool *_y)\n" \ +"{\n" \ +" (void) _y;\n" \ +" *z = *x;\n" \ +"}\n" + +#define SECOND_MULTIPLE_PATHS_DEFN \ +"void second_multiple_paths_f(MultiplePaths *z, bool *_x,\n" \ +" MultiplePaths *y)\n" \ +"{\n" \ +" (void) _x;\n" \ +" *z = *y;\n" \ +"}\n" + +#define COMBINE_MULTIPLE_PATHS_DEFN \ +"void combine_multiple_paths_f(MultiplePaths *z,\n" \ +" const MultiplePaths *x, const MultiplePaths *y)\n" \ +"{\n" \ +" MultiplePaths x_copy = *x;\n" \ +" MultiplePaths y_copy = *y;\n" \ +" size_t path_count = x_copy.path_count + y_copy.path_count;\n" \ +" if (!multiple_paths_prepare_jit(z, path_count)) return;\n" \ +" for (size_t j = 0; j < x_copy.path_count; j++)\n" \ +" {\n" \ +" multiple_paths_append_unchecked_jit(z,\n" \ +" multiple_paths_nth_const_jit(&x_copy, j));\n" \ +" }\n" \ +" for (size_t j = 0; j < y_copy.path_count; j++)\n" \ +" {\n" \ +" multiple_paths_append_unchecked_jit(z,\n" \ +" multiple_paths_nth_const_jit(&y_copy, j));\n" \ +" }\n" \ +"}\n" + +#define EXTEND_MULTIPLE_PATHS_DEFN \ +"void extend_multiple_paths_f(MultiplePaths *z,\n" \ +" const MultiplePaths *x, GrB_Index _row, GrB_Index col,\n" \ +" const void *_y)\n" \ +"{\n" \ +" MultiplePaths src = *x;\n" \ +" (void) _row;\n" \ +" (void) _y;\n" \ +" if (!multiple_paths_prepare_jit(z, src.path_count)) return;\n" \ +" for (size_t i = 0; i < src.path_count; i++)\n" \ +" {\n" \ +" Path path = *multiple_paths_nth_const_jit(&src, i);\n" \ +" path_extend_jit(&path, (Vertex) col);\n" \ +" if (!path_is_empty_jit(&path))\n" \ +" {\n" \ +" multiple_paths_append_unchecked_jit(z, &path);\n" \ +" }\n" \ +" }\n" \ +"}\n" + +#define EXTEND_MULTIPLE_SIMPLE_DEFN \ +"void extend_multiple_simple_f(MultiplePaths *z,\n" \ +" const MultiplePaths *x, GrB_Index _row, GrB_Index col,\n" \ +" const void *_y)\n" \ +"{\n" \ +" MultiplePaths src = *x;\n" \ +" (void) _row;\n" \ +" (void) _y;\n" \ +" if (!multiple_paths_prepare_jit(z, src.path_count)) return;\n" \ +" for (size_t i = 0; i < src.path_count; i++)\n" \ +" {\n" \ +" Path path = *multiple_paths_nth_const_jit(&src, i);\n" \ +" if (path_extending_will_add_repeated_non_starting_vertex_jit(\n"\ +" &path, (Vertex) col))\n" \ +" {\n" \ +" continue;\n" \ +" }\n" \ +" path_extend_jit(&path, (Vertex) col);\n" \ +" if (!path_is_empty_jit(&path))\n" \ +" {\n" \ +" multiple_paths_append_unchecked_jit(z, &path);\n" \ +" }\n" \ +" }\n" \ +"}\n" + +#define EXTEND_MULTIPLE_TRAILS_DEFN \ +"void extend_multiple_trails_f(MultiplePaths *z,\n" \ +" const MultiplePaths *x, GrB_Index _row, GrB_Index col,\n" \ +" const void *y)\n" \ +"{\n" \ +" MultiplePaths src = *x;\n" \ +" (void) _row;\n" \ +" (void) y;\n" \ +" if (!multiple_paths_prepare_jit(z, src.path_count)) return;\n" \ +" for (size_t i = 0; i < src.path_count; i++)\n" \ +" {\n" \ +" Path path = *multiple_paths_nth_const_jit(&src, i);\n" \ +" if (path_extending_will_add_repeated_edge_jit(&path,\n" \ +" (Vertex) col))\n" \ +" {\n" \ +" continue;\n" \ +" }\n" \ +" path_extend_jit(&path, (Vertex) col);\n" \ +" if (!path_is_empty_jit(&path))\n" \ +" {\n" \ +" multiple_paths_append_unchecked_jit(z, &path);\n" \ +" }\n" \ +" }\n" \ +"}\n" +// + +// Helpers for Path with inline vertices + heap/temp overflow vertices. +static size_t path_extra_len (const Path *path) +{ + if (path->vertex_count > QUICK_PATH_LENGTH) + { + return path->vertex_count - QUICK_PATH_LENGTH ; + } + + return 0 ; +} + +static Vertex path_get_vertex (const Path *path, size_t i) +{ + assert (path != NULL) ; + assert (i < path->vertex_count) ; + + if (i < QUICK_PATH_LENGTH) + { + return path->vertices[i] ; + } + + assert (path->extra != NULL) ; + return path->extra->vertices[i - QUICK_PATH_LENGTH] ; +} + +static bool path_is_empty (const Path *path) +{ + return path->vertex_count == 0 ; +} + +static Vertex path_start_vertex (const Path *path) +{ + return path_get_vertex (path, 0) ; +} + +static Vertex path_last_vertex (const Path *path) +{ + return path_get_vertex (path, path->vertex_count - 1) ; +} + +static bool path_is_closed_cycle (const Path *path) +{ + return path->vertex_count > 1 && + path_start_vertex (path) == path_last_vertex (path) ; +} + +static PathExtra *path_extra_temp_alloc_copy_plus_one +( + const Path *src, + Vertex vertex +) +{ + size_t old_extra_len ; + size_t new_extra_len ; + size_t nbytes ; + PathExtra *extra ; + + old_extra_len = path_extra_len (src) ; + new_extra_len = old_extra_len + 1 ; + nbytes = sizeof (PathExtra) + new_extra_len * sizeof (Vertex) ; + + extra = (PathExtra *) temp_calloc_bytes (nbytes) ; + if (extra == NULL) + { + return NULL ; + } + + extra->len = new_extra_len ; + + if (old_extra_len > 0) + { + assert (src->extra != NULL) ; + memcpy (extra->vertices, src->extra->vertices, + old_extra_len * sizeof (Vertex)) ; + } + + extra->vertices[old_extra_len] = vertex ; + return extra ; +} + +static void path_extend (Path *path, Vertex vertex) +{ + PathExtra *new_extra ; + + if (path->vertex_count == 0) + { + return ; + } + + if (path->vertex_count < QUICK_PATH_LENGTH) + { + path->vertices[path->vertex_count] = vertex ; + path->vertex_count++ ; + return ; + } + + new_extra = path_extra_temp_alloc_copy_plus_one (path, vertex) ; + if (new_extra == NULL) + { + path->vertex_count = 0 ; + path->extra = NULL ; + return ; + } + + path->extra = new_extra ; + path->vertex_count++ ; +} + +static int path_clone_heap (Path *dst, const Path *src, char *msg) +{ + size_t extra_len ; + size_t nbytes ; + int info ; + + memset (dst, 0, sizeof (*dst)) ; + dst->vertex_count = src->vertex_count ; + memcpy (dst->vertices, src->vertices, sizeof (dst->vertices)) ; + + extra_len = path_extra_len (src) ; + if (extra_len == 0) + { + dst->extra = NULL ; + return GrB_SUCCESS ; + } + + nbytes = sizeof (PathExtra) + extra_len * sizeof (Vertex) ; + info = LAGraph_Malloc ((void **) &dst->extra, nbytes, sizeof (char), msg) ; + if (info != GrB_SUCCESS) + { + return info ; + } + + dst->extra->len = extra_len ; + memcpy (dst->extra->vertices, src->extra->vertices, + extra_len * sizeof (Vertex)) ; + return GrB_SUCCESS ; +} + +static void path_destroy_heap (Path *path) +{ + if (path == NULL) + { + return ; + } + + LAGraph_Free ((void **) &path->extra, NULL) ; + path->vertex_count = 0 ; +} + +static void free_all (Path **paths, size_t path_count) +{ + if (paths == NULL || *paths == NULL) + { + return ; + } + + for (size_t i = 0 ; i < path_count ; i++) + { + path_destroy_heap (&((*paths)[i])) ; + } + + LAGraph_Free ((void **) paths, NULL) ; +} +// + void Path_print (const Path *x) { if (x->vertex_count == 0) @@ -70,7 +677,7 @@ void Path_print (const Path *x) // Increase the vertex by 1 since usually user expects the same // numbering as in the input determined by MTX file in which the // entries are enumerated starting from 1. - printf ("(%llu)", (i < QUICK_PATH_LENGTH ? x->vertices[i] : x->extra->vertices[i - QUICK_PATH_LENGTH]) + 1) ; + printf ("(%llu)", (unsigned long long) (path_get_vertex (x, i) + 1)) ; if (i != x->vertex_count - 1) { @@ -81,133 +688,175 @@ void Path_print (const Path *x) printf ("\n") ; } -static void MultiplePaths_print (const MultiplePaths *x) +// Block with helper function for handling cases +// when we have more paths than QUICK_PATH_COUNT +static size_t multiple_paths_capacity (const MultiplePaths *x) { - printf("Multiple paths:\n") ; - for (size_t i = 0 ; i < x->path_count ; i++) - { + size_t cap = QUICK_PATH_COUNT ; - printf("\t Path %ld: ", i) ; - Path_print (&x->paths[i]) ; + if (x->extra != NULL) + { + cap += x->extra->count ; } - printf("\n") ; -} -GrB_Type multiple_paths ; -GrB_BinaryOp combine_multiple_paths_op ; -GrB_Monoid combine_multiple_paths ; -GrB_BinaryOp first_multiple_paths ; -GrB_BinaryOp second_multiple_paths ; -GrB_Semiring first_combine_multiple_paths ; -GrB_Semiring second_combine_multiple_paths ; -GrB_IndexUnaryOp extend_multiple_paths ; -GrB_IndexUnaryOp extend_multiple_simple ; -GrB_IndexUnaryOp extend_multiple_trails ; + return cap ; +} -void first_multiple_paths_f(MultiplePaths *z, MultiplePaths *x, bool *_y) +static const Path *multiple_paths_nth_const +( + const MultiplePaths *x, + size_t j +) { - *z = *x; + assert (j < x->path_count) ; + + if (j < QUICK_PATH_COUNT) + { + return &x->paths[j] ; + } + + assert (x->extra != NULL) ; + return &x->extra->paths[j - QUICK_PATH_COUNT] ; } -void second_multiple_paths_f(MultiplePaths *z, bool *_x, MultiplePaths *y) + +static Path *multiple_paths_nth_mut +( + MultiplePaths *x, + size_t j +) { - *z = *y; -} + assert (j < multiple_paths_capacity (x)) ; -static inline MultiplePathsExtra *multiple_paths_extra_alloc(size_t count) { - MultiplePathsExtra *extra = xalloc(sizeof(MultiplePathsExtra) + count * sizeof(Path)); - extra->count = count; - return extra; -} + if (j < QUICK_PATH_COUNT) + { + return &x->paths[j] ; + } -#define nth_path(x, j) (((j) < QUICK_PATH_COUNT) ? ((x)->paths[j]) : ((x)->extra->paths[j - QUICK_PATH_COUNT])) -#define set_nth_path(x, j, v) \ - do { \ - if (j < QUICK_PATH_COUNT) { \ - x->paths[j] = v; \ - } else { \ - x->extra->paths[j - QUICK_PATH_COUNT] = v; \ - } \ - } while (0) + assert (x->extra != NULL) ; + return &x->extra->paths[j - QUICK_PATH_COUNT] ; +} -void combine_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, const MultiplePaths *y) +static MultiplePathsExtra *multiple_paths_extra_temp_alloc (size_t extra_count) { - size_t path_count = x->path_count + y->path_count ; - z->path_count = path_count ; + size_t nbytes ; + MultiplePathsExtra *extra ; + if (extra_count == 0) + { + return NULL ; + } - if (path_count <= QUICK_PATH_COUNT) { - size_t i = 0; - for (size_t j = 0 ; j < x->path_count ; j++) - { - z->paths[i++] = x->paths[j] ; - } + nbytes = sizeof (MultiplePathsExtra) + extra_count * sizeof (Path) ; + extra = (MultiplePathsExtra *) temp_calloc_bytes (nbytes) ; + if (extra == NULL) + { + return NULL ; + } - for (size_t j = 0 ; j < y->path_count ; j++) - { - z->paths[i++] = y->paths[j] ; - } - return; - } else { - MultiplePathsExtra *extra = multiple_paths_extra_alloc (path_count - QUICK_PATH_COUNT); - z->extra = extra; + extra->count = extra_count ; + return extra ; +} - size_t i = 0; - for (size_t j = 0 ; j < x->path_count ; j++) - { - Path path = nth_path(x, j); - set_nth_path(z, i, path); - i++; - } +static bool multiple_paths_prepare (MultiplePaths *x, size_t capacity) +{ + memset (x, 0, sizeof (*x)) ; + + // Preserve the per-point path cap. If one frontier cell + // would contain too many paths, GraphBLAS UDF cannot return an error + // directly, so we mark a global flag and check it after GrB_mxm/GrB_apply. + if (capacity > PATHS_PER_POINT_LIMIT) + { + path_limit_mark_exceeded () ; + return false ; + } - for (size_t j = 0 ; j < y->path_count ; j++) + if (capacity > QUICK_PATH_COUNT) + { + x->extra = multiple_paths_extra_temp_alloc (capacity - QUICK_PATH_COUNT) ; + if (x->extra == NULL) { - Path path = nth_path(y, j) ; - set_nth_path(z, i, path); - i++; + x->path_count = 0 ; + return false ; } } - - - // TODO: Support more than QUICK_PATH_COUNT paths. + return true ; } - -static inline void path_extend(Path *path, Vertex vertex) +static bool multiple_paths_append (MultiplePaths *x, const Path *path) { - if (path->vertex_count == 0) + // handle per-point cap + if (x->path_count >= PATHS_PER_POINT_LIMIT) { - return ; + path_limit_mark_exceeded () ; + return false ; } - if (path->vertex_count < QUICK_PATH_LENGTH) + if (x->path_count >= multiple_paths_capacity (x)) { - path->vertices[path->vertex_count++] = vertex ; + return false ; } - else + + *multiple_paths_nth_mut (x, x->path_count) = *path ; + x->path_count++ ; + return true ; +} + +static void multiple_paths_append_unchecked (MultiplePaths *x, const Path *path) +{ + *multiple_paths_nth_mut (x, x->path_count) = *path ; + x->path_count++ ; +} +// + +static void MultiplePaths_print (const MultiplePaths *x) +{ + printf("Multiple paths:\n") ; + for (size_t i = 0 ; i < x->path_count ; i++) { - if (path->extra == NULL) - { - LAGraph_Calloc ((void **) &path->extra, 64, sizeof (Vertex), NULL) ; - } - path->extra->vertices [(path->vertex_count++) - QUICK_PATH_LENGTH] = vertex ; + printf("\t Path %zu: ", i) ; + Path_print (multiple_paths_nth_const (x, i)) ; } - - // TODO: Support more than QUICK_PATH_LENGTH vertices. + printf("\n") ; } -static inline bool path_is_empty(Path *path) +// All functions below reworked. +// Due to graphblas api, we must handle z param like it's empty. +// It should just store result (z = f(x)). +// So we can't use it for any checks, or use its fields for something. +void first_multiple_paths_f(MultiplePaths *z, MultiplePaths *x, bool *_y) { - return path->vertex_count == 0; + (void) _y ; + *z = *x ; } +void second_multiple_paths_f(MultiplePaths *z, bool *_x, MultiplePaths *y) +{ + (void) _x ; + *z = *y ; +} -static inline void multiple_paths_append(MultiplePaths *multiple_paths, const Path *path) +void combine_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, const MultiplePaths *y) { - multiple_paths->paths[multiple_paths->path_count++] = *path ; + MultiplePaths x_copy = *x ; + MultiplePaths y_copy = *y ; + size_t path_count = x_copy.path_count + y_copy.path_count ; - // TODO: Support more than QUICK_PATH_COUNT paths. + if (!multiple_paths_prepare (z, path_count)) + { + return ; + } + + for (size_t j = 0 ; j < x_copy.path_count ; j++) + { + multiple_paths_append_unchecked (z, multiple_paths_nth_const (&x_copy, j)) ; + } + + for (size_t j = 0 ; j < y_copy.path_count ; j++) + { + multiple_paths_append_unchecked (z, multiple_paths_nth_const (&y_copy, j)) ; + } } // @@ -220,19 +869,26 @@ static inline void multiple_paths_append(MultiplePaths *multiple_paths, const Pa void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) { - /*if (z != x) - for (size_t i = 0 ; i < x->path_count ; i++) - { - multiple_paths_append(z, &x->paths[i]) ; - path_extend (&z->paths[i], col) ; - } - {*/ - for (size_t i = 0 ; i < z->path_count ; i++) + MultiplePaths src = *x ; + + (void) _row ; + (void) _y ; + + if (!multiple_paths_prepare (z, src.path_count)) + { + return ; + } + + for (size_t i = 0 ; i < src.path_count ; i++) + { + Path path = *multiple_paths_nth_const (&src, i) ; + path_extend (&path, (Vertex) col) ; + + if (!path_is_empty (&path)) { - Path *path = &z->paths[i] ; - path_extend (&z->paths[i], col) ; + multiple_paths_append_unchecked (z, &path) ; } - //} + } } // @@ -241,54 +897,60 @@ void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index static inline bool path_extending_will_add_repeated_non_starting_vertex(const Path *path, Vertex vertex) { - if (path->vertex_count <= 1) + if (path->vertex_count == 0) + { + return false ; + } + + if (path_is_closed_cycle (path)) + { + return true ; + } + + if (vertex == path_start_vertex (path)) { return false ; } for (size_t i = 1 ; i < path->vertex_count ; i++) { - if (path->vertices[i] == vertex) + if (path_get_vertex (path, i) == vertex) { return true ; } } - Vertex last_vertex = path->vertices[path->vertex_count - 1] ; - - return path->vertices[0] == last_vertex; + return false ; } void extend_multiple_simple_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) { - /*if (z != x) - { - for (size_t i = 0 ; i < x->path_count ; i++) - { - const Path *path = &x->paths[i] ; - if (path_has_loop_at_end (path)) - { - continue; - } + MultiplePaths src = *x ; - multiple_paths_append(z, path) ; - path_extend (&z->paths[i], col) ; - } + (void) _row ; + (void) _y ; + + if (!multiple_paths_prepare (z, src.path_count)) + { + return ; } - else - {*/ - for (size_t i = 0 ; i < z->path_count ; i++) + + for (size_t i = 0 ; i < src.path_count ; i++) + { + Path path = *multiple_paths_nth_const (&src, i) ; + + if (path_extending_will_add_repeated_non_starting_vertex (&path, + (Vertex) col)) { - Path *path = &z->paths[i] ; - if (path_extending_will_add_repeated_non_starting_vertex (path, col)) - { - path->vertex_count = 0 ; - continue ; - } + continue ; + } - path_extend (&z->paths[i], col) ; + path_extend (&path, (Vertex) col) ; + if (!path_is_empty (&path)) + { + multiple_paths_append_unchecked (z, &path) ; } - //} + } } // @@ -303,11 +965,12 @@ static inline bool path_extending_will_add_repeated_edge(const Path *path, Verte } // We identify edges as pairs of vertices. - Vertex vertex_1 = path->vertices[path->vertex_count - 1] ; + Vertex vertex_1 = path_last_vertex (path) ; - for (size_t i = 0 ; i < path->vertex_count - 1; i++) + for (size_t i = 0 ; i + 1 < path->vertex_count ; i++) { - if (path->vertices[i] == vertex_1 && path->vertices[i + 1] == vertex_2) + if (path_get_vertex (path, i) == vertex_1 && + path_get_vertex (path, i + 1) == vertex_2) { return true ; } @@ -315,40 +978,96 @@ static inline bool path_extending_will_add_repeated_edge(const Path *path, Verte return false ; } + void extend_multiple_trails_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *y) { - /*if (z != x) + MultiplePaths src = *x ; + + (void) _row ; + (void) y ; + + if (!multiple_paths_prepare (z, src.path_count)) + { + return ; + } + + for (size_t i = 0 ; i < src.path_count ; i++) { - z->path_count = x->path_count ; + Path path = *multiple_paths_nth_const (&src, i) ; - for (size_t i = 0 ; i < x->path_count ; i++) + if (path_extending_will_add_repeated_edge (&path, (Vertex) col)) { - const Path *path = &x->paths[i] ; - if (path_extending_will_add_repeated_edge (path, col)) - { - continue ; - } + continue ; + } - multiple_paths_append(z, path) ; - path_extend (&z->paths[i], col) ; + path_extend (&path, (Vertex) col) ; + if (!path_is_empty (&path)) + { + multiple_paths_append_unchecked (z, &path) ; } } - else - {*/ - for (size_t i = 0 ; i < z->path_count ; i++) +} + +// Result array can grow past PATH_LIMIT if the caller passes a larger limit. +static int ensure_result_capacity +( + Path **paths, + size_t *capacity, + size_t need, + char *msg +) +{ + Path *new_paths ; + size_t new_capacity ; + int info ; + + if (need <= *capacity) + { + return GrB_SUCCESS ; + } + + new_capacity = *capacity ; + if (new_capacity == 0) + { + new_capacity = PATH_LIMIT ; + if (new_capacity == 0) { - Path *path = &z->paths[i] ; - if (path_extending_will_add_repeated_edge (path, col)) - { - path->vertex_count = 0 ; - continue ; - } + new_capacity = 1 ; + } + } - path_extend (&z->paths[i], col) ; + while (new_capacity < need) + { + if (new_capacity > SIZE_MAX / 2) + { + new_capacity = need ; + break ; } - //} + + new_capacity = new_capacity * 2 ; + } + + info = LAGraph_Malloc ((void **) &new_paths, new_capacity, + sizeof (Path), msg) ; + if (info != GrB_SUCCESS) + { + return info ; + } + + memset (new_paths, 0, new_capacity * sizeof (Path)) ; + + if (*paths != NULL) + { + memcpy (new_paths, *paths, (*capacity) * sizeof (Path)) ; + LAGraph_Free ((void **) paths, NULL) ; + } + + *paths = new_paths ; + *capacity = new_capacity ; + return GrB_SUCCESS ; } + #define LG_FREE_WORK \ { \ GrB_free (&frontier) ; \ @@ -356,14 +1075,20 @@ void extend_multiple_trails_f(MultiplePaths *z, const MultiplePaths *x, GrB_Inde GrB_free (&symbol_frontier) ; \ GrB_free (&final_reducer) ; \ LAGraph_Free ((void **) &A, NULL) ; \ + LAGraph_Free ((void **) &AT, NULL) ; \ LAGraph_Free ((void **) &B, NULL) ; \ LAGraph_Free ((void **) &BT, NULL) ; \ + LAGraph_Free ((void **) &X, NULL) ; \ + LAGraph_Free ((void **) &I, NULL) ; \ + temp_arena_destroy () ; \ } -#define LG_FREE_ALL \ -{ \ - LG_FREE_WORK ; \ - LAGraph_Free ((void **) paths, NULL) ; \ +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ + free_all (paths, *path_count) ; \ + if (paths != NULL) *paths = NULL ; \ + if (path_count != NULL) *path_count = 0 ; \ } static int LAGraph_2Rpq @@ -409,14 +1134,9 @@ static int LAGraph_2Rpq GrB_Index ng = 0 ; // # nodes in the graph GrB_Index nr = 0 ; // # states in the NFA - GrB_Index nv = 0 ; // # pair count in the frontier - GrB_Index states = ns ; // # pairs in the current - // correspondence between the graph and - // the NFA GrB_Index rows = 0 ; // utility matrix row count GrB_Index cols = 0 ; // utility matrix column count - GrB_Index vals = 0 ; // utility matrix value count // TODO: This names might be too short. GrB_Semiring sr1 = first_combine_multiple_paths ; @@ -428,16 +1148,24 @@ static int LAGraph_2Rpq GrB_Matrix *B = NULL ; GrB_Matrix *BT = NULL ; - LG_ASSERT (paths != NULL, GrB_NULL_POINTER) ; - LG_ASSERT (path_count != NULL, GrB_NULL_POINTER) ; - LG_ASSERT (G != NULL, GrB_NULL_POINTER) ; - LG_ASSERT (R != NULL, GrB_NULL_POINTER) ; - LG_ASSERT (S != NULL, GrB_NULL_POINTER) ; - LG_ASSERT (op != NULL, GrB_NULL_POINTER) ; + MultiplePaths *X = NULL ; + GrB_Index *I = NULL ; + size_t result_capacity = 0 ; + + if (paths == NULL || path_count == NULL || G == NULL || R == NULL || + S == NULL || op == NULL) + { + return GrB_NULL_POINTER ; + } (*paths) = NULL ; (*path_count) = 0 ; + path_limit_reset () ; + + // init arenas for pathExtra + LG_TRY (temp_arena_init (msg)) ; + for (size_t i = 0 ; i < nl ; i++) { if (G[i] == NULL) continue ; @@ -539,13 +1267,13 @@ static int LAGraph_2Rpq { if (B[i] == NULL) continue ; - GrB_Index rows = 0 ; - GrB_Index cols = 0 ; + GrB_Index rrows = 0 ; + GrB_Index rcols = 0 ; - GRB_TRY (GrB_Matrix_nrows (&rows, B[i])) ; - GRB_TRY (GrB_Matrix_ncols (&cols, B[i])) ; + GRB_TRY (GrB_Matrix_nrows (&rrows, B[i])) ; + GRB_TRY (GrB_Matrix_ncols (&rcols, B[i])) ; - LG_ASSERT_MSG (rows == nr && cols == nr, LAGRAPH_NOT_CACHED, + LG_ASSERT_MSG (rrows == nr && rcols == nr, LAGRAPH_NOT_CACHED, "all the matrices in the NFA adjacency matrix decomposition " "should have the same dimensions and be square") ; } @@ -576,7 +1304,8 @@ static int LAGraph_2Rpq // initialization // ------------------------------------------------------------------------- - LG_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, sizeof (Path), msg)) ; + GRB_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, sizeof (Path), msg)) ; + result_capacity = PATH_LIMIT ; GRB_TRY (GrB_Vector_new (&final_reducer, GrB_BOOL, nr)) ; @@ -594,10 +1323,12 @@ static int LAGraph_2Rpq .paths = { { .vertices = { s }, - .vertex_count = 1 + .vertex_count = 1, + .extra = NULL } }, - .path_count = 1 + .path_count = 1, + .extra = NULL }; for (size_t j = 0 ; j < nqs ; j++) @@ -617,13 +1348,10 @@ static int LAGraph_2Rpq { //printf("Iteration\n"); GrB_Index nvals = 0 ; - GRB_TRY (GrB_Matrix_nvals (&nvals, next_frontier)) ; - - MultiplePaths *X ; - GrB_Index *I ; bool had_non_empty_path = false ; - //MultiplePaths *X; + GRB_TRY (GrB_Matrix_nvals (&nvals, next_frontier)) ; + LG_TRY (LAGraph_Calloc ((void **) &X, nvals, sizeof (MultiplePaths), msg)) ; LG_TRY (LAGraph_Calloc ((void **) &I, nvals, sizeof (GrB_Index), msg)) ; @@ -635,7 +1363,8 @@ static int LAGraph_2Rpq { for (size_t j = 0 ; j < X[i].path_count ; j++) { - if (!path_is_empty(&X[i].paths[j])) + // Required beacause we need to handle not only quick paths + if (!path_is_empty (multiple_paths_nth_const (&X[i], j))) { had_non_empty_path = true; break; @@ -660,16 +1389,34 @@ static int LAGraph_2Rpq } //printf("Found final paths!\n"); + if ((*path_count) >= limit) + { + continue ; + } + + size_t result_need = (*path_count) + X[i].path_count ; + if (result_need > result_capacity) + { + LG_TRY (ensure_result_capacity (paths, &result_capacity, + result_need, msg)) ; + } + for (size_t j = 0 ; j < X[i].path_count && (*path_count) < limit ; j++) { - const Path *path = &X[i].paths[j] ; - if (!path_is_empty(path)) + // Deep copy of result + const Path *path = multiple_paths_nth_const (&X[i], j) ; + if (!path_is_empty (path)) { - (*paths)[(*path_count)++] = *path ; + LG_TRY (path_clone_heap (&((*paths)[(*path_count)]), path, + msg)) ; + (*path_count)++ ; } } } + LAGraph_Free ((void **) &X, NULL) ; + LAGraph_Free ((void **) &I, NULL) ; + if (!had_non_empty_path || (*path_count) == limit) { //printf("breaking\n"); @@ -704,7 +1451,10 @@ static int LAGraph_2Rpq GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, sr2, B[i], frontier, GrB_DESC_R )) ; } - // TODO: Skip the iteration if symbol_frontier is already empty. + // Skip the iteration if symbol_frontier is already empty. + GrB_Index symbol_nvals = 0 ; + GRB_TRY (GrB_Matrix_nvals (&symbol_nvals, symbol_frontier)) ; + if (symbol_nvals == 0) continue ; // Traverse the graph if (!inverse_labels[i]) { @@ -724,18 +1474,26 @@ static int LAGraph_2Rpq GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; } } + + // Handle oom + LG_ASSERT_MSGF (!temp_alloc_failed (), GrB_OUT_OF_MEMORY, + "out of memory in temporary RPQ allocator: used=%zu capacity=%zu", + temp_arena_used (), temp_arena_capacity) ; + LG_ASSERT_MSG (!path_limit_failed (), GrB_OUT_OF_MEMORY, + "path limit per point exceeded") ; } GRB_TRY (GrB_apply (next_frontier, GrB_NULL, GrB_NULL, op, next_frontier, false, GrB_NULL)) ; + // Handle oom + LG_ASSERT_MSGF (!temp_alloc_failed (), GrB_OUT_OF_MEMORY, + "out of memory in temporary RPQ allocator: used=%zu capacity=%zu", + temp_arena_used (), temp_arena_capacity) ; + LG_ASSERT_MSG (!path_limit_failed (), GrB_OUT_OF_MEMORY, + "path limit per point exceeded") ; } - if (oom) { - // RODION, PLEASE HANDLE SOMEHOW - } - - //LG_FREE_WORK ; - xfree(); + LG_FREE_WORK ; return (GrB_SUCCESS) ; } @@ -824,6 +1582,20 @@ int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, limit, msg, extend_multiple_paths) ; } +// Required because returned Path objects may own heap-allocated PathExtra. +int LAGraph_2Rpq_FreePaths +( + Path **paths, + size_t path_count, + char *msg +) +{ + (void) msg ; + + free_all (paths, path_count) ; + return GrB_SUCCESS ; +} + #define LG_FREE_WORK \ { \ } @@ -834,17 +1606,23 @@ int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression int LAGraph_Rpq_initialize(char *msg) { - GRB_TRY (GrB_Type_new (&multiple_paths, sizeof(MultiplePaths))) ; + (void) msg ; - GRB_TRY (GrB_BinaryOp_new (&combine_multiple_paths_op, (GxB_binary_function) &combine_multiple_paths_f, multiple_paths, multiple_paths, multiple_paths)) ; - GRB_TRY (GrB_BinaryOp_new (&first_multiple_paths, (GxB_binary_function) &first_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; - GRB_TRY (GrB_BinaryOp_new (&second_multiple_paths, (GxB_binary_function) &second_multiple_paths_f, multiple_paths, GrB_BOOL, multiple_paths)) ; - GRB_TRY (GrB_Monoid_new (&combine_multiple_paths, combine_multiple_paths_op, (void*) &multiple_paths_identity)) ; + memset (&multiple_paths_identity, 0, sizeof (multiple_paths_identity)) ; + + GRB_TRY (GxB_Type_new (&multiple_paths, sizeof (MultiplePaths), "MultiplePaths", MULTIPLE_PATHS_TYPE_DEFN)) ; + + GRB_TRY (GxB_BinaryOp_new (&combine_multiple_paths_op, (GxB_binary_function) &combine_multiple_paths_f, multiple_paths, multiple_paths, multiple_paths, "combine_multiple_paths_f", COMBINE_MULTIPLE_PATHS_DEFN)) ; + GRB_TRY (GxB_BinaryOp_new (&first_multiple_paths, (GxB_binary_function) &first_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL, "first_multiple_paths_f", FIRST_MULTIPLE_PATHS_DEFN)) ; + GRB_TRY (GxB_BinaryOp_new (&second_multiple_paths, (GxB_binary_function) &second_multiple_paths_f, multiple_paths, GrB_BOOL, multiple_paths, "second_multiple_paths_f", SECOND_MULTIPLE_PATHS_DEFN)) ; + GRB_TRY (GrB_Monoid_new (&combine_multiple_paths, combine_multiple_paths_op, (void*) &multiple_paths_identity)) ; GRB_TRY (GrB_Semiring_new (&first_combine_multiple_paths, combine_multiple_paths, first_multiple_paths)) ; GRB_TRY (GrB_Semiring_new (&second_combine_multiple_paths, combine_multiple_paths, second_multiple_paths)) ; - GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_paths, (GxB_index_unary_function) &extend_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; - GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_simple, (GxB_index_unary_function) &extend_multiple_simple_f, multiple_paths, multiple_paths, GrB_BOOL)) ; - GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_trails, (GxB_index_unary_function) &extend_multiple_trails_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GxB_IndexUnaryOp_new (&extend_multiple_paths, (GxB_index_unary_function) &extend_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL, "extend_multiple_paths_f", EXTEND_MULTIPLE_PATHS_DEFN)) ; + GRB_TRY (GxB_IndexUnaryOp_new (&extend_multiple_simple, (GxB_index_unary_function) &extend_multiple_simple_f, multiple_paths, multiple_paths, GrB_BOOL, "extend_multiple_simple_f", EXTEND_MULTIPLE_SIMPLE_DEFN)) ; + GRB_TRY (GxB_IndexUnaryOp_new (&extend_multiple_trails, (GxB_index_unary_function) &extend_multiple_trails_f, multiple_paths, multiple_paths, GrB_BOOL, "extend_multiple_trails_f", EXTEND_MULTIPLE_TRAILS_DEFN)) ; + + return GrB_SUCCESS ; } diff --git a/include/LAGraphX.h b/include/LAGraphX.h index a3d8bfc2a5..3c014e5948 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -874,6 +874,9 @@ typedef struct { PathExtra *extra; } Path ; +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_FreePaths(Path **paths, size_t path_count, char *msg); + void Path_print (const Path *x); LAGRAPHX_PUBLIC From 7497ab5f3c3f45db7eff96180adc489ecaa41714 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sun, 3 May 2026 15:00:50 +0300 Subject: [PATCH 07/11] Add ALL SHORTEST PATH 2RPQ support This patch introduces ALL SHORTEST PATH semantics in the regular path query algorithm. The key insight is really similar to the reachability (i.e. ENPOINTS) semantics described in detail in [^1]. The idea of SINGLE SOURCE ALL SHORTEST PATH semantics is for a given query $Q$, a graph $G$, and a vertex $s$ is for all vertices $v$ to find all minimum length paths from $s$ to $v$. The implementation combines custom semirings for ALL PATHS along with filtering already-visited pairs of NFA states and graph vertices. [^1] https://arxiv.org/abs/2412.10287 --- experimental/algorithm/LAGraph_2Rpq.c | 86 +++++++++++++++++++++++---- experimental/test/test_2Rpq.c | 20 +++++++ include/LAGraphX.h | 24 ++++++++ 3 files changed, 120 insertions(+), 10 deletions(-) diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c index 36fef4bc71..9028a61aeb 100644 --- a/experimental/algorithm/LAGraph_2Rpq.c +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -1080,6 +1080,7 @@ static int ensure_result_capacity LAGraph_Free ((void **) &BT, NULL) ; \ LAGraph_Free ((void **) &X, NULL) ; \ LAGraph_Free ((void **) &I, NULL) ; \ + LAGraph_Free ((void **) &J, NULL) ; \ temp_arena_destroy () ; \ } @@ -1111,6 +1112,7 @@ static int LAGraph_2Rpq const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices bool inverse, // inverse the whole query + bool ignore_visited, // use mask to avoid processing the same (q, v) uint64_t limit, // maximum path count char *msg, // LAGraph output message GrB_IndexUnaryOp op // index unary op for a specific semantic @@ -1129,6 +1131,7 @@ static int LAGraph_2Rpq // specific label GrB_Matrix next_frontier = NULL ; // frontier value on the next // traversal step + GrB_Matrix visited = NULL ; // visited pairs (state, vertex) GrB_Vector final_reducer = NULL ; // auxiliary vector for reducing the // visited matrix to an answer @@ -1150,6 +1153,7 @@ static int LAGraph_2Rpq MultiplePaths *X = NULL ; GrB_Index *I = NULL ; + GrB_Index *J = NULL ; size_t result_capacity = 0 ; if (paths == NULL || path_count == NULL || G == NULL || R == NULL || @@ -1314,6 +1318,11 @@ static int LAGraph_2Rpq GRB_TRY (GrB_Matrix_new (&next_frontier, multiple_paths, nr, ng)) ; + if (ignore_visited) + { + GRB_TRY (GrB_Matrix_new (&visited, GrB_BOOL, nr, ng)) ; + } + // Initialize frontier with the source nodes for (size_t i = 0 ; i < ns ; i++) @@ -1339,6 +1348,11 @@ static int LAGraph_2Rpq } } + if (ignore_visited) + { + GrB_assign (visited, NULL, NULL, true, QS, nqs, S, ns, NULL) ; + } + // Initialize a few utility matrices GRB_TRY (GrB_Matrix_new (&frontier, multiple_paths, nr, ng)) ; GRB_TRY (GrB_Matrix_new (&symbol_frontier, multiple_paths, nr, ng)) ; @@ -1354,9 +1368,10 @@ static int LAGraph_2Rpq LG_TRY (LAGraph_Calloc ((void **) &X, nvals, sizeof (MultiplePaths), msg)) ; LG_TRY (LAGraph_Calloc ((void **) &I, nvals, sizeof (GrB_Index), msg)) ; + LG_TRY (LAGraph_Calloc ((void **) &J, nvals, sizeof (GrB_Index), msg)) ; // TODO: Change to a generic call. - GRB_TRY (GrB_Matrix_extractTuples_UDT (I, GrB_NULL, (void**) X, &nvals, next_frontier)) ; + GRB_TRY (GrB_Matrix_extractTuples_UDT (I, J, (void**) X, &nvals, next_frontier)) ; //printf("Next frontier with %d entries\n", nvals); for (size_t i = 0 ; i < nvals ; i++) @@ -1383,6 +1398,20 @@ static int LAGraph_2Rpq } //printf("Path at %ld final is %b", I[i], final) ; + // HACK: only for all_shortest_paths + if (ignore_visited) { + GrB_Vector w; + GRB_TRY (GrB_Vector_new (&w, GrB_BOOL, nr)) ; + GrB_Col_extract(w, GrB_NULL, GrB_NULL, visited, QF, nqf, J[i], GrB_NULL); + GrB_Index col_nvals = 0 ; + GrB_Vector_nvals (&col_nvals, w) ; + GrB_free (&w) ; + + if (col_nvals > 0) { + continue ; + } + } + if (!final) { continue ; @@ -1414,8 +1443,16 @@ static int LAGraph_2Rpq } } + if (ignore_visited) + { + //GRB_TRY (GrB_assign (visited, visited, GrB_NULL, next_frontier, + // GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_SC)) ; + GrB_assign (visited, next_frontier, GrB_NULL, true, GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_S) ; + } + LAGraph_Free ((void **) &X, NULL) ; LAGraph_Free ((void **) &I, NULL) ; + LAGraph_Free ((void **) &J, NULL) ; if (!had_non_empty_path || (*path_count) == limit) { @@ -1456,22 +1493,25 @@ static int LAGraph_2Rpq GRB_TRY (GrB_Matrix_nvals (&symbol_nvals, symbol_frontier)) ; if (symbol_nvals == 0) continue ; + GrB_Descriptor desc_forward = ignore_visited ? GrB_DESC_SC : GrB_NULL ; + GrB_Descriptor desc_backward = ignore_visited ? GrB_DESC_SCT1 : GrB_DESC_T1 ; + // Traverse the graph if (!inverse_labels[i]) { if (!inverse) { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_forward)) ; } else if (AT[i]) { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; } else { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_backward)) ; } } else { if (!inverse && AT[i]) { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; } else if (!inverse) { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_backward)) ; } else { - GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_forward)) ; } } @@ -1524,7 +1564,7 @@ int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular char *msg // LAGraph output message ) { - return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_simple) ; + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, false, ULLONG_MAX, msg, extend_multiple_simple) ; } LAGRAPHX_PUBLIC @@ -1552,7 +1592,7 @@ int LAGraph_2Rpq_AllTrails // All trails satisfying regular expression. char *msg // LAGraph output message ) { - return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_trails) ; + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, false, ULLONG_MAX, msg, extend_multiple_trails) ; } int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression @@ -1579,7 +1619,33 @@ int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression char *msg // LAGraph output message ) { - return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, limit, msg, extend_multiple_paths) ; + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, false, limit, msg, extend_multiple_paths) ; +} + +int LAGraph_2Rpq_AllShortestPaths // All shortest paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message + ) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, true, ULLONG_MAX, msg, extend_multiple_paths) ; } // Required because returned Path objects may own heap-allocated PathExtra. diff --git a/experimental/test/test_2Rpq.c b/experimental/test/test_2Rpq.c index 173f88a9cf..1d81ddb521 100644 --- a/experimental/test/test_2Rpq.c +++ b/experimental/test/test_2Rpq.c @@ -225,6 +225,26 @@ void test_Rpq_Simple (void) } printf("\n"); + // Cleanup + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + res = LAGraph_2Rpq_AllShortestPaths (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL SHORTEST PATHS:\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + // Cleanup OK (LAGraph_Free ((void **) &paths, NULL)) ; for (uint64_t i = 0 ; i < MAX_LABELS ; i++) diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 3c014e5948..0838a65214 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -958,6 +958,30 @@ int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression uint64_t limit, // maximum path count char *msg // LAGraph output message ); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllShortestPaths // All shortest paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +); //**************************************************************************** LAGRAPHX_PUBLIC int LAGraph_VertexCentrality_Triangle // vertex triangle-centrality From e01a1322c5c6c8279b5503ce4757f844f49cc0f8 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 11 May 2026 15:55:16 +0300 Subject: [PATCH 08/11] feat: [wip] fix all shortest and other things --- experimental/algorithm/LAGraph_2Rpq.c | 230 ++++++++++++++++---------- include/LAGraphX.h | 4 +- 2 files changed, 145 insertions(+), 89 deletions(-) diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c index 9028a61aeb..0c55e25b12 100644 --- a/experimental/algorithm/LAGraph_2Rpq.c +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -21,14 +21,20 @@ #include #include #include +#include #include #define PATH_LIMIT 100000 +#define RPQ_MAX_PATH_LENGTH 60000 + +#define RPQ_STRINGIFY_HELPER(x) #x +#define RPQ_STRINGIFY(x) RPQ_STRINGIFY_HELPER(x) + // This define and three functions below need for YAGO dataset. // Because we have OOM on it // -#define PATHS_PER_POINT_LIMIT 15000 +#define PATHS_PER_POINT_LIMIT 3000000 static atomic_int path_limit_exceeded ; @@ -153,9 +159,6 @@ static size_t temp_arena_used (void) // JIT kernels are compiled into a separate shared object. They cannot call // static functions from this translation unit, so expose tiny wrappers for // the stateful parts: arena allocation and path-limit reporting. -// -// These functions must be visible from the dynamic symbol table. If LAGraphX -// builds with hidden visibility, LAGRAPHX_PUBLIC is important here. LAGRAPHX_PUBLIC void *LAGraph_Rpq_jit_temp_calloc_bytes (size_t size) { @@ -202,9 +205,10 @@ GrB_IndexUnaryOp extend_multiple_trails ; "#include \n" \ "#include \n" \ "#include \n" \ -"#define QUICK_PATH_LENGTH 20\n" \ -"#define QUICK_PATH_COUNT 1\n" \ -"#define PATHS_PER_POINT_LIMIT 15000\n" \ +"#define QUICK_PATH_LENGTH " RPQ_STRINGIFY(QUICK_PATH_LENGTH) "\n" \ +"#define QUICK_PATH_COUNT " RPQ_STRINGIFY(QUICK_PATH_COUNT) "\n" \ +"#define PATHS_PER_POINT_LIMIT " RPQ_STRINGIFY(PATHS_PER_POINT_LIMIT) "\n"\ +"#define RPQ_MAX_PATH_LENGTH " RPQ_STRINGIFY(RPQ_MAX_PATH_LENGTH) "\n" \ "typedef uint64_t Vertex;\n" \ "typedef struct PathExtra {\n" \ " size_t len;\n" \ @@ -253,6 +257,11 @@ GrB_IndexUnaryOp extend_multiple_trails ; " return path->vertex_count > 1 &&\n" \ " path_start_vertex_jit(path) == path_last_vertex_jit(path);\n" \ "}\n" \ +"static bool all_paths_can_extend_jit(const Path *path)\n" \ +"{\n" \ +" return RPQ_MAX_PATH_LENGTH == 0 ||\n" \ +" path->vertex_count < RPQ_MAX_PATH_LENGTH;\n" \ +"}\n" \ "static PathExtra *path_extra_temp_alloc_copy_plus_one_jit(\n" \ " const Path *src, Vertex vertex)\n" \ "{\n" \ @@ -364,13 +373,12 @@ GrB_IndexUnaryOp extend_multiple_trails ; " *multiple_paths_nth_mut_jit(x, x->path_count) = *path;\n" \ " x->path_count++;\n" \ "}\n" \ -"static bool path_extending_will_add_repeated_non_starting_vertex_jit(\n"\ +"static bool path_extending_will_add_repeated_vertex_jit(\n" \ " const Path *path, Vertex vertex)\n" \ "{\n" \ " if (path->vertex_count == 0) return false;\n" \ " if (path_is_closed_cycle_jit(path)) return true;\n" \ -" if (vertex == path_start_vertex_jit(path)) return false;\n" \ -" for (size_t i = 1; i < path->vertex_count; i++)\n" \ +" for (size_t i = 0 ; i < path->vertex_count ; i++)\n" \ " {\n" \ " if (path_get_vertex_jit(path, i) == vertex) return true;\n" \ " }\n" \ @@ -440,6 +448,7 @@ GrB_IndexUnaryOp extend_multiple_trails ; " for (size_t i = 0; i < src.path_count; i++)\n" \ " {\n" \ " Path path = *multiple_paths_nth_const_jit(&src, i);\n" \ +" if (!all_paths_can_extend_jit(&path)) continue;\n" \ " path_extend_jit(&path, (Vertex) col);\n" \ " if (!path_is_empty_jit(&path))\n" \ " {\n" \ @@ -460,7 +469,7 @@ GrB_IndexUnaryOp extend_multiple_trails ; " for (size_t i = 0; i < src.path_count; i++)\n" \ " {\n" \ " Path path = *multiple_paths_nth_const_jit(&src, i);\n" \ -" if (path_extending_will_add_repeated_non_starting_vertex_jit(\n"\ +" if (path_extending_will_add_repeated_vertex_jit(\n" \ " &path, (Vertex) col))\n" \ " {\n" \ " continue;\n" \ @@ -545,6 +554,12 @@ static bool path_is_closed_cycle (const Path *path) path_start_vertex (path) == path_last_vertex (path) ; } +static bool all_paths_can_extend (const Path *path) +{ + return RPQ_MAX_PATH_LENGTH == 0 || + path->vertex_count < RPQ_MAX_PATH_LENGTH ; +} + static PathExtra *path_extra_temp_alloc_copy_plus_one ( const Path *src, @@ -809,18 +824,6 @@ static void multiple_paths_append_unchecked (MultiplePaths *x, const Path *path) } // -static void MultiplePaths_print (const MultiplePaths *x) -{ - printf("Multiple paths:\n") ; - for (size_t i = 0 ; i < x->path_count ; i++) - { - - printf("\t Path %zu: ", i) ; - Path_print (multiple_paths_nth_const (x, i)) ; - } - printf("\n") ; -} - // All functions below reworked. // Due to graphblas api, we must handle z param like it's empty. // It should just store result (z = f(x)). @@ -882,6 +885,11 @@ void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index for (size_t i = 0 ; i < src.path_count ; i++) { Path path = *multiple_paths_nth_const (&src, i) ; + if (!all_paths_can_extend (&path)) + { + continue ; + } + path_extend (&path, (Vertex) col) ; if (!path_is_empty (&path)) @@ -895,7 +903,9 @@ void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index // ALL SIMPLE // -static inline bool path_extending_will_add_repeated_non_starting_vertex(const Path *path, Vertex vertex) + +// remove cycle support from all simple +static inline bool path_extending_will_add_repeated_vertex(const Path *path, Vertex vertex) { if (path->vertex_count == 0) { @@ -907,12 +917,7 @@ static inline bool path_extending_will_add_repeated_non_starting_vertex(const Pa return true ; } - if (vertex == path_start_vertex (path)) - { - return false ; - } - - for (size_t i = 1 ; i < path->vertex_count ; i++) + for (size_t i = 0 ; i < path->vertex_count ; i++) { if (path_get_vertex (path, i) == vertex) { @@ -939,7 +944,7 @@ void extend_multiple_simple_f(MultiplePaths *z, const MultiplePaths *x, GrB_Inde { Path path = *multiple_paths_nth_const (&src, i) ; - if (path_extending_will_add_repeated_non_starting_vertex (&path, + if (path_extending_will_add_repeated_vertex (&path, (Vertex) col)) { continue ; @@ -1067,13 +1072,52 @@ static int ensure_result_capacity return GrB_SUCCESS ; } +static int final_state_was_visited +( + GrB_Matrix visited, + const GrB_Index *QF, + size_t nqf, + GrB_Index vertex, + bool *seen +) +{ + GrB_Info info ; + bool value ; + + *seen = false ; + + for (size_t i = 0 ; i < nqf ; i++) + { + value = false ; + info = GrB_Matrix_extractElement_BOOL (&value, visited, QF [i], + vertex) ; + if (info == GrB_NO_VALUE) + { + continue ; + } + if (info != GrB_SUCCESS) + { + return info ; + } + if (value) + { + *seen = true ; + return GrB_SUCCESS ; + } + } + + return GrB_SUCCESS ; +} + +#undef LG_FREE_WORK +#undef LG_FREE_ALL #define LG_FREE_WORK \ { \ GrB_free (&frontier) ; \ GrB_free (&next_frontier) ; \ GrB_free (&symbol_frontier) ; \ - GrB_free (&final_reducer) ; \ + GrB_free (&visited) ; \ LAGraph_Free ((void **) &A, NULL) ; \ LAGraph_Free ((void **) &AT, NULL) ; \ LAGraph_Free ((void **) &B, NULL) ; \ @@ -1132,8 +1176,6 @@ static int LAGraph_2Rpq GrB_Matrix next_frontier = NULL ; // frontier value on the next // traversal step GrB_Matrix visited = NULL ; // visited pairs (state, vertex) - GrB_Vector final_reducer = NULL ; // auxiliary vector for reducing the - // visited matrix to an answer GrB_Index ng = 0 ; // # nodes in the graph GrB_Index nr = 0 ; // # states in the NFA @@ -1167,6 +1209,9 @@ static int LAGraph_2Rpq path_limit_reset () ; + LG_ASSERT_MSG (!ignore_visited || ns == 1, GrB_INVALID_VALUE, + "AllShortestPaths requires exactly one source vertex") ; + // init arenas for pathExtra LG_TRY (temp_arena_init (msg)) ; @@ -1308,14 +1353,10 @@ static int LAGraph_2Rpq // initialization // ------------------------------------------------------------------------- - GRB_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, sizeof (Path), msg)) ; + GRB_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, + sizeof (Path), msg)) ; result_capacity = PATH_LIMIT ; - GRB_TRY (GrB_Vector_new (&final_reducer, GrB_BOOL, nr)) ; - - // Initialize matrix for reducing the result - GRB_TRY (GrB_assign (final_reducer, NULL, NULL, true, QF, nqf, NULL)) ; - GRB_TRY (GrB_Matrix_new (&next_frontier, multiple_paths, nr, ng)) ; if (ignore_visited) @@ -1348,11 +1389,6 @@ static int LAGraph_2Rpq } } - if (ignore_visited) - { - GrB_assign (visited, NULL, NULL, true, QS, nqs, S, ns, NULL) ; - } - // Initialize a few utility matrices GRB_TRY (GrB_Matrix_new (&frontier, multiple_paths, nr, ng)) ; GRB_TRY (GrB_Matrix_new (&symbol_frontier, multiple_paths, nr, ng)) ; @@ -1360,7 +1396,6 @@ static int LAGraph_2Rpq // Main loop while (true) { - //printf("Iteration\n"); GrB_Index nvals = 0 ; bool had_non_empty_path = false ; @@ -1368,25 +1403,31 @@ static int LAGraph_2Rpq LG_TRY (LAGraph_Calloc ((void **) &X, nvals, sizeof (MultiplePaths), msg)) ; LG_TRY (LAGraph_Calloc ((void **) &I, nvals, sizeof (GrB_Index), msg)) ; - LG_TRY (LAGraph_Calloc ((void **) &J, nvals, sizeof (GrB_Index), msg)) ; + if (ignore_visited) + { + LG_TRY (LAGraph_Calloc ((void **) &J, nvals, + sizeof (GrB_Index), msg)) ; + } // TODO: Change to a generic call. - GRB_TRY (GrB_Matrix_extractTuples_UDT (I, J, (void**) X, &nvals, next_frontier)) ; - //printf("Next frontier with %d entries\n", nvals); + GRB_TRY (GrB_Matrix_extractTuples_UDT (I, + ignore_visited ? J : GrB_NULL, (void**) X, &nvals, + next_frontier)) ; for (size_t i = 0 ; i < nvals ; i++) { for (size_t j = 0 ; j < X[i].path_count ; j++) { + const Path *path = multiple_paths_nth_const (&X[i], j) ; + // Required beacause we need to handle not only quick paths - if (!path_is_empty (multiple_paths_nth_const (&X[i], j))) + if (!path_is_empty (path)) { had_non_empty_path = true; break; } } - //MultiplePaths_print (&X[i]) ; bool final = false ; for (size_t j = 0 ; j < nqf ; j++) { @@ -1396,34 +1437,36 @@ static int LAGraph_2Rpq break ; } } - //printf("Path at %ld final is %b", I[i], final) ; - - // HACK: only for all_shortest_paths - if (ignore_visited) { - GrB_Vector w; - GRB_TRY (GrB_Vector_new (&w, GrB_BOOL, nr)) ; - GrB_Col_extract(w, GrB_NULL, GrB_NULL, visited, QF, nqf, J[i], GrB_NULL); - GrB_Index col_nvals = 0 ; - GrB_Vector_nvals (&col_nvals, w) ; - GrB_free (&w) ; - - if (col_nvals > 0) { - continue ; - } - } if (!final) { continue ; } - //printf("Found final paths!\n"); + if (ignore_visited) + { + bool seen_final = false ; + + GRB_TRY (final_state_was_visited (visited, QF, nqf, J [i], + &seen_final)) ; + if (seen_final) + { + continue ; + } + } + if ((*path_count) >= limit) { continue ; } - size_t result_need = (*path_count) + X[i].path_count ; + uint64_t remaining_limit = limit - (uint64_t) (*path_count) ; + size_t paths_to_reserve = X[i].path_count ; + if (remaining_limit < (uint64_t) paths_to_reserve) + { + paths_to_reserve = (size_t) remaining_limit ; + } + size_t result_need = (*path_count) + paths_to_reserve ; if (result_need > result_capacity) { LG_TRY (ensure_result_capacity (paths, &result_capacity, @@ -1445,9 +1488,8 @@ static int LAGraph_2Rpq if (ignore_visited) { - //GRB_TRY (GrB_assign (visited, visited, GrB_NULL, next_frontier, - // GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_SC)) ; - GrB_assign (visited, next_frontier, GrB_NULL, true, GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_S) ; + GRB_TRY (GrB_assign (visited, next_frontier, GrB_NULL, true, + GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_S)) ; } LAGraph_Free ((void **) &X, NULL) ; @@ -1456,7 +1498,6 @@ static int LAGraph_2Rpq if (!had_non_empty_path || (*path_count) == limit) { - //printf("breaking\n"); break; } @@ -1493,25 +1534,28 @@ static int LAGraph_2Rpq GRB_TRY (GrB_Matrix_nvals (&symbol_nvals, symbol_frontier)) ; if (symbol_nvals == 0) continue ; - GrB_Descriptor desc_forward = ignore_visited ? GrB_DESC_SC : GrB_NULL ; - GrB_Descriptor desc_backward = ignore_visited ? GrB_DESC_SCT1 : GrB_DESC_T1 ; + GrB_Matrix mask = ignore_visited ? visited : GrB_NULL ; + GrB_Descriptor desc_forward = ignore_visited ? GrB_DESC_SC : + GrB_NULL ; + GrB_Descriptor desc_backward = ignore_visited ? GrB_DESC_SCT1 : + GrB_DESC_T1 ; // Traverse the graph if (!inverse_labels[i]) { if (!inverse) { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_forward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, A[i], desc_forward)) ; } else if (AT[i]) { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; } else { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_backward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, A[i], desc_backward)) ; } } else { if (!inverse && AT[i]) { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, AT[i], desc_forward)) ; } else if (!inverse) { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_backward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, A[i], desc_backward)) ; } else { - GRB_TRY (GrB_mxm (next_frontier, visited, acc, sr1, symbol_frontier, A[i], desc_forward)) ; + GRB_TRY (GrB_mxm (next_frontier, mask, acc, sr1, symbol_frontier, A[i], desc_forward)) ; } } @@ -1519,8 +1563,13 @@ static int LAGraph_2Rpq LG_ASSERT_MSGF (!temp_alloc_failed (), GrB_OUT_OF_MEMORY, "out of memory in temporary RPQ allocator: used=%zu capacity=%zu", temp_arena_used (), temp_arena_capacity) ; - LG_ASSERT_MSG (!path_limit_failed (), GrB_OUT_OF_MEMORY, - "path limit per point exceeded") ; + if (path_limit_failed ()) + { + LG_ERROR_MSG ("LAGraph failure (file %s, line %d): %s", + __FILE__, __LINE__, "path limit per point exceeded") ; + LG_FREE_WORK ; + return GrB_OUT_OF_MEMORY ; + } } GRB_TRY (GrB_apply (next_frontier, GrB_NULL, GrB_NULL, op, next_frontier, false, GrB_NULL)) ; @@ -1529,8 +1578,13 @@ static int LAGraph_2Rpq LG_ASSERT_MSGF (!temp_alloc_failed (), GrB_OUT_OF_MEMORY, "out of memory in temporary RPQ allocator: used=%zu capacity=%zu", temp_arena_used (), temp_arena_capacity) ; - LG_ASSERT_MSG (!path_limit_failed (), GrB_OUT_OF_MEMORY, - "path limit per point exceeded") ; + if (path_limit_failed ()) + { + LG_ERROR_MSG ("LAGraph failure (file %s, line %d): %s", + __FILE__, __LINE__, "path limit per point exceeded") ; + LG_FREE_WORK ; + return GrB_OUT_OF_MEMORY ; + } } LG_FREE_WORK ; @@ -1540,8 +1594,7 @@ static int LAGraph_2Rpq int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular // expression. Simple paths are paths without - // loops or the ones with the same starting - // and final nodes. + // repeated vertices. ( // output: Path **paths, // simple paths from one of the starting @@ -1642,10 +1695,11 @@ int LAGraph_2Rpq_AllShortestPaths // All shortest paths satisfying regular const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices bool inverse, // inverse the whole query + uint64_t limit, // maximum path count char *msg // LAGraph output message ) { - return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, true, ULLONG_MAX, msg, extend_multiple_paths) ; + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, true, limit, msg, extend_multiple_paths) ; } // Required because returned Path objects may own heap-allocated PathExtra. @@ -1662,6 +1716,8 @@ int LAGraph_2Rpq_FreePaths return GrB_SUCCESS ; } +#undef LG_FREE_WORK +#undef LG_FREE_ALL #define LG_FREE_WORK \ { \ } diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 0838a65214..b69904c55b 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -885,8 +885,7 @@ int LAGraph_Rpq_initialize (char *msg); LAGRAPHX_PUBLIC int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular // expression. Simple paths are paths without - // loops or the ones with the same starting - // and final nodes. + // repeated vertices. ( // output: Path **paths, // simple paths from one of the starting @@ -980,6 +979,7 @@ int LAGraph_2Rpq_AllShortestPaths // All shortest paths satisfying regular expre const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices bool inverse, // inverse the whole query + uint64_t limit, // maximum path count char *msg // LAGraph output message ); //**************************************************************************** From eeb5624e9d6c907ce9693348cd931b1a24c9e0f0 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 19 May 2026 16:02:29 +0300 Subject: [PATCH 09/11] fix: all shortest usage in test --- experimental/test/test_2Rpq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/test/test_2Rpq.c b/experimental/test/test_2Rpq.c index 1d81ddb521..125536243b 100644 --- a/experimental/test/test_2Rpq.c +++ b/experimental/test/test_2Rpq.c @@ -230,7 +230,7 @@ void test_Rpq_Simple (void) res = LAGraph_2Rpq_AllShortestPaths (&paths, &path_count, R, inverse_labels, MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, - inverse, msg) ; + inverse, 100, msg) ; // Compare the results with expected values //TEST_CHECK (nvals == files[k].expected_count) ; From 09716fb1c63e7b7fda3d05458e510ae8bfb56b5f Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 19 May 2026 16:11:39 +0300 Subject: [PATCH 10/11] fix: 2 regular path query usage --- experimental/test/test_RegularPathQuery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/test/test_RegularPathQuery.c b/experimental/test/test_RegularPathQuery.c index f2e0981fd0..914890e7c3 100644 --- a/experimental/test/test_RegularPathQuery.c +++ b/experimental/test/test_RegularPathQuery.c @@ -179,8 +179,8 @@ void test_RegularPathQueryBasic (void) // Evaluate the algorithm GrB_Vector r = NULL ; - OK (LAGraph_RegularPathQuery (&r, R, MAX_LABELS, QS, nqs, - QF, nqf, G, S, ns, msg)) ; + OK (LAGraph_2RegularPathQuery (&r, R, false, MAX_LABELS, QS, nqs, + QF, nqf, G, S, ns, false, msg)) ; // Extract results from the output vector GrB_Index *reachable ; From 3d94ea98ef5f6231b5a73da21397bacd06436ced Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 19 May 2026 16:22:45 +0300 Subject: [PATCH 11/11] fix: conflict fixes --- experimental/test/test_RegularPathQuery.c | 47 ++++------------------- 1 file changed, 8 insertions(+), 39 deletions(-) diff --git a/experimental/test/test_RegularPathQuery.c b/experimental/test/test_RegularPathQuery.c index 914890e7c3..ee5d314866 100644 --- a/experimental/test/test_RegularPathQuery.c +++ b/experimental/test/test_RegularPathQuery.c @@ -178,8 +178,9 @@ void test_RegularPathQueryBasic (void) // Evaluate the algorithm GrB_Vector r = NULL ; + bool inverse_labels[MAX_LABELS] = {false} ; - OK (LAGraph_2RegularPathQuery (&r, R, false, MAX_LABELS, QS, nqs, + OK (LAGraph_2RegularPathQuery (&r, R, inverse_labels, MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, false, msg)) ; // Extract results from the output vector @@ -190,7 +191,7 @@ void test_RegularPathQueryBasic (void) GrB_Vector_nvals (&nvals, r) ; OK (LAGraph_Malloc ((void **) &reachable, MAX_RESULTS, sizeof (GrB_Index), msg)) ; - OK (LAGraph_Malloc ((void **) &values, MAX_RESULTS, sizeof (GrB_Index), msg)) ; + OK (LAGraph_Malloc ((void **) &values, MAX_RESULTS, sizeof (bool), msg)) ; GrB_Vector_extractTuples (reachable, values, &nvals, r) ; @@ -211,43 +212,11 @@ void test_RegularPathQueryBasic (void) OK (LAGraph_Delete (&(G[i]), msg)) ; } - // TODO: Use 2RPQ HERE. - //OK (LAGraph_RegularPathQuery (&r, R, MAX_LABELS, QS, nqs, - // QF, nqf, G, S, ns, msg)) ; - - // Extract results from the output vector - GrB_Index *reachable ; - bool *values ; - - GrB_Index nvals ; - GrB_Vector_nvals (&nvals, r) ; - - OK (LAGraph_Malloc ((void **) &reachable, MAX_RESULTS, sizeof (GrB_Index), msg)) ; - OK (LAGraph_Malloc ((void **) &values, MAX_RESULTS, sizeof (GrB_Index), msg)) ; - - GrB_Vector_extractTuples (reachable, values, &nvals, r) ; - - // Compare the results with expected values - TEST_CHECK (nvals == files[k].expected_count) ; - for (uint64_t i = 0 ; i < nvals ; i++) - TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; - - // Cleanup - OK (LAGraph_Free ((void **) &values, NULL)) ; - OK (LAGraph_Free ((void **) &reachable, NULL)) ; - - OK (GrB_free (&r)) ; - - for (uint64_t i = 0 ; i < MAX_LABELS ; i++) - { - if (G[i] == NULL) continue ; - OK (LAGraph_Delete (&(G[i]), msg)) ; - } - - for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) - { - if (R[i] == NULL) continue ; - OK (LAGraph_Delete (&(R[i]), msg)) ; + for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) + { + if (R[i] == NULL) continue ; + OK (LAGraph_Delete (&(R[i]), msg)) ; + } } }