SparseLinearAlgebra · VanyaGlazunov · Mar 31, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,6 +21,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           submodules: recursive
+          lfs: true
 
       - name: Install system dependencies
         run: |

diff --git a/AGENTS.md b/AGENTS.md
@@ -26,9 +26,11 @@ pathrex/
 │   │   └── inmemory.rs         # InMemory marker, InMemoryBuilder, InMemoryGraph
 │   └── formats/
 │       ├── mod.rs              # FormatError enum, re-exports
-│       └── csv.rs              # Csv<R> — CSV → Edge iterator (CsvConfig, ColumnSpec)
+│       ├── csv.rs              # Csv<R> — CSV → Edge iterator (CsvConfig, ColumnSpec)
+│       └── mm.rs               # MatrixMarket directory loader (vertices.txt, edges.txt, *.txt)
 ├── tests/
-│   └── inmemory_tests.rs       # Integration tests for InMemoryBuilder / InMemoryGraph
+│   ├── inmemory_tests.rs       # Integration tests for InMemoryBuilder / InMemoryGraph
+│   └── mm_tests.rs             # Integration tests for MatrixMarket format
 ├── deps/
 │   └── LAGraph/                # Git submodule (SparseLinearAlgebra/LAGraph)
 └── .github/workflows/ci.yml   # CI: build GraphBLAS + LAGraph, cargo build & test
@@ -178,18 +180,26 @@ pub trait Backend {
 ### InMemoryBuilder / InMemoryGraph
 
 [`InMemoryBuilder`](src/graph/inmemory.rs:35) is the primary `GraphBuilder` implementation.
-It collects edges in RAM, then [`build()`](src/graph/inmemory.rs:110) calls
+It collects edges in RAM, then [`build()`](src/graph/inmemory.rs:131) calls
 GraphBLAS to create one `GrB_Matrix` per label via COO format, wraps each in an
-`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:153).
+`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:173).
 
 Multiple CSV sources can be chained with repeated `.load()` calls; all edges are merged
 into a single graph.
 
+**Node ID representation:** Internally, `InMemoryBuilder` uses `HashMap<usize, String>` for
+`id_to_node` (changed from `Vec<String>` to support sparse/pre-assigned IDs from MatrixMarket).
+The [`set_node_map()`](src/graph/inmemory.rs:67) method allows bulk-installing a node mapping,
+which is used by the MatrixMarket loader.
+
 ### Format parsers
 
-[`Csv<R>`](src/formats/csv.rs:52) is the only built-in parser. It yields
-`Iterator<Item = Result<Edge, FormatError>>` and is directly pluggable into
-`GraphBuilder::load()` via its `GraphSource<InMemoryBuilder>` impl.
+Two built-in parsers are available:
+
+#### CSV format
+
+[`Csv<R>`](src/formats/csv.rs:52) yields `Iterator<Item = Result<Edge, FormatError>>` and is
+directly pluggable into `GraphBuilder::load()` via its `GraphSource<InMemoryBuilder>` impl.
 
 Configuration is via [`CsvConfig`](src/formats/csv.rs:17):
 
@@ -204,6 +214,26 @@ Configuration is via [`CsvConfig`](src/formats/csv.rs:17):
 [`ColumnSpec`](src/formats/csv.rs:11) is either `Index(usize)` or `Name(String)`.
 Name-based lookup requires `has_header: true`.
 
+#### MatrixMarket directory format
+
+[`MatrixMarket`](src/formats/mm.rs:160) loads an edge-labeled graph from a directory with:
+
+- `vertices.txt` — one line per node: `<node_name> <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:199) returns the matching **0-based** matrix index
+- `edges.txt` — one line per label: `<label_name> <1-based-index>` (selects `n.txt`)
+- `<n>.txt` — MatrixMarket adjacency matrix for label with index `n`
+
+The loader uses [`LAGraph_MMRead`](src/lagraph_sys.rs) to parse each `.txt` file into a
+`GrB_Matrix`, then wraps it in an `LAGraph_Graph`. Vertex indices from `vertices.txt` are
+converted to 0-based and installed via [`InMemoryBuilder::set_node_map()`](src/graph/inmemory.rs:67).
+
+Helper functions:
+
+- [`load_mm_file(path)`](src/formats/mm.rs:64) — reads a single MatrixMarket file into a
+  `GrB_Matrix`.
+- [`parse_index_map(path)`](src/formats/mm.rs) — parses `<name> <index>` lines; indices must be **>= 1** and **unique** within the file.
+
+`MatrixMarket` implements `GraphSource<InMemoryBuilder>` in [`src/graph/inmemory.rs`](src/graph/inmemory.rs): `vertices.txt` maps are converted from 1-based file indices to 0-based matrix ids before [`set_node_map`](src/graph/inmemory.rs:67); `edges.txt` indices are unchanged for `n.txt` lookup.
+
 ### FFI layer
 
 [`lagraph_sys`](src/lagraph_sys.rs) exposes raw C bindings for GraphBLAS and

diff --git a/Cargo.toml b/Cargo.toml
@@ -13,5 +13,8 @@ thiserror = "1.0"
 [features]
 regenerate-bindings = ["bindgen"]
 
+[dev-dependencies]
+tempfile = "3"
+
 [build-dependencies]
 bindgen = { version = "0.71", optional = true }
diff --git a/src/formats/mm.rs b/src/formats/mm.rs
@@ -0,0 +1,268 @@
+//! MatrixMarket directory format loader.
+//!
+//! An edge-labeled graph is stored in a directory with the following layout:
+//!
+//! ```text
+//! <dir>/
+//!   vertices.txt   — one line per node:  `<node_name> <1-based-index>`
+//!   edges.txt      — one line per label: `<label_name> <1-based-index>`
+//!   1.txt          — MM adjacency matrix for the label with index 1
+//!   2.txt          — MM adjacency matrix for the label with index 2
+//!   …
+//! ```
+//!
+//! # Example
+//!
+//! ```no_run
+//! use pathrex::graph::{Graph, InMemory, GraphDecomposition};
+//! use pathrex::formats::mm::MatrixMarket;
+//!
+//! let graph = Graph::<InMemory>::try_from(
+//!     MatrixMarket::from_dir("path/to/graph/dir")
+//! ).unwrap();
+//! println!("Nodes: {}", graph.num_nodes());
+//! ```
+
+use std::collections::HashMap;
+use std::ffi::CString;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+use std::os::fd::IntoRawFd;
+use std::path::{Path, PathBuf};
+
+use crate::formats::FormatError;
+use crate::graph::{GraphError, ensure_grb_init};
+use crate::la_ok;
+use crate::lagraph_sys::{FILE, GrB_Matrix, LAGraph_MMRead};
+
+/// Read a single MatrixMarket file and return the raw [`GrB_Matrix`].
+pub fn load_mm_file(path: impl AsRef<Path>) -> Result<GrB_Matrix, FormatError> {
+    let path = path.as_ref();
+
+    ensure_grb_init().map_err(|e| match e {
+        GraphError::LAGraph(info, msg) => FormatError::MatrixMarket {
+            code: info,
+            message: msg,
+        },
+        _ => FormatError::MatrixMarket {
+            code: crate::lagraph_sys::GrB_Info::GrB_PANIC,
+            message: "Failed to initialize GraphBLAS".to_string(),
+        },
+    })?;
+
+    let file = File::open(path)?;
+    let fd = file.into_raw_fd();
+
+    let c_mode = CString::new("r").unwrap();
+    let f = unsafe { libc::fdopen(fd, c_mode.as_ptr()) };
+    if f.is_null() {
+        unsafe { libc::close(fd) };
+        return Err(std::io::Error::last_os_error().into());
+    }
+
+    let mut matrix: GrB_Matrix = std::ptr::null_mut();
+
+    let err = la_ok!(LAGraph_MMRead(&mut matrix, f as *mut FILE));
+    unsafe { libc::fclose(f) };
+
+    match err {
+        Ok(_) => Ok(matrix),
+        Err(GraphError::LAGraph(info, msg)) => Err(FormatError::MatrixMarket {
+            code: info,
+            message: msg,
+        }),
+        _ => unreachable!("should be either mm read error or ok"),
+    }
+}
+
+/// Parse a `<name> <index>` mapping file.
+///
+/// Throws error on non-positive or duplicate indicies
+pub(crate) fn parse_index_map(
+    path: &Path,
+) -> Result<(HashMap<usize, String>, HashMap<String, usize>), FormatError> {
+    let file_name = path
+        .file_name()
+        .map(|n| n.to_string_lossy().into_owned())
+        .unwrap_or_else(|| path.display().to_string());
+
+    let reader = BufReader::new(File::open(path)?);
+    let mut by_idx: HashMap<usize, String> = HashMap::new();
+    let mut by_name: HashMap<String, usize> = HashMap::new();
+
+    for (line_no, line) in reader.lines().enumerate() {
+        let line = line?;
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+
+        let (name, idx_str) =
+            line.rsplit_once(char::is_whitespace)
+                .ok_or_else(|| FormatError::InvalidFormat {
+                    file: file_name.clone(),
+                    line: line_no + 1,
+                    reason: "expected '<name> <index>' but found no whitespace".into(),
+                })?;
+
+        let idx: usize = idx_str
+            .trim()
+            .parse()
+            .map_err(|_| FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "index '{}' is not a valid positive integer",
+                    idx_str.trim()
+                ),
+            })?;
-            })?;
+            })?;
+        if idx == 0 {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "index '{}' must be >= 1 (1-based indexing)",
+                    idx_str.trim()
+                ),
+            });
+        }
-            })?;
+            })?;
+        if idx == 0 {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "index '{}' must be >= 1 (1-based indexing)",
+                    idx_str.trim()
+                ),
+            });
+        }
+
+        if idx == 0 {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: "index must be a positive integer (>= 1)".into(),
+            });
+        }
+
+        let name = name.trim().to_owned();
-        let name = name.trim().to_owned();
+        let name = name.trim().to_owned();
+
+        if by_idx.contains_key(&idx) {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "duplicate index '{}' encountered; each index must be unique",
+                    idx
+                ),
+            });
+        }
+
+        if by_name.contains_key(&name) {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "duplicate name '{}' encountered; each name must be unique",
+                    name
+                ),
+            });
+        }
-        let name = name.trim().to_owned();
+        let name = name.trim().to_owned();
+
+        if by_idx.contains_key(&idx) {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "duplicate index '{}' encountered; each index must be unique",
+                    idx
+                ),
+            });
+        }
+
+        if by_name.contains_key(&name) {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!(
+                    "duplicate name '{}' encountered; each name must be unique",
+                    name
+                ),
+            });
+        }
+        if by_idx.insert(idx, name.clone()).is_some() {
+            return Err(FormatError::InvalidFormat {
+                file: file_name.clone(),
+                line: line_no + 1,
+                reason: format!("duplicate index {idx}"),
+            });
+        }
+        by_name.insert(name, idx);
+    }
+
+    Ok((by_idx, by_name))
+}
+
+/// A MatrixMarket directory data source.
+///
+/// Reads the graph from a directory that contains:
+/// - `vertices.txt` — `<node_name> <1-based-index>` mapping
+/// - `edges.txt`    — `<label_name> <1-based-index>` mapping
+/// - `<n>.txt`      — one MM adjacency matrix per label index
+/// # Example
+///
+/// ```no_run
+/// use pathrex::graph::{Graph, InMemory, GraphDecomposition};
+/// use pathrex::formats::mm::MatrixMarket;
+///
+/// let graph = Graph::<InMemory>::try_from(
+///     MatrixMarket::from_dir("path/to/graph/dir")
+/// ).unwrap();
+/// println!("Nodes: {}", graph.num_nodes());
+/// ```
+pub struct MatrixMarket {
+    pub(crate) dir: PathBuf,
+}
+
+impl MatrixMarket {
+    /// Create a `MatrixMarket` source that will load from `dir`.
+    pub fn from_dir(dir: impl Into<PathBuf>) -> Self {
+        Self { dir: dir.into() }
+    }
+
+    pub(crate) fn mm_path(&self, idx: usize) -> PathBuf {
+        self.dir.join(format!("{}.txt", idx))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+    use tempfile::TempDir;
+
+    fn write_file(dir: &Path, name: &str, content: &str) {
+        let mut f = File::create(dir.join(name)).unwrap();
+        f.write_all(content.as_bytes()).unwrap();
+    }
+
+    #[test]
+    fn test_parse_index_map_basic() {
+        let tmp = TempDir::new().unwrap();
+        write_file(
+            tmp.path(),
+            "vertices.txt",
+            "<Article1> 1\n<Paul_Erdoes> 2\n<1940> 3\n",
+        );
+        let (by_idx, by_name) = parse_index_map(&tmp.path().join("vertices.txt")).unwrap();
+        assert_eq!(by_idx[&1], "<Article1>");
+        assert_eq!(by_idx[&2], "<Paul_Erdoes>");
+        assert_eq!(by_idx[&3], "<1940>");
+        assert_eq!(by_name["<Article1>"], 1);
+        assert_eq!(by_name["<Paul_Erdoes>"], 2);
+    }
+
+    #[test]
+    fn test_parse_index_map_rejects_zero_index() {
+        let tmp = TempDir::new().unwrap();
+        write_file(tmp.path(), "v.txt", "<a> 0\n");
+        let err = parse_index_map(&tmp.path().join("v.txt")).unwrap_err();
+        assert!(matches!(err, FormatError::InvalidFormat { .. }));
+    }
+
+    #[test]
+    fn test_parse_index_map_rejects_duplicate_index() {
+        let tmp = TempDir::new().unwrap();
+        write_file(tmp.path(), "v.txt", "<a> 1\n<b> 1\n");
+        let err = parse_index_map(&tmp.path().join("v.txt")).unwrap_err();
+        assert!(matches!(err, FormatError::InvalidFormat { .. }));
+    }
+
+    #[test]
+    fn test_parse_index_map_empty_lines_ignored() {
+        let tmp = TempDir::new().unwrap();
+        write_file(tmp.path(), "edges.txt", "\n<journal> 1\n\n<creator> 2\n");
+        let (by_idx, _) = parse_index_map(&tmp.path().join("edges.txt")).unwrap();
+        assert_eq!(by_idx.len(), 2);
+        assert_eq!(by_idx[&1], "<journal>");
+        assert_eq!(by_idx[&2], "<creator>");
+    }
+
+    #[test]
+    fn test_parse_index_map_bad_index_returns_error() {
+        let tmp = TempDir::new().unwrap();
+        write_file(tmp.path(), "bad.txt", "<foo> notanumber\n");
+        let err = parse_index_map(&tmp.path().join("bad.txt")).unwrap_err();
+        assert!(
+            matches!(err, FormatError::InvalidFormat { .. }),
+            "expected InvalidFormat, got {:?}",
+            err
+        );
+    }
+
+    #[test]
+    fn test_parse_index_map_missing_whitespace_returns_error() {
+        let tmp = TempDir::new().unwrap();
+        write_file(tmp.path(), "bad.txt", "nospacehere\n");
+        let err = parse_index_map(&tmp.path().join("bad.txt")).unwrap_err();
+        assert!(matches!(err, FormatError::InvalidFormat { .. }));
+    }
+
+    #[test]
+    fn test_load_nonexistent_mm_file_returns_io_error() {
+        let result = load_mm_file("/nonexistent/path/to/file.txt");
+        assert!(
+            matches!(result, Err(FormatError::Io(_))),
+            "expected Io error for missing file, got: {:?}",
+            result
+        );
+    }
+
+    #[test]
+    fn test_from_dir_stores_path() {
+        let src = MatrixMarket::from_dir("/some/path");
+        assert_eq!(src.dir, PathBuf::from("/some/path"));
+    }
+
+    #[test]
+    fn test_mm_path() {
+        let src = MatrixMarket::from_dir("/graph");
+        assert_eq!(src.mm_path(3), PathBuf::from("/graph/3.txt"));
+    }
+}
diff --git a/src/formats/mod.rs b/src/formats/mod.rs
@@ -14,11 +14,15 @@
 //! ```
 
 pub mod csv;
+pub mod mm;
 
 pub use csv::Csv;
+pub use mm::MatrixMarket;
 
 use thiserror::Error;
 
+use crate::lagraph_sys::GrB_Info;
+
 /// Unified error type for all format parsing operations.
 #[derive(Error, Debug)]
 pub enum FormatError {
@@ -33,4 +37,16 @@ pub enum FormatError {
     /// An I/O error occurred while reading the data source.
     #[error("I/O error: {0}")]
     Io(#[from] std::io::Error),
+
+    /// [`LAGraph_MMRead`](crate::lagraph_sys::LAGraph_MMRead) returned a
+    /// non-zero info code while reading a MatrixMarket file.
+    #[error("MatrixMarket read error (code {code}): {message}")]
+    MatrixMarket { code: GrB_Info, message: String },
+
+    #[error("Invalid format in '{file}' at line {line}: {reason}")]
+    InvalidFormat {
+        file: String,
+        line: usize,
+        reason: String,
+    },
 }