From 21395e245902a744c3f18d6f1914502a943c81c1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:45:19 -0600 Subject: [PATCH 01/25] feat: vendor Leiden community detection, remove graphology dependency Replace graphology + graphology-communities-louvain with a vendored Leiden/Louvain optimiser adapted from ngraph.leiden (MIT). This removes two runtime dependencies (~180 KB) while upgrading from Louvain to the strictly-better Leiden algorithm with refinement. Key changes: - src/graph/algorithms/leiden/: vendored optimiser, adapter, partition, modularity, CPM quality functions, seeded PRNG (mulberry32) - Resolution (gamma) parameter now works for modularity mode, not just CPM - src/graph/algorithms/louvain.js: delegates to new Leiden detectClusters - src/graph/model.js: removed toGraphology() method - tests/graph/algorithms/leiden.test.js: 12 tests covering two-clique split, CPM, directed, isolated nodes, determinism, fixed nodes, etc. - Removed graphology and graphology-communities-louvain from package.json --- docs/tasks/PLAN_centralize_config.md | 285 ---------------- package.json | 2 - src/graph/algorithms/index.js | 1 + src/graph/algorithms/leiden/LICENSE | 24 ++ src/graph/algorithms/leiden/adapter.js | 157 +++++++++ src/graph/algorithms/leiden/cpm.js | 33 ++ src/graph/algorithms/leiden/index.js | 77 +++++ src/graph/algorithms/leiden/modularity.js | 58 ++++ src/graph/algorithms/leiden/optimiser.js | 357 ++++++++++++++++++++ src/graph/algorithms/leiden/partition.js | 377 ++++++++++++++++++++++ src/graph/algorithms/leiden/rng.js | 19 ++ src/graph/algorithms/louvain.js | 22 +- src/graph/model.js | 17 - src/presentation/queries-cli/overview.js | 2 +- tests/graph/algorithms/leiden.test.js | 307 ++++++++++++++++++ tests/graph/model.test.js | 22 -- 16 files changed, 1424 insertions(+), 336 deletions(-) delete mode 100644 docs/tasks/PLAN_centralize_config.md create mode 100644 src/graph/algorithms/leiden/LICENSE create mode 100644 src/graph/algorithms/leiden/adapter.js create mode 100644 src/graph/algorithms/leiden/cpm.js create mode 100644 src/graph/algorithms/leiden/index.js create mode 100644 src/graph/algorithms/leiden/modularity.js create mode 100644 src/graph/algorithms/leiden/optimiser.js create mode 100644 src/graph/algorithms/leiden/partition.js create mode 100644 src/graph/algorithms/leiden/rng.js create mode 100644 tests/graph/algorithms/leiden.test.js diff --git a/docs/tasks/PLAN_centralize_config.md b/docs/tasks/PLAN_centralize_config.md deleted file mode 100644 index c8970ae0..00000000 --- a/docs/tasks/PLAN_centralize_config.md +++ /dev/null @@ -1,285 +0,0 @@ -# Plan: Centralize Hardcoded Configuration - -> **Goal:** Eliminate magic numbers scattered across the codebase by routing all tunable parameters through the existing `.codegraphrc.json` config system (`DEFAULTS` in `src/infrastructure/config.js`). - -## Problem - -The config system already exists and handles env overrides, but ~70 individual behavioral constants (34 inventory entries expanding to ~70 discrete values when counting sub-keys in B1, B2, and E1) are hardcoded in individual modules and never read from config. Users cannot tune thresholds, depths, weights, or limits without editing source code. - ---- - -## Inventory of Hardcoded Values - -### Category A — Analysis Parameters (high user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| A1 | `maxDepth = 5` | `domain/analysis/impact.js` | 111 | `fn-impact` transitive caller depth | -| A2 | `maxDepth = 3` | `domain/analysis/impact.js` | 31, 144 | BFS default depth for impact/diff-impact | -| A3 | `maxDepth = 3` | `features/audit.js` | 102 | Audit blast-radius depth | -| A4 | `maxDepth = 3` | `features/check.js` | 220 | CI check blast-radius depth | -| A5 | `maxDepth = 10` | `features/sequence.js` | 91 | Sequence diagram traversal depth | -| A6 | `FALSE_POSITIVE_CALLER_THRESHOLD = 20` | `domain/analysis/module-map.js` | 37 | Generic function false-positive filter | -| A7 | `resolution = 1.0` | `graph/algorithms/louvain.js` | 17 | Louvain community detection granularity | -| A8 | `driftThreshold = 0.3` | `features/structure.js` | 581 | Structure cohesion drift warning | -| A9 | `maxCallers >= 10` | `domain/analysis/brief.js` | 38 | `brief` high-risk tier threshold | -| A10 | `maxCallers >= 3` | `domain/analysis/brief.js` | 39 | `brief` medium-risk tier threshold | -| A11 | `maxDepth = 5` | `domain/analysis/brief.js` | 47 | `brief` transitive caller BFS depth | -| A12 | `maxDepth = 5` | `domain/analysis/brief.js` | 73 | `brief` transitive importer BFS depth | - -### Category B — Risk & Scoring Weights (medium-high user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| B1 | `fanIn: 0.25, complexity: 0.3, churn: 0.2, role: 0.15, mi: 0.1` | `graph/classifiers/risk.js` | 10-14 | Risk score weighting | -| B2 | `core: 1.0, utility: 0.9, entry: 0.8, adapter: 0.5, leaf: 0.2, dead: 0.1` | `graph/classifiers/risk.js` | 21-27 | Role importance weights | -| B3 | `DEFAULT_ROLE_WEIGHT = 0.5` | `graph/classifiers/risk.js` | 30 | Fallback role weight | - -### Category C — Search & Embedding (already partially in config) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| C1 | `limit = 15` | `domain/search/search/hybrid.js` | 12 | Hybrid search default limit | -| C2 | `rrfK = 60` | `domain/search/search/hybrid.js` | 13 | RRF fusion constant | -| C3 | `limit = 15` | `domain/search/search/semantic.js` | 12 | Semantic search default limit | -| C4 | `minScore = 0.2` | `domain/search/search/semantic.js` | 13, 52 | Minimum similarity threshold | -| C5 | `SIMILARITY_WARN_THRESHOLD = 0.85` | `domain/search/search/semantic.js` | 71 | Duplicate query warning | -| ~~C6~~ | ~~Batch sizes per model~~ | — | — | Moved to Category F (see below) | - -### Category D — Display & Truncation (low-medium user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| D1 | `MAX_COL_WIDTH = 40` | `presentation/result-formatter.js` | 82 | Table column width | -| D2 | `50 lines` | `shared/file-utils.js` | 23 | Source context excerpt length | -| D3 | `100 chars` | `shared/file-utils.js` | 48, 63 | Summary/docstring truncation | -| D4a | `10 lines` | `shared/file-utils.js` | 36 | JSDoc block-end scan depth (upward scan for `*/`) | -| D4b | `20 lines` | `shared/file-utils.js` | 54 | JSDoc opening scan depth (upward scan for `/**`) | -| D5 | `5 lines` | `shared/file-utils.js` | 76 | Multi-line signature gather | - -### Category E — MCP Pagination (medium user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| E1 | `MCP_DEFAULTS` (22 entries) | `shared/paginate.js` | 9-34 | Per-tool default page sizes | -| ~~E2~~ | ~~`MCP_MAX_LIMIT = 1000`~~ | — | — | Moved to Category F (see below) | - -### Category F — Infrastructure (low user value, keep hardcoded) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| F1 | `CACHE_TTL_MS = 86400000` | `infrastructure/update-check.js` | 10 | Version check cache (24h) | -| F2 | `FETCH_TIMEOUT_MS = 3000` | `infrastructure/update-check.js` | 11 | Version check HTTP timeout | -| F3 | `debounce = 300` | `domain/graph/watcher.js` | 80 | File watcher debounce (ms) | -| F4 | `maxBuffer = 10MB` | `features/check.js` | 260 | Git diff buffer | -| F5 | `volume / 3000` | `features/complexity.js` | 85 | Halstead bugs formula (standard) | -| F6 | `timeout = 10_000` | `infrastructure/config.js` | 110 | apiKeyCommand timeout | -| F7 | `MCP_MAX_LIMIT = 1000` | `shared/paginate.js` | 37 | Hard abuse-prevention cap — server-side safety boundary, not a tuning knob | -| F8 | Batch sizes per model | `domain/search/models.js` | 66-75 | Embedding batch sizes — model-specific implementation details rarely tuned by end-users, analogous to watcher debounce (F3) | -| F9 | `MAX_VISIT_DEPTH = 200` | `crates/.../dataflow.rs` | 11 | Dataflow AST visit recursion limit — stack overflow prevention | -| F10 | `MAX_WALK_DEPTH = 200` | `crates/.../extractors/helpers.rs` | 6 | Extractor AST walk recursion limit — stack overflow prevention (#481) | -| F11 | `MAX_WALK_DEPTH = 200` | `crates/.../complexity.rs` | 6 | Complexity walk recursion limit — stack overflow prevention (#481) | -| F12 | `MAX_WALK_DEPTH = 200` | `crates/.../cfg.rs` | 5 | CFG process_if recursion limit — stack overflow prevention (#481) | - ---- - -## Design - -### Proposed `DEFAULTS` additions in `src/infrastructure/config.js` - -```js -export const DEFAULTS = { - // ... existing fields ... - - analysis: { - impactDepth: 3, // A2: BFS depth for impact/diff-impact - fnImpactDepth: 5, // A1: fn-impact transitive depth - auditDepth: 3, // A3: audit blast-radius depth - sequenceDepth: 10, // A5: sequence diagram depth - falsePositiveCallers: 20, // A6: generic function filter threshold - briefCallerDepth: 5, // A11: brief transitive caller BFS depth - briefImporterDepth: 5, // A12: brief transitive importer BFS depth - briefHighRiskCallers: 10, // A9: brief high-risk tier threshold - briefMediumRiskCallers: 3, // A10: brief medium-risk tier threshold - }, - - community: { - resolution: 1.0, // A7: Louvain resolution (only Louvain params here) - }, - - // build.driftThreshold stays in `build` (already wired in finalize.js line 52) - // — it's a build-pipeline concern, not community detection - - structure: { - cohesionThreshold: 0.3, // A8: structure cohesion drift warning - }, - - risk: { - weights: { // B1 - fanIn: 0.25, - complexity: 0.3, - churn: 0.2, - role: 0.15, - mi: 0.1, - }, - roleWeights: { // B2 - core: 1.0, - utility: 0.9, - entry: 0.8, - adapter: 0.5, - leaf: 0.2, - dead: 0.1, - }, - defaultRoleWeight: 0.5, // B3 - }, - - display: { - maxColWidth: 40, // D1 - excerptLines: 50, // D2 - summaryMaxChars: 100, // D3 - jsdocEndScanLines: 10, // D4a: lines to scan upward for block-end marker (*/) - jsdocOpenScanLines: 20, // D4b: lines to scan upward for /** opening - signatureGatherLines: 5, // D5 - }, - - search: { - // defaultMinScore, rrfK, topK already exist in DEFAULTS — - // add the missing C5 key: - similarityWarnThreshold: 0.85, // C5: duplicate-query warning in multiSearchData - }, - - mcp: { - defaults: { /* E1: current MCP_DEFAULTS object */ }, - // MCP_MAX_LIMIT stays hardcoded (Category F) — server-side safety boundary - }, -}; -``` - -### What stays hardcoded (Category F) - -- **Halstead `volume / 3000`** — industry-standard formula, not a tuning knob -- **Git `maxBuffer`** — platform concern, not analysis behavior -- **`apiKeyCommand` timeout** — security boundary, not user-facing -- **Update check TTL/timeout** — implementation detail -- **Watcher debounce** — could be configurable later but low priority -- **`MCP_MAX_LIMIT`** — server-side abuse-prevention cap; making it user-configurable via `.codegraphrc.json` would allow any process with project directory write access to raise it arbitrarily, defeating its security purpose -- **Embedding batch sizes** — model-specific implementation details (per-model map shape); rarely tuned by end-users, analogous to watcher debounce -- **Native engine `MAX_WALK_DEPTH` / `MAX_VISIT_DEPTH` (200)** — stack overflow safety boundaries in Rust extractors, complexity, CFG, and dataflow modules; raising them risks process crashes on deeply nested ASTs - ---- - -## Implementation Plan - -### Phase 1 — Extend DEFAULTS schema (1 PR) - -**Files:** `src/infrastructure/config.js`, `tests/unit/config.test.js` - -1. Add `analysis`, `community`, `structure`, `risk`, `display`, `mcp` sections to `DEFAULTS` -2. Keep `build.driftThreshold` where it is (already wired in `finalize.js` — no migration needed) -3. **Hard prerequisite:** Update `mergeConfig` to perform recursive (deep) merging — at minimum 2 levels deep. The current implementation only merges 1 level deep, which means partial user overrides of nested objects like `risk.weights` (e.g. `{ "complexity": 0.4, "churn": 0.1 }`) will **silently drop** un-specified sibling keys (`fanIn`, `role`, `mi`), producing `NaN` risk scores. This must be fixed before any nested config keys are wired in subsequent phases -4. Add tests: loading config with overrides for each new section - -### Phase 2 — Wire analysis parameters (1 PR) - -**Files to change:** -- `src/domain/analysis/impact.js` → read `config.analysis.impactDepth` / `config.analysis.fnImpactDepth` -- `src/features/audit.js` → read `config.analysis.auditDepth` -- `src/features/check.js` → replace hardcoded `3` with `config.check.depth` (already in DEFAULTS, sole authoritative key for check depth — do **not** chain with `config.analysis.impactDepth`) -- `src/features/sequence.js` → read `config.analysis.sequenceDepth` -- `src/domain/analysis/module-map.js` → read `config.analysis.falsePositiveCallers` -- `src/domain/analysis/brief.js` → read `config.analysis.briefCallerDepth`, `config.analysis.briefImporterDepth`, `config.analysis.briefHighRiskCallers`, `config.analysis.briefMediumRiskCallers` (PR #480) - -**Pattern:** Each module calls `loadConfig()` (or receives config as a parameter). Replace the hardcoded value with `config.analysis.X ?? FALLBACK`. The fallback ensures backward compatibility if config is missing. - -**Tests:** Update integration tests to verify custom config values flow through. - -### Phase 3 — Wire risk & community parameters (1 PR) - -**Files to change:** -- `src/graph/classifiers/risk.js` → read `config.risk.weights`, `config.risk.roleWeights`, `config.risk.defaultRoleWeight` -- `src/graph/algorithms/louvain.js` → accept `resolution` parameter, default from config -- `src/features/structure.js` → read `config.structure.cohesionThreshold` - -**Pattern:** These modules don't currently receive config. Options: -1. **Preferred:** Accept an `options` parameter that callers populate from config -2. **Alternative:** Import `loadConfig` directly (adds coupling but simpler) - -**Tests:** Unit tests for risk scoring with custom weights. Integration test for Louvain with custom resolution. - -### Phase 4 — Wire search parameters (1 PR) - -**Files to change:** -- `src/domain/search/search/hybrid.js` → read `config.search.rrfK`, `config.search.topK` -- `src/domain/search/search/semantic.js` → read `config.search.defaultMinScore`, `config.search.topK` (C3), and `config.search.similarityWarnThreshold` (C5, replaces hardcoded `SIMILARITY_WARN_THRESHOLD`) -- `src/domain/search/models.js` → batch sizes stay hardcoded (moved to Category F — model-specific implementation details) - -**Note:** `config.search` already exists with `defaultMinScore`, `rrfK`, `topK`. The modules just don't read from it — they duplicate the values. This phase wires the existing config keys. - -### Phase 5 — Wire display & MCP parameters (1 PR) - -**Files to change:** -- `src/presentation/result-formatter.js` → read `config.display.maxColWidth` -- `src/shared/file-utils.js` → read `config.display.excerptLines`, `config.display.jsdocEndScanLines` (D4a, 10 lines), `config.display.jsdocOpenScanLines` (D4b, 20 lines — note different default values), `config.display.summaryMaxChars`, `config.display.signatureGatherLines` -- `src/shared/paginate.js` → read `config.mcp.defaults` (`MCP_MAX_LIMIT` stays hardcoded — security boundary) - -**Consideration:** `file-utils.js` and `paginate.js` are low-level shared utilities. They shouldn't call `loadConfig()` directly. Instead, pass display/mcp settings down from callers, or use a module-level config cache set at startup. - -### Phase 6 — Documentation & migration (1 PR) - -1. Update `README.md` configuration section with the full schema -2. Add a `docs/configuration.md` reference with all keys, types, defaults, and descriptions -3. Document the `structure.cohesionThreshold` key and its relationship to A8 -4. Add a JSON Schema file (`.codegraphrc.schema.json`) for IDE autocomplete -5. Add a **Configuration** section to `CLAUDE.md` that documents: - - The `.codegraphrc.json` config file and its location - - The full list of configurable sections (`analysis`, `community`, `structure`, `risk`, `display`, `mcp`, `search`, `check`, `coChange`, `manifesto`) - - Key tunable parameters and their defaults (depth limits, risk weights, thresholds) - - How `mergeConfig` works (partial overrides deep-merge with defaults) - - Env var overrides (`CODEGRAPH_LLM_*`) - - Guidance: when adding new behavioral constants, always add them to `DEFAULTS` in `config.js` and wire them through — never introduce new hardcoded magic numbers - ---- - -## Migration & Backward Compatibility - -- All new config keys have defaults matching current hardcoded values → **zero breaking changes** -- Existing `.codegraphrc.json` files continue to work unchanged -- `mergeConfig` will be updated to deep-merge recursively (Phase 1 prerequisite), so users only need to specify the keys they want to override -- `build.driftThreshold` stays in place — no migration needed - -## Example `.codegraphrc.json` after this work - -```json -{ - "analysis": { - "fnImpactDepth": 8, - "falsePositiveCallers": 30 - }, - "risk": { - "weights": { - "complexity": 0.4, - "churn": 0.1 - } - }, - "community": { "resolution": 1.5 }, - "structure": { "cohesionThreshold": 0.25 }, - "display": { - "maxColWidth": 60 - } -} -``` - ---- - -## Estimated Scope - -| Phase | Files changed | New tests | Risk | -|-------|--------------|-----------|------| -| 1 — Schema | 2 | 3-4 | Low | -| 2 — Analysis wiring | 6 | 4-5 | Low | -| 3 — Risk/community | 3 | 2-3 | Medium (parameter threading) | -| 4 — Search wiring | 3 | 2 | Low (config keys already exist) | -| 5 — Display/MCP | 3 | 2 | Medium (shared utility coupling) | -| 6 — Docs + CLAUDE.md | 5 | 0 | None | - -**Total: ~22 files changed, 6 PRs, one concern per PR.** diff --git a/package.json b/package.json index 2b4b03bf..c42c05b2 100644 --- a/package.json +++ b/package.json @@ -67,8 +67,6 @@ "dependencies": { "better-sqlite3": "^12.6.2", "commander": "^14.0.3", - "graphology": "^0.26.0", - "graphology-communities-louvain": "^2.0.2", "web-tree-sitter": "^0.26.5" }, "peerDependencies": { diff --git a/src/graph/algorithms/index.js b/src/graph/algorithms/index.js index 3949b94c..e30637b3 100644 --- a/src/graph/algorithms/index.js +++ b/src/graph/algorithms/index.js @@ -1,5 +1,6 @@ export { bfs } from './bfs.js'; export { fanInOut } from './centrality.js'; +export { detectClusters } from './leiden/index.js'; export { louvainCommunities } from './louvain.js'; export { shortestPath } from './shortest-path.js'; export { tarjan } from './tarjan.js'; diff --git a/src/graph/algorithms/leiden/LICENSE b/src/graph/algorithms/leiden/LICENSE new file mode 100644 index 00000000..25e2bf1c --- /dev/null +++ b/src/graph/algorithms/leiden/LICENSE @@ -0,0 +1,24 @@ +MIT License + +Vendored from ngraph.leiden by Andrei Kashcha (anvaka) +https://github.com/anvaka/ngraph.leiden + +Copyright (c) 2025 Andrei Kashcha + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js new file mode 100644 index 00000000..d4d16b6d --- /dev/null +++ b/src/graph/algorithms/leiden/adapter.js @@ -0,0 +1,157 @@ +/** + * Graph adapter that converts a CodeGraph into the dense array format + * expected by the Leiden optimiser. + * + * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. + */ + +/** + * @param {import('../../model.js').CodeGraph} graph + * @param {object} [opts] + * @param {boolean} [opts.directed] + * @param {(attrs: object) => number} [opts.linkWeight] - extract weight from edge attrs + * @param {(attrs: object) => number} [opts.nodeSize] - extract size from node attrs + * @param {string[]} [opts.baseNodeIds] + */ +export function makeGraphAdapter(graph, opts = {}) { + const linkWeight = + opts.linkWeight || + ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); + const nodeSize = + opts.nodeSize || + ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); + const directed = !!opts.directed; + const baseNodeIds = opts.baseNodeIds; + + // Build dense node index mapping + const nodeIds = []; + const idToIndex = new Map(); + if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { + for (let i = 0; i < baseNodeIds.length; i++) { + const id = baseNodeIds[i]; + if (!graph.hasNode(id)) + throw new Error('Missing node: ' + id); + idToIndex.set(id, i); + nodeIds.push(id); + } + } else { + for (const [id] of graph.nodes()) { + idToIndex.set(id, nodeIds.length); + nodeIds.push(id); + } + } + const n = nodeIds.length; + + // Storage + const size = new Float64Array(n); + const selfLoop = new Float64Array(n); + const strengthOut = new Float64Array(n); + const strengthIn = new Float64Array(n); + + // Edge list by source for fast iteration + const outEdges = new Array(n); + const inEdges = new Array(n); + for (let i = 0; i < n; i++) { + outEdges[i] = []; + inEdges[i] = []; + } + + // Populate from graph + if (directed) { + for (const [src, tgt, attrs] of graph.edges()) { + const from = idToIndex.get(src); + const to = idToIndex.get(tgt); + if (from == null || to == null) continue; + const w = +linkWeight(attrs) || 0; + if (from === to) { + selfLoop[from] += w; + } + outEdges[from].push({ to, w }); + inEdges[to].push({ from, w }); + strengthOut[from] += w; + strengthIn[to] += w; + } + } else { + // Undirected: symmetrize and average reciprocal pairs + const pairAgg = new Map(); + + for (const [src, tgt, attrs] of graph.edges()) { + const a = idToIndex.get(src); + const b = idToIndex.get(tgt); + if (a == null || b == null) continue; + const w = +linkWeight(attrs) || 0; + if (a === b) { + selfLoop[a] += w; + continue; + } + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = i + ':' + j; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; + } + + for (const [key, rec] of pairAgg.entries()) { + const [iStr, jStr] = key.split(':'); + const i = +iStr; + const j = +jStr; + const dirCount = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); + const w = dirCount > 0 ? rec.sum / dirCount : 0; + if (w === 0) continue; + outEdges[i].push({ to: j, w }); + outEdges[j].push({ to: i, w }); + inEdges[i].push({ from: j, w }); + inEdges[j].push({ from: i, w }); + strengthOut[i] += w; + strengthOut[j] += w; + strengthIn[i] += w; + strengthIn[j] += w; + } + + // Add self-loops into adjacency and strengths + for (let v = 0; v < n; v++) { + const w = selfLoop[v]; + if (w !== 0) { + outEdges[v].push({ to: v, w }); + inEdges[v].push({ from: v, w }); + strengthOut[v] += w; + strengthIn[v] += w; + } + } + } + + // Node sizes + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + + // Totals + const totalWeight = strengthOut.reduce((a, b) => a + b, 0); + + function forEachNeighbor(i, cb) { + const list = outEdges[i]; + for (let k = 0; k < list.length; k++) cb(list[k].to, list[k].w); + } + + return { + n, + nodeIds, + idToIndex, + size, + selfLoop, + strengthOut, + strengthIn, + outEdges, + inEdges, + directed, + totalWeight, + forEachNeighbor, + }; +} diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js new file mode 100644 index 00000000..f6c54d58 --- /dev/null +++ b/src/graph/algorithms/leiden/cpm.js @@ -0,0 +1,33 @@ +/** + * CPM (Constant Potts Model) quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +export function diffCPM(part, g, v, c, gamma = 1.0) { + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + const w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + const w_new = c < g.n ? (part.getNeighborEdgeWeightToCommunity(c) || 0) : 0; + const s_v = g.size[v] || 1; + const S_old = part.communityTotalSize[oldC] || 0; + const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; + return w_new - w_old - gamma * s_v * (S_new - S_old + s_v); +} + +export function qualityCPM(part, g, gamma = 1.0) { + let sum = 0; + for (let c = 0; c < part.communityCount; c++) + sum += + part.communityInternalEdgeWeight[c] - + (gamma * (part.communityNodeCount[c] * (part.communityNodeCount[c] - 1))) / 2; + return sum; +} + +export function qualityCPMSizeAware(part, g, gamma = 1.0) { + let sum = 0; + for (let c = 0; c < part.communityCount; c++) { + const S = part.communityTotalSize[c] || 0; + sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; + } + return sum; +} diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js new file mode 100644 index 00000000..400c4a0b --- /dev/null +++ b/src/graph/algorithms/leiden/index.js @@ -0,0 +1,77 @@ +/** + * Leiden community detection — vendored from ngraph.leiden (MIT). + * Adapted to work directly with CodeGraph (no external graph library dependency). + * + * Original: https://github.com/anvaka/ngraph.leiden + * License: MIT — see LICENSE in this directory. + */ + +import { qualityCPM, qualityCPMSizeAware } from './cpm.js'; +import { qualityModularity } from './modularity.js'; +import { runLouvainUndirectedModularity } from './optimiser.js'; + +/** + * Detect communities in a CodeGraph using the Leiden algorithm. + * + * @param {import('../../model.js').CodeGraph} graph + * @param {object} [options] + * @param {number} [options.randomSeed=42] + * @param {boolean} [options.directed=false] + * @param {boolean} [options.refine=true] - Leiden refinement (set false for plain Louvain) + * @param {string} [options.quality='modularity'] - 'modularity' | 'cpm' + * @param {number} [options.resolution=1.0] + * @param {number} [options.maxCommunitySize] + * @param {Set|Array} [options.fixedNodes] + * @param {string} [options.candidateStrategy] - 'neighbors' | 'all' | 'random' | 'random-neighbor' + * @returns {{ getClass(id): number, getCommunities(): Map, quality(): number, toJSON(): object }} + */ +export function detectClusters(graph, options = {}) { + const { + graph: finalGraph, + partition, + levels, + originalToCurrent, + originalNodeIds, + } = runLouvainUndirectedModularity(graph, options); + + const idToClass = new Map(); + for (let i = 0; i < originalNodeIds.length; i++) { + const comm = originalToCurrent[i]; + idToClass.set(originalNodeIds[i], comm); + } + + return { + getClass(nodeId) { + return idToClass.get(String(nodeId)); + }, + getCommunities() { + const out = new Map(); + for (const [id, c] of idToClass) { + if (!out.has(c)) out.set(c, []); + out.get(c).push(id); + } + return out; + }, + quality() { + const q = (options.quality || 'modularity').toLowerCase(); + if (q === 'cpm') { + const gamma = + typeof options.resolution === 'number' ? options.resolution : 1.0; + if ((options.cpmMode || 'unit') === 'size-aware') + return qualityCPMSizeAware(partition, finalGraph, gamma); + return qualityCPM(partition, finalGraph, gamma); + } + const gamma = + typeof options.resolution === 'number' ? options.resolution : 1.0; + return qualityModularity(partition, finalGraph, gamma); + }, + toJSON() { + const membershipObj = {}; + for (const [id, c] of idToClass) membershipObj[id] = c; + return { + membership: membershipObj, + meta: { levels: levels.length, quality: this.quality(), options }, + }; + }, + }; +} diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js new file mode 100644 index 00000000..3fe2732f --- /dev/null +++ b/src/graph/algorithms/leiden/modularity.js @@ -0,0 +1,58 @@ +/** + * Modularity quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +export function diffModularity(part, g, v, c, gamma = 1.0) { + if (g.directed) return diffModularityDirected(part, g, v, c, gamma); + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + const k_v = g.strengthOut[v]; + const m2 = g.totalWeight; + const k_v_in_new = part.getNeighborEdgeWeightToCommunity(c) || 0; + const k_v_in_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + const wTot_new = c < part.communityTotalStrength.length ? part.communityTotalStrength[c] : 0; + const wTot_old = part.communityTotalStrength[oldC]; + const gain_remove = -(k_v_in_old / m2 - gamma * (k_v * wTot_old) / (m2 * m2)); + const gain_add = k_v_in_new / m2 - gamma * (k_v * wTot_new) / (m2 * m2); + return gain_remove + gain_add; +} + +export function diffModularityDirected(part, g, v, c, gamma = 1.0) { + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + const m = g.totalWeight; + const k_out = g.strengthOut[v]; + const k_in = g.strengthIn[v]; + const w_new_in = c < g.n ? (part.getInEdgeWeightFromCommunity(c) || 0) : 0; + const w_new_out = c < g.n ? (part.getOutEdgeWeightToCommunity(c) || 0) : 0; + const w_old_in = part.getInEdgeWeightFromCommunity(oldC) || 0; + const w_old_out = part.getOutEdgeWeightToCommunity(oldC) || 0; + const T_new = + c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; + const F_new = + c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; + const T_old = part.communityTotalInStrength[oldC]; + const F_old = part.communityTotalOutStrength[oldC]; + const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out) / m; + const deltaExpected = gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old)) / (m * m); + return deltaInternal - deltaExpected; +} + +export function qualityModularity(part, g, gamma = 1.0) { + const m2 = g.totalWeight; + let sum = 0; + if (g.directed) { + for (let c = 0; c < part.communityCount; c++) + sum += + part.communityInternalEdgeWeight[c] / m2 - + gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c]) / (m2 * m2); + } else { + for (let c = 0; c < part.communityCount; c++) { + const lc = part.communityInternalEdgeWeight[c]; + const dc = part.communityTotalStrength[c]; + sum += lc / m2 - gamma * (dc * dc) / (m2 * m2); + } + } + return sum; +} diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js new file mode 100644 index 00000000..6baed9e8 --- /dev/null +++ b/src/graph/algorithms/leiden/optimiser.js @@ -0,0 +1,357 @@ +/** + * Core Leiden/Louvain community detection optimiser. + * Vendored from ngraph.leiden (MIT) — adapted to use CodeGraph + local RNG. + */ + +import { CodeGraph } from '../../model.js'; +import { makeGraphAdapter } from './adapter.js'; +import { diffCPM } from './cpm.js'; +import { diffModularity, diffModularityDirected } from './modularity.js'; +import { makePartition } from './partition.js'; +import { createRng } from './rng.js'; + +const DEFAULT_MAX_LEVELS = 50; +const DEFAULT_MAX_LOCAL_PASSES = 20; +const GAIN_EPSILON = 1e-12; + +const CandidateStrategy = { + Neighbors: 0, + All: 1, + RandomAny: 2, + RandomNeighbor: 3, +}; + +export function runLouvainUndirectedModularity(graph, optionsInput = {}) { + const options = normalizeOptions(optionsInput); + let currentGraph = graph; + const levels = []; + const rngSource = createRng(options.randomSeed); + const random = () => rngSource.nextDouble(); + + const baseGraphAdapter = makeGraphAdapter(currentGraph, { + directed: options.directed, + ...optionsInput, + }); + const origN = baseGraphAdapter.n; + const originalToCurrent = new Int32Array(origN); + for (let i = 0; i < origN; i++) originalToCurrent[i] = i; + + let fixedNodeMask = null; + if (options.fixedNodes) { + const fixed = new Uint8Array(origN); + const asSet = + options.fixedNodes instanceof Set ? options.fixedNodes : new Set(options.fixedNodes); + for (const id of asSet) { + const idx = baseGraphAdapter.idToIndex.get(String(id)); + if (idx != null) fixed[idx] = 1; + } + fixedNodeMask = fixed; + } + + for (let level = 0; level < options.maxLevels; level++) { + const graphAdapter = + level === 0 + ? baseGraphAdapter + : makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput }); + const partition = makePartition(graphAdapter); + partition.graph = graphAdapter; + partition.initializeAggregates(); + + const order = new Int32Array(graphAdapter.n); + for (let i = 0; i < graphAdapter.n; i++) order[i] = i; + + let improved = true; + let localPasses = 0; + const strategyCode = options.candidateStrategyCode; + while (improved) { + improved = false; + localPasses++; + shuffleArrayInPlace(order, random); + for (let idx = 0; idx < order.length; idx++) { + const nodeIndex = order[idx]; + if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue; + const candidateCount = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); + let bestCommunityId = partition.nodeCommunity[nodeIndex]; + let bestGain = 0; + const maxCommunitySize = options.maxCommunitySize; + if (strategyCode === CandidateStrategy.All) { + for (let communityId = 0; communityId < partition.communityCount; communityId++) { + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else if (strategyCode === CandidateStrategy.RandomAny) { + const tries = Math.min(10, Math.max(1, partition.communityCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + const communityId = (random() * partition.communityCount) | 0; + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else if (strategyCode === CandidateStrategy.RandomNeighbor) { + const tries = Math.min(10, Math.max(1, candidateCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + const communityId = partition.getCandidateCommunityAt((random() * candidateCount) | 0); + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else { + for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { + const communityId = partition.getCandidateCommunityAt(trialIndex); + if (maxCommunitySize < Infinity) { + const nextSize = + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]; + if (nextSize > maxCommunitySize) continue; + } + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } + if (options.allowNewCommunity) { + const newCommunityId = partition.communityCount; + const gain = computeQualityGain(partition, nodeIndex, newCommunityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = newCommunityId; + } + } + if (bestCommunityId !== partition.nodeCommunity[nodeIndex] && bestGain > GAIN_EPSILON) { + partition.moveNodeToCommunity(nodeIndex, bestCommunityId); + improved = true; + } + } + if (localPasses > options.maxLocalPasses) break; + } + + renumberCommunities(partition, options.preserveLabels); + + let effectivePartition = partition; + if (options.refine) { + const refined = refineWithinCoarseCommunities( + graphAdapter, + partition, + random, + options, + level === 0 ? fixedNodeMask : null, + ); + renumberCommunities(refined, options.preserveLabels); + effectivePartition = refined; + } + + levels.push({ graph: graphAdapter, partition: effectivePartition }); + const fineToCoarse = effectivePartition.nodeCommunity; + for (let i = 0; i < originalToCurrent.length; i++) { + originalToCurrent[i] = fineToCoarse[originalToCurrent[i]]; + } + + if (partition.communityCount === graphAdapter.n) break; + currentGraph = buildCoarseGraph(graphAdapter, effectivePartition); + } + + const last = levels[levels.length - 1]; + return { + graph: last.graph, + partition: last.partition, + levels, + originalToCurrent, + originalNodeIds: baseGraphAdapter.nodeIds, + }; +} + +/** + * Build a coarse graph where each community becomes a node. + * Uses CodeGraph instead of ngraph.graph. + */ +function buildCoarseGraph(g, p) { + const coarse = new CodeGraph(); + for (let c = 0; c < p.communityCount; c++) { + coarse.addNode(String(c), { size: p.communityTotalSize[c] }); + } + const acc = new Map(); + for (let i = 0; i < g.n; i++) { + const cu = p.nodeCommunity[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const j = list[k].to; + const w = list[k].w; + const cv = p.nodeCommunity[j]; + const key = `${cu}:${cv}`; + acc.set(key, (acc.get(key) || 0) + w); + } + } + for (const [key, w] of acc.entries()) { + const [cuStr, cvStr] = key.split(':'); + coarse.addEdge(cuStr, cvStr, { weight: w }); + } + return coarse; +} + +function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { + const p = makePartition(g); + p.initializeAggregates(); + p.graph = g; + const macro = basePart.nodeCommunity; + const commMacro = new Int32Array(p.communityCount); + for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]; + + const order = new Int32Array(g.n); + for (let i = 0; i < g.n; i++) order[i] = i; + let improved = true; + let passes = 0; + while (improved) { + improved = false; + passes++; + shuffleArrayInPlace(order, rng); + for (let idx = 0; idx < order.length; idx++) { + const v = order[idx]; + if (fixedMask0?.[v]) continue; + const macroV = macro[v]; + const touchedCount = p.accumulateNeighborCommunityEdgeWeights(v); + let bestC = p.nodeCommunity[v]; + let bestGain = 0; + const maxSize = Number.isFinite(opts.maxCommunitySize) ? opts.maxCommunitySize : Infinity; + for (let t = 0; t < touchedCount; t++) { + const c = p.getCandidateCommunityAt(t); + if (commMacro[c] !== macroV) continue; + if (maxSize < Infinity) { + const nextSize = p.getCommunityTotalSize(c) + g.size[v]; + if (nextSize > maxSize) continue; + } + const gain = computeQualityGain(p, v, c, opts); + if (gain > bestGain) { + bestGain = gain; + bestC = c; + } + } + if (bestC !== p.nodeCommunity[v] && bestGain > GAIN_EPSILON) { + p.moveNodeToCommunity(v, bestC); + improved = true; + } + } + if (passes > (opts.maxLocalPasses || DEFAULT_MAX_LOCAL_PASSES)) break; + } + return p; +} + +function computeQualityGain(partition, v, c, opts) { + const quality = (opts.quality || 'modularity').toLowerCase(); + const gamma = typeof opts.resolution === 'number' ? opts.resolution : 1.0; + if (quality === 'cpm') { + return ( + diffCPM(partition, partition.graph || {}, v, c, gamma) || + partition.deltaCPM?.(v, c, gamma) || + 0 + ); + } + if (opts.directed) + return ( + diffModularityDirected(partition, partition.graph || {}, v, c, gamma) || + partition.deltaModularityDirected?.(v, c, gamma) || + 0 + ); + return ( + diffModularity(partition, partition.graph || {}, v, c, gamma) || + partition.deltaModularityUndirected?.(v, c, gamma) || + 0 + ); +} + +function shuffleArrayInPlace(arr, rng = Math.random) { + for (let i = arr.length - 1; i > 0; i--) { + const j = Math.floor(rng() * (i + 1)); + const t = arr[i]; + arr[i] = arr[j]; + arr[j] = t; + } + return arr; +} + +function resolveCandidateStrategy(options) { + const val = options.candidateStrategy; + if (typeof val !== 'string') return CandidateStrategy.Neighbors; + switch (val) { + case 'neighbors': + return CandidateStrategy.Neighbors; + case 'all': + return CandidateStrategy.All; + case 'random': + return CandidateStrategy.RandomAny; + case 'random-neighbor': + return CandidateStrategy.RandomNeighbor; + default: + return CandidateStrategy.Neighbors; + } +} + +function normalizeOptions(options = {}) { + const directed = !!options.directed; + const randomSeed = Number.isFinite(options.randomSeed) ? options.randomSeed : 42; + const maxLevels = Number.isFinite(options.maxLevels) ? options.maxLevels : DEFAULT_MAX_LEVELS; + const maxLocalPasses = Number.isFinite(options.maxLocalPasses) + ? options.maxLocalPasses + : DEFAULT_MAX_LOCAL_PASSES; + const allowNewCommunity = !!options.allowNewCommunity; + const candidateStrategyCode = resolveCandidateStrategy(options); + const quality = (options.quality || 'modularity').toLowerCase(); + const resolution = typeof options.resolution === 'number' ? options.resolution : 1.0; + const refine = options.refine !== false; + const preserveLabels = options.preserveLabels; + const maxCommunitySize = Number.isFinite(options.maxCommunitySize) + ? options.maxCommunitySize + : Infinity; + return { + directed, + randomSeed, + maxLevels, + maxLocalPasses, + allowNewCommunity, + candidateStrategyCode, + quality, + resolution, + refine, + preserveLabels, + maxCommunitySize, + fixedNodes: options.fixedNodes, + }; +} + +function renumberCommunities(partition, preserveLabels) { + if (preserveLabels && preserveLabels instanceof Map) { + partition.compactCommunityIds({ preserveMap: preserveLabels }); + } else if (preserveLabels === true) { + partition.compactCommunityIds({ keepOldOrder: true }); + } else { + partition.compactCommunityIds(); + } +} diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js new file mode 100644 index 00000000..fa4038c6 --- /dev/null +++ b/src/graph/algorithms/leiden/partition.js @@ -0,0 +1,377 @@ +/** + * Mutable community assignment with per-community aggregates. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + * + * Maintains per-community totals and per-move scratch accumulators so we can + * compute modularity/CPM gains in O(neighborhood) time without rescanning the + * whole graph after each move. + */ + +export function makePartition(graph) { + const n = graph.n; + const nodeCommunity = new Int32Array(n); + for (let i = 0; i < n; i++) nodeCommunity[i] = i; + let communityCount = n; + + let communityTotalSize = new Float64Array(communityCount); + let communityNodeCount = new Int32Array(communityCount); + let communityInternalEdgeWeight = new Float64Array(communityCount); + let communityTotalStrength = new Float64Array(communityCount); + let communityTotalOutStrength = new Float64Array(communityCount); + let communityTotalInStrength = new Float64Array(communityCount); + + let candidateCommunities = new Int32Array(n); + let candidateCommunityCount = 0; + let neighborEdgeWeightToCommunity = new Float64Array(n); + let outEdgeWeightToCommunity = new Float64Array(n); + let inEdgeWeightFromCommunity = new Float64Array(n); + let isCandidateCommunity = new Uint8Array(n); + + function ensureCommCapacity(newCount) { + if (newCount <= communityTotalSize.length) return; + const growTo = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); + communityTotalSize = growFloat(communityTotalSize, growTo); + communityNodeCount = growInt(communityNodeCount, growTo); + communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); + communityTotalStrength = growFloat(communityTotalStrength, growTo); + communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); + communityTotalInStrength = growFloat(communityTotalInStrength, growTo); + } + + function initializeAggregates() { + communityTotalSize.fill(0); + communityNodeCount.fill(0); + communityInternalEdgeWeight.fill(0); + communityTotalStrength.fill(0); + communityTotalOutStrength.fill(0); + communityTotalInStrength.fill(0); + for (let i = 0; i < n; i++) { + const c = nodeCommunity[i]; + communityTotalSize[c] += graph.size[i]; + communityNodeCount[c] += 1; + if (graph.directed) { + communityTotalOutStrength[c] += graph.strengthOut[i]; + communityTotalInStrength[c] += graph.strengthIn[i]; + } else { + communityTotalStrength[c] += graph.strengthOut[i]; + } + if (graph.selfLoop[i] !== 0) communityInternalEdgeWeight[c] += graph.selfLoop[i]; + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const neighbors = graph.outEdges[i]; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]; + if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const neighbors = graph.outEdges[i]; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]; + if (j <= i) continue; + if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; + } + } + } + } + + function resetScratch() { + for (let i = 0; i < candidateCommunityCount; i++) { + const c = candidateCommunities[i]; + isCandidateCommunity[c] = 0; + neighborEdgeWeightToCommunity[c] = 0; + outEdgeWeightToCommunity[c] = 0; + inEdgeWeightFromCommunity[c] = 0; + } + candidateCommunityCount = 0; + } + + function touch(c) { + if (isCandidateCommunity[c]) return; + isCandidateCommunity[c] = 1; + candidateCommunities[candidateCommunityCount++] = c; + } + + function accumulateNeighborCommunityEdgeWeights(v) { + resetScratch(); + const ci = nodeCommunity[v]; + touch(ci); + if (graph.directed) { + const outL = graph.outEdges[v]; + for (let k = 0; k < outL.length; k++) { + const j = outL[k].to; + const w = outL[k].w; + const cj = nodeCommunity[j]; + touch(cj); + outEdgeWeightToCommunity[cj] += w; + } + const inL = graph.inEdges[v]; + for (let k = 0; k < inL.length; k++) { + const i2 = inL[k].from; + const w = inL[k].w; + const ci2 = nodeCommunity[i2]; + touch(ci2); + inEdgeWeightFromCommunity[ci2] += w; + } + } else { + const list = graph.outEdges[v]; + for (let k = 0; k < list.length; k++) { + const j = list[k].to; + const w = list[k].w; + const cj = nodeCommunity[j]; + touch(cj); + neighborEdgeWeightToCommunity[cj] += w; + } + } + return candidateCommunityCount; + } + + const twoMUndirected = graph.totalWeight; + function deltaModularityUndirected(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + const strengthV = graph.strengthOut[v]; + const weightToNew = + newC < neighborEdgeWeightToCommunity.length + ? neighborEdgeWeightToCommunity[newC] || 0 + : 0; + const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; + const totalStrengthNew = + newC < communityTotalStrength.length ? communityTotalStrength[newC] : 0; + const totalStrengthOld = communityTotalStrength[oldC]; + const gain_remove = -( + weightToOld / twoMUndirected - + gamma * (strengthV * totalStrengthOld) / (twoMUndirected * twoMUndirected) + ); + const gain_add = + weightToNew / twoMUndirected - + gamma * (strengthV * totalStrengthNew) / (twoMUndirected * twoMUndirected); + return gain_remove + gain_add; + } + + function deltaModularityDirected(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + const totalEdgeWeight = graph.totalWeight; + const strengthOutV = graph.strengthOut[v]; + const strengthInV = graph.strengthIn[v]; + const inFromNew = + newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; + const outToNew = + newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; + const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; + const outToOld = outEdgeWeightToCommunity[oldC] || 0; + const totalInStrengthNew = + newC < communityTotalInStrength.length ? communityTotalInStrength[newC] : 0; + const totalOutStrengthNew = + newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; + const totalInStrengthOld = communityTotalInStrength[oldC]; + const totalOutStrengthOld = communityTotalOutStrength[oldC]; + const deltaInternal = + (inFromNew + outToNew - inFromOld - outToOld) / totalEdgeWeight; + const deltaExpected = + gamma * (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + + strengthInV * (totalOutStrengthNew - totalOutStrengthOld)) / + (totalEdgeWeight * totalEdgeWeight); + return deltaInternal - deltaExpected; + } + + function deltaCPM(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; + const weightToNew = + newC < neighborEdgeWeightToCommunity.length + ? neighborEdgeWeightToCommunity[newC] || 0 + : 0; + const nodeSize = graph.size[v] || 1; + const sizeOld = communityTotalSize[oldC] || 0; + const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; + return weightToNew - weightToOld - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); + } + + function moveNodeToCommunity(v, newC) { + const oldC = nodeCommunity[v]; + if (oldC === newC) return false; + if (newC >= communityCount) { + ensureCommCapacity(newC + 1); + communityCount = newC + 1; + } + const strengthOutV = graph.strengthOut[v]; + const strengthInV = graph.strengthIn[v]; + const selfLoopWeight = graph.selfLoop[v]; + const nodeSize = graph.size[v]; + + communityNodeCount[oldC] -= 1; + communityNodeCount[newC] += 1; + communityTotalSize[oldC] -= nodeSize; + communityTotalSize[newC] += nodeSize; + if (graph.directed) { + communityTotalOutStrength[oldC] -= strengthOutV; + communityTotalOutStrength[newC] += strengthOutV; + communityTotalInStrength[oldC] -= strengthInV; + communityTotalInStrength[newC] += strengthInV; + } else { + communityTotalStrength[oldC] -= strengthOutV; + communityTotalStrength[newC] += strengthOutV; + } + + if (graph.directed) { + const outToOld = outEdgeWeightToCommunity[oldC] || 0; + const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; + const outToNew = + newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; + const inFromNew = + newC < inEdgeWeightFromCommunity.length + ? inEdgeWeightFromCommunity[newC] || 0 + : 0; + communityInternalEdgeWeight[oldC] -= outToOld + inFromOld + selfLoopWeight; + communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; + } else { + const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; + const weightToNew = neighborEdgeWeightToCommunity[newC] || 0; + communityInternalEdgeWeight[oldC] -= 2 * weightToOld + selfLoopWeight; + communityInternalEdgeWeight[newC] += 2 * weightToNew + selfLoopWeight; + } + + nodeCommunity[v] = newC; + return true; + } + + function compactCommunityIds(opts = {}) { + const ids = []; + for (let c = 0; c < communityCount; c++) + if (communityNodeCount[c] > 0) ids.push(c); + if (opts.keepOldOrder) { + ids.sort((a, b) => a - b); + } else if (opts.preserveMap instanceof Map) { + ids.sort((a, b) => { + const pa = opts.preserveMap.get(a); + const pb = opts.preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return ( + communityTotalSize[b] - communityTotalSize[a] || + communityNodeCount[b] - communityNodeCount[a] || + a - b + ); + }); + } else { + ids.sort( + (a, b) => + communityTotalSize[b] - communityTotalSize[a] || + communityNodeCount[b] - communityNodeCount[a] || + a - b, + ); + } + const newId = new Int32Array(communityCount).fill(-1); + ids.forEach((c, i) => { + newId[c] = i; + }); + for (let i = 0; i < nodeCommunity.length; i++) nodeCommunity[i] = newId[nodeCommunity[i]]; + const remappedCount = ids.length; + const newTotalSize = new Float64Array(remappedCount); + const newNodeCount = new Int32Array(remappedCount); + const newInternalEdgeWeight = new Float64Array(remappedCount); + const newTotalStrength = new Float64Array(remappedCount); + const newTotalOutStrength = new Float64Array(remappedCount); + const newTotalInStrength = new Float64Array(remappedCount); + for (let i = 0; i < n; i++) { + const c = nodeCommunity[i]; + newTotalSize[c] += graph.size[i]; + newNodeCount[c] += 1; + if (graph.directed) { + newTotalOutStrength[c] += graph.strengthOut[i]; + newTotalInStrength[c] += graph.strengthIn[i]; + } else { + newTotalStrength[c] += graph.strengthOut[i]; + } + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const list = graph.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const list = graph.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (j <= i) continue; + if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; + } + } + } + communityCount = remappedCount; + communityTotalSize = newTotalSize; + communityNodeCount = newNodeCount; + communityInternalEdgeWeight = newInternalEdgeWeight; + communityTotalStrength = newTotalStrength; + communityTotalOutStrength = newTotalOutStrength; + communityTotalInStrength = newTotalInStrength; + } + + function getCommunityMembers() { + const comms = new Array(communityCount); + for (let i = 0; i < communityCount; i++) comms[i] = []; + for (let i = 0; i < n; i++) comms[nodeCommunity[i]].push(i); + return comms; + } + + function getCommunityTotalSize(c) { + return c < communityTotalSize.length ? communityTotalSize[c] : 0; + } + function getCommunityNodeCount(c) { + return c < communityNodeCount.length ? communityNodeCount[c] : 0; + } + + return { + n, + get communityCount() { + return communityCount; + }, + nodeCommunity, + communityTotalSize, + communityNodeCount, + communityInternalEdgeWeight, + communityTotalStrength, + communityTotalOutStrength, + communityTotalInStrength, + initializeAggregates, + accumulateNeighborCommunityEdgeWeights, + getCandidateCommunityCount: () => candidateCommunityCount, + getCandidateCommunityAt: (i) => candidateCommunities[i], + getNeighborEdgeWeightToCommunity: (c) => neighborEdgeWeightToCommunity[c] || 0, + getOutEdgeWeightToCommunity: (c) => outEdgeWeightToCommunity[c] || 0, + getInEdgeWeightFromCommunity: (c) => inEdgeWeightFromCommunity[c] || 0, + deltaModularityUndirected, + deltaModularityDirected, + deltaCPM, + moveNodeToCommunity, + compactCommunityIds, + getCommunityMembers, + getCommunityTotalSize, + getCommunityNodeCount, + }; +} + +function growFloat(a, to) { + const b = new Float64Array(to); + b.set(a); + return b; +} +function growInt(a, to) { + const b = new Int32Array(to); + b.set(a); + return b; +} diff --git a/src/graph/algorithms/leiden/rng.js b/src/graph/algorithms/leiden/rng.js new file mode 100644 index 00000000..9d20fcb6 --- /dev/null +++ b/src/graph/algorithms/leiden/rng.js @@ -0,0 +1,19 @@ +/** + * Seeded PRNG (mulberry32). + * Drop-in replacement for ngraph.random — only nextDouble() is needed. + * + * @param {number} [seed] + * @returns {{ nextDouble(): number }} + */ +export function createRng(seed = 42) { + let s = seed | 0; + return { + nextDouble() { + s |= 0; + s = (s + 0x6d2b79f5) | 0; + let t = Math.imul(s ^ (s >>> 15), 1 | s); + t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }, + }; +} diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.js index 2a7f3a69..d68572d6 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.js @@ -1,26 +1,30 @@ /** - * Louvain community detection via graphology. + * Community detection via vendored Leiden algorithm. + * Maintains backward-compatible API: { assignments: Map, modularity: number } * * @param {import('../model.js').CodeGraph} graph * @param {{ resolution?: number }} [opts] * @returns {{ assignments: Map, modularity: number }} */ -import graphologyLouvain from 'graphology-communities-louvain'; +import { detectClusters } from './leiden/index.js'; export function louvainCommunities(graph, opts = {}) { - const gy = graph.toGraphology({ type: 'undirected' }); - - if (gy.order === 0 || gy.size === 0) { + if (graph.nodeCount === 0 || graph.edgeCount === 0) { return { assignments: new Map(), modularity: 0 }; } const resolution = opts.resolution ?? 1.0; - const details = graphologyLouvain.detailed(gy, { resolution }); + const result = detectClusters(graph, { + resolution, + randomSeed: 42, + directed: false, + }); const assignments = new Map(); - for (const [nodeId, communityId] of Object.entries(details.communities)) { - assignments.set(nodeId, communityId); + for (const [id] of graph.nodes()) { + const cls = result.getClass(id); + if (cls != null) assignments.set(id, cls); } - return { assignments, modularity: details.modularity }; + return { assignments, modularity: result.quality() }; } diff --git a/src/graph/model.js b/src/graph/model.js index 733be688..8672155b 100644 --- a/src/graph/model.js +++ b/src/graph/model.js @@ -5,8 +5,6 @@ * Node IDs are always strings. DB integer IDs should be stringified before use. */ -import Graph from 'graphology'; - export class CodeGraph { /** * @param {{ directed?: boolean }} [opts] @@ -189,21 +187,6 @@ export class CodeGraph { return result; } - /** Convert to graphology instance (for Louvain etc). */ - toGraphology(opts = {}) { - const type = opts.type || (this._directed ? 'directed' : 'undirected'); - const g = new Graph({ type }); - for (const [id] of this._nodes) { - g.addNode(id); - } - - for (const [src, tgt] of this.edges()) { - if (src === tgt) continue; - if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); - } - return g; - } - // ─── Utilities ────────────────────────────────────────────────── clone() { diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 29a4f6e9..677a33a7 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -116,7 +116,7 @@ export async function stats(customDbPath, opts = {}) { const { communitySummaryForStats } = await import('../../features/communities.js'); data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); } catch { - /* graphology may not be available */ + /* community detection may fail on disconnected graphs */ } if (outputResult(data, null, opts)) return; diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js new file mode 100644 index 00000000..5e3c1865 --- /dev/null +++ b/tests/graph/algorithms/leiden.test.js @@ -0,0 +1,307 @@ +import { describe, expect, it } from 'vitest'; +import { detectClusters } from '../../../src/graph/algorithms/leiden/index.js'; +import { CodeGraph } from '../../../src/graph/model.js'; + +// ─── Helpers ────────────────────────────────────────────────────────── + +/** Two 4-node cliques connected by a single weak bridge. */ +function makeTwoCliquesBridge() { + const g = new CodeGraph(); + const A = ['0', '1', '2', '3']; + const B = ['4', '5', '6', '7']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('3', '4'); + g.addEdge('4', '3'); + return g; +} + +function makeTwoCliques(n = 4) { + const g = new CodeGraph(); + const A = Array.from({ length: n }, (_, i) => `a${i}`); + const B = Array.from({ length: n }, (_, i) => `b${i}`); + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge(A[A.length - 1], B[0]); + g.addEdge(B[0], A[A.length - 1]); + return { g, A, B }; +} + +// ─── Basic ──────────────────────────────────────────────────────────── + +describe('detectClusters', () => { + it('splits two cliques with a weak bridge', () => { + const g = makeTwoCliquesBridge(); + const clusters = detectClusters(g, { randomSeed: 1 }); + const cA = new Set(['0', '1', '2', '3'].map((i) => clusters.getClass(i))); + const cB = new Set(['4', '5', '6', '7'].map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── CPM ────────────────────────────────────────────────────────────── + +describe('CPM resolution tuning', () => { + it('splits more with higher gamma', () => { + const g = makeTwoCliquesBridge(); + const low = detectClusters(g, { quality: 'cpm', resolution: 0.01, randomSeed: 1 }); + const high = detectClusters(g, { quality: 'cpm', resolution: 10.0, randomSeed: 1 }); + const ids = ['0', '1', '2', '3', '4', '5', '6', '7']; + const countCommunities = (clusters) => new Set(ids.map((i) => clusters.getClass(i))).size; + expect(countCommunities(low)).toBeLessThanOrEqual(countCommunities(high)); + }); +}); + +// ─── CPM size-aware ─────────────────────────────────────────────────── + +describe('CPM size-aware mode', () => { + it('penalizes large-size communities more than unit mode', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2', '3']; + const B = ['4', '5', '6', '7']; + for (const id of [...A, ...B]) g.addNode(id, { size: A.includes(id) ? 5 : 1 }); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('3', '4'); + g.addEdge('4', '3'); + + const gamma = 0.5; + const unit = detectClusters(g, { + quality: 'cpm', + cpmMode: 'unit', + resolution: gamma, + randomSeed: 3, + }); + const sized = detectClusters(g, { + quality: 'cpm', + cpmMode: 'size-aware', + resolution: gamma, + randomSeed: 3, + }); + expect(sized.quality()).toBeLessThanOrEqual(unit.quality()); + const ids = [...A, ...B]; + const count = (cl) => new Set(ids.map((i) => cl.getClass(i))).size; + expect(count(unit)).toBe(2); + expect(count(sized)).toBe(2); + }); +}); + +// ─── Directed ───────────────────────────────────────────────────────── + +describe('directed modularity', () => { + it('finds two communities in directed case', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2']; + const B = ['3', '4', '5']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = 0; j < A.length; j++) if (i !== j) g.addEdge(A[i], A[j]); + for (let i = 0; i < B.length; i++) + for (let j = 0; j < B.length; j++) if (i !== j) g.addEdge(B[i], B[j]); + g.addEdge('2', '3'); + + const clusters = detectClusters(g, { directed: true, randomSeed: 2 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── Edge cases ─────────────────────────────────────────────────────── + +describe('edge cases', () => { + it('keeps isolated node as its own community', () => { + const g = new CodeGraph(); + g.addNode('x'); + g.addNode('y'); + g.addNode('z'); + g.addEdge('x', 'y'); + g.addEdge('y', 'x'); + + const clusters = detectClusters(g, { randomSeed: 123 }); + expect(clusters.getClass('x')).toBe(clusters.getClass('y')); + expect(clusters.getClass('z')).not.toBe(clusters.getClass('x')); + }); + + it('handles negative weights and preserves intuitive split', () => { + const g = new CodeGraph(); + const A = ['a1', 'a2', 'a3', 'a4']; + const B = ['b1', 'b2', 'b3', 'b4']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('a4', 'b1', { weight: -2 }); + g.addEdge('b1', 'a4', { weight: -2 }); + g.addEdge('a3', 'b2', { weight: -1 }); + g.addEdge('b2', 'a3', { weight: -1 }); + + const clusters = detectClusters(g, { randomSeed: 7 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); + + it('self-loop biases node to remain separate under weak external ties (CPM)', () => { + const g = new CodeGraph(); + g.addNode('a'); + g.addNode('b'); + g.addEdge('a', 'a', { weight: 5 }); + g.addEdge('a', 'b', { weight: 0.1 }); + g.addEdge('b', 'a', { weight: 0.1 }); + + const clusters = detectClusters(g, { + randomSeed: 5, + quality: 'cpm', + resolution: 1.0, + }); + expect(clusters.getClass('a')).not.toBe(clusters.getClass('b')); + }); + + it('treats a disconnected clique as its own isolated community', () => { + const g = new CodeGraph(); + const A = ['a1', 'a2', 'a3']; + const B = ['b1', 'b2']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + g.addEdge('b1', 'b2'); + g.addEdge('b2', 'b1'); + + const clusters = detectClusters(g, { randomSeed: 321 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── Ergonomics & constraints ───────────────────────────────────────── + +describe('ergonomics & constraints', () => { + it('maxCommunitySize is enforced', () => { + const { g, A, B } = makeTwoCliques(3); + const clusters = detectClusters(g, { randomSeed: 123, maxCommunitySize: 3 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); + + it('deterministic with fixed seed even with random strategies', () => { + const { g } = makeTwoCliques(4); + const opts = { randomSeed: 2024, candidateStrategy: 'random-neighbor' }; + const a = detectClusters(g, opts); + const b = detectClusters(g, opts); + const classesA = new Map(); + const classesB = new Map(); + for (const [id] of g.nodes()) { + classesA.set(id, a.getClass(id)); + classesB.set(id, b.getClass(id)); + } + expect(JSON.stringify([...classesA.entries()].sort())).toBe( + JSON.stringify([...classesB.entries()].sort()), + ); + }); +}); + +// ─── Fixed nodes ────────────────────────────────────────────────────── + +describe('fixed nodes', () => { + it('does not force fixed nodes to leave their clique communities', () => { + const g = makeTwoCliquesBridge(); + const fixedRun = detectClusters(g, { + randomSeed: 11, + refine: true, + fixedNodes: new Set(['3', '4']), + }); + const c3 = fixedRun.getClass('3'); + const c4 = fixedRun.getClass('4'); + expect(fixedRun.getClass('0')).toBe(c3); + expect(fixedRun.getClass('1')).toBe(c3); + expect(fixedRun.getClass('2')).toBe(c3); + expect(fixedRun.getClass('4')).not.toBe(c3); + expect(fixedRun.getClass('5')).toBe(c4); + expect(fixedRun.getClass('6')).toBe(c4); + expect(fixedRun.getClass('7')).toBe(c4); + }); +}); + +// ─── Refinement ─────────────────────────────────────────────────────── + +describe('refinement', () => { + it('keeps cliques separated across refinement', () => { + const g = new CodeGraph(); + const groups = [ + Array.from({ length: 5 }, (_, i) => String(i)), + Array.from({ length: 5 }, (_, i) => String(i + 5)), + Array.from({ length: 5 }, (_, i) => String(i + 10)), + ]; + for (const group of groups) for (const v of group) g.addNode(v); + for (const group of groups) { + for (let i = 0; i < group.length; i++) + for (let j = i + 1; j < group.length; j++) { + g.addEdge(group[i], group[j]); + g.addEdge(group[j], group[i]); + } + } + g.addEdge('4', '5'); + g.addEdge('5', '4'); + g.addEdge('9', '10'); + g.addEdge('10', '9'); + + const clusters = detectClusters(g, { randomSeed: 1, refine: true }); + const c0 = new Set(['0', '1', '2', '3', '4'].map((i) => clusters.getClass(i))); + const c1 = new Set(['5', '6', '7', '8', '9'].map((i) => clusters.getClass(i))); + const c2 = new Set(['10', '11', '12', '13', '14'].map((i) => clusters.getClass(i))); + expect(c0.size).toBe(1); + expect(c1.size).toBe(1); + expect(c2.size).toBe(1); + expect([...c0][0]).not.toBe([...c1][0]); + expect([...c1][0]).not.toBe([...c2][0]); + }); +}); diff --git a/tests/graph/model.test.js b/tests/graph/model.test.js index ec925ba0..dc1bbe93 100644 --- a/tests/graph/model.test.js +++ b/tests/graph/model.test.js @@ -144,28 +144,6 @@ describe('CodeGraph — toEdgeArray', () => { }); }); -describe('CodeGraph — toGraphology', () => { - it('creates an undirected graphology graph', () => { - const g = new CodeGraph(); - g.addEdge('a', 'b'); - g.addEdge('b', 'c'); - - const gy = g.toGraphology({ type: 'undirected' }); - expect(gy.order).toBe(3); - expect(gy.size).toBe(2); - expect(gy.type).toBe('undirected'); - }); - - it('skips self-loops', () => { - const g = new CodeGraph(); - g.addEdge('a', 'a'); - g.addEdge('a', 'b'); - - const gy = g.toGraphology({ type: 'undirected' }); - expect(gy.size).toBe(1); - }); -}); - describe('CodeGraph — clone', () => { it('produces an independent copy', () => { const g = new CodeGraph(); From c81e861a25b2f3e506c935e0cc5a3d70c6707879 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:50:27 -0600 Subject: [PATCH 02/25] style: apply biome formatting to vendored Leiden modules --- src/graph/algorithms/leiden/adapter.js | 11 +++---- src/graph/algorithms/leiden/cpm.js | 6 ++-- src/graph/algorithms/leiden/index.js | 6 ++-- src/graph/algorithms/leiden/modularity.js | 21 ++++++------- src/graph/algorithms/leiden/partition.js | 37 +++++++++-------------- 5 files changed, 34 insertions(+), 47 deletions(-) diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js index d4d16b6d..bad647ff 100644 --- a/src/graph/algorithms/leiden/adapter.js +++ b/src/graph/algorithms/leiden/adapter.js @@ -15,11 +15,9 @@ */ export function makeGraphAdapter(graph, opts = {}) { const linkWeight = - opts.linkWeight || - ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); + opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); const nodeSize = - opts.nodeSize || - ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); + opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); const directed = !!opts.directed; const baseNodeIds = opts.baseNodeIds; @@ -29,8 +27,7 @@ export function makeGraphAdapter(graph, opts = {}) { if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { for (let i = 0; i < baseNodeIds.length; i++) { const id = baseNodeIds[i]; - if (!graph.hasNode(id)) - throw new Error('Missing node: ' + id); + if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); idToIndex.set(id, i); nodeIds.push(id); } @@ -86,7 +83,7 @@ export function makeGraphAdapter(graph, opts = {}) { } const i = a < b ? a : b; const j = a < b ? b : a; - const key = i + ':' + j; + const key = `${i}:${j}`; let rec = pairAgg.get(key); if (!rec) { rec = { sum: 0, seenAB: 0, seenBA: 0 }; diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js index f6c54d58..e7fd9b76 100644 --- a/src/graph/algorithms/leiden/cpm.js +++ b/src/graph/algorithms/leiden/cpm.js @@ -7,14 +7,14 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { const oldC = part.nodeCommunity[v]; if (c === oldC) return 0; const w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - const w_new = c < g.n ? (part.getNeighborEdgeWeightToCommunity(c) || 0) : 0; + const w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; const s_v = g.size[v] || 1; const S_old = part.communityTotalSize[oldC] || 0; const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; return w_new - w_old - gamma * s_v * (S_new - S_old + s_v); } -export function qualityCPM(part, g, gamma = 1.0) { +export function qualityCPM(part, _g, gamma = 1.0) { let sum = 0; for (let c = 0; c < part.communityCount; c++) sum += @@ -23,7 +23,7 @@ export function qualityCPM(part, g, gamma = 1.0) { return sum; } -export function qualityCPMSizeAware(part, g, gamma = 1.0) { +export function qualityCPMSizeAware(part, _g, gamma = 1.0) { let sum = 0; for (let c = 0; c < part.communityCount; c++) { const S = part.communityTotalSize[c] || 0; diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js index 400c4a0b..b2dc4808 100644 --- a/src/graph/algorithms/leiden/index.js +++ b/src/graph/algorithms/leiden/index.js @@ -55,14 +55,12 @@ export function detectClusters(graph, options = {}) { quality() { const q = (options.quality || 'modularity').toLowerCase(); if (q === 'cpm') { - const gamma = - typeof options.resolution === 'number' ? options.resolution : 1.0; + const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; if ((options.cpmMode || 'unit') === 'size-aware') return qualityCPMSizeAware(partition, finalGraph, gamma); return qualityCPM(partition, finalGraph, gamma); } - const gamma = - typeof options.resolution === 'number' ? options.resolution : 1.0; + const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; return qualityModularity(partition, finalGraph, gamma); }, toJSON() { diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js index 3fe2732f..8f314830 100644 --- a/src/graph/algorithms/leiden/modularity.js +++ b/src/graph/algorithms/leiden/modularity.js @@ -13,8 +13,8 @@ export function diffModularity(part, g, v, c, gamma = 1.0) { const k_v_in_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; const wTot_new = c < part.communityTotalStrength.length ? part.communityTotalStrength[c] : 0; const wTot_old = part.communityTotalStrength[oldC]; - const gain_remove = -(k_v_in_old / m2 - gamma * (k_v * wTot_old) / (m2 * m2)); - const gain_add = k_v_in_new / m2 - gamma * (k_v * wTot_new) / (m2 * m2); + const gain_remove = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); + const gain_add = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); return gain_remove + gain_add; } @@ -24,18 +24,16 @@ export function diffModularityDirected(part, g, v, c, gamma = 1.0) { const m = g.totalWeight; const k_out = g.strengthOut[v]; const k_in = g.strengthIn[v]; - const w_new_in = c < g.n ? (part.getInEdgeWeightFromCommunity(c) || 0) : 0; - const w_new_out = c < g.n ? (part.getOutEdgeWeightToCommunity(c) || 0) : 0; + const w_new_in = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; + const w_new_out = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; const w_old_in = part.getInEdgeWeightFromCommunity(oldC) || 0; const w_old_out = part.getOutEdgeWeightToCommunity(oldC) || 0; - const T_new = - c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; - const F_new = - c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; + const T_new = c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; + const F_new = c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; const T_old = part.communityTotalInStrength[oldC]; const F_old = part.communityTotalOutStrength[oldC]; const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out) / m; - const deltaExpected = gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old)) / (m * m); + const deltaExpected = (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old))) / (m * m); return deltaInternal - deltaExpected; } @@ -46,12 +44,13 @@ export function qualityModularity(part, g, gamma = 1.0) { for (let c = 0; c < part.communityCount; c++) sum += part.communityInternalEdgeWeight[c] / m2 - - gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c]) / (m2 * m2); + (gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c])) / + (m2 * m2); } else { for (let c = 0; c < part.communityCount; c++) { const lc = part.communityInternalEdgeWeight[c]; const dc = part.communityTotalStrength[c]; - sum += lc / m2 - gamma * (dc * dc) / (m2 * m2); + sum += lc / m2 - (gamma * (dc * dc)) / (m2 * m2); } } return sum; diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js index fa4038c6..acc9c789 100644 --- a/src/graph/algorithms/leiden/partition.js +++ b/src/graph/algorithms/leiden/partition.js @@ -20,12 +20,12 @@ export function makePartition(graph) { let communityTotalOutStrength = new Float64Array(communityCount); let communityTotalInStrength = new Float64Array(communityCount); - let candidateCommunities = new Int32Array(n); + const candidateCommunities = new Int32Array(n); let candidateCommunityCount = 0; - let neighborEdgeWeightToCommunity = new Float64Array(n); - let outEdgeWeightToCommunity = new Float64Array(n); - let inEdgeWeightFromCommunity = new Float64Array(n); - let isCandidateCommunity = new Uint8Array(n); + const neighborEdgeWeightToCommunity = new Float64Array(n); + const outEdgeWeightToCommunity = new Float64Array(n); + const inEdgeWeightFromCommunity = new Float64Array(n); + const isCandidateCommunity = new Uint8Array(n); function ensureCommCapacity(newCount) { if (newCount <= communityTotalSize.length) return; @@ -136,20 +136,18 @@ export function makePartition(graph) { if (newC === oldC) return 0; const strengthV = graph.strengthOut[v]; const weightToNew = - newC < neighborEdgeWeightToCommunity.length - ? neighborEdgeWeightToCommunity[newC] || 0 - : 0; + newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; const totalStrengthNew = newC < communityTotalStrength.length ? communityTotalStrength[newC] : 0; const totalStrengthOld = communityTotalStrength[oldC]; const gain_remove = -( weightToOld / twoMUndirected - - gamma * (strengthV * totalStrengthOld) / (twoMUndirected * twoMUndirected) + (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) ); const gain_add = weightToNew / twoMUndirected - - gamma * (strengthV * totalStrengthNew) / (twoMUndirected * twoMUndirected); + (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); return gain_remove + gain_add; } @@ -171,11 +169,11 @@ export function makePartition(graph) { newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; const totalInStrengthOld = communityTotalInStrength[oldC]; const totalOutStrengthOld = communityTotalOutStrength[oldC]; - const deltaInternal = - (inFromNew + outToNew - inFromOld - outToOld) / totalEdgeWeight; + const deltaInternal = (inFromNew + outToNew - inFromOld - outToOld) / totalEdgeWeight; const deltaExpected = - gamma * (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + - strengthInV * (totalOutStrengthNew - totalOutStrengthOld)) / + (gamma * + (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + + strengthInV * (totalOutStrengthNew - totalOutStrengthOld))) / (totalEdgeWeight * totalEdgeWeight); return deltaInternal - deltaExpected; } @@ -185,9 +183,7 @@ export function makePartition(graph) { if (newC === oldC) return 0; const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; const weightToNew = - newC < neighborEdgeWeightToCommunity.length - ? neighborEdgeWeightToCommunity[newC] || 0 - : 0; + newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; const nodeSize = graph.size[v] || 1; const sizeOld = communityTotalSize[oldC] || 0; const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; @@ -226,9 +222,7 @@ export function makePartition(graph) { const outToNew = newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; const inFromNew = - newC < inEdgeWeightFromCommunity.length - ? inEdgeWeightFromCommunity[newC] || 0 - : 0; + newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; communityInternalEdgeWeight[oldC] -= outToOld + inFromOld + selfLoopWeight; communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; } else { @@ -244,8 +238,7 @@ export function makePartition(graph) { function compactCommunityIds(opts = {}) { const ids = []; - for (let c = 0; c < communityCount; c++) - if (communityNodeCount[c] > 0) ids.push(c); + for (let c = 0; c < communityCount; c++) if (communityNodeCount[c] > 0) ids.push(c); if (opts.keepOldOrder) { ids.sort((a, b) => a - b); } else if (opts.preserveMap instanceof Map) { From 555dbf511b47e78de26c5f50806e61b080a48c53 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 02:17:14 -0600 Subject: [PATCH 03/25] fix(leiden): remove falsy || fallback in computeQualityGain The || operator treats 0 and negative quality gains as falsy, silently falling through to partition closure methods. Use the standalone diff functions directly since both implementations compute identical results. Impact: 2 functions changed, 4 affected --- src/graph/algorithms/leiden/optimiser.js | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index 6baed9e8..4521e8fa 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -194,7 +194,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { * Uses CodeGraph instead of ngraph.graph. */ function buildCoarseGraph(g, p) { - const coarse = new CodeGraph(); + const coarse = new CodeGraph({ directed: g.directed }); for (let c = 0; c < p.communityCount; c++) { coarse.addNode(String(c), { size: p.communityTotalSize[c] }); } @@ -268,23 +268,12 @@ function computeQualityGain(partition, v, c, opts) { const quality = (opts.quality || 'modularity').toLowerCase(); const gamma = typeof opts.resolution === 'number' ? opts.resolution : 1.0; if (quality === 'cpm') { - return ( - diffCPM(partition, partition.graph || {}, v, c, gamma) || - partition.deltaCPM?.(v, c, gamma) || - 0 - ); + return diffCPM(partition, partition.graph || {}, v, c, gamma); } - if (opts.directed) - return ( - diffModularityDirected(partition, partition.graph || {}, v, c, gamma) || - partition.deltaModularityDirected?.(v, c, gamma) || - 0 - ); - return ( - diffModularity(partition, partition.graph || {}, v, c, gamma) || - partition.deltaModularityUndirected?.(v, c, gamma) || - 0 - ); + if (opts.directed) { + return diffModularityDirected(partition, partition.graph || {}, v, c, gamma); + } + return diffModularity(partition, partition.graph || {}, v, c, gamma); } function shuffleArrayInPlace(arr, rng = Math.random) { From 78d4969af30819f40a6687c0e88c61cc5412b5fd Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 02:17:42 -0600 Subject: [PATCH 04/25] fix(leiden): document self-loop weight convention in adapter The vendored code uses single-w self-loop convention (not standard 2w) which is self-consistent with the modularity formulas in modularity.js. Added comment explaining the convention choice. Impact: 1 functions changed, 3 affected --- src/graph/algorithms/leiden/adapter.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js index bad647ff..9168c1ae 100644 --- a/src/graph/algorithms/leiden/adapter.js +++ b/src/graph/algorithms/leiden/adapter.js @@ -111,7 +111,9 @@ export function makeGraphAdapter(graph, opts = {}) { strengthIn[j] += w; } - // Add self-loops into adjacency and strengths + // Add self-loops into adjacency and strengths. + // Note: uses single-w convention (not standard 2w) — the modularity formulas in + // modularity.js are written to match this convention, keeping the system self-consistent. for (let v = 0; v < n; v++) { const w = selfLoop[v]; if (w !== 0) { From 23767b35792a352efb147d733875ceba812cbf0a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 02:17:55 -0600 Subject: [PATCH 05/25] fix(leiden): report standard modularity regardless of resolution quality() now always evaluates at gamma=1.0 for standard Newman-Girvan modularity, making values comparable across different resolution settings. CPM quality still uses the user-specified resolution since that is intrinsic to the CPM metric. Impact: 2 functions changed, 8 affected --- src/graph/algorithms/leiden/index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js index b2dc4808..c1e98c6a 100644 --- a/src/graph/algorithms/leiden/index.js +++ b/src/graph/algorithms/leiden/index.js @@ -60,8 +60,9 @@ export function detectClusters(graph, options = {}) { return qualityCPMSizeAware(partition, finalGraph, gamma); return qualityCPM(partition, finalGraph, gamma); } - const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; - return qualityModularity(partition, finalGraph, gamma); + // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity reporting, + // regardless of the resolution used during optimization + return qualityModularity(partition, finalGraph, 1.0); }, toJSON() { const membershipObj = {}; From 367f0ddb09470eeb76c017278a8253f688823d07 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:04:12 -0600 Subject: [PATCH 06/25] fix(leiden): correct directed self-loop double-counting in partition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the directed path, initializeAggregates counted self-loop weight twice — once via graph.selfLoop[i] and again via the outEdges iteration (since self-loops are stored in outEdges). moveNodeToCommunity then subtracted outToOld + inFromOld + selfLoopWeight (3w total) against an initialized value of 2w, driving communityInternalEdgeWeight negative for any directed graph with self-loops. Fix: add i === j guard in initializeAggregates and compactCommunityIds directed loops (matching the undirected j <= i guard), and change moveNodeToCommunity to subtract selfLoopWeight instead of adding it (since outToOld/inFromOld already include the self-loop contribution). Impact: 4 functions changed, 3 affected --- src/graph/algorithms/leiden/partition.js | 7 +++++- tests/graph/algorithms/leiden.test.js | 29 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js index acc9c789..54585b5a 100644 --- a/src/graph/algorithms/leiden/partition.js +++ b/src/graph/algorithms/leiden/partition.js @@ -63,6 +63,7 @@ export function makePartition(graph) { const neighbors = graph.outEdges[i]; for (let k = 0; k < neighbors.length; k++) { const { to: j, w } = neighbors[k]; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; } } @@ -223,7 +224,9 @@ export function makePartition(graph) { newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; const inFromNew = newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; - communityInternalEdgeWeight[oldC] -= outToOld + inFromOld + selfLoopWeight; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + communityInternalEdgeWeight[oldC] -= outToOld + inFromOld - selfLoopWeight; communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; } else { const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; @@ -284,6 +287,7 @@ export function makePartition(graph) { } else { newTotalStrength[c] += graph.strengthOut[i]; } + if (graph.selfLoop[i] !== 0) newInternalEdgeWeight[c] += graph.selfLoop[i]; } if (graph.directed) { for (let i = 0; i < n; i++) { @@ -291,6 +295,7 @@ export function makePartition(graph) { const list = graph.outEdges[i]; for (let k = 0; k < list.length; k++) { const { to: j, w } = list[k]; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; } } diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js index 5e3c1865..d6f519c4 100644 --- a/tests/graph/algorithms/leiden.test.js +++ b/tests/graph/algorithms/leiden.test.js @@ -137,6 +137,35 @@ describe('directed modularity', () => { }); }); +// ─── Directed self-loops ────────────────────────────────────────────── + +describe('directed self-loops', () => { + it('does not corrupt internal edge weight with directed self-loops', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2']; + const B = ['3', '4', '5']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = 0; j < A.length; j++) if (i !== j) g.addEdge(A[i], A[j]); + for (let i = 0; i < B.length; i++) + for (let j = 0; j < B.length; j++) if (i !== j) g.addEdge(B[i], B[j]); + g.addEdge('2', '3'); + // Add self-loops — these previously caused double-counting in directed mode + g.addEdge('0', '0', { weight: 3 }); + g.addEdge('3', '3', { weight: 3 }); + + const clusters = detectClusters(g, { directed: true, randomSeed: 2 }); + // Quality must be finite (not NaN from negative internal edge weight) + expect(Number.isFinite(clusters.quality())).toBe(true); + expect(clusters.quality()).toBeGreaterThanOrEqual(0); + // A-side nodes should not mix with B-side nodes + const aCommunities = new Set(A.map((i) => clusters.getClass(i))); + const bCommunities = new Set(B.map((i) => clusters.getClass(i))); + const overlap = [...aCommunities].filter((c) => bCommunities.has(c)); + expect(overlap.length).toBe(0); + }); +}); + // ─── Edge cases ─────────────────────────────────────────────────────── describe('edge cases', () => { From 2a50235a40bc5f4d8d4497f424e6683c5c06b84f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:04:27 -0600 Subject: [PATCH 07/25] fix(leiden): add maxLevels and maxLocalPasses to DEFAULTS.community Per project convention, tunable algorithm constants belong in DEFAULTS so users can override them via .codegraphrc.json. The optimiser keeps its own defaults as fallbacks with a comment pointing to the config. --- src/graph/algorithms/leiden/optimiser.js | 2 ++ src/infrastructure/config.js | 2 ++ tests/unit/config.test.js | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index 4521e8fa..4f302985 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -10,6 +10,8 @@ import { diffModularity, diffModularityDirected } from './modularity.js'; import { makePartition } from './partition.js'; import { createRng } from './rng.js'; +// Mirrored in DEFAULTS.community (src/infrastructure/config.js) for user override +// via .codegraphrc.json. Callers (e.g. louvain.js) can pass overrides through options. const DEFAULT_MAX_LEVELS = 50; const DEFAULT_MAX_LOCAL_PASSES = 20; const GAIN_EPSILON = 1e-12; diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index e8439ab0..714fe64d 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -67,6 +67,8 @@ export const DEFAULTS = { }, community: { resolution: 1.0, + maxLevels: 50, + maxLocalPasses: 20, }, structure: { cohesionThreshold: 0.3, diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js index b7062c48..fc7d7d04 100644 --- a/tests/unit/config.test.js +++ b/tests/unit/config.test.js @@ -121,7 +121,7 @@ describe('DEFAULTS', () => { }); it('has community defaults', () => { - expect(DEFAULTS.community).toEqual({ resolution: 1.0 }); + expect(DEFAULTS.community).toEqual({ resolution: 1.0, maxLevels: 50, maxLocalPasses: 20 }); }); it('has structure defaults', () => { From 270ce8d7e23e80b9e8a9e68787886883b0da3ae0 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:04:39 -0600 Subject: [PATCH 08/25] docs(leiden): clarify quality() gamma=1.0 behavior in JSDoc quality() for modularity always evaluates at gamma=1.0 regardless of the resolution used during optimization, making values comparable across runs but not suitable for comparing partitions found at different resolution settings. --- src/graph/algorithms/leiden/index.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js index c1e98c6a..938b6bbc 100644 --- a/src/graph/algorithms/leiden/index.js +++ b/src/graph/algorithms/leiden/index.js @@ -24,6 +24,14 @@ import { runLouvainUndirectedModularity } from './optimiser.js'; * @param {Set|Array} [options.fixedNodes] * @param {string} [options.candidateStrategy] - 'neighbors' | 'all' | 'random' | 'random-neighbor' * @returns {{ getClass(id): number, getCommunities(): Map, quality(): number, toJSON(): object }} + * + * **Note on `quality()`:** For modularity, `quality()` always evaluates at γ=1.0 + * (standard Newman-Girvan modularity) regardless of the `resolution` used during + * optimization. This makes quality values comparable across runs with different + * resolutions. For CPM, `quality()` uses the caller-specified resolution since γ + * is intrinsic to the CPM metric. Do not use modularity `quality()` values to + * compare partitions found at different resolutions — they reflect Q at γ=1.0, + * not the objective that was actually optimized. */ export function detectClusters(graph, options = {}) { const { From 7188f0f2cadc1fbffe4b5e4ba0cab52fc465b372 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:14:56 -0600 Subject: [PATCH 09/25] fix(leiden): wire maxLevels/maxLocalPasses from config through callers Impact: 2 functions changed, 6 affected --- src/features/communities.js | 8 +++++++- src/graph/algorithms/louvain.js | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/features/communities.js b/src/features/communities.js index 1f3a54d7..a707ab4d 100644 --- a/src/features/communities.js +++ b/src/features/communities.js @@ -147,7 +147,13 @@ export function communitiesData(customDbPath, opts = {}) { const config = opts.config || loadConfig(); const resolution = opts.resolution ?? config.community?.resolution ?? 1.0; - const { assignments, modularity } = louvainCommunities(graph, { resolution }); + const maxLevels = opts.maxLevels ?? config.community?.maxLevels; + const maxLocalPasses = opts.maxLocalPasses ?? config.community?.maxLocalPasses; + const { assignments, modularity } = louvainCommunities(graph, { + resolution, + maxLevels, + maxLocalPasses, + }); const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); const { splitCandidates, mergeCandidates, driftScore } = analyzeDrift(communities, communityDirs); diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.js index d68572d6..4f2860d1 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.js @@ -3,7 +3,7 @@ * Maintains backward-compatible API: { assignments: Map, modularity: number } * * @param {import('../model.js').CodeGraph} graph - * @param {{ resolution?: number }} [opts] + * @param {{ resolution?: number, maxLevels?: number, maxLocalPasses?: number }} [opts] * @returns {{ assignments: Map, modularity: number }} */ import { detectClusters } from './leiden/index.js'; @@ -18,6 +18,8 @@ export function louvainCommunities(graph, opts = {}) { resolution, randomSeed: 42, directed: false, + ...(opts.maxLevels != null && { maxLevels: opts.maxLevels }), + ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), }); const assignments = new Map(); From 2b138ab8756cbb1cdee2b3a03b24d79def8c324f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:15:08 -0600 Subject: [PATCH 10/25] docs(leiden): document directed self-loop dual-representation invariant Impact: 1 functions changed, 3 affected --- src/graph/algorithms/leiden/adapter.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js index 9168c1ae..c5425a5f 100644 --- a/src/graph/algorithms/leiden/adapter.js +++ b/src/graph/algorithms/leiden/adapter.js @@ -62,6 +62,10 @@ export function makeGraphAdapter(graph, opts = {}) { const w = +linkWeight(attrs) || 0; if (from === to) { selfLoop[from] += w; + // Self-loop is intentionally kept in outEdges/inEdges as well. + // partition.js's moveNodeToCommunity (directed path) accounts for this + // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid + // triple-counting (see partition.js moveNodeToCommunity directed block). } outEdges[from].push({ to, w }); inEdges[to].push({ from, w }); From 5320964a7cd5e6c20779b94287c89e614b92a2ef Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:15:19 -0600 Subject: [PATCH 11/25] docs(leiden): explain self-loop handling in buildCoarseGraph --- src/graph/algorithms/leiden/optimiser.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index 4f302985..ecd1ef92 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -195,6 +195,11 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { * Build a coarse graph where each community becomes a node. * Uses CodeGraph instead of ngraph.graph. */ +// Build a coarse graph where each community becomes a single node. +// Self-loops (g.selfLoop[]) don't need separate handling here because they +// are already present in g.outEdges (directed path keeps them in both arrays). +// When the coarse graph is fed back to makeGraphAdapter at the next level, +// the adapter re-detects cu===cu edges as self-loops and populates selfLoop[]. function buildCoarseGraph(g, p) { const coarse = new CodeGraph({ directed: g.directed }); for (let c = 0; c < p.communityCount; c++) { From af231741185762ae91b090298794662ed16d4358 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 04:15:32 -0600 Subject: [PATCH 12/25] fix(leiden): use getters for partition aggregate arrays to prevent stale references After compactCommunityIds replaces closure variables with new arrays, the returned partition object's direct properties still pointed to the old (pre-compaction) arrays. This caused buildCoarseGraph to read wrong communityTotalSize values, corrupting multi-level CPM optimization and making quality() return ~0 post-compaction. Convert the six aggregate-array properties to getters so they always dereference the current closure variable. Update the CPM size-aware test to match corrected behavior: with actual node sizes in the CPM penalty, large-size nodes (size=5) correctly stay as singletons. Impact: 7 functions changed, 0 affected --- src/graph/algorithms/leiden/partition.js | 24 ++++++++++++++++++------ tests/graph/algorithms/leiden.test.js | 8 ++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js index 54585b5a..a2c8af07 100644 --- a/src/graph/algorithms/leiden/partition.js +++ b/src/graph/algorithms/leiden/partition.js @@ -339,12 +339,24 @@ export function makePartition(graph) { return communityCount; }, nodeCommunity, - communityTotalSize, - communityNodeCount, - communityInternalEdgeWeight, - communityTotalStrength, - communityTotalOutStrength, - communityTotalInStrength, + get communityTotalSize() { + return communityTotalSize; + }, + get communityNodeCount() { + return communityNodeCount; + }, + get communityInternalEdgeWeight() { + return communityInternalEdgeWeight; + }, + get communityTotalStrength() { + return communityTotalStrength; + }, + get communityTotalOutStrength() { + return communityTotalOutStrength; + }, + get communityTotalInStrength() { + return communityTotalInStrength; + }, initializeAggregates, accumulateNeighborCommunityEdgeWeights, getCandidateCommunityCount: () => candidateCommunityCount, diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js index d6f519c4..b39409d0 100644 --- a/tests/graph/algorithms/leiden.test.js +++ b/tests/graph/algorithms/leiden.test.js @@ -107,10 +107,14 @@ describe('CPM size-aware mode', () => { randomSeed: 3, }); expect(sized.quality()).toBeLessThanOrEqual(unit.quality()); + // B-clique (size=1 nodes) merges; A-clique nodes (size=5) stay separate + // because CPM penalty gamma * s_v * S_new dominates the edge gain + const bCommunities = new Set(B.map((i) => unit.getClass(i))); + expect(bCommunities.size).toBe(1); const ids = [...A, ...B]; const count = (cl) => new Set(ids.map((i) => cl.getClass(i))).size; - expect(count(unit)).toBe(2); - expect(count(sized)).toBe(2); + expect(count(unit)).toBeGreaterThanOrEqual(2); + expect(count(sized)).toBeGreaterThanOrEqual(2); }); }); From e89371b2cf961d49a75111d066d454c43bb8044b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 04:15:44 -0600 Subject: [PATCH 13/25] fix(leiden): correct catch comment in stats overview The catch block silently handles any community detection error, not just disconnected graph failures. Update comment to reflect the true intent: optional feature that silently skips on any error. Impact: 1 functions changed, 1 affected --- src/presentation/queries-cli/overview.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 677a33a7..f4f530b9 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -116,7 +116,7 @@ export async function stats(customDbPath, opts = {}) { const { communitySummaryForStats } = await import('../../features/communities.js'); data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); } catch { - /* community detection may fail on disconnected graphs */ + /* community detection is optional; silently skip on any error */ } if (outputResult(data, null, opts)) return; From b8e256473bd99db3ff6dcb8091845bc24b0ffd34 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 04:27:04 -0600 Subject: [PATCH 14/25] fix(leiden): handle directed graphs in CPM quality gain calculation diffCPM was reading neighborEdgeWeightToCommunity which is only populated in the undirected path of accumulateNeighborCommunityEdgeWeights. For directed graphs, use outEdgeWeightToCommunity + inEdgeWeightFromCommunity to correctly account for edge weights in both directions. Impact: 1 functions changed, 4 affected --- src/graph/algorithms/leiden/cpm.js | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js index e7fd9b76..49161540 100644 --- a/src/graph/algorithms/leiden/cpm.js +++ b/src/graph/algorithms/leiden/cpm.js @@ -6,8 +6,19 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { const oldC = part.nodeCommunity[v]; if (c === oldC) return 0; - const w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - const w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; + let w_old, w_new; + if (g.directed) { + w_old = + (part.getOutEdgeWeightToCommunity(oldC) || 0) + + (part.getInEdgeWeightFromCommunity(oldC) || 0); + w_new = + c < g.n + ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) + : 0; + } else { + w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; + } const s_v = g.size[v] || 1; const S_old = part.communityTotalSize[oldC] || 0; const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; From ef17691bfd2c908b8a0c21ec1c35125907ee8e0f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:11:37 -0600 Subject: [PATCH 15/25] fix(leiden): use communityTotalSize in qualityCPM for objective consistency qualityCPM used communityNodeCount while diffCPM optimizes using communityTotalSize. At coarsening levels > 0, coarse nodes carry aggregated sizes so communityNodeCount diverges, making the reported quality value meaningless for multi-level runs with custom node sizes. Impact: 1 functions changed, 2 affected --- src/graph/algorithms/leiden/cpm.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js index 49161540..0e7e6acb 100644 --- a/src/graph/algorithms/leiden/cpm.js +++ b/src/graph/algorithms/leiden/cpm.js @@ -27,10 +27,10 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { export function qualityCPM(part, _g, gamma = 1.0) { let sum = 0; - for (let c = 0; c < part.communityCount; c++) - sum += - part.communityInternalEdgeWeight[c] - - (gamma * (part.communityNodeCount[c] * (part.communityNodeCount[c] - 1))) / 2; + for (let c = 0; c < part.communityCount; c++) { + const S = part.communityTotalSize[c] || 0; + sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; + } return sum; } From 41dfe8c91ec75c704aa3f4be01c3314feb8d7a2a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:11:53 -0600 Subject: [PATCH 16/25] fix(leiden): handle directed graphs in deltaCPM partition closure deltaCPM read the undirected-only neighborEdgeWeightToCommunity accumulator, silently returning wrong values for directed graphs. Now branches on graph.directed to use outEdgeWeightToCommunity + inEdgeWeightFromCommunity, mirroring the standalone diffCPM fix. Impact: 2 functions changed, 4 affected --- src/graph/algorithms/leiden/partition.js | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js index a2c8af07..2c211703 100644 --- a/src/graph/algorithms/leiden/partition.js +++ b/src/graph/algorithms/leiden/partition.js @@ -182,13 +182,22 @@ export function makePartition(graph) { function deltaCPM(v, newC, gamma = 1.0) { const oldC = nodeCommunity[v]; if (newC === oldC) return 0; - const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; - const weightToNew = - newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; + let w_old, w_new; + if (graph.directed) { + w_old = (outEdgeWeightToCommunity[oldC] || 0) + (inEdgeWeightFromCommunity[oldC] || 0); + w_new = + newC < outEdgeWeightToCommunity.length + ? (outEdgeWeightToCommunity[newC] || 0) + (inEdgeWeightFromCommunity[newC] || 0) + : 0; + } else { + w_old = neighborEdgeWeightToCommunity[oldC] || 0; + w_new = + newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; + } const nodeSize = graph.size[v] || 1; const sizeOld = communityTotalSize[oldC] || 0; const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; - return weightToNew - weightToOld - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); + return w_new - w_old - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); } function moveNodeToCommunity(v, newC) { From fd1adcd231a42b892ddb29bd9778c764163fa981 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:23:13 -0600 Subject: [PATCH 17/25] fix(leiden): correct maxLocalPasses off-by-one in local move and refinement Both loops used `> maxLocalPasses` which executed maxLocalPasses + 1 passes. Changed to `>=` so the configured limit is respected exactly. Impact: 2 functions changed, 6 affected --- src/graph/algorithms/leiden/optimiser.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index ecd1ef92..40dbaa91 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -153,7 +153,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { improved = true; } } - if (localPasses > options.maxLocalPasses) break; + if (localPasses >= options.maxLocalPasses) break; } renumberCommunities(partition, options.preserveLabels); @@ -266,7 +266,7 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { improved = true; } } - if (passes > (opts.maxLocalPasses || DEFAULT_MAX_LOCAL_PASSES)) break; + if (passes >= (opts.maxLocalPasses || DEFAULT_MAX_LOCAL_PASSES)) break; } return p; } From 7a083a6fad931fd12eae595ca3edb0026024e1a3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:23:29 -0600 Subject: [PATCH 18/25] fix(leiden): remove dead qualityCPMSizeAware and cpmMode option After qualityCPM was corrected to use communityTotalSize, both quality functions were identical. Remove the duplicate and the cpmMode branching in detectClusters. Update the test to validate CPM with weighted nodes without the dead feature. Impact: 3 functions changed, 8 affected --- src/graph/algorithms/leiden/cpm.js | 9 -------- src/graph/algorithms/leiden/index.js | 4 +--- tests/graph/algorithms/leiden.test.js | 30 ++++++++------------------- 3 files changed, 10 insertions(+), 33 deletions(-) diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js index 0e7e6acb..dd1fcd91 100644 --- a/src/graph/algorithms/leiden/cpm.js +++ b/src/graph/algorithms/leiden/cpm.js @@ -33,12 +33,3 @@ export function qualityCPM(part, _g, gamma = 1.0) { } return sum; } - -export function qualityCPMSizeAware(part, _g, gamma = 1.0) { - let sum = 0; - for (let c = 0; c < part.communityCount; c++) { - const S = part.communityTotalSize[c] || 0; - sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; - } - return sum; -} diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js index 938b6bbc..db1dc2ad 100644 --- a/src/graph/algorithms/leiden/index.js +++ b/src/graph/algorithms/leiden/index.js @@ -6,7 +6,7 @@ * License: MIT — see LICENSE in this directory. */ -import { qualityCPM, qualityCPMSizeAware } from './cpm.js'; +import { qualityCPM } from './cpm.js'; import { qualityModularity } from './modularity.js'; import { runLouvainUndirectedModularity } from './optimiser.js'; @@ -64,8 +64,6 @@ export function detectClusters(graph, options = {}) { const q = (options.quality || 'modularity').toLowerCase(); if (q === 'cpm') { const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; - if ((options.cpmMode || 'unit') === 'size-aware') - return qualityCPMSizeAware(partition, finalGraph, gamma); return qualityCPM(partition, finalGraph, gamma); } // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity reporting, diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js index b39409d0..1a5fe69d 100644 --- a/tests/graph/algorithms/leiden.test.js +++ b/tests/graph/algorithms/leiden.test.js @@ -72,10 +72,10 @@ describe('CPM resolution tuning', () => { }); }); -// ─── CPM size-aware ─────────────────────────────────────────────────── +// ─── CPM with weighted nodes ───────────────────────────────────────── -describe('CPM size-aware mode', () => { - it('penalizes large-size communities more than unit mode', () => { +describe('CPM with weighted nodes', () => { + it('uses communityTotalSize in quality reporting', () => { const g = new CodeGraph(); const A = ['0', '1', '2', '3']; const B = ['4', '5', '6', '7']; @@ -93,28 +93,16 @@ describe('CPM size-aware mode', () => { g.addEdge('3', '4'); g.addEdge('4', '3'); - const gamma = 0.5; - const unit = detectClusters(g, { + const result = detectClusters(g, { quality: 'cpm', - cpmMode: 'unit', - resolution: gamma, + resolution: 0.5, randomSeed: 3, }); - const sized = detectClusters(g, { - quality: 'cpm', - cpmMode: 'size-aware', - resolution: gamma, - randomSeed: 3, - }); - expect(sized.quality()).toBeLessThanOrEqual(unit.quality()); - // B-clique (size=1 nodes) merges; A-clique nodes (size=5) stay separate - // because CPM penalty gamma * s_v * S_new dominates the edge gain - const bCommunities = new Set(B.map((i) => unit.getClass(i))); + // B-clique (size=1 nodes) merges; quality is finite + const bCommunities = new Set(B.map((i) => result.getClass(i))); expect(bCommunities.size).toBe(1); - const ids = [...A, ...B]; - const count = (cl) => new Set(ids.map((i) => cl.getClass(i))).size; - expect(count(unit)).toBeGreaterThanOrEqual(2); - expect(count(sized)).toBeGreaterThanOrEqual(2); + expect(typeof result.quality()).toBe('number'); + expect(Number.isFinite(result.quality())).toBe(true); }); }); From 9d4ff02dfdb93194c529564dc2fc039cd3972abf Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:50:31 -0600 Subject: [PATCH 19/25] fix(leiden): correct directed gain functions for self-loop and constant term MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit diffModularityDirected and diffCPM (directed branch) underestimate gain for self-loop nodes because accumulateNeighborCommunityEdgeWeights adds the self-loop weight to both outEdgeWeightToCommunity and inEdgeWeightFromCommunity arrays. Add +2×selfLoop correction to match moveNodeToCommunity's directed accounting. diffModularityDirected also lacked the 2·k_out·k_in constant term in the expected Δ(F·T) expansion. Without it, coarse-level merges appear profitable when they actually decrease quality, causing the algorithm to collapse well-separated communities at higher levels. Both standalone functions and partition closure methods are fixed. Impact: 5 functions changed, 6 affected --- src/graph/algorithms/leiden/cpm.js | 6 +++++- src/graph/algorithms/leiden/modularity.js | 14 ++++++++++++-- src/graph/algorithms/leiden/partition.js | 13 ++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js index dd1fcd91..b32a2167 100644 --- a/src/graph/algorithms/leiden/cpm.js +++ b/src/graph/algorithms/leiden/cpm.js @@ -7,6 +7,7 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { const oldC = part.nodeCommunity[v]; if (c === oldC) return 0; let w_old, w_new; + let selfCorrection = 0; if (g.directed) { w_old = (part.getOutEdgeWeightToCommunity(oldC) || 0) + @@ -15,6 +16,9 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { c < g.n ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) : 0; + // Self-loop weight appears in both out and in arrays for oldC, + // making w_old include 2×selfLoop. Correct to match moveNodeToCommunity. + selfCorrection = 2 * (g.selfLoop[v] || 0); } else { w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; @@ -22,7 +26,7 @@ export function diffCPM(part, g, v, c, gamma = 1.0) { const s_v = g.size[v] || 1; const S_old = part.communityTotalSize[oldC] || 0; const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; - return w_new - w_old - gamma * s_v * (S_new - S_old + s_v); + return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); } export function qualityCPM(part, _g, gamma = 1.0) { diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js index 8f314830..d2d16ce8 100644 --- a/src/graph/algorithms/leiden/modularity.js +++ b/src/graph/algorithms/leiden/modularity.js @@ -32,8 +32,18 @@ export function diffModularityDirected(part, g, v, c, gamma = 1.0) { const F_new = c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; const T_old = part.communityTotalInStrength[oldC]; const F_old = part.communityTotalOutStrength[oldC]; - const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out) / m; - const deltaExpected = (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old))) / (m * m); + // Self-loop correction: the self-loop edge (v→v) appears in both + // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], + // making w_old include 2×selfLoop. Since the self-loop moves with the + // node, add it back to match moveNodeToCommunity's directed accounting. + const selfW = g.selfLoop[v] || 0; + const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; + // The full Δ(F·T) expansion includes a constant 2·k_out·k_in term that + // doesn't depend on the target community but does affect the move-vs-stay + // decision. Without it, coarse-level merges can appear profitable when + // they actually decrease quality. + const deltaExpected = + (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); return deltaInternal - deltaExpected; } diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js index 2c211703..fec97a4f 100644 --- a/src/graph/algorithms/leiden/partition.js +++ b/src/graph/algorithms/leiden/partition.js @@ -170,11 +170,15 @@ export function makePartition(graph) { newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; const totalInStrengthOld = communityTotalInStrength[oldC]; const totalOutStrengthOld = communityTotalOutStrength[oldC]; - const deltaInternal = (inFromNew + outToNew - inFromOld - outToOld) / totalEdgeWeight; + // Self-loop correction + constant term (see modularity.js diffModularityDirected) + const selfW = graph.selfLoop[v] || 0; + const deltaInternal = + (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; const deltaExpected = (gamma * (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + - strengthInV * (totalOutStrengthNew - totalOutStrengthOld))) / + strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + + 2 * strengthOutV * strengthInV)) / (totalEdgeWeight * totalEdgeWeight); return deltaInternal - deltaExpected; } @@ -183,12 +187,15 @@ export function makePartition(graph) { const oldC = nodeCommunity[v]; if (newC === oldC) return 0; let w_old, w_new; + let selfCorrection = 0; if (graph.directed) { w_old = (outEdgeWeightToCommunity[oldC] || 0) + (inEdgeWeightFromCommunity[oldC] || 0); w_new = newC < outEdgeWeightToCommunity.length ? (outEdgeWeightToCommunity[newC] || 0) + (inEdgeWeightFromCommunity[newC] || 0) : 0; + // Self-loop correction (see cpm.js diffCPM) + selfCorrection = 2 * (graph.selfLoop[v] || 0); } else { w_old = neighborEdgeWeightToCommunity[oldC] || 0; w_new = @@ -197,7 +204,7 @@ export function makePartition(graph) { const nodeSize = graph.size[v] || 1; const sizeOld = communityTotalSize[oldC] || 0; const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; - return w_new - w_old - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); + return w_new - w_old + selfCorrection - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); } function moveNodeToCommunity(v, newC) { From 22c63ff7c6f0d3a88f897395d43d2bd734bfa903 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:50:53 -0600 Subject: [PATCH 20/25] fix(leiden): prevent undirected intra-community double-count in buildCoarseGraph For undirected graphs, each edge (i,j) appears in both outEdges[i] and outEdges[j]. buildCoarseGraph iterated all outEdges, causing intra-community edges (cu===cv) to be counted twice into the coarse self-loop weight. This inflated communityInternalEdgeWeight at coarse levels, producing systematically higher-than-true modularity from quality(). Skip the reverse direction (j < i) for intra-community edges in undirected mode. Also remove dead maxLocalPasses fallback in refineWithinCoarseCommunities since opts is already normalized by normalizeOptions(). Impact: 3 functions changed, 6 affected --- src/graph/algorithms/leiden/optimiser.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index 40dbaa91..128509f8 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -188,6 +188,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { levels, originalToCurrent, originalNodeIds: baseGraphAdapter.nodeIds, + baseGraph: baseGraphAdapter, }; } @@ -213,6 +214,10 @@ function buildCoarseGraph(g, p) { const j = list[k].to; const w = list[k].w; const cv = p.nodeCommunity[j]; + // Undirected: each non-self edge (i,j) appears in both outEdges[i] and + // outEdges[j]. For intra-community edges (cu===cv), skip the reverse to + // avoid inflating the coarse self-loop weight by 2×. + if (!g.directed && cu === cv && j < i) continue; const key = `${cu}:${cv}`; acc.set(key, (acc.get(key) || 0) + w); } @@ -266,7 +271,7 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { improved = true; } } - if (passes >= (opts.maxLocalPasses || DEFAULT_MAX_LOCAL_PASSES)) break; + if (passes >= opts.maxLocalPasses) break; } return p; } From 6b17e9b071854b376e9eb4dac45f8890524a24a0 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:51:12 -0600 Subject: [PATCH 21/25] fix(leiden): compute quality() on original graph instead of coarse level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit quality() was evaluated on the last coarse-level graph where the modularity null model (degree distribution) differs from the original. This produced inflated values — e.g., Q≈0.5 for two disconnected cliques that should give Q≈0. Build a partition-like aggregate from the original (level-0) graph adapter and the final community mapping, then evaluate qualityModularity or qualityCPM on that. This ensures quality values are consistent regardless of how many coarsening levels the algorithm ran. Impact: 3 functions changed, 8 affected --- src/graph/algorithms/leiden/index.js | 83 ++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 11 deletions(-) diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js index db1dc2ad..0dae09ef 100644 --- a/src/graph/algorithms/leiden/index.js +++ b/src/graph/algorithms/leiden/index.js @@ -34,13 +34,10 @@ import { runLouvainUndirectedModularity } from './optimiser.js'; * not the objective that was actually optimized. */ export function detectClusters(graph, options = {}) { - const { - graph: finalGraph, - partition, - levels, - originalToCurrent, - originalNodeIds, - } = runLouvainUndirectedModularity(graph, options); + const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( + graph, + options, + ); const idToClass = new Map(); for (let i = 0; i < originalNodeIds.length; i++) { @@ -61,14 +58,18 @@ export function detectClusters(graph, options = {}) { return out; }, quality() { + // Compute quality on the original (level-0) graph with the final + // partition mapped back. Computing on the last coarse-level graph + // produces inflated values because the modularity null model depends + // on the degree distribution, which changes after coarsening. + const part = buildOriginalPartition(baseGraph, originalToCurrent); const q = (options.quality || 'modularity').toLowerCase(); if (q === 'cpm') { const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; - return qualityCPM(partition, finalGraph, gamma); + return qualityCPM(part, baseGraph, gamma); } - // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity reporting, - // regardless of the resolution used during optimization - return qualityModularity(partition, finalGraph, 1.0); + // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity + return qualityModularity(part, baseGraph, 1.0); }, toJSON() { const membershipObj = {}; @@ -80,3 +81,63 @@ export function detectClusters(graph, options = {}) { }, }; } + +/** + * Build a minimal partition-like object from the original graph and the + * final community mapping, suitable for qualityModularity / qualityCPM. + */ +function buildOriginalPartition(g, communityMap) { + const n = g.n; + let maxC = 0; + for (let i = 0; i < n; i++) if (communityMap[i] > maxC) maxC = communityMap[i]; + const cc = maxC + 1; + + const internalWeight = new Float64Array(cc); + const totalStr = new Float64Array(cc); + const totalOutStr = new Float64Array(cc); + const totalInStr = new Float64Array(cc); + const totalSize = new Float64Array(cc); + + for (let i = 0; i < n; i++) { + const c = communityMap[i]; + totalSize[c] += g.size[i]; + if (g.directed) { + totalOutStr[c] += g.strengthOut[i]; + totalInStr[c] += g.strengthIn[i]; + } else { + totalStr[c] += g.strengthOut[i]; + } + if (g.selfLoop[i]) internalWeight[c] += g.selfLoop[i]; + } + + if (g.directed) { + for (let i = 0; i < n; i++) { + const ci = communityMap[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (i === j) continue; + if (ci === communityMap[j]) internalWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = communityMap[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (j <= i) continue; + if (ci === communityMap[j]) internalWeight[ci] += w; + } + } + } + + return { + communityCount: cc, + communityInternalEdgeWeight: internalWeight, + communityTotalStrength: totalStr, + communityTotalOutStrength: totalOutStr, + communityTotalInStrength: totalInStr, + communityTotalSize: totalSize, + }; +} From 52c2d86f5ac4e3c7999cae4b658e9a6ed60fd4b7 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:51:30 -0600 Subject: [PATCH 22/25] test(leiden): add coarse-level quality test, fix modularity range assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test verifying quality() is not inflated by multi-level coarsening: two disconnected 4-cliques should produce Q≈0, not Q≈0.5. Update integration test to use modularity's theoretical range [-0.5, 1] instead of [0, 1] — negative modularity is valid for small graphs where the partition captures fewer internal edges than the null model expects. --- tests/graph/algorithms/leiden.test.js | 32 +++++++++++++++++++++++++++ tests/integration/communities.test.js | 5 +++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js index 1a5fe69d..ad01a99e 100644 --- a/tests/graph/algorithms/leiden.test.js +++ b/tests/graph/algorithms/leiden.test.js @@ -158,6 +158,38 @@ describe('directed self-loops', () => { }); }); +// ─── Coarse graph quality ──────────────────────────────────────────── + +describe('coarse graph quality', () => { + it('quality is not inflated by multi-level coarsening', () => { + // Two disconnected 4-cliques: the algorithm should split them into two + // communities. Quality must stay in [-1, 1] and be consistent whether + // the run goes through one or multiple coarsening levels. + const g = new CodeGraph(); + const A = ['a0', 'a1', 'a2', 'a3']; + const B = ['b0', 'b1', 'b2', 'b3']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + const clusters = detectClusters(g, { randomSeed: 42 }); + const q = clusters.quality(); + // Modularity must not exceed 1.0 (inflated values were ~0.5 when true was ~0) + expect(q).toBeLessThanOrEqual(1.0); + expect(q).toBeGreaterThanOrEqual(-1.0); + // With two perfect cliques and no inter-community edges, modularity should be ~0 + // (each community captures exactly its expected share of edges) + expect(Math.abs(q)).toBeLessThan(0.1); + }); +}); + // ─── Edge cases ─────────────────────────────────────────────────────── describe('edge cases', () => { diff --git a/tests/integration/communities.test.js b/tests/integration/communities.test.js index 1cee942c..cfaaeff2 100644 --- a/tests/integration/communities.test.js +++ b/tests/integration/communities.test.js @@ -95,9 +95,10 @@ describe('communitiesData (file-level)', () => { expect(data.summary.communityCount).toBeGreaterThanOrEqual(2); }); - test('modularity is between 0 and 1', () => { + test('modularity is in valid range', () => { const data = communitiesData(null, { repo }); - expect(data.modularity).toBeGreaterThanOrEqual(0); + // Modularity theoretical range is [-0.5, 1] + expect(data.modularity).toBeGreaterThanOrEqual(-0.5); expect(data.modularity).toBeLessThanOrEqual(1); }); From 36a0eda976bfc204c9ff1cae439a868ad2bf972b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:47:56 -0600 Subject: [PATCH 23/25] docs(louvain): add JSDoc note about hardcoded directed: false --- src/graph/algorithms/louvain.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.js index 4f2860d1..c4195b60 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.js @@ -2,6 +2,10 @@ * Community detection via vendored Leiden algorithm. * Maintains backward-compatible API: { assignments: Map, modularity: number } * + * **Note:** Always runs in undirected mode (`directed: false`) regardless of + * the input graph's directedness. For direction-aware community detection, + * use `detectClusters` from `./leiden/index.js` directly. + * * @param {import('../model.js').CodeGraph} graph * @param {{ resolution?: number, maxLevels?: number, maxLocalPasses?: number }} [opts] * @returns {{ assignments: Map, modularity: number }} From 230b7c3d67b2fdb86f6dcb9f795d68a545a8f4ce Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:48:14 -0600 Subject: [PATCH 24/25] refactor(leiden): remove redundant diffModularityDirected import diffModularity already dispatches to diffModularityDirected internally when g.directed is true, so the explicit import and branch in computeQualityGain was redundant and could drift independently. Impact: 1 functions changed, 2 affected --- src/graph/algorithms/leiden/optimiser.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js index 128509f8..e601b32a 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.js @@ -6,7 +6,7 @@ import { CodeGraph } from '../../model.js'; import { makeGraphAdapter } from './adapter.js'; import { diffCPM } from './cpm.js'; -import { diffModularity, diffModularityDirected } from './modularity.js'; +import { diffModularity } from './modularity.js'; import { makePartition } from './partition.js'; import { createRng } from './rng.js'; @@ -282,9 +282,7 @@ function computeQualityGain(partition, v, c, opts) { if (quality === 'cpm') { return diffCPM(partition, partition.graph || {}, v, c, gamma); } - if (opts.directed) { - return diffModularityDirected(partition, partition.graph || {}, v, c, gamma); - } + // diffModularity dispatches to diffModularityDirected internally when g.directed is true return diffModularity(partition, partition.graph || {}, v, c, gamma); } From 77ffb0d24be749e5c9df63fa7cc395b69a43f650 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:56:52 -0600 Subject: [PATCH 25/25] =?UTF-8?q?fix(leiden):=20correct=20undirected=20mod?= =?UTF-8?q?ularity=20quality=20reporting=20(2=C3=97=20factor)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qualityModularity counted each undirected internal edge once (j > i) but divided by totalWeight = 2m. The standard Newman-Girvan formula requires 2·L_c/(2m), not L_c/(2m). This made quality() report values ~0.5 too low — e.g. two disjoint K4 cliques reported Q ≈ 0 instead of the correct Q = 0.5. The optimization was unaffected (gain functions have the same halved scaling, so the same partition is found), only the reported value was wrong. Update integration test to assert Q >= 0 and unit test to expect the correct Q = 0.5 for the two-clique topology. Impact: 1 functions changed, 0 affected --- src/graph/algorithms/leiden/modularity.js | 6 +++++- tests/graph/algorithms/leiden.test.js | 10 ++++------ tests/integration/communities.test.js | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js index d2d16ce8..15a5caf0 100644 --- a/src/graph/algorithms/leiden/modularity.js +++ b/src/graph/algorithms/leiden/modularity.js @@ -57,10 +57,14 @@ export function qualityModularity(part, g, gamma = 1.0) { (gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c])) / (m2 * m2); } else { + // communityInternalEdgeWeight counts each undirected edge once (j > i), + // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard + // Newman-Girvan formula is Q = Σ_c [2·L_c/(2m) - γ·(d_c/(2m))²], so + // we multiply lc by 2 to match. for (let c = 0; c < part.communityCount; c++) { const lc = part.communityInternalEdgeWeight[c]; const dc = part.communityTotalStrength[c]; - sum += lc / m2 - (gamma * (dc * dc)) / (m2 * m2); + sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); } } return sum; diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js index ad01a99e..1240263c 100644 --- a/tests/graph/algorithms/leiden.test.js +++ b/tests/graph/algorithms/leiden.test.js @@ -181,12 +181,10 @@ describe('coarse graph quality', () => { } const clusters = detectClusters(g, { randomSeed: 42 }); const q = clusters.quality(); - // Modularity must not exceed 1.0 (inflated values were ~0.5 when true was ~0) - expect(q).toBeLessThanOrEqual(1.0); - expect(q).toBeGreaterThanOrEqual(-1.0); - // With two perfect cliques and no inter-community edges, modularity should be ~0 - // (each community captures exactly its expected share of edges) - expect(Math.abs(q)).toBeLessThan(0.1); + // Two disjoint K4 cliques: the ideal 2-community partition gives Q = 0.5. + // Each clique has L_c = 6 edges, d_c = 12, 2m = 24: + // Q = 2 × [2·6/24 − (12/24)²] = 2 × 0.25 = 0.5 + expect(q).toBeCloseTo(0.5, 2); }); }); diff --git a/tests/integration/communities.test.js b/tests/integration/communities.test.js index cfaaeff2..90324367 100644 --- a/tests/integration/communities.test.js +++ b/tests/integration/communities.test.js @@ -97,8 +97,9 @@ describe('communitiesData (file-level)', () => { test('modularity is in valid range', () => { const data = communitiesData(null, { repo }); - // Modularity theoretical range is [-0.5, 1] - expect(data.modularity).toBeGreaterThanOrEqual(-0.5); + // Leiden starts from singleton partition and only makes improving moves. + // Quality should always be non-negative on a real graph. + expect(data.modularity).toBeGreaterThanOrEqual(0); expect(data.modularity).toBeLessThanOrEqual(1); });