diff --git a/docs/tasks/PLAN_centralize_config.md b/docs/tasks/PLAN_centralize_config.md deleted file mode 100644 index c8970ae0..00000000 --- a/docs/tasks/PLAN_centralize_config.md +++ /dev/null @@ -1,285 +0,0 @@ -# Plan: Centralize Hardcoded Configuration - -> **Goal:** Eliminate magic numbers scattered across the codebase by routing all tunable parameters through the existing `.codegraphrc.json` config system (`DEFAULTS` in `src/infrastructure/config.js`). - -## Problem - -The config system already exists and handles env overrides, but ~70 individual behavioral constants (34 inventory entries expanding to ~70 discrete values when counting sub-keys in B1, B2, and E1) are hardcoded in individual modules and never read from config. Users cannot tune thresholds, depths, weights, or limits without editing source code. - ---- - -## Inventory of Hardcoded Values - -### Category A — Analysis Parameters (high user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| A1 | `maxDepth = 5` | `domain/analysis/impact.js` | 111 | `fn-impact` transitive caller depth | -| A2 | `maxDepth = 3` | `domain/analysis/impact.js` | 31, 144 | BFS default depth for impact/diff-impact | -| A3 | `maxDepth = 3` | `features/audit.js` | 102 | Audit blast-radius depth | -| A4 | `maxDepth = 3` | `features/check.js` | 220 | CI check blast-radius depth | -| A5 | `maxDepth = 10` | `features/sequence.js` | 91 | Sequence diagram traversal depth | -| A6 | `FALSE_POSITIVE_CALLER_THRESHOLD = 20` | `domain/analysis/module-map.js` | 37 | Generic function false-positive filter | -| A7 | `resolution = 1.0` | `graph/algorithms/louvain.js` | 17 | Louvain community detection granularity | -| A8 | `driftThreshold = 0.3` | `features/structure.js` | 581 | Structure cohesion drift warning | -| A9 | `maxCallers >= 10` | `domain/analysis/brief.js` | 38 | `brief` high-risk tier threshold | -| A10 | `maxCallers >= 3` | `domain/analysis/brief.js` | 39 | `brief` medium-risk tier threshold | -| A11 | `maxDepth = 5` | `domain/analysis/brief.js` | 47 | `brief` transitive caller BFS depth | -| A12 | `maxDepth = 5` | `domain/analysis/brief.js` | 73 | `brief` transitive importer BFS depth | - -### Category B — Risk & Scoring Weights (medium-high user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| B1 | `fanIn: 0.25, complexity: 0.3, churn: 0.2, role: 0.15, mi: 0.1` | `graph/classifiers/risk.js` | 10-14 | Risk score weighting | -| B2 | `core: 1.0, utility: 0.9, entry: 0.8, adapter: 0.5, leaf: 0.2, dead: 0.1` | `graph/classifiers/risk.js` | 21-27 | Role importance weights | -| B3 | `DEFAULT_ROLE_WEIGHT = 0.5` | `graph/classifiers/risk.js` | 30 | Fallback role weight | - -### Category C — Search & Embedding (already partially in config) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| C1 | `limit = 15` | `domain/search/search/hybrid.js` | 12 | Hybrid search default limit | -| C2 | `rrfK = 60` | `domain/search/search/hybrid.js` | 13 | RRF fusion constant | -| C3 | `limit = 15` | `domain/search/search/semantic.js` | 12 | Semantic search default limit | -| C4 | `minScore = 0.2` | `domain/search/search/semantic.js` | 13, 52 | Minimum similarity threshold | -| C5 | `SIMILARITY_WARN_THRESHOLD = 0.85` | `domain/search/search/semantic.js` | 71 | Duplicate query warning | -| ~~C6~~ | ~~Batch sizes per model~~ | — | — | Moved to Category F (see below) | - -### Category D — Display & Truncation (low-medium user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| D1 | `MAX_COL_WIDTH = 40` | `presentation/result-formatter.js` | 82 | Table column width | -| D2 | `50 lines` | `shared/file-utils.js` | 23 | Source context excerpt length | -| D3 | `100 chars` | `shared/file-utils.js` | 48, 63 | Summary/docstring truncation | -| D4a | `10 lines` | `shared/file-utils.js` | 36 | JSDoc block-end scan depth (upward scan for `*/`) | -| D4b | `20 lines` | `shared/file-utils.js` | 54 | JSDoc opening scan depth (upward scan for `/**`) | -| D5 | `5 lines` | `shared/file-utils.js` | 76 | Multi-line signature gather | - -### Category E — MCP Pagination (medium user value) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| E1 | `MCP_DEFAULTS` (22 entries) | `shared/paginate.js` | 9-34 | Per-tool default page sizes | -| ~~E2~~ | ~~`MCP_MAX_LIMIT = 1000`~~ | — | — | Moved to Category F (see below) | - -### Category F — Infrastructure (low user value, keep hardcoded) - -| # | Value | File | Line | Controls | -|---|-------|------|------|----------| -| F1 | `CACHE_TTL_MS = 86400000` | `infrastructure/update-check.js` | 10 | Version check cache (24h) | -| F2 | `FETCH_TIMEOUT_MS = 3000` | `infrastructure/update-check.js` | 11 | Version check HTTP timeout | -| F3 | `debounce = 300` | `domain/graph/watcher.js` | 80 | File watcher debounce (ms) | -| F4 | `maxBuffer = 10MB` | `features/check.js` | 260 | Git diff buffer | -| F5 | `volume / 3000` | `features/complexity.js` | 85 | Halstead bugs formula (standard) | -| F6 | `timeout = 10_000` | `infrastructure/config.js` | 110 | apiKeyCommand timeout | -| F7 | `MCP_MAX_LIMIT = 1000` | `shared/paginate.js` | 37 | Hard abuse-prevention cap — server-side safety boundary, not a tuning knob | -| F8 | Batch sizes per model | `domain/search/models.js` | 66-75 | Embedding batch sizes — model-specific implementation details rarely tuned by end-users, analogous to watcher debounce (F3) | -| F9 | `MAX_VISIT_DEPTH = 200` | `crates/.../dataflow.rs` | 11 | Dataflow AST visit recursion limit — stack overflow prevention | -| F10 | `MAX_WALK_DEPTH = 200` | `crates/.../extractors/helpers.rs` | 6 | Extractor AST walk recursion limit — stack overflow prevention (#481) | -| F11 | `MAX_WALK_DEPTH = 200` | `crates/.../complexity.rs` | 6 | Complexity walk recursion limit — stack overflow prevention (#481) | -| F12 | `MAX_WALK_DEPTH = 200` | `crates/.../cfg.rs` | 5 | CFG process_if recursion limit — stack overflow prevention (#481) | - ---- - -## Design - -### Proposed `DEFAULTS` additions in `src/infrastructure/config.js` - -```js -export const DEFAULTS = { - // ... existing fields ... - - analysis: { - impactDepth: 3, // A2: BFS depth for impact/diff-impact - fnImpactDepth: 5, // A1: fn-impact transitive depth - auditDepth: 3, // A3: audit blast-radius depth - sequenceDepth: 10, // A5: sequence diagram depth - falsePositiveCallers: 20, // A6: generic function filter threshold - briefCallerDepth: 5, // A11: brief transitive caller BFS depth - briefImporterDepth: 5, // A12: brief transitive importer BFS depth - briefHighRiskCallers: 10, // A9: brief high-risk tier threshold - briefMediumRiskCallers: 3, // A10: brief medium-risk tier threshold - }, - - community: { - resolution: 1.0, // A7: Louvain resolution (only Louvain params here) - }, - - // build.driftThreshold stays in `build` (already wired in finalize.js line 52) - // — it's a build-pipeline concern, not community detection - - structure: { - cohesionThreshold: 0.3, // A8: structure cohesion drift warning - }, - - risk: { - weights: { // B1 - fanIn: 0.25, - complexity: 0.3, - churn: 0.2, - role: 0.15, - mi: 0.1, - }, - roleWeights: { // B2 - core: 1.0, - utility: 0.9, - entry: 0.8, - adapter: 0.5, - leaf: 0.2, - dead: 0.1, - }, - defaultRoleWeight: 0.5, // B3 - }, - - display: { - maxColWidth: 40, // D1 - excerptLines: 50, // D2 - summaryMaxChars: 100, // D3 - jsdocEndScanLines: 10, // D4a: lines to scan upward for block-end marker (*/) - jsdocOpenScanLines: 20, // D4b: lines to scan upward for /** opening - signatureGatherLines: 5, // D5 - }, - - search: { - // defaultMinScore, rrfK, topK already exist in DEFAULTS — - // add the missing C5 key: - similarityWarnThreshold: 0.85, // C5: duplicate-query warning in multiSearchData - }, - - mcp: { - defaults: { /* E1: current MCP_DEFAULTS object */ }, - // MCP_MAX_LIMIT stays hardcoded (Category F) — server-side safety boundary - }, -}; -``` - -### What stays hardcoded (Category F) - -- **Halstead `volume / 3000`** — industry-standard formula, not a tuning knob -- **Git `maxBuffer`** — platform concern, not analysis behavior -- **`apiKeyCommand` timeout** — security boundary, not user-facing -- **Update check TTL/timeout** — implementation detail -- **Watcher debounce** — could be configurable later but low priority -- **`MCP_MAX_LIMIT`** — server-side abuse-prevention cap; making it user-configurable via `.codegraphrc.json` would allow any process with project directory write access to raise it arbitrarily, defeating its security purpose -- **Embedding batch sizes** — model-specific implementation details (per-model map shape); rarely tuned by end-users, analogous to watcher debounce -- **Native engine `MAX_WALK_DEPTH` / `MAX_VISIT_DEPTH` (200)** — stack overflow safety boundaries in Rust extractors, complexity, CFG, and dataflow modules; raising them risks process crashes on deeply nested ASTs - ---- - -## Implementation Plan - -### Phase 1 — Extend DEFAULTS schema (1 PR) - -**Files:** `src/infrastructure/config.js`, `tests/unit/config.test.js` - -1. Add `analysis`, `community`, `structure`, `risk`, `display`, `mcp` sections to `DEFAULTS` -2. Keep `build.driftThreshold` where it is (already wired in `finalize.js` — no migration needed) -3. **Hard prerequisite:** Update `mergeConfig` to perform recursive (deep) merging — at minimum 2 levels deep. The current implementation only merges 1 level deep, which means partial user overrides of nested objects like `risk.weights` (e.g. `{ "complexity": 0.4, "churn": 0.1 }`) will **silently drop** un-specified sibling keys (`fanIn`, `role`, `mi`), producing `NaN` risk scores. This must be fixed before any nested config keys are wired in subsequent phases -4. Add tests: loading config with overrides for each new section - -### Phase 2 — Wire analysis parameters (1 PR) - -**Files to change:** -- `src/domain/analysis/impact.js` → read `config.analysis.impactDepth` / `config.analysis.fnImpactDepth` -- `src/features/audit.js` → read `config.analysis.auditDepth` -- `src/features/check.js` → replace hardcoded `3` with `config.check.depth` (already in DEFAULTS, sole authoritative key for check depth — do **not** chain with `config.analysis.impactDepth`) -- `src/features/sequence.js` → read `config.analysis.sequenceDepth` -- `src/domain/analysis/module-map.js` → read `config.analysis.falsePositiveCallers` -- `src/domain/analysis/brief.js` → read `config.analysis.briefCallerDepth`, `config.analysis.briefImporterDepth`, `config.analysis.briefHighRiskCallers`, `config.analysis.briefMediumRiskCallers` (PR #480) - -**Pattern:** Each module calls `loadConfig()` (or receives config as a parameter). Replace the hardcoded value with `config.analysis.X ?? FALLBACK`. The fallback ensures backward compatibility if config is missing. - -**Tests:** Update integration tests to verify custom config values flow through. - -### Phase 3 — Wire risk & community parameters (1 PR) - -**Files to change:** -- `src/graph/classifiers/risk.js` → read `config.risk.weights`, `config.risk.roleWeights`, `config.risk.defaultRoleWeight` -- `src/graph/algorithms/louvain.js` → accept `resolution` parameter, default from config -- `src/features/structure.js` → read `config.structure.cohesionThreshold` - -**Pattern:** These modules don't currently receive config. Options: -1. **Preferred:** Accept an `options` parameter that callers populate from config -2. **Alternative:** Import `loadConfig` directly (adds coupling but simpler) - -**Tests:** Unit tests for risk scoring with custom weights. Integration test for Louvain with custom resolution. - -### Phase 4 — Wire search parameters (1 PR) - -**Files to change:** -- `src/domain/search/search/hybrid.js` → read `config.search.rrfK`, `config.search.topK` -- `src/domain/search/search/semantic.js` → read `config.search.defaultMinScore`, `config.search.topK` (C3), and `config.search.similarityWarnThreshold` (C5, replaces hardcoded `SIMILARITY_WARN_THRESHOLD`) -- `src/domain/search/models.js` → batch sizes stay hardcoded (moved to Category F — model-specific implementation details) - -**Note:** `config.search` already exists with `defaultMinScore`, `rrfK`, `topK`. The modules just don't read from it — they duplicate the values. This phase wires the existing config keys. - -### Phase 5 — Wire display & MCP parameters (1 PR) - -**Files to change:** -- `src/presentation/result-formatter.js` → read `config.display.maxColWidth` -- `src/shared/file-utils.js` → read `config.display.excerptLines`, `config.display.jsdocEndScanLines` (D4a, 10 lines), `config.display.jsdocOpenScanLines` (D4b, 20 lines — note different default values), `config.display.summaryMaxChars`, `config.display.signatureGatherLines` -- `src/shared/paginate.js` → read `config.mcp.defaults` (`MCP_MAX_LIMIT` stays hardcoded — security boundary) - -**Consideration:** `file-utils.js` and `paginate.js` are low-level shared utilities. They shouldn't call `loadConfig()` directly. Instead, pass display/mcp settings down from callers, or use a module-level config cache set at startup. - -### Phase 6 — Documentation & migration (1 PR) - -1. Update `README.md` configuration section with the full schema -2. Add a `docs/configuration.md` reference with all keys, types, defaults, and descriptions -3. Document the `structure.cohesionThreshold` key and its relationship to A8 -4. Add a JSON Schema file (`.codegraphrc.schema.json`) for IDE autocomplete -5. Add a **Configuration** section to `CLAUDE.md` that documents: - - The `.codegraphrc.json` config file and its location - - The full list of configurable sections (`analysis`, `community`, `structure`, `risk`, `display`, `mcp`, `search`, `check`, `coChange`, `manifesto`) - - Key tunable parameters and their defaults (depth limits, risk weights, thresholds) - - How `mergeConfig` works (partial overrides deep-merge with defaults) - - Env var overrides (`CODEGRAPH_LLM_*`) - - Guidance: when adding new behavioral constants, always add them to `DEFAULTS` in `config.js` and wire them through — never introduce new hardcoded magic numbers - ---- - -## Migration & Backward Compatibility - -- All new config keys have defaults matching current hardcoded values → **zero breaking changes** -- Existing `.codegraphrc.json` files continue to work unchanged -- `mergeConfig` will be updated to deep-merge recursively (Phase 1 prerequisite), so users only need to specify the keys they want to override -- `build.driftThreshold` stays in place — no migration needed - -## Example `.codegraphrc.json` after this work - -```json -{ - "analysis": { - "fnImpactDepth": 8, - "falsePositiveCallers": 30 - }, - "risk": { - "weights": { - "complexity": 0.4, - "churn": 0.1 - } - }, - "community": { "resolution": 1.5 }, - "structure": { "cohesionThreshold": 0.25 }, - "display": { - "maxColWidth": 60 - } -} -``` - ---- - -## Estimated Scope - -| Phase | Files changed | New tests | Risk | -|-------|--------------|-----------|------| -| 1 — Schema | 2 | 3-4 | Low | -| 2 — Analysis wiring | 6 | 4-5 | Low | -| 3 — Risk/community | 3 | 2-3 | Medium (parameter threading) | -| 4 — Search wiring | 3 | 2 | Low (config keys already exist) | -| 5 — Display/MCP | 3 | 2 | Medium (shared utility coupling) | -| 6 — Docs + CLAUDE.md | 5 | 0 | None | - -**Total: ~22 files changed, 6 PRs, one concern per PR.** diff --git a/package.json b/package.json index bf0c7aa0..2c029854 100644 --- a/package.json +++ b/package.json @@ -68,8 +68,6 @@ "dependencies": { "better-sqlite3": "^12.6.2", "commander": "^14.0.3", - "graphology": "^0.26.0", - "graphology-communities-louvain": "^2.0.2", "web-tree-sitter": "^0.26.5" }, "peerDependencies": { diff --git a/src/features/communities.js b/src/features/communities.js index 1f3a54d7..a707ab4d 100644 --- a/src/features/communities.js +++ b/src/features/communities.js @@ -147,7 +147,13 @@ export function communitiesData(customDbPath, opts = {}) { const config = opts.config || loadConfig(); const resolution = opts.resolution ?? config.community?.resolution ?? 1.0; - const { assignments, modularity } = louvainCommunities(graph, { resolution }); + const maxLevels = opts.maxLevels ?? config.community?.maxLevels; + const maxLocalPasses = opts.maxLocalPasses ?? config.community?.maxLocalPasses; + const { assignments, modularity } = louvainCommunities(graph, { + resolution, + maxLevels, + maxLocalPasses, + }); const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); const { splitCandidates, mergeCandidates, driftScore } = analyzeDrift(communities, communityDirs); diff --git a/src/graph/algorithms/index.js b/src/graph/algorithms/index.js index 3949b94c..e30637b3 100644 --- a/src/graph/algorithms/index.js +++ b/src/graph/algorithms/index.js @@ -1,5 +1,6 @@ export { bfs } from './bfs.js'; export { fanInOut } from './centrality.js'; +export { detectClusters } from './leiden/index.js'; export { louvainCommunities } from './louvain.js'; export { shortestPath } from './shortest-path.js'; export { tarjan } from './tarjan.js'; diff --git a/src/graph/algorithms/leiden/LICENSE b/src/graph/algorithms/leiden/LICENSE new file mode 100644 index 00000000..25e2bf1c --- /dev/null +++ b/src/graph/algorithms/leiden/LICENSE @@ -0,0 +1,24 @@ +MIT License + +Vendored from ngraph.leiden by Andrei Kashcha (anvaka) +https://github.com/anvaka/ngraph.leiden + +Copyright (c) 2025 Andrei Kashcha + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js new file mode 100644 index 00000000..c5425a5f --- /dev/null +++ b/src/graph/algorithms/leiden/adapter.js @@ -0,0 +1,160 @@ +/** + * Graph adapter that converts a CodeGraph into the dense array format + * expected by the Leiden optimiser. + * + * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. + */ + +/** + * @param {import('../../model.js').CodeGraph} graph + * @param {object} [opts] + * @param {boolean} [opts.directed] + * @param {(attrs: object) => number} [opts.linkWeight] - extract weight from edge attrs + * @param {(attrs: object) => number} [opts.nodeSize] - extract size from node attrs + * @param {string[]} [opts.baseNodeIds] + */ +export function makeGraphAdapter(graph, opts = {}) { + const linkWeight = + opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); + const nodeSize = + opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); + const directed = !!opts.directed; + const baseNodeIds = opts.baseNodeIds; + + // Build dense node index mapping + const nodeIds = []; + const idToIndex = new Map(); + if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { + for (let i = 0; i < baseNodeIds.length; i++) { + const id = baseNodeIds[i]; + if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); + idToIndex.set(id, i); + nodeIds.push(id); + } + } else { + for (const [id] of graph.nodes()) { + idToIndex.set(id, nodeIds.length); + nodeIds.push(id); + } + } + const n = nodeIds.length; + + // Storage + const size = new Float64Array(n); + const selfLoop = new Float64Array(n); + const strengthOut = new Float64Array(n); + const strengthIn = new Float64Array(n); + + // Edge list by source for fast iteration + const outEdges = new Array(n); + const inEdges = new Array(n); + for (let i = 0; i < n; i++) { + outEdges[i] = []; + inEdges[i] = []; + } + + // Populate from graph + if (directed) { + for (const [src, tgt, attrs] of graph.edges()) { + const from = idToIndex.get(src); + const to = idToIndex.get(tgt); + if (from == null || to == null) continue; + const w = +linkWeight(attrs) || 0; + if (from === to) { + selfLoop[from] += w; + // Self-loop is intentionally kept in outEdges/inEdges as well. + // partition.js's moveNodeToCommunity (directed path) accounts for this + // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid + // triple-counting (see partition.js moveNodeToCommunity directed block). + } + outEdges[from].push({ to, w }); + inEdges[to].push({ from, w }); + strengthOut[from] += w; + strengthIn[to] += w; + } + } else { + // Undirected: symmetrize and average reciprocal pairs + const pairAgg = new Map(); + + for (const [src, tgt, attrs] of graph.edges()) { + const a = idToIndex.get(src); + const b = idToIndex.get(tgt); + if (a == null || b == null) continue; + const w = +linkWeight(attrs) || 0; + if (a === b) { + selfLoop[a] += w; + continue; + } + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; + } + + for (const [key, rec] of pairAgg.entries()) { + const [iStr, jStr] = key.split(':'); + const i = +iStr; + const j = +jStr; + const dirCount = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); + const w = dirCount > 0 ? rec.sum / dirCount : 0; + if (w === 0) continue; + outEdges[i].push({ to: j, w }); + outEdges[j].push({ to: i, w }); + inEdges[i].push({ from: j, w }); + inEdges[j].push({ from: i, w }); + strengthOut[i] += w; + strengthOut[j] += w; + strengthIn[i] += w; + strengthIn[j] += w; + } + + // Add self-loops into adjacency and strengths. + // Note: uses single-w convention (not standard 2w) — the modularity formulas in + // modularity.js are written to match this convention, keeping the system self-consistent. + for (let v = 0; v < n; v++) { + const w = selfLoop[v]; + if (w !== 0) { + outEdges[v].push({ to: v, w }); + inEdges[v].push({ from: v, w }); + strengthOut[v] += w; + strengthIn[v] += w; + } + } + } + + // Node sizes + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + + // Totals + const totalWeight = strengthOut.reduce((a, b) => a + b, 0); + + function forEachNeighbor(i, cb) { + const list = outEdges[i]; + for (let k = 0; k < list.length; k++) cb(list[k].to, list[k].w); + } + + return { + n, + nodeIds, + idToIndex, + size, + selfLoop, + strengthOut, + strengthIn, + outEdges, + inEdges, + directed, + totalWeight, + forEachNeighbor, + }; +} diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js new file mode 100644 index 00000000..b32a2167 --- /dev/null +++ b/src/graph/algorithms/leiden/cpm.js @@ -0,0 +1,39 @@ +/** + * CPM (Constant Potts Model) quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +export function diffCPM(part, g, v, c, gamma = 1.0) { + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + let w_old, w_new; + let selfCorrection = 0; + if (g.directed) { + w_old = + (part.getOutEdgeWeightToCommunity(oldC) || 0) + + (part.getInEdgeWeightFromCommunity(oldC) || 0); + w_new = + c < g.n + ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) + : 0; + // Self-loop weight appears in both out and in arrays for oldC, + // making w_old include 2×selfLoop. Correct to match moveNodeToCommunity. + selfCorrection = 2 * (g.selfLoop[v] || 0); + } else { + w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; + } + const s_v = g.size[v] || 1; + const S_old = part.communityTotalSize[oldC] || 0; + const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; + return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); +} + +export function qualityCPM(part, _g, gamma = 1.0) { + let sum = 0; + for (let c = 0; c < part.communityCount; c++) { + const S = part.communityTotalSize[c] || 0; + sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; + } + return sum; +} diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js new file mode 100644 index 00000000..0dae09ef --- /dev/null +++ b/src/graph/algorithms/leiden/index.js @@ -0,0 +1,143 @@ +/** + * Leiden community detection — vendored from ngraph.leiden (MIT). + * Adapted to work directly with CodeGraph (no external graph library dependency). + * + * Original: https://github.com/anvaka/ngraph.leiden + * License: MIT — see LICENSE in this directory. + */ + +import { qualityCPM } from './cpm.js'; +import { qualityModularity } from './modularity.js'; +import { runLouvainUndirectedModularity } from './optimiser.js'; + +/** + * Detect communities in a CodeGraph using the Leiden algorithm. + * + * @param {import('../../model.js').CodeGraph} graph + * @param {object} [options] + * @param {number} [options.randomSeed=42] + * @param {boolean} [options.directed=false] + * @param {boolean} [options.refine=true] - Leiden refinement (set false for plain Louvain) + * @param {string} [options.quality='modularity'] - 'modularity' | 'cpm' + * @param {number} [options.resolution=1.0] + * @param {number} [options.maxCommunitySize] + * @param {Set|Array} [options.fixedNodes] + * @param {string} [options.candidateStrategy] - 'neighbors' | 'all' | 'random' | 'random-neighbor' + * @returns {{ getClass(id): number, getCommunities(): Map, quality(): number, toJSON(): object }} + * + * **Note on `quality()`:** For modularity, `quality()` always evaluates at γ=1.0 + * (standard Newman-Girvan modularity) regardless of the `resolution` used during + * optimization. This makes quality values comparable across runs with different + * resolutions. For CPM, `quality()` uses the caller-specified resolution since γ + * is intrinsic to the CPM metric. Do not use modularity `quality()` values to + * compare partitions found at different resolutions — they reflect Q at γ=1.0, + * not the objective that was actually optimized. + */ +export function detectClusters(graph, options = {}) { + const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( + graph, + options, + ); + + const idToClass = new Map(); + for (let i = 0; i < originalNodeIds.length; i++) { + const comm = originalToCurrent[i]; + idToClass.set(originalNodeIds[i], comm); + } + + return { + getClass(nodeId) { + return idToClass.get(String(nodeId)); + }, + getCommunities() { + const out = new Map(); + for (const [id, c] of idToClass) { + if (!out.has(c)) out.set(c, []); + out.get(c).push(id); + } + return out; + }, + quality() { + // Compute quality on the original (level-0) graph with the final + // partition mapped back. Computing on the last coarse-level graph + // produces inflated values because the modularity null model depends + // on the degree distribution, which changes after coarsening. + const part = buildOriginalPartition(baseGraph, originalToCurrent); + const q = (options.quality || 'modularity').toLowerCase(); + if (q === 'cpm') { + const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; + return qualityCPM(part, baseGraph, gamma); + } + // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity + return qualityModularity(part, baseGraph, 1.0); + }, + toJSON() { + const membershipObj = {}; + for (const [id, c] of idToClass) membershipObj[id] = c; + return { + membership: membershipObj, + meta: { levels: levels.length, quality: this.quality(), options }, + }; + }, + }; +} + +/** + * Build a minimal partition-like object from the original graph and the + * final community mapping, suitable for qualityModularity / qualityCPM. + */ +function buildOriginalPartition(g, communityMap) { + const n = g.n; + let maxC = 0; + for (let i = 0; i < n; i++) if (communityMap[i] > maxC) maxC = communityMap[i]; + const cc = maxC + 1; + + const internalWeight = new Float64Array(cc); + const totalStr = new Float64Array(cc); + const totalOutStr = new Float64Array(cc); + const totalInStr = new Float64Array(cc); + const totalSize = new Float64Array(cc); + + for (let i = 0; i < n; i++) { + const c = communityMap[i]; + totalSize[c] += g.size[i]; + if (g.directed) { + totalOutStr[c] += g.strengthOut[i]; + totalInStr[c] += g.strengthIn[i]; + } else { + totalStr[c] += g.strengthOut[i]; + } + if (g.selfLoop[i]) internalWeight[c] += g.selfLoop[i]; + } + + if (g.directed) { + for (let i = 0; i < n; i++) { + const ci = communityMap[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (i === j) continue; + if (ci === communityMap[j]) internalWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = communityMap[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (j <= i) continue; + if (ci === communityMap[j]) internalWeight[ci] += w; + } + } + } + + return { + communityCount: cc, + communityInternalEdgeWeight: internalWeight, + communityTotalStrength: totalStr, + communityTotalOutStrength: totalOutStr, + communityTotalInStrength: totalInStr, + communityTotalSize: totalSize, + }; +} diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js new file mode 100644 index 00000000..15a5caf0 --- /dev/null +++ b/src/graph/algorithms/leiden/modularity.js @@ -0,0 +1,71 @@ +/** + * Modularity quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +export function diffModularity(part, g, v, c, gamma = 1.0) { + if (g.directed) return diffModularityDirected(part, g, v, c, gamma); + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + const k_v = g.strengthOut[v]; + const m2 = g.totalWeight; + const k_v_in_new = part.getNeighborEdgeWeightToCommunity(c) || 0; + const k_v_in_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + const wTot_new = c < part.communityTotalStrength.length ? part.communityTotalStrength[c] : 0; + const wTot_old = part.communityTotalStrength[oldC]; + const gain_remove = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); + const gain_add = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); + return gain_remove + gain_add; +} + +export function diffModularityDirected(part, g, v, c, gamma = 1.0) { + const oldC = part.nodeCommunity[v]; + if (c === oldC) return 0; + const m = g.totalWeight; + const k_out = g.strengthOut[v]; + const k_in = g.strengthIn[v]; + const w_new_in = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; + const w_new_out = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; + const w_old_in = part.getInEdgeWeightFromCommunity(oldC) || 0; + const w_old_out = part.getOutEdgeWeightToCommunity(oldC) || 0; + const T_new = c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; + const F_new = c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; + const T_old = part.communityTotalInStrength[oldC]; + const F_old = part.communityTotalOutStrength[oldC]; + // Self-loop correction: the self-loop edge (v→v) appears in both + // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], + // making w_old include 2×selfLoop. Since the self-loop moves with the + // node, add it back to match moveNodeToCommunity's directed accounting. + const selfW = g.selfLoop[v] || 0; + const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; + // The full Δ(F·T) expansion includes a constant 2·k_out·k_in term that + // doesn't depend on the target community but does affect the move-vs-stay + // decision. Without it, coarse-level merges can appear profitable when + // they actually decrease quality. + const deltaExpected = + (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); + return deltaInternal - deltaExpected; +} + +export function qualityModularity(part, g, gamma = 1.0) { + const m2 = g.totalWeight; + let sum = 0; + if (g.directed) { + for (let c = 0; c < part.communityCount; c++) + sum += + part.communityInternalEdgeWeight[c] / m2 - + (gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c])) / + (m2 * m2); + } else { + // communityInternalEdgeWeight counts each undirected edge once (j > i), + // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard + // Newman-Girvan formula is Q = Σ_c [2·L_c/(2m) - γ·(d_c/(2m))²], so + // we multiply lc by 2 to match. + for (let c = 0; c < part.communityCount; c++) { + const lc = part.communityInternalEdgeWeight[c]; + const dc = part.communityTotalStrength[c]; + sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); + } + } + return sum; +} diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.js new file mode 100644 index 00000000..e601b32a --- /dev/null +++ b/src/graph/algorithms/leiden/optimiser.js @@ -0,0 +1,356 @@ +/** + * Core Leiden/Louvain community detection optimiser. + * Vendored from ngraph.leiden (MIT) — adapted to use CodeGraph + local RNG. + */ + +import { CodeGraph } from '../../model.js'; +import { makeGraphAdapter } from './adapter.js'; +import { diffCPM } from './cpm.js'; +import { diffModularity } from './modularity.js'; +import { makePartition } from './partition.js'; +import { createRng } from './rng.js'; + +// Mirrored in DEFAULTS.community (src/infrastructure/config.js) for user override +// via .codegraphrc.json. Callers (e.g. louvain.js) can pass overrides through options. +const DEFAULT_MAX_LEVELS = 50; +const DEFAULT_MAX_LOCAL_PASSES = 20; +const GAIN_EPSILON = 1e-12; + +const CandidateStrategy = { + Neighbors: 0, + All: 1, + RandomAny: 2, + RandomNeighbor: 3, +}; + +export function runLouvainUndirectedModularity(graph, optionsInput = {}) { + const options = normalizeOptions(optionsInput); + let currentGraph = graph; + const levels = []; + const rngSource = createRng(options.randomSeed); + const random = () => rngSource.nextDouble(); + + const baseGraphAdapter = makeGraphAdapter(currentGraph, { + directed: options.directed, + ...optionsInput, + }); + const origN = baseGraphAdapter.n; + const originalToCurrent = new Int32Array(origN); + for (let i = 0; i < origN; i++) originalToCurrent[i] = i; + + let fixedNodeMask = null; + if (options.fixedNodes) { + const fixed = new Uint8Array(origN); + const asSet = + options.fixedNodes instanceof Set ? options.fixedNodes : new Set(options.fixedNodes); + for (const id of asSet) { + const idx = baseGraphAdapter.idToIndex.get(String(id)); + if (idx != null) fixed[idx] = 1; + } + fixedNodeMask = fixed; + } + + for (let level = 0; level < options.maxLevels; level++) { + const graphAdapter = + level === 0 + ? baseGraphAdapter + : makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput }); + const partition = makePartition(graphAdapter); + partition.graph = graphAdapter; + partition.initializeAggregates(); + + const order = new Int32Array(graphAdapter.n); + for (let i = 0; i < graphAdapter.n; i++) order[i] = i; + + let improved = true; + let localPasses = 0; + const strategyCode = options.candidateStrategyCode; + while (improved) { + improved = false; + localPasses++; + shuffleArrayInPlace(order, random); + for (let idx = 0; idx < order.length; idx++) { + const nodeIndex = order[idx]; + if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue; + const candidateCount = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); + let bestCommunityId = partition.nodeCommunity[nodeIndex]; + let bestGain = 0; + const maxCommunitySize = options.maxCommunitySize; + if (strategyCode === CandidateStrategy.All) { + for (let communityId = 0; communityId < partition.communityCount; communityId++) { + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else if (strategyCode === CandidateStrategy.RandomAny) { + const tries = Math.min(10, Math.max(1, partition.communityCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + const communityId = (random() * partition.communityCount) | 0; + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else if (strategyCode === CandidateStrategy.RandomNeighbor) { + const tries = Math.min(10, Math.max(1, candidateCount)); + for (let trialIndex = 0; trialIndex < tries; trialIndex++) { + const communityId = partition.getCandidateCommunityAt((random() * candidateCount) | 0); + if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if ( + maxCommunitySize < Infinity && + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + maxCommunitySize + ) + continue; + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } else { + for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { + const communityId = partition.getCandidateCommunityAt(trialIndex); + if (maxCommunitySize < Infinity) { + const nextSize = + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]; + if (nextSize > maxCommunitySize) continue; + } + const gain = computeQualityGain(partition, nodeIndex, communityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = communityId; + } + } + } + if (options.allowNewCommunity) { + const newCommunityId = partition.communityCount; + const gain = computeQualityGain(partition, nodeIndex, newCommunityId, options); + if (gain > bestGain) { + bestGain = gain; + bestCommunityId = newCommunityId; + } + } + if (bestCommunityId !== partition.nodeCommunity[nodeIndex] && bestGain > GAIN_EPSILON) { + partition.moveNodeToCommunity(nodeIndex, bestCommunityId); + improved = true; + } + } + if (localPasses >= options.maxLocalPasses) break; + } + + renumberCommunities(partition, options.preserveLabels); + + let effectivePartition = partition; + if (options.refine) { + const refined = refineWithinCoarseCommunities( + graphAdapter, + partition, + random, + options, + level === 0 ? fixedNodeMask : null, + ); + renumberCommunities(refined, options.preserveLabels); + effectivePartition = refined; + } + + levels.push({ graph: graphAdapter, partition: effectivePartition }); + const fineToCoarse = effectivePartition.nodeCommunity; + for (let i = 0; i < originalToCurrent.length; i++) { + originalToCurrent[i] = fineToCoarse[originalToCurrent[i]]; + } + + if (partition.communityCount === graphAdapter.n) break; + currentGraph = buildCoarseGraph(graphAdapter, effectivePartition); + } + + const last = levels[levels.length - 1]; + return { + graph: last.graph, + partition: last.partition, + levels, + originalToCurrent, + originalNodeIds: baseGraphAdapter.nodeIds, + baseGraph: baseGraphAdapter, + }; +} + +/** + * Build a coarse graph where each community becomes a node. + * Uses CodeGraph instead of ngraph.graph. + */ +// Build a coarse graph where each community becomes a single node. +// Self-loops (g.selfLoop[]) don't need separate handling here because they +// are already present in g.outEdges (directed path keeps them in both arrays). +// When the coarse graph is fed back to makeGraphAdapter at the next level, +// the adapter re-detects cu===cu edges as self-loops and populates selfLoop[]. +function buildCoarseGraph(g, p) { + const coarse = new CodeGraph({ directed: g.directed }); + for (let c = 0; c < p.communityCount; c++) { + coarse.addNode(String(c), { size: p.communityTotalSize[c] }); + } + const acc = new Map(); + for (let i = 0; i < g.n; i++) { + const cu = p.nodeCommunity[i]; + const list = g.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const j = list[k].to; + const w = list[k].w; + const cv = p.nodeCommunity[j]; + // Undirected: each non-self edge (i,j) appears in both outEdges[i] and + // outEdges[j]. For intra-community edges (cu===cv), skip the reverse to + // avoid inflating the coarse self-loop weight by 2×. + if (!g.directed && cu === cv && j < i) continue; + const key = `${cu}:${cv}`; + acc.set(key, (acc.get(key) || 0) + w); + } + } + for (const [key, w] of acc.entries()) { + const [cuStr, cvStr] = key.split(':'); + coarse.addEdge(cuStr, cvStr, { weight: w }); + } + return coarse; +} + +function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { + const p = makePartition(g); + p.initializeAggregates(); + p.graph = g; + const macro = basePart.nodeCommunity; + const commMacro = new Int32Array(p.communityCount); + for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]; + + const order = new Int32Array(g.n); + for (let i = 0; i < g.n; i++) order[i] = i; + let improved = true; + let passes = 0; + while (improved) { + improved = false; + passes++; + shuffleArrayInPlace(order, rng); + for (let idx = 0; idx < order.length; idx++) { + const v = order[idx]; + if (fixedMask0?.[v]) continue; + const macroV = macro[v]; + const touchedCount = p.accumulateNeighborCommunityEdgeWeights(v); + let bestC = p.nodeCommunity[v]; + let bestGain = 0; + const maxSize = Number.isFinite(opts.maxCommunitySize) ? opts.maxCommunitySize : Infinity; + for (let t = 0; t < touchedCount; t++) { + const c = p.getCandidateCommunityAt(t); + if (commMacro[c] !== macroV) continue; + if (maxSize < Infinity) { + const nextSize = p.getCommunityTotalSize(c) + g.size[v]; + if (nextSize > maxSize) continue; + } + const gain = computeQualityGain(p, v, c, opts); + if (gain > bestGain) { + bestGain = gain; + bestC = c; + } + } + if (bestC !== p.nodeCommunity[v] && bestGain > GAIN_EPSILON) { + p.moveNodeToCommunity(v, bestC); + improved = true; + } + } + if (passes >= opts.maxLocalPasses) break; + } + return p; +} + +function computeQualityGain(partition, v, c, opts) { + const quality = (opts.quality || 'modularity').toLowerCase(); + const gamma = typeof opts.resolution === 'number' ? opts.resolution : 1.0; + if (quality === 'cpm') { + return diffCPM(partition, partition.graph || {}, v, c, gamma); + } + // diffModularity dispatches to diffModularityDirected internally when g.directed is true + return diffModularity(partition, partition.graph || {}, v, c, gamma); +} + +function shuffleArrayInPlace(arr, rng = Math.random) { + for (let i = arr.length - 1; i > 0; i--) { + const j = Math.floor(rng() * (i + 1)); + const t = arr[i]; + arr[i] = arr[j]; + arr[j] = t; + } + return arr; +} + +function resolveCandidateStrategy(options) { + const val = options.candidateStrategy; + if (typeof val !== 'string') return CandidateStrategy.Neighbors; + switch (val) { + case 'neighbors': + return CandidateStrategy.Neighbors; + case 'all': + return CandidateStrategy.All; + case 'random': + return CandidateStrategy.RandomAny; + case 'random-neighbor': + return CandidateStrategy.RandomNeighbor; + default: + return CandidateStrategy.Neighbors; + } +} + +function normalizeOptions(options = {}) { + const directed = !!options.directed; + const randomSeed = Number.isFinite(options.randomSeed) ? options.randomSeed : 42; + const maxLevels = Number.isFinite(options.maxLevels) ? options.maxLevels : DEFAULT_MAX_LEVELS; + const maxLocalPasses = Number.isFinite(options.maxLocalPasses) + ? options.maxLocalPasses + : DEFAULT_MAX_LOCAL_PASSES; + const allowNewCommunity = !!options.allowNewCommunity; + const candidateStrategyCode = resolveCandidateStrategy(options); + const quality = (options.quality || 'modularity').toLowerCase(); + const resolution = typeof options.resolution === 'number' ? options.resolution : 1.0; + const refine = options.refine !== false; + const preserveLabels = options.preserveLabels; + const maxCommunitySize = Number.isFinite(options.maxCommunitySize) + ? options.maxCommunitySize + : Infinity; + return { + directed, + randomSeed, + maxLevels, + maxLocalPasses, + allowNewCommunity, + candidateStrategyCode, + quality, + resolution, + refine, + preserveLabels, + maxCommunitySize, + fixedNodes: options.fixedNodes, + }; +} + +function renumberCommunities(partition, preserveLabels) { + if (preserveLabels && preserveLabels instanceof Map) { + partition.compactCommunityIds({ preserveMap: preserveLabels }); + } else if (preserveLabels === true) { + partition.compactCommunityIds({ keepOldOrder: true }); + } else { + partition.compactCommunityIds(); + } +} diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js new file mode 100644 index 00000000..fec97a4f --- /dev/null +++ b/src/graph/algorithms/leiden/partition.js @@ -0,0 +1,403 @@ +/** + * Mutable community assignment with per-community aggregates. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + * + * Maintains per-community totals and per-move scratch accumulators so we can + * compute modularity/CPM gains in O(neighborhood) time without rescanning the + * whole graph after each move. + */ + +export function makePartition(graph) { + const n = graph.n; + const nodeCommunity = new Int32Array(n); + for (let i = 0; i < n; i++) nodeCommunity[i] = i; + let communityCount = n; + + let communityTotalSize = new Float64Array(communityCount); + let communityNodeCount = new Int32Array(communityCount); + let communityInternalEdgeWeight = new Float64Array(communityCount); + let communityTotalStrength = new Float64Array(communityCount); + let communityTotalOutStrength = new Float64Array(communityCount); + let communityTotalInStrength = new Float64Array(communityCount); + + const candidateCommunities = new Int32Array(n); + let candidateCommunityCount = 0; + const neighborEdgeWeightToCommunity = new Float64Array(n); + const outEdgeWeightToCommunity = new Float64Array(n); + const inEdgeWeightFromCommunity = new Float64Array(n); + const isCandidateCommunity = new Uint8Array(n); + + function ensureCommCapacity(newCount) { + if (newCount <= communityTotalSize.length) return; + const growTo = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); + communityTotalSize = growFloat(communityTotalSize, growTo); + communityNodeCount = growInt(communityNodeCount, growTo); + communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); + communityTotalStrength = growFloat(communityTotalStrength, growTo); + communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); + communityTotalInStrength = growFloat(communityTotalInStrength, growTo); + } + + function initializeAggregates() { + communityTotalSize.fill(0); + communityNodeCount.fill(0); + communityInternalEdgeWeight.fill(0); + communityTotalStrength.fill(0); + communityTotalOutStrength.fill(0); + communityTotalInStrength.fill(0); + for (let i = 0; i < n; i++) { + const c = nodeCommunity[i]; + communityTotalSize[c] += graph.size[i]; + communityNodeCount[c] += 1; + if (graph.directed) { + communityTotalOutStrength[c] += graph.strengthOut[i]; + communityTotalInStrength[c] += graph.strengthIn[i]; + } else { + communityTotalStrength[c] += graph.strengthOut[i]; + } + if (graph.selfLoop[i] !== 0) communityInternalEdgeWeight[c] += graph.selfLoop[i]; + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const neighbors = graph.outEdges[i]; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const neighbors = graph.outEdges[i]; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]; + if (j <= i) continue; + if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; + } + } + } + } + + function resetScratch() { + for (let i = 0; i < candidateCommunityCount; i++) { + const c = candidateCommunities[i]; + isCandidateCommunity[c] = 0; + neighborEdgeWeightToCommunity[c] = 0; + outEdgeWeightToCommunity[c] = 0; + inEdgeWeightFromCommunity[c] = 0; + } + candidateCommunityCount = 0; + } + + function touch(c) { + if (isCandidateCommunity[c]) return; + isCandidateCommunity[c] = 1; + candidateCommunities[candidateCommunityCount++] = c; + } + + function accumulateNeighborCommunityEdgeWeights(v) { + resetScratch(); + const ci = nodeCommunity[v]; + touch(ci); + if (graph.directed) { + const outL = graph.outEdges[v]; + for (let k = 0; k < outL.length; k++) { + const j = outL[k].to; + const w = outL[k].w; + const cj = nodeCommunity[j]; + touch(cj); + outEdgeWeightToCommunity[cj] += w; + } + const inL = graph.inEdges[v]; + for (let k = 0; k < inL.length; k++) { + const i2 = inL[k].from; + const w = inL[k].w; + const ci2 = nodeCommunity[i2]; + touch(ci2); + inEdgeWeightFromCommunity[ci2] += w; + } + } else { + const list = graph.outEdges[v]; + for (let k = 0; k < list.length; k++) { + const j = list[k].to; + const w = list[k].w; + const cj = nodeCommunity[j]; + touch(cj); + neighborEdgeWeightToCommunity[cj] += w; + } + } + return candidateCommunityCount; + } + + const twoMUndirected = graph.totalWeight; + function deltaModularityUndirected(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + const strengthV = graph.strengthOut[v]; + const weightToNew = + newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; + const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; + const totalStrengthNew = + newC < communityTotalStrength.length ? communityTotalStrength[newC] : 0; + const totalStrengthOld = communityTotalStrength[oldC]; + const gain_remove = -( + weightToOld / twoMUndirected - + (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) + ); + const gain_add = + weightToNew / twoMUndirected - + (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); + return gain_remove + gain_add; + } + + function deltaModularityDirected(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + const totalEdgeWeight = graph.totalWeight; + const strengthOutV = graph.strengthOut[v]; + const strengthInV = graph.strengthIn[v]; + const inFromNew = + newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; + const outToNew = + newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; + const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; + const outToOld = outEdgeWeightToCommunity[oldC] || 0; + const totalInStrengthNew = + newC < communityTotalInStrength.length ? communityTotalInStrength[newC] : 0; + const totalOutStrengthNew = + newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; + const totalInStrengthOld = communityTotalInStrength[oldC]; + const totalOutStrengthOld = communityTotalOutStrength[oldC]; + // Self-loop correction + constant term (see modularity.js diffModularityDirected) + const selfW = graph.selfLoop[v] || 0; + const deltaInternal = + (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; + const deltaExpected = + (gamma * + (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + + strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + + 2 * strengthOutV * strengthInV)) / + (totalEdgeWeight * totalEdgeWeight); + return deltaInternal - deltaExpected; + } + + function deltaCPM(v, newC, gamma = 1.0) { + const oldC = nodeCommunity[v]; + if (newC === oldC) return 0; + let w_old, w_new; + let selfCorrection = 0; + if (graph.directed) { + w_old = (outEdgeWeightToCommunity[oldC] || 0) + (inEdgeWeightFromCommunity[oldC] || 0); + w_new = + newC < outEdgeWeightToCommunity.length + ? (outEdgeWeightToCommunity[newC] || 0) + (inEdgeWeightFromCommunity[newC] || 0) + : 0; + // Self-loop correction (see cpm.js diffCPM) + selfCorrection = 2 * (graph.selfLoop[v] || 0); + } else { + w_old = neighborEdgeWeightToCommunity[oldC] || 0; + w_new = + newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; + } + const nodeSize = graph.size[v] || 1; + const sizeOld = communityTotalSize[oldC] || 0; + const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; + return w_new - w_old + selfCorrection - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); + } + + function moveNodeToCommunity(v, newC) { + const oldC = nodeCommunity[v]; + if (oldC === newC) return false; + if (newC >= communityCount) { + ensureCommCapacity(newC + 1); + communityCount = newC + 1; + } + const strengthOutV = graph.strengthOut[v]; + const strengthInV = graph.strengthIn[v]; + const selfLoopWeight = graph.selfLoop[v]; + const nodeSize = graph.size[v]; + + communityNodeCount[oldC] -= 1; + communityNodeCount[newC] += 1; + communityTotalSize[oldC] -= nodeSize; + communityTotalSize[newC] += nodeSize; + if (graph.directed) { + communityTotalOutStrength[oldC] -= strengthOutV; + communityTotalOutStrength[newC] += strengthOutV; + communityTotalInStrength[oldC] -= strengthInV; + communityTotalInStrength[newC] += strengthInV; + } else { + communityTotalStrength[oldC] -= strengthOutV; + communityTotalStrength[newC] += strengthOutV; + } + + if (graph.directed) { + const outToOld = outEdgeWeightToCommunity[oldC] || 0; + const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; + const outToNew = + newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; + const inFromNew = + newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + communityInternalEdgeWeight[oldC] -= outToOld + inFromOld - selfLoopWeight; + communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; + } else { + const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; + const weightToNew = neighborEdgeWeightToCommunity[newC] || 0; + communityInternalEdgeWeight[oldC] -= 2 * weightToOld + selfLoopWeight; + communityInternalEdgeWeight[newC] += 2 * weightToNew + selfLoopWeight; + } + + nodeCommunity[v] = newC; + return true; + } + + function compactCommunityIds(opts = {}) { + const ids = []; + for (let c = 0; c < communityCount; c++) if (communityNodeCount[c] > 0) ids.push(c); + if (opts.keepOldOrder) { + ids.sort((a, b) => a - b); + } else if (opts.preserveMap instanceof Map) { + ids.sort((a, b) => { + const pa = opts.preserveMap.get(a); + const pb = opts.preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return ( + communityTotalSize[b] - communityTotalSize[a] || + communityNodeCount[b] - communityNodeCount[a] || + a - b + ); + }); + } else { + ids.sort( + (a, b) => + communityTotalSize[b] - communityTotalSize[a] || + communityNodeCount[b] - communityNodeCount[a] || + a - b, + ); + } + const newId = new Int32Array(communityCount).fill(-1); + ids.forEach((c, i) => { + newId[c] = i; + }); + for (let i = 0; i < nodeCommunity.length; i++) nodeCommunity[i] = newId[nodeCommunity[i]]; + const remappedCount = ids.length; + const newTotalSize = new Float64Array(remappedCount); + const newNodeCount = new Int32Array(remappedCount); + const newInternalEdgeWeight = new Float64Array(remappedCount); + const newTotalStrength = new Float64Array(remappedCount); + const newTotalOutStrength = new Float64Array(remappedCount); + const newTotalInStrength = new Float64Array(remappedCount); + for (let i = 0; i < n; i++) { + const c = nodeCommunity[i]; + newTotalSize[c] += graph.size[i]; + newNodeCount[c] += 1; + if (graph.directed) { + newTotalOutStrength[c] += graph.strengthOut[i]; + newTotalInStrength[c] += graph.strengthIn[i]; + } else { + newTotalStrength[c] += graph.strengthOut[i]; + } + if (graph.selfLoop[i] !== 0) newInternalEdgeWeight[c] += graph.selfLoop[i]; + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const list = graph.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci = nodeCommunity[i]; + const list = graph.outEdges[i]; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]; + if (j <= i) continue; + if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; + } + } + } + communityCount = remappedCount; + communityTotalSize = newTotalSize; + communityNodeCount = newNodeCount; + communityInternalEdgeWeight = newInternalEdgeWeight; + communityTotalStrength = newTotalStrength; + communityTotalOutStrength = newTotalOutStrength; + communityTotalInStrength = newTotalInStrength; + } + + function getCommunityMembers() { + const comms = new Array(communityCount); + for (let i = 0; i < communityCount; i++) comms[i] = []; + for (let i = 0; i < n; i++) comms[nodeCommunity[i]].push(i); + return comms; + } + + function getCommunityTotalSize(c) { + return c < communityTotalSize.length ? communityTotalSize[c] : 0; + } + function getCommunityNodeCount(c) { + return c < communityNodeCount.length ? communityNodeCount[c] : 0; + } + + return { + n, + get communityCount() { + return communityCount; + }, + nodeCommunity, + get communityTotalSize() { + return communityTotalSize; + }, + get communityNodeCount() { + return communityNodeCount; + }, + get communityInternalEdgeWeight() { + return communityInternalEdgeWeight; + }, + get communityTotalStrength() { + return communityTotalStrength; + }, + get communityTotalOutStrength() { + return communityTotalOutStrength; + }, + get communityTotalInStrength() { + return communityTotalInStrength; + }, + initializeAggregates, + accumulateNeighborCommunityEdgeWeights, + getCandidateCommunityCount: () => candidateCommunityCount, + getCandidateCommunityAt: (i) => candidateCommunities[i], + getNeighborEdgeWeightToCommunity: (c) => neighborEdgeWeightToCommunity[c] || 0, + getOutEdgeWeightToCommunity: (c) => outEdgeWeightToCommunity[c] || 0, + getInEdgeWeightFromCommunity: (c) => inEdgeWeightFromCommunity[c] || 0, + deltaModularityUndirected, + deltaModularityDirected, + deltaCPM, + moveNodeToCommunity, + compactCommunityIds, + getCommunityMembers, + getCommunityTotalSize, + getCommunityNodeCount, + }; +} + +function growFloat(a, to) { + const b = new Float64Array(to); + b.set(a); + return b; +} +function growInt(a, to) { + const b = new Int32Array(to); + b.set(a); + return b; +} diff --git a/src/graph/algorithms/leiden/rng.js b/src/graph/algorithms/leiden/rng.js new file mode 100644 index 00000000..9d20fcb6 --- /dev/null +++ b/src/graph/algorithms/leiden/rng.js @@ -0,0 +1,19 @@ +/** + * Seeded PRNG (mulberry32). + * Drop-in replacement for ngraph.random — only nextDouble() is needed. + * + * @param {number} [seed] + * @returns {{ nextDouble(): number }} + */ +export function createRng(seed = 42) { + let s = seed | 0; + return { + nextDouble() { + s |= 0; + s = (s + 0x6d2b79f5) | 0; + let t = Math.imul(s ^ (s >>> 15), 1 | s); + t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }, + }; +} diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.js index 2a7f3a69..c4195b60 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.js @@ -1,26 +1,36 @@ /** - * Louvain community detection via graphology. + * Community detection via vendored Leiden algorithm. + * Maintains backward-compatible API: { assignments: Map, modularity: number } + * + * **Note:** Always runs in undirected mode (`directed: false`) regardless of + * the input graph's directedness. For direction-aware community detection, + * use `detectClusters` from `./leiden/index.js` directly. * * @param {import('../model.js').CodeGraph} graph - * @param {{ resolution?: number }} [opts] + * @param {{ resolution?: number, maxLevels?: number, maxLocalPasses?: number }} [opts] * @returns {{ assignments: Map, modularity: number }} */ -import graphologyLouvain from 'graphology-communities-louvain'; +import { detectClusters } from './leiden/index.js'; export function louvainCommunities(graph, opts = {}) { - const gy = graph.toGraphology({ type: 'undirected' }); - - if (gy.order === 0 || gy.size === 0) { + if (graph.nodeCount === 0 || graph.edgeCount === 0) { return { assignments: new Map(), modularity: 0 }; } const resolution = opts.resolution ?? 1.0; - const details = graphologyLouvain.detailed(gy, { resolution }); + const result = detectClusters(graph, { + resolution, + randomSeed: 42, + directed: false, + ...(opts.maxLevels != null && { maxLevels: opts.maxLevels }), + ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), + }); const assignments = new Map(); - for (const [nodeId, communityId] of Object.entries(details.communities)) { - assignments.set(nodeId, communityId); + for (const [id] of graph.nodes()) { + const cls = result.getClass(id); + if (cls != null) assignments.set(id, cls); } - return { assignments, modularity: details.modularity }; + return { assignments, modularity: result.quality() }; } diff --git a/src/graph/model.js b/src/graph/model.js index 733be688..8672155b 100644 --- a/src/graph/model.js +++ b/src/graph/model.js @@ -5,8 +5,6 @@ * Node IDs are always strings. DB integer IDs should be stringified before use. */ -import Graph from 'graphology'; - export class CodeGraph { /** * @param {{ directed?: boolean }} [opts] @@ -189,21 +187,6 @@ export class CodeGraph { return result; } - /** Convert to graphology instance (for Louvain etc). */ - toGraphology(opts = {}) { - const type = opts.type || (this._directed ? 'directed' : 'undirected'); - const g = new Graph({ type }); - for (const [id] of this._nodes) { - g.addNode(id); - } - - for (const [src, tgt] of this.edges()) { - if (src === tgt) continue; - if (!g.hasEdge(src, tgt)) g.addEdge(src, tgt); - } - return g; - } - // ─── Utilities ────────────────────────────────────────────────── clone() { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index 7f62083b..3d32a393 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -67,6 +67,8 @@ export const DEFAULTS = { }, community: { resolution: 1.0, + maxLevels: 50, + maxLocalPasses: 20, }, structure: { cohesionThreshold: 0.3, diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 29a4f6e9..f4f530b9 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -116,7 +116,7 @@ export async function stats(customDbPath, opts = {}) { const { communitySummaryForStats } = await import('../../features/communities.js'); data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); } catch { - /* graphology may not be available */ + /* community detection is optional; silently skip on any error */ } if (outputResult(data, null, opts)) return; diff --git a/tests/graph/algorithms/leiden.test.js b/tests/graph/algorithms/leiden.test.js new file mode 100644 index 00000000..1240263c --- /dev/null +++ b/tests/graph/algorithms/leiden.test.js @@ -0,0 +1,358 @@ +import { describe, expect, it } from 'vitest'; +import { detectClusters } from '../../../src/graph/algorithms/leiden/index.js'; +import { CodeGraph } from '../../../src/graph/model.js'; + +// ─── Helpers ────────────────────────────────────────────────────────── + +/** Two 4-node cliques connected by a single weak bridge. */ +function makeTwoCliquesBridge() { + const g = new CodeGraph(); + const A = ['0', '1', '2', '3']; + const B = ['4', '5', '6', '7']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('3', '4'); + g.addEdge('4', '3'); + return g; +} + +function makeTwoCliques(n = 4) { + const g = new CodeGraph(); + const A = Array.from({ length: n }, (_, i) => `a${i}`); + const B = Array.from({ length: n }, (_, i) => `b${i}`); + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge(A[A.length - 1], B[0]); + g.addEdge(B[0], A[A.length - 1]); + return { g, A, B }; +} + +// ─── Basic ──────────────────────────────────────────────────────────── + +describe('detectClusters', () => { + it('splits two cliques with a weak bridge', () => { + const g = makeTwoCliquesBridge(); + const clusters = detectClusters(g, { randomSeed: 1 }); + const cA = new Set(['0', '1', '2', '3'].map((i) => clusters.getClass(i))); + const cB = new Set(['4', '5', '6', '7'].map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── CPM ────────────────────────────────────────────────────────────── + +describe('CPM resolution tuning', () => { + it('splits more with higher gamma', () => { + const g = makeTwoCliquesBridge(); + const low = detectClusters(g, { quality: 'cpm', resolution: 0.01, randomSeed: 1 }); + const high = detectClusters(g, { quality: 'cpm', resolution: 10.0, randomSeed: 1 }); + const ids = ['0', '1', '2', '3', '4', '5', '6', '7']; + const countCommunities = (clusters) => new Set(ids.map((i) => clusters.getClass(i))).size; + expect(countCommunities(low)).toBeLessThanOrEqual(countCommunities(high)); + }); +}); + +// ─── CPM with weighted nodes ───────────────────────────────────────── + +describe('CPM with weighted nodes', () => { + it('uses communityTotalSize in quality reporting', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2', '3']; + const B = ['4', '5', '6', '7']; + for (const id of [...A, ...B]) g.addNode(id, { size: A.includes(id) ? 5 : 1 }); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('3', '4'); + g.addEdge('4', '3'); + + const result = detectClusters(g, { + quality: 'cpm', + resolution: 0.5, + randomSeed: 3, + }); + // B-clique (size=1 nodes) merges; quality is finite + const bCommunities = new Set(B.map((i) => result.getClass(i))); + expect(bCommunities.size).toBe(1); + expect(typeof result.quality()).toBe('number'); + expect(Number.isFinite(result.quality())).toBe(true); + }); +}); + +// ─── Directed ───────────────────────────────────────────────────────── + +describe('directed modularity', () => { + it('finds two communities in directed case', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2']; + const B = ['3', '4', '5']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = 0; j < A.length; j++) if (i !== j) g.addEdge(A[i], A[j]); + for (let i = 0; i < B.length; i++) + for (let j = 0; j < B.length; j++) if (i !== j) g.addEdge(B[i], B[j]); + g.addEdge('2', '3'); + + const clusters = detectClusters(g, { directed: true, randomSeed: 2 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── Directed self-loops ────────────────────────────────────────────── + +describe('directed self-loops', () => { + it('does not corrupt internal edge weight with directed self-loops', () => { + const g = new CodeGraph(); + const A = ['0', '1', '2']; + const B = ['3', '4', '5']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = 0; j < A.length; j++) if (i !== j) g.addEdge(A[i], A[j]); + for (let i = 0; i < B.length; i++) + for (let j = 0; j < B.length; j++) if (i !== j) g.addEdge(B[i], B[j]); + g.addEdge('2', '3'); + // Add self-loops — these previously caused double-counting in directed mode + g.addEdge('0', '0', { weight: 3 }); + g.addEdge('3', '3', { weight: 3 }); + + const clusters = detectClusters(g, { directed: true, randomSeed: 2 }); + // Quality must be finite (not NaN from negative internal edge weight) + expect(Number.isFinite(clusters.quality())).toBe(true); + expect(clusters.quality()).toBeGreaterThanOrEqual(0); + // A-side nodes should not mix with B-side nodes + const aCommunities = new Set(A.map((i) => clusters.getClass(i))); + const bCommunities = new Set(B.map((i) => clusters.getClass(i))); + const overlap = [...aCommunities].filter((c) => bCommunities.has(c)); + expect(overlap.length).toBe(0); + }); +}); + +// ─── Coarse graph quality ──────────────────────────────────────────── + +describe('coarse graph quality', () => { + it('quality is not inflated by multi-level coarsening', () => { + // Two disconnected 4-cliques: the algorithm should split them into two + // communities. Quality must stay in [-1, 1] and be consistent whether + // the run goes through one or multiple coarsening levels. + const g = new CodeGraph(); + const A = ['a0', 'a1', 'a2', 'a3']; + const B = ['b0', 'b1', 'b2', 'b3']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + const clusters = detectClusters(g, { randomSeed: 42 }); + const q = clusters.quality(); + // Two disjoint K4 cliques: the ideal 2-community partition gives Q = 0.5. + // Each clique has L_c = 6 edges, d_c = 12, 2m = 24: + // Q = 2 × [2·6/24 − (12/24)²] = 2 × 0.25 = 0.5 + expect(q).toBeCloseTo(0.5, 2); + }); +}); + +// ─── Edge cases ─────────────────────────────────────────────────────── + +describe('edge cases', () => { + it('keeps isolated node as its own community', () => { + const g = new CodeGraph(); + g.addNode('x'); + g.addNode('y'); + g.addNode('z'); + g.addEdge('x', 'y'); + g.addEdge('y', 'x'); + + const clusters = detectClusters(g, { randomSeed: 123 }); + expect(clusters.getClass('x')).toBe(clusters.getClass('y')); + expect(clusters.getClass('z')).not.toBe(clusters.getClass('x')); + }); + + it('handles negative weights and preserves intuitive split', () => { + const g = new CodeGraph(); + const A = ['a1', 'a2', 'a3', 'a4']; + const B = ['b1', 'b2', 'b3', 'b4']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + for (let i = 0; i < B.length; i++) + for (let j = i + 1; j < B.length; j++) { + g.addEdge(B[i], B[j]); + g.addEdge(B[j], B[i]); + } + g.addEdge('a4', 'b1', { weight: -2 }); + g.addEdge('b1', 'a4', { weight: -2 }); + g.addEdge('a3', 'b2', { weight: -1 }); + g.addEdge('b2', 'a3', { weight: -1 }); + + const clusters = detectClusters(g, { randomSeed: 7 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); + + it('self-loop biases node to remain separate under weak external ties (CPM)', () => { + const g = new CodeGraph(); + g.addNode('a'); + g.addNode('b'); + g.addEdge('a', 'a', { weight: 5 }); + g.addEdge('a', 'b', { weight: 0.1 }); + g.addEdge('b', 'a', { weight: 0.1 }); + + const clusters = detectClusters(g, { + randomSeed: 5, + quality: 'cpm', + resolution: 1.0, + }); + expect(clusters.getClass('a')).not.toBe(clusters.getClass('b')); + }); + + it('treats a disconnected clique as its own isolated community', () => { + const g = new CodeGraph(); + const A = ['a1', 'a2', 'a3']; + const B = ['b1', 'b2']; + for (const id of [...A, ...B]) g.addNode(id); + for (let i = 0; i < A.length; i++) + for (let j = i + 1; j < A.length; j++) { + g.addEdge(A[i], A[j]); + g.addEdge(A[j], A[i]); + } + g.addEdge('b1', 'b2'); + g.addEdge('b2', 'b1'); + + const clusters = detectClusters(g, { randomSeed: 321 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); +}); + +// ─── Ergonomics & constraints ───────────────────────────────────────── + +describe('ergonomics & constraints', () => { + it('maxCommunitySize is enforced', () => { + const { g, A, B } = makeTwoCliques(3); + const clusters = detectClusters(g, { randomSeed: 123, maxCommunitySize: 3 }); + const cA = new Set(A.map((i) => clusters.getClass(i))); + const cB = new Set(B.map((i) => clusters.getClass(i))); + expect(cA.size).toBe(1); + expect(cB.size).toBe(1); + expect([...cA][0]).not.toBe([...cB][0]); + }); + + it('deterministic with fixed seed even with random strategies', () => { + const { g } = makeTwoCliques(4); + const opts = { randomSeed: 2024, candidateStrategy: 'random-neighbor' }; + const a = detectClusters(g, opts); + const b = detectClusters(g, opts); + const classesA = new Map(); + const classesB = new Map(); + for (const [id] of g.nodes()) { + classesA.set(id, a.getClass(id)); + classesB.set(id, b.getClass(id)); + } + expect(JSON.stringify([...classesA.entries()].sort())).toBe( + JSON.stringify([...classesB.entries()].sort()), + ); + }); +}); + +// ─── Fixed nodes ────────────────────────────────────────────────────── + +describe('fixed nodes', () => { + it('does not force fixed nodes to leave their clique communities', () => { + const g = makeTwoCliquesBridge(); + const fixedRun = detectClusters(g, { + randomSeed: 11, + refine: true, + fixedNodes: new Set(['3', '4']), + }); + const c3 = fixedRun.getClass('3'); + const c4 = fixedRun.getClass('4'); + expect(fixedRun.getClass('0')).toBe(c3); + expect(fixedRun.getClass('1')).toBe(c3); + expect(fixedRun.getClass('2')).toBe(c3); + expect(fixedRun.getClass('4')).not.toBe(c3); + expect(fixedRun.getClass('5')).toBe(c4); + expect(fixedRun.getClass('6')).toBe(c4); + expect(fixedRun.getClass('7')).toBe(c4); + }); +}); + +// ─── Refinement ─────────────────────────────────────────────────────── + +describe('refinement', () => { + it('keeps cliques separated across refinement', () => { + const g = new CodeGraph(); + const groups = [ + Array.from({ length: 5 }, (_, i) => String(i)), + Array.from({ length: 5 }, (_, i) => String(i + 5)), + Array.from({ length: 5 }, (_, i) => String(i + 10)), + ]; + for (const group of groups) for (const v of group) g.addNode(v); + for (const group of groups) { + for (let i = 0; i < group.length; i++) + for (let j = i + 1; j < group.length; j++) { + g.addEdge(group[i], group[j]); + g.addEdge(group[j], group[i]); + } + } + g.addEdge('4', '5'); + g.addEdge('5', '4'); + g.addEdge('9', '10'); + g.addEdge('10', '9'); + + const clusters = detectClusters(g, { randomSeed: 1, refine: true }); + const c0 = new Set(['0', '1', '2', '3', '4'].map((i) => clusters.getClass(i))); + const c1 = new Set(['5', '6', '7', '8', '9'].map((i) => clusters.getClass(i))); + const c2 = new Set(['10', '11', '12', '13', '14'].map((i) => clusters.getClass(i))); + expect(c0.size).toBe(1); + expect(c1.size).toBe(1); + expect(c2.size).toBe(1); + expect([...c0][0]).not.toBe([...c1][0]); + expect([...c1][0]).not.toBe([...c2][0]); + }); +}); diff --git a/tests/graph/model.test.js b/tests/graph/model.test.js index ec925ba0..dc1bbe93 100644 --- a/tests/graph/model.test.js +++ b/tests/graph/model.test.js @@ -144,28 +144,6 @@ describe('CodeGraph — toEdgeArray', () => { }); }); -describe('CodeGraph — toGraphology', () => { - it('creates an undirected graphology graph', () => { - const g = new CodeGraph(); - g.addEdge('a', 'b'); - g.addEdge('b', 'c'); - - const gy = g.toGraphology({ type: 'undirected' }); - expect(gy.order).toBe(3); - expect(gy.size).toBe(2); - expect(gy.type).toBe('undirected'); - }); - - it('skips self-loops', () => { - const g = new CodeGraph(); - g.addEdge('a', 'a'); - g.addEdge('a', 'b'); - - const gy = g.toGraphology({ type: 'undirected' }); - expect(gy.size).toBe(1); - }); -}); - describe('CodeGraph — clone', () => { it('produces an independent copy', () => { const g = new CodeGraph(); diff --git a/tests/integration/communities.test.js b/tests/integration/communities.test.js index 1cee942c..90324367 100644 --- a/tests/integration/communities.test.js +++ b/tests/integration/communities.test.js @@ -95,8 +95,10 @@ describe('communitiesData (file-level)', () => { expect(data.summary.communityCount).toBeGreaterThanOrEqual(2); }); - test('modularity is between 0 and 1', () => { + test('modularity is in valid range', () => { const data = communitiesData(null, { repo }); + // Leiden starts from singleton partition and only makes improving moves. + // Quality should always be non-negative on a real graph. expect(data.modularity).toBeGreaterThanOrEqual(0); expect(data.modularity).toBeLessThanOrEqual(1); }); diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js index b7062c48..fc7d7d04 100644 --- a/tests/unit/config.test.js +++ b/tests/unit/config.test.js @@ -121,7 +121,7 @@ describe('DEFAULTS', () => { }); it('has community defaults', () => { - expect(DEFAULTS.community).toEqual({ resolution: 1.0 }); + expect(DEFAULTS.community).toEqual({ resolution: 1.0, maxLevels: 50, maxLocalPasses: 20 }); }); it('has structure defaults', () => {