From f5ee3dee4d116fde881a072d98d111ded6f1d4fd Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:05:31 -0600 Subject: [PATCH 01/37] chore: remove dead exports and un-export internal constant - Remove dead `truncate` function from ast-analysis/shared.js (0 consumers) - Remove dead `truncStart` function from presentation/table.js (0 consumers) - Un-export `BATCH_CHUNK` in builder/helpers.js (only used internally) Skipped sync.json targets that were false positives: - BUILTIN_RECEIVERS: used by incremental.js + build-edges.js - TRANSIENT_CODES/RETRY_DELAY_MS: internal to readFileSafe - MAX_COL_WIDTH: internal to printAutoTable - findFunctionNode: re-exported from index.js, used in tests Impact: 1 functions changed, 32 affected --- src/ast-analysis/shared.js | 12 ------------ src/domain/graph/builder/helpers.js | 2 +- src/presentation/table.js | 8 -------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/ast-analysis/shared.js b/src/ast-analysis/shared.js index 964f9a06..e3f40bd0 100644 --- a/src/ast-analysis/shared.js +++ b/src/ast-analysis/shared.js @@ -176,18 +176,6 @@ export function findFunctionNode(rootNode, startLine, _endLine, rules) { return best; } -/** - * Truncate a string to a maximum length, appending an ellipsis if truncated. - * - * @param {string} str - Input string - * @param {number} [max=200] - Maximum length - * @returns {string} - */ -export function truncate(str, max = 200) { - if (!str) return ''; - return str.length > max ? `${str.slice(0, max)}…` : str; -} - // ─── Extension / Language Mapping ───────────────────────────────────────── /** diff --git a/src/domain/graph/builder/helpers.js b/src/domain/graph/builder/helpers.js index 038de4c2..b7916c84 100644 --- a/src/domain/graph/builder/helpers.js +++ b/src/domain/graph/builder/helpers.js @@ -179,7 +179,7 @@ export function purgeFilesFromGraph(db, files, options = {}) { } /** Batch INSERT chunk size for multi-value INSERTs. */ -export const BATCH_CHUNK = 200; +const BATCH_CHUNK = 200; /** * Batch-insert node rows via multi-value INSERT statements. diff --git a/src/presentation/table.js b/src/presentation/table.js index d5ef1903..4fdba379 100644 --- a/src/presentation/table.js +++ b/src/presentation/table.js @@ -37,11 +37,3 @@ export function truncEnd(str, maxLen) { if (str.length <= maxLen) return str; return `${str.slice(0, maxLen - 1)}\u2026`; } - -/** - * Truncate a string from the start, prepending '\u2026' if truncated. - */ -export function truncStart(str, maxLen) { - if (str.length <= maxLen) return str; - return `\u2026${str.slice(-(maxLen - 1))}`; -} From 17cdcb00984f582485f8582734a40e3df4211d10 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:14:13 -0600 Subject: [PATCH 02/37] refactor: extract shared findNodes utility from cfg and dataflow features Impact: 5 functions changed, 7 affected --- src/features/cfg.js | 31 +++++------------ src/features/dataflow.js | 55 +++++++++++++++---------------- src/features/shared/find-nodes.js | 32 ++++++++++++++++++ 3 files changed, 66 insertions(+), 52 deletions(-) create mode 100644 src/features/shared/find-nodes.js diff --git a/src/features/cfg.js b/src/features/cfg.js index e8728cab..eff08652 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -24,8 +24,8 @@ import { openReadonlyOrFail, } from '../db/index.js'; import { info } from '../infrastructure/logger.js'; -import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +import { findNodes } from './shared/find-nodes.js'; // Re-export for backward compatibility export { _makeCfgRules as makeCfgRules, CFG_RULES }; @@ -273,27 +273,7 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { // ─── Query-Time Functions ─────────────────────────────────────────────── -function findNodes(db, name, opts = {}) { - const kinds = opts.kind ? [opts.kind] : ['function', 'method']; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND n.file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line - FROM nodes n - WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`, - ) - .all(...params); - - return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; -} +const CFG_DEFAULT_KINDS = ['function', 'method']; /** * Load CFG data for a function from the database. @@ -317,7 +297,12 @@ export function cfgData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + CFG_DEFAULT_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 9d0c8bcc..0f500b8f 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -24,6 +24,7 @@ import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; import { info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +import { findNodes } from './shared/find-nodes.js'; // Re-export for backward compatibility export { _makeDataflowRules as makeDataflowRules, DATAFLOW_RULES }; @@ -234,31 +235,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) // ── Query functions ───────────────────────────────────────────────────────── -/** - * Look up node(s) by name with optional file/kind/noTests filtering. - * Similar to findMatchingNodes in queries.js but operates on the dataflow table. - */ -function findNodes(db, name, opts = {}) { - const kinds = opts.kind ? [opts.kind] : ALL_SYMBOL_KINDS; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare( - `SELECT * FROM nodes - WHERE name LIKE ? AND kind IN (${placeholders})${fileCondition} - ORDER BY file, line`, - ) - .all(...params); - - return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; -} +// findNodes imported from ./shared/find-nodes.js /** * Return all dataflow edges for a symbol. @@ -282,7 +259,12 @@ export function dataflowData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } @@ -426,12 +408,22 @@ export function dataflowPathData(from, to, customDbPath, opts = {}) { }; } - const fromNodes = findNodes(db, from, { noTests, file: opts.fromFile, kind: opts.kind }); + const fromNodes = findNodes( + db, + from, + { noTests, file: opts.fromFile, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (fromNodes.length === 0) { return { from, to, found: false, error: `No symbol matching "${from}"` }; } - const toNodes = findNodes(db, to, { noTests, file: opts.toFile, kind: opts.kind }); + const toNodes = findNodes( + db, + to, + { noTests, file: opts.toFile, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (toNodes.length === 0) { return { from, to, found: false, error: `No symbol matching "${to}"` }; } @@ -554,7 +546,12 @@ export function dataflowImpactData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } diff --git a/src/features/shared/find-nodes.js b/src/features/shared/find-nodes.js new file mode 100644 index 00000000..cc886d80 --- /dev/null +++ b/src/features/shared/find-nodes.js @@ -0,0 +1,32 @@ +import { isTestFile } from '../../infrastructure/test-filter.js'; + +/** + * Look up node(s) by name with optional file/kind/noTests filtering. + * + * @param {object} db - open SQLite database handle + * @param {string} name - symbol name (partial LIKE match) + * @param {object} [opts] - { kind, file, noTests } + * @param {string[]} defaultKinds - fallback kinds when opts.kind is not set + * @returns {object[]} matching node rows + */ +export function findNodes(db, name, opts = {}, defaultKinds) { + const kinds = opts.kind ? [opts.kind] : defaultKinds; + const placeholders = kinds.map(() => '?').join(', '); + const params = [`%${name}%`, ...kinds]; + + let fileCondition = ''; + if (opts.file) { + fileCondition = ' AND file LIKE ?'; + params.push(`%${opts.file}%`); + } + + const rows = db + .prepare( + `SELECT * FROM nodes + WHERE name LIKE ? AND kind IN (${placeholders})${fileCondition} + ORDER BY file, line`, + ) + .all(...params); + + return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; +} From a09740d9184ea58b3cdcecfeebb964f8743594e8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:18:59 -0600 Subject: [PATCH 03/37] fix: replace empty catch blocks in db connection and migrations connection.js: add debug() logging to all 8 catch-with-fallback blocks so failures are observable without changing behavior. migrations.js: replace 14 try/catch blocks in initSchema with hasColumn() and hasTable() guards. CREATE INDEX calls use IF NOT EXISTS directly. getBuildMeta uses hasTable() check instead of try/catch. Impact: 10 functions changed, 19 affected --- src/db/connection.js | 30 +++++++++------- src/db/migrations.js | 86 +++++++++++++++----------------------------- 2 files changed, 46 insertions(+), 70 deletions(-) diff --git a/src/db/connection.js b/src/db/connection.js index 75ee4a6d..59114bbd 100644 --- a/src/db/connection.js +++ b/src/db/connection.js @@ -37,10 +37,12 @@ export function findRepoRoot(fromDir) { // matches the realpathSync'd dir in findDbPath. try { root = fs.realpathSync(raw); - } catch { + } catch (e) { + debug(`realpathSync failed for git root "${raw}", using resolve: ${e.message}`); root = path.resolve(raw); } - } catch { + } catch (e) { + debug(`git rev-parse failed for "${dir}": ${e.message}`); root = null; } if (!fromDir) { @@ -60,7 +62,8 @@ function isProcessAlive(pid) { try { process.kill(pid, 0); return true; - } catch { + } catch (e) { + debug(`PID ${pid} not alive: ${e.code || e.message}`); return false; } } @@ -75,13 +78,13 @@ function acquireAdvisoryLock(dbPath) { warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); } } - } catch { - /* ignore read errors */ + } catch (e) { + debug(`Advisory lock read failed: ${e.message}`); } try { fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); - } catch { - /* best-effort */ + } catch (e) { + debug(`Advisory lock write failed: ${e.message}`); } } @@ -91,8 +94,8 @@ function releaseAdvisoryLock(lockPath) { if (Number(content) === process.pid) { fs.unlinkSync(lockPath); } - } catch { - /* ignore */ + } catch (e) { + debug(`Advisory lock release failed for ${lockPath}: ${e.message}`); } } @@ -107,7 +110,8 @@ function isSameDirectory(a, b) { const sa = fs.statSync(a); const sb = fs.statSync(b); return sa.dev === sb.dev && sa.ino === sb.ino; - } catch { + } catch (e) { + debug(`isSameDirectory stat failed: ${e.message}`); return false; } } @@ -139,7 +143,8 @@ export function findDbPath(customPath) { if (rawCeiling) { try { ceiling = fs.realpathSync(rawCeiling); - } catch { + } catch (e) { + debug(`realpathSync failed for ceiling "${rawCeiling}": ${e.message}`); ceiling = rawCeiling; } } else { @@ -149,7 +154,8 @@ export function findDbPath(customPath) { let dir; try { dir = fs.realpathSync(process.cwd()); - } catch { + } catch (e) { + debug(`realpathSync failed for cwd: ${e.message}`); dir = process.cwd(); } while (true) { diff --git a/src/db/migrations.js b/src/db/migrations.js index 3b38feff..8a12bda2 100644 --- a/src/db/migrations.js +++ b/src/db/migrations.js @@ -242,13 +242,20 @@ export const MIGRATIONS = [ }, ]; +function hasColumn(db, table, column) { + const cols = db.pragma(`table_info(${table})`); + return cols.some((c) => c.name === column); +} + +function hasTable(db, table) { + const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?").get(table); + return !!row; +} + export function getBuildMeta(db, key) { - try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); - return row ? row.value : null; - } catch { - return null; - } + if (!hasTable(db, 'build_meta')) return null; + const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + return row ? row.value : null; } export function setBuildMeta(db, entries) { @@ -280,74 +287,37 @@ export function initSchema(db) { } } - try { + // Legacy column compat — add columns that may be missing from pre-migration DBs + if (!hasColumn(db, 'nodes', 'end_line')) { db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'edges', 'confidence')) { db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'edges', 'dynamic')) { db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'role')) { db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); - } catch { - /* already exists */ } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); - } catch { - /* already exists */ - } - try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + if (!hasColumn(db, 'nodes', 'parent_id')) { db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); - } catch { - /* already exists */ } - try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + if (!hasColumn(db, 'nodes', 'qualified_name')) { db.exec('ALTER TABLE nodes ADD COLUMN qualified_name TEXT'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'scope')) { db.exec('ALTER TABLE nodes ADD COLUMN scope TEXT'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'visibility')) { db.exec('ALTER TABLE nodes ADD COLUMN visibility TEXT'); - } catch { - /* already exists */ } - try { + if (hasTable(db, 'nodes')) { db.exec('UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL'); - } catch { - /* nodes table may not exist yet */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); - } catch { - /* already exists */ } + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); } From b691fcc90b9cc9757997cfc076af00b5dd756473 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:21:22 -0600 Subject: [PATCH 04/37] fix: replace empty catch blocks in domain analysis layer Add debug() logging to 10 empty catch blocks across context.js, symbol-lookup.js, exports.js, impact.js, and module-map.js. All catches retain their fallback behavior but failures are now observable via debug logging. Impact: 6 functions changed, 18 affected --- src/domain/analysis/context.js | 13 +++++++------ src/domain/analysis/exports.js | 5 +++-- src/domain/analysis/impact.js | 13 +++++++------ src/domain/analysis/module-map.js | 9 +++++---- src/domain/analysis/symbol-lookup.js | 4 +++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/domain/analysis/context.js b/src/domain/analysis/context.js index e3409208..a97e5419 100644 --- a/src/domain/analysis/context.js +++ b/src/domain/analysis/context.js @@ -13,6 +13,7 @@ import { getComplexityForNode, openReadonlyOrFail, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { createFileLinesReader, @@ -142,8 +143,8 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { halsteadVolume: cRow.halstead_volume || 0, }; } - } catch { - /* table may not exist */ + } catch (e) { + debug(`complexity lookup failed for node ${node.id}: ${e.message}`); } return { @@ -311,8 +312,8 @@ export function contextData(name, customDbPath, opts = {}) { halsteadVolume: cRow.halstead_volume || 0, }; } - } catch { - /* table may not exist */ + } catch (e) { + debug(`complexity lookup failed for node ${node.id}: ${e.message}`); } // Children (parameters, properties, constants) @@ -324,8 +325,8 @@ export function contextData(name, customDbPath, opts = {}) { line: c.line, endLine: c.end_line || null, })); - } catch { - /* parent_id column may not exist */ + } catch (e) { + debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); } return { diff --git a/src/domain/analysis/exports.js b/src/domain/analysis/exports.js index 9af6b807..7bebac40 100644 --- a/src/domain/analysis/exports.js +++ b/src/domain/analysis/exports.js @@ -6,6 +6,7 @@ import { findNodesByFile, openReadonlyOrFail, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { createFileLinesReader, @@ -60,8 +61,8 @@ function exportsFileImpl(db, target, noTests, getFileLines, unused) { try { db.prepare('SELECT exported FROM nodes LIMIT 0').raw(); hasExportedCol = true; - } catch { - /* old DB without exported column */ + } catch (e) { + debug(`exported column not available, using fallback: ${e.message}`); } return fileNodes.map((fn) => { diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index 736d76e0..bd3bbe1d 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -13,6 +13,7 @@ import { evaluateBoundaries } from '../../features/boundaries.js'; import { coChangeForFiles } from '../../features/cochange.js'; import { ownersForFiles } from '../../features/owners.js'; import { loadConfig } from '../../infrastructure/config.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; @@ -289,8 +290,8 @@ export function diffImpactData(customDbPath, opts = {}) { }); // Exclude files already found via static analysis historicallyCoupled = coResults.filter((r) => !affectedFiles.has(r.file)); - } catch { - /* co_changes table doesn't exist — skip silently */ + } catch (e) { + debug(`co_changes lookup skipped: ${e.message}`); } // Look up CODEOWNERS for changed + affected files @@ -305,8 +306,8 @@ export function diffImpactData(customDbPath, opts = {}) { suggestedReviewers: ownerResult.suggestedReviewers, }; } - } catch { - /* CODEOWNERS missing or unreadable — skip silently */ + } catch (e) { + debug(`CODEOWNERS lookup skipped: ${e.message}`); } // Check boundary violations scoped to changed files @@ -323,8 +324,8 @@ export function diffImpactData(customDbPath, opts = {}) { boundaryViolations = result.violations; boundaryViolationCount = result.violationCount; } - } catch { - /* boundary check failed — skip silently */ + } catch (e) { + debug(`boundary check skipped: ${e.message}`); } const base = { diff --git a/src/domain/analysis/module-map.js b/src/domain/analysis/module-map.js index e6aa0936..d2bc613b 100644 --- a/src/domain/analysis/module-map.js +++ b/src/domain/analysis/module-map.js @@ -1,5 +1,6 @@ import path from 'node:path'; import { openReadonlyOrFail, testFilterSQL } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { findCycles } from '../graph/cycles.js'; import { LANGUAGE_REGISTRY } from '../parser.js'; @@ -193,8 +194,8 @@ export function statsData(customDbPath, opts = {}) { builtAt: meta.built_at || null, }; } - } catch { - /* embeddings table may not exist */ + } catch (e) { + debug(`embeddings lookup skipped: ${e.message}`); } // Graph quality metrics @@ -301,8 +302,8 @@ export function statsData(customDbPath, opts = {}) { minMI: +Math.min(...miValues).toFixed(1), }; } - } catch { - /* table may not exist in older DBs */ + } catch (e) { + debug(`complexity summary skipped: ${e.message}`); } return { diff --git a/src/domain/analysis/symbol-lookup.js b/src/domain/analysis/symbol-lookup.js index b272004a..312581cc 100644 --- a/src/domain/analysis/symbol-lookup.js +++ b/src/domain/analysis/symbol-lookup.js @@ -14,6 +14,7 @@ import { openReadonlyOrFail, Repository, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { ALL_SYMBOL_KINDS } from '../../shared/kinds.js'; import { getFileHash, normalizeSymbol } from '../../shared/normalize.js'; @@ -206,7 +207,8 @@ export function childrenData(name, customDbPath, opts = {}) { let children; try { children = findNodeChildren(db, node.id); - } catch { + } catch (e) { + debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); children = []; } if (noTests) children = children.filter((c) => !isTestFile(c.file || node.file)); From dadb383a8dea5b7be7ab7ea7ac7e705633de9314 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:22:28 -0600 Subject: [PATCH 05/37] fix: replace empty catch blocks in parser.js Add debug() logging to 6 empty catch blocks: 3 in disposeParsers() for WASM resource cleanup, 2 in ensureWasmTrees() for file read and parse failures, and 1 in getActiveEngine() for version lookup. Impact: 3 functions changed, 0 affected --- src/domain/parser.js | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/domain/parser.js b/src/domain/parser.js index fb41d473..476e6184 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -2,7 +2,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { Language, Parser, Query } from 'web-tree-sitter'; -import { warn } from '../infrastructure/logger.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { getNative, getNativePackageVersion, loadNative } from '../infrastructure/native.js'; // Re-export all extractors for backward compatibility @@ -116,29 +116,35 @@ export async function createParsers() { */ export function disposeParsers() { if (_cachedParsers) { - for (const [, parser] of _cachedParsers) { + for (const [id, parser] of _cachedParsers) { if (parser && typeof parser.delete === 'function') { try { parser.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose parser ${id}: ${e.message}`); + } } } _cachedParsers = null; } - for (const [, query] of _queryCache) { + for (const [id, query] of _queryCache) { if (query && typeof query.delete === 'function') { try { query.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose query ${id}: ${e.message}`); + } } } _queryCache.clear(); if (_cachedLanguages) { - for (const [, lang] of _cachedLanguages) { + for (const [id, lang] of _cachedLanguages) { if (lang && typeof lang.delete === 'function') { try { lang.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose language ${id}: ${e.message}`); + } } } _cachedLanguages = null; @@ -189,14 +195,15 @@ export async function ensureWasmTrees(fileSymbols, rootDir) { let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`ensureWasmTrees: cannot read ${relPath}: ${e.message}`); continue; } try { symbols._tree = parser.parse(code); symbols._langId = entry.id; - } catch { - // skip files that fail to parse + } catch (e) { + debug(`ensureWasmTrees: parse failed for ${relPath}: ${e.message}`); } } } @@ -483,7 +490,9 @@ export function getActiveEngine(opts = {}) { if (native) { try { version = getNativePackageVersion() ?? version; - } catch {} + } catch (e) { + debug(`getNativePackageVersion failed: ${e.message}`); + } } return { name, version }; } From 22d94f4f70437a5c319431e0ee5a1e313ffdeef3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:29:16 -0600 Subject: [PATCH 06/37] fix: replace empty catch blocks in features layer Add debug() logging to 9 empty catch blocks across complexity.js (5), cfg.js (2), and dataflow.js (2). All catches for file read and parse failures now log the error message before continuing. Impact: 4 functions changed, 2 affected --- src/features/cfg.js | 8 +++++--- src/features/complexity.js | 23 +++++++++++++---------- src/features/dataflow.js | 8 +++++--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/features/cfg.js b/src/features/cfg.js index eff08652..ae1b8564 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -23,7 +23,7 @@ import { hasCfgTables, openReadonlyOrFail, } from '../db/index.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { paginateResult } from '../shared/paginate.js'; import { findNodes } from './shared/find-nodes.js'; @@ -149,7 +149,8 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`cfg: cannot read ${relPath}: ${e.message}`); continue; } @@ -158,7 +159,8 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`cfg: parse failed for ${relPath}: ${e.message}`); continue; } } diff --git a/src/features/complexity.js b/src/features/complexity.js index c5cdf62e..12f5acf1 100644 --- a/src/features/complexity.js +++ b/src/features/complexity.js @@ -14,7 +14,7 @@ import { walkWithVisitors } from '../ast-analysis/visitor.js'; import { createComplexityVisitor } from '../ast-analysis/visitors/complexity-visitor.js'; import { getFunctionNodeId, openReadonlyOrFail } from '../db/index.js'; import { loadConfig } from '../infrastructure/config.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; @@ -401,7 +401,8 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`complexity: cannot read ${relPath}: ${e.message}`); continue; } @@ -410,7 +411,8 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`complexity: parse failed for ${relPath}: ${e.message}`); continue; } } @@ -606,13 +608,14 @@ export function complexityData(customDbPath, opts = {}) { ORDER BY ${orderBy}`, ) .all(...params); - } catch { + } catch (e) { + debug(`complexity query failed (table may not exist): ${e.message}`); // Check if graph has nodes even though complexity table is missing/empty let hasGraph = false; try { hasGraph = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c > 0; - } catch { - /* ignore */ + } catch (e2) { + debug(`nodes table check failed: ${e2.message}`); } return { functions: [], summary: null, thresholds, hasGraph }; } @@ -701,8 +704,8 @@ export function complexityData(customDbPath, opts = {}) { ).length, }; } - } catch { - /* ignore */ + } catch (e) { + debug(`complexity summary query failed: ${e.message}`); } // When summary is null (no complexity rows), check if graph has nodes @@ -710,8 +713,8 @@ export function complexityData(customDbPath, opts = {}) { if (summary === null) { try { hasGraph = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c > 0; - } catch { - /* ignore */ + } catch (e) { + debug(`nodes table check failed: ${e.message}`); } } diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 0f500b8f..695afa95 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -21,7 +21,7 @@ import { walkWithVisitors } from '../ast-analysis/visitor.js'; import { createDataflowVisitor } from '../ast-analysis/visitors/dataflow-visitor.js'; import { hasDataflowTable, openReadonlyOrFail } from '../db/index.js'; import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; import { findNodes } from './shared/find-nodes.js'; @@ -141,7 +141,8 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`dataflow: cannot read ${relPath}: ${e.message}`); continue; } @@ -150,7 +151,8 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`dataflow: parse failed for ${relPath}: ${e.message}`); continue; } } From 3b365347a3e9e3ce39b652d674b69db8e7458278 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:39:16 -0600 Subject: [PATCH 07/37] refactor: decompose extractSymbolsWalk into per-category handlers Split the monolithic walkJavaScriptNode switch (13 cases, cognitive 228) into 11 focused handler functions. The dispatcher is now a thin switch that delegates to handleFunctionDecl, handleClassDecl, handleMethodDef, handleInterfaceDecl, handleTypeAliasDecl, handleVariableDecl, handleEnumDecl, handleCallExpr, handleImportStmt, handleExportStmt, and handleExpressionStmt. The expression_statement case now reuses the existing handleCommonJSAssignment helper, eliminating ~50 lines of duplication. Worst handler complexity: handleVariableDecl (cognitive 20), down from the original monolithic function (cognitive 279). Impact: 13 functions changed, 3 affected --- src/extractors/javascript.js | 578 +++++++++++++++++------------------ 1 file changed, 274 insertions(+), 304 deletions(-) diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index a2d9e7b1..997c8ea6 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -320,333 +320,303 @@ function handleCommonJSAssignment(left, right, node, imports) { // ── Manual tree walk (fallback when Query not available) ──────────────────── function extractSymbolsWalk(tree) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function walkJavaScriptNode(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const fnChildren = extractParameters(node); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fnChildren.length > 0 ? fnChildren : undefined, - }); - } - break; - } + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; + + walkJavaScriptNode(tree.rootNode, ctx); + return ctx; +} - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const className = nameNode.text; - const startLine = node.startPosition.row + 1; - const clsChildren = extractClassProperties(node); - definitions.push({ - name: className, - kind: 'class', - line: startLine, - endLine: nodeEndLine(node), - children: clsChildren.length > 0 ? clsChildren : undefined, - }); - const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); - if (heritage) { - const superName = extractSuperclass(heritage); - if (superName) { - classes.push({ name: className, extends: superName, line: startLine }); - } - const implementsList = extractImplements(heritage); - for (const iface of implementsList) { - classes.push({ name: className, implements: iface, line: startLine }); - } - } - } - break; - } +function walkJavaScriptNode(node, ctx) { + switch (node.type) { + case 'function_declaration': + handleFunctionDecl(node, ctx); + break; + case 'class_declaration': + handleClassDecl(node, ctx); + break; + case 'method_definition': + handleMethodDef(node, ctx); + break; + case 'interface_declaration': + handleInterfaceDecl(node, ctx); + break; + case 'type_alias_declaration': + handleTypeAliasDecl(node, ctx); + break; + case 'lexical_declaration': + case 'variable_declaration': + handleVariableDecl(node, ctx); + break; + case 'enum_declaration': + handleEnumDecl(node, ctx); + break; + case 'call_expression': + handleCallExpr(node, ctx); + break; + case 'import_statement': + handleImportStmt(node, ctx); + break; + case 'export_statement': + handleExportStmt(node, ctx); + break; + case 'expression_statement': + handleExpressionStmt(node, ctx); + break; + } - case 'method_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const methChildren = extractParameters(node); - const methVis = extractVisibility(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: methChildren.length > 0 ? methChildren : undefined, - visibility: methVis, - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) { + walkJavaScriptNode(node.child(i), ctx); + } +} - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = - node.childForFieldName('body') || - findChild(node, 'interface_body') || - findChild(node, 'object_type'); - if (body) { - extractInterfaceMethods(body, nameNode.text, definitions); - } - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'type_alias_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } +function handleFunctionDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const fnChildren = extractParameters(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fnChildren.length > 0 ? fnChildren : undefined, + }); + } +} - case 'lexical_declaration': - case 'variable_declaration': { - const isConst = node.text.startsWith('const '); - for (let i = 0; i < node.childCount; i++) { - const declarator = node.child(i); - if (declarator && declarator.type === 'variable_declarator') { - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - if (nameN && valueN) { - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' - ) { - const varFnChildren = extractParameters(valueN); - definitions.push({ - name: nameN.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(valueN), - children: varFnChildren.length > 0 ? varFnChildren : undefined, - }); - } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { - definitions.push({ - name: nameN.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } else if (isConst && nameN && nameN.type === 'identifier' && !valueN) { - // const with no value (shouldn't happen but be safe) - } - } - } - break; - } +function handleClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const className = nameNode.text; + const startLine = node.startPosition.row + 1; + const clsChildren = extractClassProperties(node); + ctx.definitions.push({ + name: className, + kind: 'class', + line: startLine, + endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, + }); + const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); + if (heritage) { + const superName = extractSuperclass(heritage); + if (superName) { + ctx.classes.push({ name: className, extends: superName, line: startLine }); + } + const implementsList = extractImplements(heritage); + for (const iface of implementsList) { + ctx.classes.push({ name: className, implements: iface, line: startLine }); + } + } +} - case 'enum_declaration': { - // TypeScript enum - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = []; - const body = node.childForFieldName('body') || findChild(node, 'enum_body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member) continue; - if (member.type === 'enum_assignment' || member.type === 'property_identifier') { - const mName = member.childForFieldName('name') || member.child(0); - if (mName) { - enumChildren.push({ - name: mName.text, - kind: 'constant', - line: member.startPosition.row + 1, - }); - } - } - } - } - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handleMethodDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const methChildren = extractParameters(node); + const methVis = extractVisibility(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: methChildren.length > 0 ? methChildren : undefined, + visibility: methVis, + }); + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - // Dynamic import(): import('./foo.js') → extract as an import entry - if (fn.type === 'import') { - const args = node.childForFieldName('arguments') || findChild(node, 'arguments'); - if (args) { - const strArg = findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - // Extract destructured names from parent context: - // const { a, b } = await import('./foo.js') - // (standalone import('./foo.js').then(...) calls produce an edge with empty names) - const names = extractDynamicImportNames(node); - imports.push({ - source: modPath, - names, - line: node.startPosition.row + 1, - dynamicImport: true, - }); - } else { - debug( - `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`, - ); - } - } - } else { - const callInfo = extractCallInfo(fn, node); - if (callInfo) calls.push(callInfo); - if (fn.type === 'member_expression') { - const cbDef = extractCallbackDefinition(node, fn); - if (cbDef) definitions.push(cbDef); - } - } - } - break; - } +function handleInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = + node.childForFieldName('body') || + findChild(node, 'interface_body') || + findChild(node, 'object_type'); + if (body) { + extractInterfaceMethods(body, nameNode.text, ctx.definitions); + } +} - case 'import_statement': { - const isTypeOnly = node.text.startsWith('import type'); - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source) { - const modPath = source.text.replace(/['"]/g, ''); - const names = extractImportNames(node); - imports.push({ - source: modPath, - names, +function handleTypeAliasDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } +} + +function handleVariableDecl(node, ctx) { + const isConst = node.text.startsWith('const '); + for (let i = 0; i < node.childCount; i++) { + const declarator = node.child(i); + if (declarator && declarator.type === 'variable_declarator') { + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (nameN && valueN) { + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' + ) { + const varFnChildren = extractParameters(valueN); + ctx.definitions.push({ + name: nameN.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, + }); + } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', line: node.startPosition.row + 1, - typeOnly: isTypeOnly, + endLine: nodeEndLine(node), }); } - break; } + } + } +} - case 'export_statement': { - const exportLine = node.startPosition.row + 1; - const decl = node.childForFieldName('declaration'); - if (decl) { - const declType = decl.type; - const kindMap = { - function_declaration: 'function', - class_declaration: 'class', - interface_declaration: 'interface', - type_alias_declaration: 'type', - }; - const kind = kindMap[declType]; - if (kind) { - const n = decl.childForFieldName('name'); - if (n) exports.push({ name: n.text, kind, line: exportLine }); - } - } - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source && !decl) { - const modPath = source.text.replace(/['"]/g, ''); - const reexportNames = extractImportNames(node); - const nodeText = node.text; - const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); - imports.push({ - source: modPath, - names: reexportNames, - line: exportLine, - reexport: true, - wildcardReexport: isWildcard && reexportNames.length === 0, +function handleEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'enum_assignment' || member.type === 'property_identifier') { + const mName = member.childForFieldName('name') || member.child(0); + if (mName) { + enumChildren.push({ + name: mName.text, + kind: 'constant', + line: member.startPosition.row + 1, }); } - break; } + } + } + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'expression_statement': { - const expr = node.child(0); - if (expr && expr.type === 'assignment_expression') { - const left = expr.childForFieldName('left'); - const right = expr.childForFieldName('right'); - if (left && right) { - const leftText = left.text; - if (leftText.startsWith('module.exports') || leftText === 'exports') { - if (right.type === 'call_expression') { - const fn = right.childForFieldName('function'); - const args = right.childForFieldName('arguments') || findChild(right, 'arguments'); - if (fn && fn.text === 'require' && args) { - const strArg = findChild(args, 'string'); - if (strArg) { - imports.push({ - source: strArg.text.replace(/['"]/g, ''), - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - if (right.type === 'object') { - for (let ci = 0; ci < right.childCount; ci++) { - const child = right.child(ci); - if (child && child.type === 'spread_element') { - const spreadExpr = child.child(1) || child.childForFieldName('value'); - if (spreadExpr && spreadExpr.type === 'call_expression') { - const fn2 = spreadExpr.childForFieldName('function'); - const args2 = - spreadExpr.childForFieldName('arguments') || - findChild(spreadExpr, 'arguments'); - if (fn2 && fn2.text === 'require' && args2) { - const strArg2 = findChild(args2, 'string'); - if (strArg2) { - imports.push({ - source: strArg2.text.replace(/['"]/g, ''), - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - } - } - } - } - } - } - break; +function handleCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'import') { + const args = node.childForFieldName('arguments') || findChild(node, 'arguments'); + if (args) { + const strArg = findChild(args, 'string'); + if (strArg) { + const modPath = strArg.text.replace(/['"]/g, ''); + const names = extractDynamicImportNames(node); + ctx.imports.push({ + source: modPath, + names, + line: node.startPosition.row + 1, + dynamicImport: true, + }); + } else { + debug( + `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`, + ); } } + } else { + const callInfo = extractCallInfo(fn, node); + if (callInfo) ctx.calls.push(callInfo); + if (fn.type === 'member_expression') { + const cbDef = extractCallbackDefinition(node, fn); + if (cbDef) ctx.definitions.push(cbDef); + } + } +} - for (let i = 0; i < node.childCount; i++) { - walkJavaScriptNode(node.child(i)); +function handleImportStmt(node, ctx) { + const isTypeOnly = node.text.startsWith('import type'); + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source) { + const modPath = source.text.replace(/['"]/g, ''); + const names = extractImportNames(node); + ctx.imports.push({ + source: modPath, + names, + line: node.startPosition.row + 1, + typeOnly: isTypeOnly, + }); + } +} + +function handleExportStmt(node, ctx) { + const exportLine = node.startPosition.row + 1; + const decl = node.childForFieldName('declaration'); + if (decl) { + const declType = decl.type; + const kindMap = { + function_declaration: 'function', + class_declaration: 'class', + interface_declaration: 'interface', + type_alias_declaration: 'type', + }; + const kind = kindMap[declType]; + if (kind) { + const n = decl.childForFieldName('name'); + if (n) ctx.exports.push({ name: n.text, kind, line: exportLine }); } } + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source && !decl) { + const modPath = source.text.replace(/['"]/g, ''); + const reexportNames = extractImportNames(node); + const nodeText = node.text; + const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); + ctx.imports.push({ + source: modPath, + names: reexportNames, + line: exportLine, + reexport: true, + wildcardReexport: isWildcard && reexportNames.length === 0, + }); + } +} - walkJavaScriptNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleExpressionStmt(node, ctx) { + const expr = node.child(0); + if (expr && expr.type === 'assignment_expression') { + const left = expr.childForFieldName('left'); + const right = expr.childForFieldName('right'); + handleCommonJSAssignment(left, right, node, ctx.imports); + } } // ── Child extraction helpers ──────────────────────────────────────────────── From e1d7ee03846d70178fa75db4145482375687d12b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:41:20 -0600 Subject: [PATCH 08/37] refactor: decompose extractPythonSymbols into per-category handlers Split walkPythonNode switch into 7 focused handlers: handlePyFunctionDef, handlePyClassDef, handlePyCall, handlePyImport, handlePyExpressionStmt, handlePyImportFrom, plus the decorated_definition inline dispatch. Moved extractPythonParameters, extractPythonClassProperties, walkInitBody, and findPythonParentClass from closures to module-scope functions. Impact: 12 functions changed, 5 affected --- src/extractors/python.js | 502 ++++++++++++++++++++------------------- 1 file changed, 252 insertions(+), 250 deletions(-) diff --git a/src/extractors/python.js b/src/extractors/python.js index 968dbacb..053a07ca 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -4,292 +4,294 @@ import { findChild, nodeEndLine, pythonVisibility } from './helpers.js'; * Extract symbols from Python files. */ export function extractPythonSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function walkPythonNode(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const decorators = []; - if (node.previousSibling && node.previousSibling.type === 'decorator') { - decorators.push(node.previousSibling.text); - } - const parentClass = findPythonParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const kind = parentClass ? 'method' : 'function'; - const fnChildren = extractPythonParameters(node); - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - decorators, - children: fnChildren.length > 0 ? fnChildren : undefined, - visibility: pythonVisibility(nameNode.text), - }); - } - break; - } + walkPythonNode(tree.rootNode, ctx); + return ctx; +} - case 'class_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const clsChildren = extractPythonClassProperties(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: clsChildren.length > 0 ? clsChildren : undefined, - }); - const superclasses = - node.childForFieldName('superclasses') || findChild(node, 'argument_list'); - if (superclasses) { - for (let i = 0; i < superclasses.childCount; i++) { - const child = superclasses.child(i); - if (child && child.type === 'identifier') { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - break; - } +function walkPythonNode(node, ctx) { + switch (node.type) { + case 'function_definition': + handlePyFunctionDef(node, ctx); + break; + case 'class_definition': + handlePyClassDef(node, ctx); + break; + case 'decorated_definition': + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i), ctx); + return; + case 'call': + handlePyCall(node, ctx); + break; + case 'import_statement': + handlePyImport(node, ctx); + break; + case 'expression_statement': + handlePyExpressionStmt(node, ctx); + break; + case 'import_from_statement': + handlePyImportFrom(node, ctx); + break; + } - case 'decorated_definition': { - for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); - return; - } + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i), ctx); +} - case 'call': { - const fn = node.childForFieldName('function'); - if (fn) { - let callName = null; - let receiver; - if (fn.type === 'identifier') callName = fn.text; - else if (fn.type === 'attribute') { - const attr = fn.childForFieldName('attribute'); - if (attr) callName = attr.text; - const obj = fn.childForFieldName('object'); - if (obj) receiver = obj.text; - } - if (callName) { - const call = { name: callName, line: node.startPosition.row + 1 }; - if (receiver) call.receiver = receiver; - calls.push(call); - } - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'import_statement': { - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { - const name = - child.type === 'aliased_import' - ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text - : child.text; - if (name) names.push(name); - } - } - if (names.length > 0) - imports.push({ - source: names[0], - names, - line: node.startPosition.row + 1, - pythonImport: true, - }); - break; - } +function handlePyFunctionDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const decorators = []; + if (node.previousSibling && node.previousSibling.type === 'decorator') { + decorators.push(node.previousSibling.text); + } + const parentClass = findPythonParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const kind = parentClass ? 'method' : 'function'; + const fnChildren = extractPythonParameters(node); + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators, + children: fnChildren.length > 0 ? fnChildren : undefined, + visibility: pythonVisibility(nameNode.text), + }); +} - case 'expression_statement': { - // Module-level UPPER_CASE assignments → constants - if (node.parent && node.parent.type === 'module') { - const assignment = findChild(node, 'assignment'); - if (assignment) { - const left = assignment.childForFieldName('left'); - if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { - definitions.push({ - name: left.text, - kind: 'constant', - line: node.startPosition.row + 1, - }); - } - } - } - break; +function handlePyClassDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const clsChildren = extractPythonClassProperties(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, + }); + const superclasses = node.childForFieldName('superclasses') || findChild(node, 'argument_list'); + if (superclasses) { + for (let i = 0; i < superclasses.childCount; i++) { + const child = superclasses.child(i); + if (child && child.type === 'identifier') { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); } + } + } +} - case 'import_from_statement': { - let source = ''; - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'dotted_name' || child.type === 'relative_import') { - if (!source) source = child.text; - else names.push(child.text); - } - if (child.type === 'aliased_import') { - const n = child.childForFieldName('name') || child.child(0); - if (n) names.push(n.text); - } - if (child.type === 'wildcard_import') names.push('*'); - } - if (source) - imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); - break; +function handlePyCall(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + let callName = null; + let receiver; + if (fn.type === 'identifier') callName = fn.text; + else if (fn.type === 'attribute') { + const attr = fn.childForFieldName('attribute'); + if (attr) callName = attr.text; + const obj = fn.childForFieldName('object'); + if (obj) receiver = obj.text; + } + if (callName) { + const call = { name: callName, line: node.startPosition.row + 1 }; + if (receiver) call.receiver = receiver; + ctx.calls.push(call); + } +} + +function handlePyImport(node, ctx) { + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { + const name = + child.type === 'aliased_import' + ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text + : child.text; + if (name) names.push(name); + } + } + if (names.length > 0) + ctx.imports.push({ + source: names[0], + names, + line: node.startPosition.row + 1, + pythonImport: true, + }); +} + +function handlePyExpressionStmt(node, ctx) { + if (node.parent && node.parent.type === 'module') { + const assignment = findChild(node, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { + ctx.definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + }); } } + } +} - for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); +function handlePyImportFrom(node, ctx) { + let source = ''; + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'dotted_name' || child.type === 'relative_import') { + if (!source) source = child.text; + else names.push(child.text); + } + if (child.type === 'aliased_import') { + const n = child.childForFieldName('name') || child.child(0); + if (n) names.push(n.text); + } + if (child.type === 'wildcard_import') names.push('*'); } + if (source) + ctx.imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); +} - function extractPythonParameters(fnNode) { - const params = []; - const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); - if (!paramsNode) return params; - for (let i = 0; i < paramsNode.childCount; i++) { - const child = paramsNode.child(i); - if (!child) continue; - const t = child.type; - if (t === 'identifier') { - params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); - } else if ( - t === 'typed_parameter' || - t === 'default_parameter' || - t === 'typed_default_parameter' - ) { - const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode && nameNode.type === 'identifier') { - params.push({ - name: nameNode.text, - kind: 'parameter', - line: child.startPosition.row + 1, - }); - } - } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { - // *args, **kwargs - for (let j = 0; j < child.childCount; j++) { - const inner = child.child(j); - if (inner && inner.type === 'identifier') { - params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); - break; - } +// ── Python-specific helpers ───────────────────────────────────────────────── + +function extractPythonParameters(fnNode) { + const params = []; + const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'typed_parameter' || + t === 'default_parameter' || + t === 'typed_default_parameter' + ) { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'identifier') { + params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); + break; } } } - return params; } + return params; +} - function extractPythonClassProperties(classNode) { - const props = []; - const seen = new Set(); - const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); - if (!body) return props; +function extractPythonClassProperties(classNode) { + const props = []; + const seen = new Set(); + const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); + if (!body) return props; - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (!child) continue; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; - // Direct class attribute assignments: x = 5 - if (child.type === 'expression_statement') { - const assignment = findChild(child, 'assignment'); - if (assignment) { - const left = assignment.childForFieldName('left'); - if (left && left.type === 'identifier' && !seen.has(left.text)) { - seen.add(left.text); - props.push({ - name: left.text, - kind: 'property', - line: child.startPosition.row + 1, - visibility: pythonVisibility(left.text), - }); - } + if (child.type === 'expression_statement') { + const assignment = findChild(child, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && !seen.has(left.text)) { + seen.add(left.text); + props.push({ + name: left.text, + kind: 'property', + line: child.startPosition.row + 1, + visibility: pythonVisibility(left.text), + }); } } + } - // __init__ method: self.x = ... assignments - if (child.type === 'function_definition') { - const fnName = child.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = child.childForFieldName('body') || findChild(child, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = child.childForFieldName('body') || findChild(child, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); } } + } - // decorated __init__ - if (child.type === 'decorated_definition') { - for (let j = 0; j < child.childCount; j++) { - const inner = child.child(j); - if (inner && inner.type === 'function_definition') { - const fnName = inner.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } + if (child.type === 'decorated_definition') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'function_definition') { + const fnName = inner.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); } } } } } - return props; } + return props; +} - function walkInitBody(bodyNode, seen, props) { - for (let i = 0; i < bodyNode.childCount; i++) { - const stmt = bodyNode.child(i); - if (!stmt || stmt.type !== 'expression_statement') continue; - const assignment = findChild(stmt, 'assignment'); - if (!assignment) continue; - const left = assignment.childForFieldName('left'); - if (!left || left.type !== 'attribute') continue; - const obj = left.childForFieldName('object'); - const attr = left.childForFieldName('attribute'); - if ( - obj && - obj.text === 'self' && - attr && - attr.type === 'identifier' && - !seen.has(attr.text) - ) { - seen.add(attr.text); - props.push({ - name: attr.text, - kind: 'property', - line: stmt.startPosition.row + 1, - visibility: pythonVisibility(attr.text), - }); - } +function walkInitBody(bodyNode, seen, props) { + for (let i = 0; i < bodyNode.childCount; i++) { + const stmt = bodyNode.child(i); + if (!stmt || stmt.type !== 'expression_statement') continue; + const assignment = findChild(stmt, 'assignment'); + if (!assignment) continue; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'attribute') continue; + const obj = left.childForFieldName('object'); + const attr = left.childForFieldName('attribute'); + if (obj && obj.text === 'self' && attr && attr.type === 'identifier' && !seen.has(attr.text)) { + seen.add(attr.text); + props.push({ + name: attr.text, + kind: 'property', + line: stmt.startPosition.row + 1, + visibility: pythonVisibility(attr.text), + }); } } +} - function findPythonParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class_definition') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; +function findPythonParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class_definition') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; } - return null; + current = current.parent; } - - walkPythonNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; + return null; } From 3a656bb089cca1e595d3a28331fb5303d5c11a16 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:42:33 -0600 Subject: [PATCH 09/37] refactor: decompose extractJavaSymbols into per-category handlers Split walkJavaNode switch into 8 focused handlers plus an extractJavaInterfaces helper. Moved findJavaParentClass to module scope. The class_declaration case (deepest nesting in the file) is now split between handleJavaClassDecl and extractJavaInterfaces. Impact: 12 functions changed, 5 affected --- src/extractors/java.js | 418 +++++++++++++++++++++-------------------- 1 file changed, 211 insertions(+), 207 deletions(-) diff --git a/src/extractors/java.js b/src/extractors/java.js index 2bf0bb28..9da313c1 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -4,239 +4,243 @@ import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js' * Extract symbols from Java files. */ export function extractJavaSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findJavaParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'enum_declaration' || - current.type === 'interface_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkJavaNode(tree.rootNode, ctx); + return ctx; +} + +function walkJavaNode(node, ctx) { + switch (node.type) { + case 'class_declaration': + handleJavaClassDecl(node, ctx); + break; + case 'interface_declaration': + handleJavaInterfaceDecl(node, ctx); + break; + case 'enum_declaration': + handleJavaEnumDecl(node, ctx); + break; + case 'method_declaration': + handleJavaMethodDecl(node, ctx); + break; + case 'constructor_declaration': + handleJavaConstructorDecl(node, ctx); + break; + case 'import_declaration': + handleJavaImportDecl(node, ctx); + break; + case 'method_invocation': + handleJavaMethodInvocation(node, ctx); + break; + case 'object_creation_expression': + handleJavaObjectCreation(node, ctx); + break; } - function walkJavaNode(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); + for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i), ctx); +} - const superclass = node.childForFieldName('superclass'); - if (superclass) { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'generic_type') - ) { - const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (superName) - classes.push({ - name: nameNode.text, - extends: superName, - line: node.startPosition.row + 1, - }); - break; - } - } - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - const interfaces = node.childForFieldName('interfaces'); - if (interfaces) { - for (let i = 0; i < interfaces.childCount; i++) { - const child = interfaces.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'type_list' || - child.type === 'generic_type') - ) { - if (child.type === 'type_list') { - for (let j = 0; j < child.childCount; j++) { - const t = child.child(j); - if ( - t && - (t.type === 'type_identifier' || - t.type === 'identifier' || - t.type === 'generic_type') - ) { - const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } else { - const ifaceName = - child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleJavaClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ + const superclass = node.childForFieldName('superclass'); + if (superclass) { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'generic_type') + ) { + const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (superName) + ctx.classes.push({ name: nameNode.text, - kind: 'interface', + extends: superName, line: node.startPosition.row + 1, - endLine: nodeEndLine(node), }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } break; } + } + } - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractEnumConstants(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + extractJavaInterfaces(interfaces, nameNode.text, node.startPosition.row + 1, ctx); + } +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); +function extractJavaInterfaces(interfaces, className, line, ctx) { + for (let i = 0; i < interfaces.childCount; i++) { + const child = interfaces.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'type_list' || + child.type === 'generic_type') + ) { + if (child.type === 'type_list') { + for (let j = 0; j < child.childCount; j++) { + const t = child.child(j); + if ( + t && + (t.type === 'type_identifier' || t.type === 'identifier' || t.type === 'generic_type') + ) { + const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; + if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); + } } - break; + } else { + const ifaceName = child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); } + } + } +} - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, +function handleJavaInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { - const fullPath = child.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - javaImport: true, - }); - } - if (child && child.type === 'asterisk') { - const lastImport = imports[imports.length - 1]; - if (lastImport) lastImport.names = ['*']; - } - } - break; - } +function handleJavaEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractEnumConstants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'method_invocation': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const obj = node.childForFieldName('object'); - const call = { name: nameNode.text, line: node.startPosition.row + 1 }; - if (obj) call.receiver = obj.text; - calls.push(call); - } - break; - } +function handleJavaMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } +function handleJavaConstructorDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i)); +function handleJavaImportDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { + const fullPath = child.text; + const lastName = fullPath.split('.').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + javaImport: true, + }); + } + if (child && child.type === 'asterisk') { + const lastImport = ctx.imports[ctx.imports.length - 1]; + if (lastImport) lastImport.names = ['*']; + } } +} + +function handleJavaMethodInvocation(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const obj = node.childForFieldName('object'); + const call = { name: nameNode.text, line: node.startPosition.row + 1 }; + if (obj) call.receiver = obj.text; + ctx.calls.push(call); +} - walkJavaNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleJavaObjectCreation(node, ctx) { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); +} + +function findJavaParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'enum_declaration' || + current.type === 'interface_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── From bf5b986f3407dcbf9c6734e47e28aec39229e406 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:47:27 -0600 Subject: [PATCH 10/37] refactor: decompose remaining language extractors Apply the same per-category handler decomposition to all remaining language extractors: Go (6 handlers), Ruby (8 handlers), PHP (11 handlers), C# (11 handlers), Rust (9 handlers), HCL (4 handlers). Each extractor now follows the template established by the JS extractor: - Thin entry function creates ctx, delegates to walkXNode - walkXNode is a thin dispatcher switch - Each case is a named handler function at module scope - Helper functions (findParentClass, etc.) moved to module scope Impact: 66 functions changed, 23 affected --- src/extractors/csharp.js | 429 ++++++++++++++++++------------------ src/extractors/go.js | 349 +++++++++++++++--------------- src/extractors/hcl.js | 172 ++++++++------- src/extractors/php.js | 453 ++++++++++++++++++++------------------- src/extractors/ruby.js | 377 ++++++++++++++++---------------- src/extractors/rust.js | 347 +++++++++++++++--------------- 6 files changed, 1097 insertions(+), 1030 deletions(-) diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index 9dafa451..d52aa893 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -4,233 +4,248 @@ import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js' * Extract symbols from C# files. */ export function extractCSharpSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findCSharpParentType(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'struct_declaration' || - current.type === 'interface_declaration' || - current.type === 'enum_declaration' || - current.type === 'record_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkCSharpNode(tree.rootNode, ctx); + return ctx; +} + +function walkCSharpNode(node, ctx) { + switch (node.type) { + case 'class_declaration': + handleCsClassDecl(node, ctx); + break; + case 'struct_declaration': + handleCsStructDecl(node, ctx); + break; + case 'record_declaration': + handleCsRecordDecl(node, ctx); + break; + case 'interface_declaration': + handleCsInterfaceDecl(node, ctx); + break; + case 'enum_declaration': + handleCsEnumDecl(node, ctx); + break; + case 'method_declaration': + handleCsMethodDecl(node, ctx); + break; + case 'constructor_declaration': + handleCsConstructorDecl(node, ctx); + break; + case 'property_declaration': + handleCsPropertyDecl(node, ctx); + break; + case 'using_directive': + handleCsUsingDirective(node, ctx); + break; + case 'invocation_expression': + handleCsInvocationExpr(node, ctx); + break; + case 'object_creation_expression': + handleCsObjectCreation(node, ctx); + break; } - function walkCSharpNode(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractCSharpClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i), ctx); +} - case 'struct_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const structChildren = extractCSharpClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: structChildren.length > 0 ? structChildren : undefined, - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'record_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'record', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } +function handleCsClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractCSharpClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleCsStructDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const structChildren = extractCSharpClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: structChildren.length > 0 ? structChildren : undefined, + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractCSharpEnumMembers(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handleCsRecordDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'record', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - const params = extractCSharpParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, +function handleCsInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - const params = extractCSharpParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handleCsEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractCSharpEnumMembers(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'property_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'property', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handleCsMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'using_directive': { - // using System.Collections.Generic; - const nameNode = - node.childForFieldName('name') || - findChild(node, 'qualified_name') || - findChild(node, 'identifier'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - csharpUsing: true, - }); - } - break; - } +function handleCsConstructorDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'invocation_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'member_access_expression') { - const name = fn.childForFieldName('name'); - if (name) { - const expr = fn.childForFieldName('expression'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (expr) call.receiver = expr.text; - calls.push(call); - } - } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { - const name = fn.childForFieldName('name') || fn.child(0); - if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - } - break; - } +function handleCsPropertyDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + ctx.definitions.push({ + name: fullName, + kind: 'property', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: extractModifierVisibility(node), + }); +} - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_name' - ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text - : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } +function handleCsUsingDirective(node, ctx) { + const nameNode = + node.childForFieldName('name') || + findChild(node, 'qualified_name') || + findChild(node, 'identifier'); + if (!nameNode) return; + const fullPath = nameNode.text; + const lastName = fullPath.split('.').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + csharpUsing: true, + }); +} - for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i)); +function handleCsInvocationExpr(node, ctx) { + const fn = node.childForFieldName('function') || node.child(0); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'member_access_expression') { + const name = fn.childForFieldName('name'); + if (name) { + const expr = fn.childForFieldName('expression'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (expr) call.receiver = expr.text; + ctx.calls.push(call); + } + } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { + const name = fn.childForFieldName('name') || fn.child(0); + if (name) ctx.calls.push({ name: name.text, line: node.startPosition.row + 1 }); } +} + +function handleCsObjectCreation(node, ctx) { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = + typeNode.type === 'generic_name' + ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text + : typeNode.text; + if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); +} - walkCSharpNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findCSharpParentType(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'struct_declaration' || + current.type === 'interface_declaration' || + current.type === 'enum_declaration' || + current.type === 'record_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/go.js b/src/extractors/go.js index 50460c8d..57d3b2a8 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -4,196 +4,201 @@ import { findChild, goVisibility, nodeEndLine } from './helpers.js'; * Extract symbols from Go files. */ export function extractGoSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function walkGoNode(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const params = extractGoParameters(node.childForFieldName('parameters')); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: goVisibility(nameNode.text), - }); - } - break; - } + walkGoNode(tree.rootNode, ctx); + return ctx; +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - const receiver = node.childForFieldName('receiver'); - if (nameNode) { - let receiverType = null; - if (receiver) { - // receiver is a parameter_list like (r *Foo) or (r Foo) - for (let i = 0; i < receiver.childCount; i++) { - const param = receiver.child(i); - if (!param) continue; - const typeNode = param.childForFieldName('type'); - if (typeNode) { - receiverType = - typeNode.type === 'pointer_type' - ? typeNode.text.replace(/^\*/, '') - : typeNode.text; - break; - } - } - } - const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; - const params = extractGoParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: goVisibility(nameNode.text), - }); - } - break; - } +function walkGoNode(node, ctx) { + switch (node.type) { + case 'function_declaration': + handleGoFuncDecl(node, ctx); + break; + case 'method_declaration': + handleGoMethodDecl(node, ctx); + break; + case 'type_declaration': + handleGoTypeDecl(node, ctx); + break; + case 'import_declaration': + handleGoImportDecl(node, ctx); + break; + case 'const_declaration': + handleGoConstDecl(node, ctx); + break; + case 'call_expression': + handleGoCallExpr(node, ctx); + break; + } - case 'type_declaration': { - for (let i = 0; i < node.childCount; i++) { - const spec = node.child(i); - if (!spec || spec.type !== 'type_spec') continue; - const nameNode = spec.childForFieldName('name'); - const typeNode = spec.childForFieldName('type'); - if (nameNode && typeNode) { - if (typeNode.type === 'struct_type') { - const fields = extractStructFields(typeNode); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fields.length > 0 ? fields : undefined, - }); - } else if (typeNode.type === 'interface_type') { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - for (let j = 0; j < typeNode.childCount; j++) { - const member = typeNode.child(j); - if (member && member.type === 'method_elem') { - const methName = member.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - }); - } - } - } - } else { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } - } + for (let i = 0; i < node.childCount; i++) walkGoNode(node.child(i), ctx); +} + +// ── Walk-path per-node-type handlers ──────────────────────────────────────── + +function handleGoFuncDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const params = extractGoParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: goVisibility(nameNode.text), + }); + } +} + +function handleGoMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + const receiver = node.childForFieldName('receiver'); + if (!nameNode) return; + let receiverType = null; + if (receiver) { + for (let i = 0; i < receiver.childCount; i++) { + const param = receiver.child(i); + if (!param) continue; + const typeNode = param.childForFieldName('type'); + if (typeNode) { + receiverType = + typeNode.type === 'pointer_type' ? typeNode.text.replace(/^\*/, '') : typeNode.text; break; } + } + } + const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + const params = extractGoParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: goVisibility(nameNode.text), + }); +} - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'import_spec') { - const pathNode = child.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = child.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: child.startPosition.row + 1, - goImport: true, +function handleGoTypeDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'type_spec') continue; + const nameNode = spec.childForFieldName('name'); + const typeNode = spec.childForFieldName('type'); + if (nameNode && typeNode) { + if (typeNode.type === 'struct_type') { + const fields = extractStructFields(typeNode); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, + }); + } else if (typeNode.type === 'interface_type') { + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + for (let j = 0; j < typeNode.childCount; j++) { + const member = typeNode.child(j); + if (member && member.type === 'method_elem') { + const methName = member.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, }); } } - if (child.type === 'import_spec_list') { - for (let j = 0; j < child.childCount; j++) { - const spec = child.child(j); - if (spec && spec.type === 'import_spec') { - const pathNode = spec.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: spec.startPosition.row + 1, - goImport: true, - }); - } - } - } - } - } - break; - } - - case 'const_declaration': { - for (let i = 0; i < node.childCount; i++) { - const spec = node.child(i); - if (!spec || spec.type !== 'const_spec') continue; - const constName = spec.childForFieldName('name'); - if (constName) { - definitions.push({ - name: constName.text, - kind: 'constant', - line: spec.startPosition.row + 1, - endLine: spec.endPosition.row + 1, - }); - } } - break; + } else { + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); } + } + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'selector_expression') { - const field = fn.childForFieldName('field'); - if (field) { - const operand = fn.childForFieldName('operand'); - const call = { name: field.text, line: node.startPosition.row + 1 }; - if (operand) call.receiver = operand.text; - calls.push(call); - } - } +function handleGoImportDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'import_spec') { + extractGoImportSpec(child, ctx); + } + if (child.type === 'import_spec_list') { + for (let j = 0; j < child.childCount; j++) { + const spec = child.child(j); + if (spec && spec.type === 'import_spec') { + extractGoImportSpec(spec, ctx); } - break; } } + } +} + +function extractGoImportSpec(spec, ctx) { + const pathNode = spec.childForFieldName('path'); + if (pathNode) { + const importPath = pathNode.text.replace(/"/g, ''); + const nameNode = spec.childForFieldName('name'); + const alias = nameNode ? nameNode.text : importPath.split('/').pop(); + ctx.imports.push({ + source: importPath, + names: [alias], + line: spec.startPosition.row + 1, + goImport: true, + }); + } +} - for (let i = 0; i < node.childCount; i++) walkGoNode(node.child(i)); +function handleGoConstDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'const_spec') continue; + const constName = spec.childForFieldName('name'); + if (constName) { + ctx.definitions.push({ + name: constName.text, + kind: 'constant', + line: spec.startPosition.row + 1, + endLine: spec.endPosition.row + 1, + }); + } } +} - walkGoNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleGoCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'selector_expression') { + const field = fn.childForFieldName('field'); + if (field) { + const operand = fn.childForFieldName('operand'); + const call = { name: field.text, line: node.startPosition.row + 1 }; + if (operand) call.receiver = operand.text; + ctx.calls.push(call); + } + } } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js index aba022a5..8b13651f 100644 --- a/src/extractors/hcl.js +++ b/src/extractors/hcl.js @@ -4,92 +4,108 @@ import { nodeEndLine } from './helpers.js'; * Extract symbols from HCL (Terraform) files. */ export function extractHCLSymbols(tree, _filePath) { - const definitions = []; - const imports = []; + const ctx = { definitions: [], imports: [] }; - function walkHclNode(node) { - if (node.type === 'block') { - const children = []; - for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); + walkHclNode(tree.rootNode, ctx); + return { + definitions: ctx.definitions, + calls: [], + imports: ctx.imports, + classes: [], + exports: [], + }; +} - const identifiers = children.filter((c) => c.type === 'identifier'); - const strings = children.filter((c) => c.type === 'string_lit'); +function walkHclNode(node, ctx) { + if (node.type === 'block') { + handleHclBlock(node, ctx); + } - if (identifiers.length > 0) { - const blockType = identifiers[0].text; - let name = ''; + for (let i = 0; i < node.childCount; i++) walkHclNode(node.child(i), ctx); +} - if (blockType === 'resource' && strings.length >= 2) { - name = `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if (blockType === 'data' && strings.length >= 2) { - name = `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if ( - (blockType === 'variable' || blockType === 'output' || blockType === 'module') && - strings.length >= 1 - ) { - name = `${blockType}.${strings[0].text.replace(/"/g, '')}`; - } else if (blockType === 'locals') { - name = 'locals'; - } else if (blockType === 'terraform' || blockType === 'provider') { - name = blockType; - if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; - } +function handleHclBlock(node, ctx) { + const children = []; + for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); - if (name) { - // Extract attributes as property children for variable/output blocks - let blockChildren; - if (blockType === 'variable' || blockType === 'output') { - blockChildren = []; - const body = children.find((c) => c.type === 'body'); - if (body) { - for (let j = 0; j < body.childCount; j++) { - const attr = body.child(j); - if (attr && attr.type === 'attribute') { - const key = attr.childForFieldName('key') || attr.child(0); - if (key) { - blockChildren.push({ - name: key.text, - kind: 'property', - line: attr.startPosition.row + 1, - }); - } - } - } - } - } - definitions.push({ - name, - kind: blockType, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: blockChildren?.length > 0 ? blockChildren : undefined, - }); - } + const identifiers = children.filter((c) => c.type === 'identifier'); + const strings = children.filter((c) => c.type === 'string_lit'); - if (blockType === 'module') { - const body = children.find((c) => c.type === 'body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const attr = body.child(i); - if (attr && attr.type === 'attribute') { - const key = attr.childForFieldName('key') || attr.child(0); - const val = attr.childForFieldName('val') || attr.child(2); - if (key && key.text === 'source' && val) { - const src = val.text.replace(/"/g, ''); - if (src.startsWith('./') || src.startsWith('../')) { - imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); - } - } - } - } - } - } - } + if (identifiers.length === 0) return; + const blockType = identifiers[0].text; + const name = resolveHclBlockName(blockType, strings); + + if (name) { + let blockChildren; + if (blockType === 'variable' || blockType === 'output') { + blockChildren = extractHclAttributes(children); } + ctx.definitions.push({ + name, + kind: blockType, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: blockChildren?.length > 0 ? blockChildren : undefined, + }); + } - for (let i = 0; i < node.childCount; i++) walkHclNode(node.child(i)); + if (blockType === 'module') { + extractHclModuleSource(children, node, ctx); } +} - walkHclNode(tree.rootNode); - return { definitions, calls: [], imports, classes: [], exports: [] }; +function resolveHclBlockName(blockType, strings) { + if (blockType === 'resource' && strings.length >= 2) { + return `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } + if (blockType === 'data' && strings.length >= 2) { + return `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } + if ( + (blockType === 'variable' || blockType === 'output' || blockType === 'module') && + strings.length >= 1 + ) { + return `${blockType}.${strings[0].text.replace(/"/g, '')}`; + } + if (blockType === 'locals') return 'locals'; + if (blockType === 'terraform' || blockType === 'provider') { + let name = blockType; + if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; + return name; + } + return ''; +} + +function extractHclAttributes(children) { + const attrs = []; + const body = children.find((c) => c.type === 'body'); + if (!body) return attrs; + for (let j = 0; j < body.childCount; j++) { + const attr = body.child(j); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + if (key) { + attrs.push({ name: key.text, kind: 'property', line: attr.startPosition.row + 1 }); + } + } + } + return attrs; +} + +function extractHclModuleSource(children, _node, ctx) { + const body = children.find((c) => c.type === 'body'); + if (!body) return; + for (let i = 0; i < body.childCount; i++) { + const attr = body.child(i); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + const val = attr.childForFieldName('val') || attr.child(2); + if (key && key.text === 'source' && val) { + const src = val.text.replace(/"/g, ''); + if (src.startsWith('./') || src.startsWith('../')) { + ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); + } + } + } + } } diff --git a/src/extractors/php.js b/src/extractors/php.js index fd008168..03f9c6d7 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -76,249 +76,260 @@ function extractPhpEnumCases(enumNode) { * Extract symbols from PHP files. */ export function extractPHPSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findPHPParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'trait_declaration' || - current.type === 'enum_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkPhpNode(tree.rootNode, ctx); + return ctx; +} + +function walkPhpNode(node, ctx) { + switch (node.type) { + case 'function_definition': + handlePhpFuncDef(node, ctx); + break; + case 'class_declaration': + handlePhpClassDecl(node, ctx); + break; + case 'interface_declaration': + handlePhpInterfaceDecl(node, ctx); + break; + case 'trait_declaration': + handlePhpTraitDecl(node, ctx); + break; + case 'enum_declaration': + handlePhpEnumDecl(node, ctx); + break; + case 'method_declaration': + handlePhpMethodDecl(node, ctx); + break; + case 'namespace_use_declaration': + handlePhpNamespaceUse(node, ctx); + break; + case 'function_call_expression': + handlePhpFuncCall(node, ctx); + break; + case 'member_call_expression': + handlePhpMemberCall(node, ctx); + break; + case 'scoped_call_expression': + handlePhpScopedCall(node, ctx); + break; + case 'object_creation_expression': + handlePhpObjectCreation(node, ctx); + break; } - function walkPhpNode(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const params = extractPhpParameters(node); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkPhpNode(node.child(i), ctx); +} - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractPhpClassChildren(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - // Check base clause (extends) - const baseClause = - node.childForFieldName('base_clause') || findChild(node, 'base_clause'); - if (baseClause) { - for (let i = 0; i < baseClause.childCount; i++) { - const child = baseClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } +function handlePhpFuncDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const params = extractPhpParameters(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - // Check class interface clause (implements) - const interfaceClause = findChild(node, 'class_interface_clause'); - if (interfaceClause) { - for (let i = 0; i < interfaceClause.childCount; i++) { - const child = interfaceClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - implements: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } +function handlePhpClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractPhpClassChildren(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + const baseClause = node.childForFieldName('base_clause') || findChild(node, 'base_clause'); + if (baseClause) { + for (let i = 0; i < baseClause.childCount; i++) { + const child = baseClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); break; } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; + } + } + const interfaceClause = findChild(node, 'class_interface_clause'); + if (interfaceClause) { + for (let i = 0; i < interfaceClause.childCount; i++) { + const child = interfaceClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + ctx.classes.push({ + name: nameNode.text, + implements: child.text, + line: node.startPosition.row + 1, + }); } + } + } +} - case 'trait_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), +function handlePhpInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractPhpEnumCases(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handlePhpTraitDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findPHPParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractPhpParameters(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handlePhpEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractPhpEnumCases(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'namespace_use_declaration': { - // use App\Models\User; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'namespace_use_clause') { - const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('\\').pop(); - const alias = child.childForFieldName('alias'); - imports.push({ - source: fullPath, - names: [alias ? alias.text : lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - // Single use clause without wrapper - if (child && (child.type === 'qualified_name' || child.type === 'name')) { - const fullPath = child.text; - const lastName = fullPath.split('\\').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - break; - } +function handlePhpMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findPHPParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractPhpParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'function_call_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'name' || fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'qualified_name') { - const parts = fn.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - } - break; +function handlePhpNamespaceUse(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'namespace_use_clause') { + const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); + if (nameNode) { + const fullPath = nameNode.text; + const lastName = fullPath.split('\\').pop(); + const alias = child.childForFieldName('alias'); + ctx.imports.push({ + source: fullPath, + names: [alias ? alias.text : lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); } + } + if (child && (child.type === 'qualified_name' || child.type === 'name')) { + const fullPath = child.text; + const lastName = fullPath.split('\\').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); + } + } +} - case 'member_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - const obj = node.childForFieldName('object'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (obj) call.receiver = obj.text; - calls.push(call); - } - break; - } +function handlePhpFuncCall(node, ctx) { + const fn = node.childForFieldName('function') || node.child(0); + if (!fn) return; + if (fn.type === 'name' || fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'qualified_name') { + const parts = fn.text.split('\\'); + ctx.calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); + } +} - case 'scoped_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - const scope = node.childForFieldName('scope'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (scope) call.receiver = scope.text; - calls.push(call); - } - break; - } +function handlePhpMemberCall(node, ctx) { + const name = node.childForFieldName('name'); + if (!name) return; + const obj = node.childForFieldName('object'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (obj) call.receiver = obj.text; + ctx.calls.push(call); +} - case 'object_creation_expression': { - const classNode = node.child(1); // skip 'new' keyword - if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { - const parts = classNode.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - break; - } - } +function handlePhpScopedCall(node, ctx) { + const name = node.childForFieldName('name'); + if (!name) return; + const scope = node.childForFieldName('scope'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (scope) call.receiver = scope.text; + ctx.calls.push(call); +} - for (let i = 0; i < node.childCount; i++) walkPhpNode(node.child(i)); +function handlePhpObjectCreation(node, ctx) { + const classNode = node.child(1); + if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { + const parts = classNode.text.split('\\'); + ctx.calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); } +} - walkPhpNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findPHPParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'trait_declaration' || + current.type === 'enum_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js index 400d410d..cc0da5fd 100644 --- a/src/extractors/ruby.js +++ b/src/extractors/ruby.js @@ -4,211 +4,218 @@ import { findChild, nodeEndLine } from './helpers.js'; * Extract symbols from Ruby files. */ export function extractRubySymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findRubyParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - if (current.type === 'module') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkRubyNode(tree.rootNode, ctx); + return ctx; +} + +function walkRubyNode(node, ctx) { + switch (node.type) { + case 'class': + handleRubyClass(node, ctx); + break; + case 'module': + handleRubyModule(node, ctx); + break; + case 'method': + handleRubyMethod(node, ctx); + break; + case 'singleton_method': + handleRubySingletonMethod(node, ctx); + break; + case 'assignment': + handleRubyAssignment(node, ctx); + break; + case 'call': + handleRubyCall(node, ctx); + break; } - function walkRubyNode(node) { - switch (node.type) { - case 'class': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractRubyClassChildren(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); - const superclass = node.childForFieldName('superclass'); - if (superclass) { - // superclass wraps the < token and class name - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - // Direct superclass node may be a constant - if (superclass.type === 'superclass') { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } - } - } + for (let i = 0; i < node.childCount; i++) walkRubyNode(node.child(i), ctx); +} + +// ── Walk-path per-node-type handlers ──────────────────────────────────────── + +function handleRubyClass(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractRubyClassChildren(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + const superclass = node.childForFieldName('superclass'); + if (superclass) { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); break; } - - case 'module': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const moduleChildren = extractRubyBodyConstants(node); - definitions.push({ + } + if (superclass.type === 'superclass') { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + ctx.classes.push({ name: nameNode.text, - kind: 'module', + extends: child.text, line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: moduleChildren.length > 0 ? moduleChildren : undefined, }); + break; } - break; } + } + } +} - case 'method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractRubyParameters(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } +function handleRubyModule(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const moduleChildren = extractRubyBodyConstants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: moduleChildren.length > 0 ? moduleChildren : undefined, + }); +} - case 'singleton_method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractRubyParameters(node); - definitions.push({ - name: fullName, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } +function handleRubyMethod(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - case 'assignment': { - // Top-level constant assignments (parent is program) - if (node.parent && node.parent.type === 'program') { - const left = node.childForFieldName('left'); - if (left && left.type === 'constant') { - definitions.push({ - name: left.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } - break; - } +function handleRubySingletonMethod(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - case 'call': { - const methodNode = node.childForFieldName('method'); - if (methodNode) { - // Check for require/require_relative - if (methodNode.text === 'require' || methodNode.text === 'require_relative') { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = arg.text.replace(/^['"]|['"]$/g, ''); - imports.push({ - source: strContent, - names: [strContent.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - // Look inside string for string_content - if (arg && arg.type === 'string') { - const content = findChild(arg, 'string_content'); - if (content) { - imports.push({ - source: content.text, - names: [content.text.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - } - } - } - } else if ( - methodNode.text === 'include' || - methodNode.text === 'extend' || - methodNode.text === 'prepend' - ) { - // Module inclusion — treated like implements - const parentClass = findRubyParentClass(node); - if (parentClass) { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { - classes.push({ - name: parentClass, - implements: arg.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - } else { - const recv = node.childForFieldName('receiver'); - const call = { name: methodNode.text, line: node.startPosition.row + 1 }; - if (recv) call.receiver = recv.text; - calls.push(call); - } - } +function handleRubyAssignment(node, ctx) { + if (node.parent && node.parent.type === 'program') { + const left = node.childForFieldName('left'); + if (left && left.type === 'constant') { + ctx.definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } +} + +function handleRubyCall(node, ctx) { + const methodNode = node.childForFieldName('method'); + if (!methodNode) return; + if (methodNode.text === 'require' || methodNode.text === 'require_relative') { + handleRubyRequire(node, ctx); + } else if ( + methodNode.text === 'include' || + methodNode.text === 'extend' || + methodNode.text === 'prepend' + ) { + handleRubyModuleInclusion(node, methodNode, ctx); + } else { + const recv = node.childForFieldName('receiver'); + const call = { name: methodNode.text, line: node.startPosition.row + 1 }; + if (recv) call.receiver = recv.text; + ctx.calls.push(call); + } +} + +function handleRubyRequire(node, ctx) { + const args = node.childForFieldName('arguments'); + if (!args) return; + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'string' || arg.type === 'string_content')) { + const strContent = arg.text.replace(/^['"]|['"]$/g, ''); + ctx.imports.push({ + source: strContent, + names: [strContent.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); + break; + } + if (arg && arg.type === 'string') { + const content = findChild(arg, 'string_content'); + if (content) { + ctx.imports.push({ + source: content.text, + names: [content.text.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); break; } } + } +} - for (let i = 0; i < node.childCount; i++) walkRubyNode(node.child(i)); +function handleRubyModuleInclusion(node, _methodNode, ctx) { + const parentClass = findRubyParentClass(node); + if (!parentClass) return; + const args = node.childForFieldName('arguments'); + if (!args) return; + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { + ctx.classes.push({ + name: parentClass, + implements: arg.text, + line: node.startPosition.row + 1, + }); + } } +} - walkRubyNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findRubyParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class' || current.type === 'module') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 705f9bd0..389bec00 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -4,191 +4,204 @@ import { findChild, nodeEndLine, rustVisibility } from './helpers.js'; * Extract symbols from Rust files. */ export function extractRustSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findCurrentImpl(node) { - let current = node.parent; - while (current) { - if (current.type === 'impl_item') { - const typeNode = current.childForFieldName('type'); - return typeNode ? typeNode.text : null; - } - current = current.parent; - } - return null; + walkRustNode(tree.rootNode, ctx); + return ctx; +} + +function walkRustNode(node, ctx) { + switch (node.type) { + case 'function_item': + handleRustFuncItem(node, ctx); + break; + case 'struct_item': + handleRustStructItem(node, ctx); + break; + case 'enum_item': + handleRustEnumItem(node, ctx); + break; + case 'const_item': + handleRustConstItem(node, ctx); + break; + case 'trait_item': + handleRustTraitItem(node, ctx); + break; + case 'impl_item': + handleRustImplItem(node, ctx); + break; + case 'use_declaration': + handleRustUseDecl(node, ctx); + break; + case 'call_expression': + handleRustCallExpr(node, ctx); + break; + case 'macro_invocation': + handleRustMacroInvocation(node, ctx); + break; } - function walkRustNode(node) { - switch (node.type) { - case 'function_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const implType = findCurrentImpl(node); - const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; - const kind = implType ? 'method' : 'function'; - const params = extractRustParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: rustVisibility(node), - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkRustNode(node.child(i), ctx); +} - case 'struct_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const fields = extractStructFields(node); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fields.length > 0 ? fields : undefined, - visibility: rustVisibility(node), - }); - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'enum_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const variants = extractEnumVariants(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: variants.length > 0 ? variants : undefined, - }); - } - break; - } +function handleRustFuncItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const implType = findCurrentImpl(node); + const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; + const kind = implType ? 'method' : 'function'; + const params = extractRustParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: rustVisibility(node), + }); +} - case 'const_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } +function handleRustStructItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const fields = extractStructFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, + visibility: rustVisibility(node), + }); +} - case 'trait_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if ( - child && - (child.type === 'function_signature_item' || child.type === 'function_item') - ) { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleRustEnumItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const variants = extractEnumVariants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: variants.length > 0 ? variants : undefined, + }); +} + +function handleRustConstItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} - case 'impl_item': { - const typeNode = node.childForFieldName('type'); - const traitNode = node.childForFieldName('trait'); - if (typeNode && traitNode) { - classes.push({ - name: typeNode.text, - implements: traitNode.text, - line: node.startPosition.row + 1, +function handleRustTraitItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && (child.type === 'function_signature_item' || child.type === 'function_item')) { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'use_declaration': { - const argNode = node.child(1); - if (argNode) { - const usePaths = extractRustUsePath(argNode); - for (const imp of usePaths) { - imports.push({ - source: imp.source, - names: imp.names, - line: node.startPosition.row + 1, - rustUse: true, - }); - } - } - break; - } +function handleRustImplItem(node, ctx) { + const typeNode = node.childForFieldName('type'); + const traitNode = node.childForFieldName('trait'); + if (typeNode && traitNode) { + ctx.classes.push({ + name: typeNode.text, + implements: traitNode.text, + line: node.startPosition.row + 1, + }); + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'field_expression') { - const field = fn.childForFieldName('field'); - if (field) { - const value = fn.childForFieldName('value'); - const call = { name: field.text, line: node.startPosition.row + 1 }; - if (value) call.receiver = value.text; - calls.push(call); - } - } else if (fn.type === 'scoped_identifier') { - const name = fn.childForFieldName('name'); - if (name) { - const path = fn.childForFieldName('path'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (path) call.receiver = path.text; - calls.push(call); - } - } - } - break; - } +function handleRustUseDecl(node, ctx) { + const argNode = node.child(1); + if (!argNode) return; + const usePaths = extractRustUsePath(argNode); + for (const imp of usePaths) { + ctx.imports.push({ + source: imp.source, + names: imp.names, + line: node.startPosition.row + 1, + rustUse: true, + }); + } +} - case 'macro_invocation': { - const macroNode = node.child(0); - if (macroNode) { - calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); - } - break; - } +function handleRustCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'field_expression') { + const field = fn.childForFieldName('field'); + if (field) { + const value = fn.childForFieldName('value'); + const call = { name: field.text, line: node.startPosition.row + 1 }; + if (value) call.receiver = value.text; + ctx.calls.push(call); + } + } else if (fn.type === 'scoped_identifier') { + const name = fn.childForFieldName('name'); + if (name) { + const path = fn.childForFieldName('path'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (path) call.receiver = path.text; + ctx.calls.push(call); } + } +} - for (let i = 0; i < node.childCount; i++) walkRustNode(node.child(i)); +function handleRustMacroInvocation(node, ctx) { + const macroNode = node.child(0); + if (macroNode) { + ctx.calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); } +} - walkRustNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findCurrentImpl(node) { + let current = node.parent; + while (current) { + if (current.type === 'impl_item') { + const typeNode = current.childForFieldName('type'); + return typeNode ? typeNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── From eafdf193777b9723e0c0ded0834b0c5bd9688684 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:19:21 -0600 Subject: [PATCH 11/37] refactor: decompose AST analysis visitors and engine into focused helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move nested handler functions to module level in cfg-visitor.js, dataflow-visitor.js, and complexity-visitor.js — reducing cognitive complexity of each factory function from 100-337 down to thin coordinators. Extract WASM pre-parse, visitor setup, result storage, and build delegation from runAnalyses into focused helper functions. Impact: 66 functions changed, 43 affected --- src/ast-analysis/engine.js | 510 ++++--- src/ast-analysis/visitors/cfg-visitor.js | 1284 ++++++++--------- .../visitors/complexity-visitor.js | 274 ++-- src/ast-analysis/visitors/dataflow-visitor.js | 454 +++--- 4 files changed, 1252 insertions(+), 1270 deletions(-) diff --git a/src/ast-analysis/engine.js b/src/ast-analysis/engine.js index 981ec514..76ba8cd2 100644 --- a/src/ast-analysis/engine.js +++ b/src/ast-analysis/engine.js @@ -50,294 +50,227 @@ async function getParserModule() { return _parserModule; } -// ─── Public API ────────────────────────────────────────────────────────── +// ─── WASM pre-parse ───────────────────────────────────────────────────── -/** - * Run all enabled AST analyses in a coordinated pass. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) - * @param {object} [engineOpts] - engine options - * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} - */ -export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { - const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; - - const doAst = opts.ast !== false; +async function ensureWasmTreesIfNeeded(fileSymbols, opts) { const doComplexity = opts.complexity !== false; const doCfg = opts.cfg !== false; const doDataflow = opts.dataflow !== false; - if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; - - const extToLang = buildExtToLangMap(); - - // ── WASM pre-parse for files that need it ─────────────────────────── - // The native engine only handles parsing (symbols, calls, imports). - // Complexity, CFG, and dataflow all require a WASM tree-sitter tree - // for their visitor walks. Without this, incremental rebuilds on the - // native engine silently lose these analyses for changed files (#468). - if (doComplexity || doCfg || doDataflow) { - let needsWasmTrees = false; - for (const [relPath, symbols] of fileSymbols) { - if (symbols._tree) continue; - const ext = path.extname(relPath).toLowerCase(); - const defs = symbols.definitions || []; - - const needsComplexity = - doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); - const needsCfg = - doCfg && - CFG_EXTENSIONS.has(ext) && - defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !Array.isArray(d.cfg?.blocks), - ); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); - - if (needsComplexity || needsCfg || needsDataflow) { - needsWasmTrees = true; - break; - } - } - - if (needsWasmTrees) { - try { - const { ensureWasmTrees } = await getParserModule(); - await ensureWasmTrees(fileSymbols, rootDir); - } catch (err) { - debug(`ensureWasmTrees failed: ${err.message}`); - } - } - } - - // ── Phase 7 Optimization: Unified pre-walk ───────────────────────── - // For files with WASM trees, run all applicable visitors in a SINGLE - // walkWithVisitors call. Store results in the format that buildXxx - // functions already expect as pre-computed data (same fields as native - // engine output). This eliminates ~3 redundant tree traversals per file. - const t0walk = performance.now(); + if (!doComplexity && !doCfg && !doDataflow) return; + let needsWasmTrees = false; for (const [relPath, symbols] of fileSymbols) { - if (!symbols._tree) continue; // No WASM tree — native path handles it - + if (symbols._tree) continue; const ext = path.extname(relPath).toLowerCase(); - const langId = symbols._langId || extToLang.get(ext); - if (!langId) continue; - const defs = symbols.definitions || []; - const visitors = []; - const walkerOpts = { - functionNodeTypes: new Set(), - nestingNodeTypes: new Set(), - getFunctionName: (_node) => null, - }; - - // ─ AST-store visitor ─ - const astTypeMap = AST_TYPE_MAPS.get(langId); - let astVisitor = null; - if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { - const nodeIdMap = new Map(); - for (const row of bulkNodeIdsByFile(db, relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); - visitors.push(astVisitor); - } - // ─ Complexity visitor (file-level mode) ─ - const cRules = COMPLEXITY_RULES.get(langId); - const hRules = HALSTEAD_RULES.get(langId); - let complexityVisitor = null; - if (doComplexity && cRules) { - // Only use visitor if some functions lack pre-computed complexity - const needsWasmComplexity = defs.some( - (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, - ); - if (needsWasmComplexity) { - complexityVisitor = createComplexityVisitor(cRules, hRules, { - fileLevelWalk: true, - langId, - }); - visitors.push(complexityVisitor); - - // Merge nesting nodes for complexity tracking - // NOTE: do NOT add functionNodes here — funcDepth in the complexity - // visitor already tracks function-level nesting. Adding them to - // nestingNodeTypes would inflate context.nestingLevel by +1 inside - // every function body, double-counting in cognitive += 1 + nestingLevel. - for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); - - // Provide getFunctionName for complexity visitor - const dfRules = DATAFLOW_RULES.get(langId); - walkerOpts.getFunctionName = (node) => { - // Try complexity rules' function name field first - const nameNode = node.childForFieldName('name'); - if (nameNode) return nameNode.text; - // Fall back to dataflow rules' richer name extraction - if (dfRules) return getFuncName(node, dfRules); - return null; - }; - } - } - - // ─ CFG visitor ─ - const cfgRulesForLang = CFG_RULES.get(langId); - let cfgVisitor = null; - if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { - // Only use visitor if some functions lack pre-computed CFG - const needsWasmCfg = defs.some( + const needsComplexity = + doComplexity && + COMPLEXITY_EXTENSIONS.has(ext) && + defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); + const needsCfg = + doCfg && + CFG_EXTENSIONS.has(ext) && + defs.some( (d) => (d.kind === 'function' || d.kind === 'method') && d.line && d.cfg !== null && !Array.isArray(d.cfg?.blocks), ); - if (needsWasmCfg) { - cfgVisitor = createCfgVisitor(cfgRulesForLang); - visitors.push(cfgVisitor); - } + const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + + if (needsComplexity || needsCfg || needsDataflow) { + needsWasmTrees = true; + break; } + } - // ─ Dataflow visitor ─ - const dfRules = DATAFLOW_RULES.get(langId); - let dataflowVisitor = null; - if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { - dataflowVisitor = createDataflowVisitor(dfRules); - visitors.push(dataflowVisitor); + if (needsWasmTrees) { + try { + const { ensureWasmTrees } = await getParserModule(); + await ensureWasmTrees(fileSymbols); + } catch (err) { + debug(`ensureWasmTrees failed: ${err.message}`); } + } +} - // ─ Run unified walk if we have visitors ─ - if (visitors.length === 0) continue; +// ─── Per-file visitor setup ───────────────────────────────────────────── - const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); +function setupVisitors(db, relPath, symbols, langId, opts) { + const ext = path.extname(relPath).toLowerCase(); + const defs = symbols.definitions || []; + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; - // ─ Store AST results (buildAstNodes will find symbols.astNodes and skip its walk) ─ - if (astVisitor) { - const astRows = results['ast-store'] || []; - if (astRows.length > 0) { - // Store in the format buildAstNodes expects for the native path - symbols.astNodes = astRows; - } + const visitors = []; + const walkerOpts = { + functionNodeTypes: new Set(), + nestingNodeTypes: new Set(), + getFunctionName: (_node) => null, + }; + + // AST-store visitor + let astVisitor = null; + const astTypeMap = AST_TYPE_MAPS.get(langId); + if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } + astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); + visitors.push(astVisitor); + } - // ─ Store complexity results on definitions (buildComplexityMetrics will find def.complexity) ─ - if (complexityVisitor) { - const complexityResults = results.complexity || []; - // Match results back to definitions by function start line - // Store the full result (metrics + funcNode) for O(1) lookup - const resultByLine = new Map(); - for (const r of complexityResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!resultByLine.has(line)) resultByLine.set(line, []); - resultByLine.get(line).push(r); - } - } - for (const def of defs) { - if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { - const candidates = resultByLine.get(def.line); - const funcResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (funcResult) { - const { metrics } = funcResult; - const loc = computeLOCMetrics(funcResult.funcNode, langId); - const volume = metrics.halstead ? metrics.halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex( - volume, - metrics.cyclomatic, - loc.sloc, - commentRatio, - ); - - def.complexity = { - cognitive: metrics.cognitive, - cyclomatic: metrics.cyclomatic, - maxNesting: metrics.maxNesting, - halstead: metrics.halstead, - loc, - maintainabilityIndex: mi, - }; - } - } - } + // Complexity visitor (file-level mode) + let complexityVisitor = null; + const cRules = COMPLEXITY_RULES.get(langId); + const hRules = HALSTEAD_RULES.get(langId); + if (doComplexity && cRules) { + const needsWasmComplexity = defs.some( + (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, + ); + if (needsWasmComplexity) { + complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); + visitors.push(complexityVisitor); + + for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); + + const dfRules = DATAFLOW_RULES.get(langId); + walkerOpts.getFunctionName = (node) => { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode.text; + if (dfRules) return getFuncName(node, dfRules); + return null; + }; } + } - // ─ Store CFG results on definitions (buildCFGData will find def.cfg and skip its walk) ─ - if (cfgVisitor) { - const cfgResults = results.cfg || []; - const cfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!cfgByLine.has(line)) cfgByLine.set(line, []); - cfgByLine.get(line).push(r); - } - } - for (const def of defs) { - if ( - (def.kind === 'function' || def.kind === 'method') && - def.line && - !def.cfg?.blocks?.length - ) { - const candidates = cfgByLine.get(def.line); - const cfgResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (cfgResult) { - def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; - - // Override complexity's cyclomatic with CFG-derived value (single source of truth) - // and recompute maintainability index to stay consistent - if (def.complexity && cfgResult.cyclomatic != null) { - def.complexity.cyclomatic = cfgResult.cyclomatic; - const { loc, halstead } = def.complexity; - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; - def.complexity.maintainabilityIndex = computeMaintainabilityIndex( - volume, - cfgResult.cyclomatic, - loc?.sloc ?? 0, - commentRatio, - ); - } - } - } - } + // CFG visitor + let cfgVisitor = null; + const cfgRulesForLang = CFG_RULES.get(langId); + if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { + const needsWasmCfg = defs.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !Array.isArray(d.cfg?.blocks), + ); + if (needsWasmCfg) { + cfgVisitor = createCfgVisitor(cfgRulesForLang); + visitors.push(cfgVisitor); } + } + + // Dataflow visitor + let dataflowVisitor = null; + const dfRules = DATAFLOW_RULES.get(langId); + if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + dataflowVisitor = createDataflowVisitor(dfRules); + visitors.push(dataflowVisitor); + } + + return { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor }; +} - // ─ Store dataflow results (buildDataflowEdges will find symbols.dataflow and skip its walk) ─ - if (dataflowVisitor) { - symbols.dataflow = results.dataflow; +// ─── Result storage helpers ───────────────────────────────────────────── + +function storeComplexityResults(results, defs, langId) { + const complexityResults = results.complexity || []; + const resultByLine = new Map(); + for (const r of complexityResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!resultByLine.has(line)) resultByLine.set(line, []); + resultByLine.get(line).push(r); } } + for (const def of defs) { + if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { + const candidates = resultByLine.get(def.line); + const funcResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (funcResult) { + const { metrics } = funcResult; + const loc = computeLOCMetrics(funcResult.funcNode, langId); + const volume = metrics.halstead ? metrics.halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, metrics.cyclomatic, loc.sloc, commentRatio); + + def.complexity = { + cognitive: metrics.cognitive, + cyclomatic: metrics.cyclomatic, + maxNesting: metrics.maxNesting, + halstead: metrics.halstead, + loc, + maintainabilityIndex: mi, + }; + } + } + } +} - timing._unifiedWalkMs = performance.now() - t0walk; +function storeCfgResults(results, defs) { + const cfgResults = results.cfg || []; + const cfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!cfgByLine.has(line)) cfgByLine.set(line, []); + cfgByLine.get(line).push(r); + } + } + for (const def of defs) { + if ( + (def.kind === 'function' || def.kind === 'method') && + def.line && + !def.cfg?.blocks?.length + ) { + const candidates = cfgByLine.get(def.line); + const cfgResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (cfgResult) { + def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; + + // Override complexity's cyclomatic with CFG-derived value (single source of truth) + if (def.complexity && cfgResult.cyclomatic != null) { + def.complexity.cyclomatic = cfgResult.cyclomatic; + const { loc, halstead } = def.complexity; + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; + def.complexity.maintainabilityIndex = computeMaintainabilityIndex( + volume, + cfgResult.cyclomatic, + loc?.sloc ?? 0, + commentRatio, + ); + } + } + } + } +} - // ── Delegate to buildXxx functions ───────────────────────────────── - // Each function finds pre-computed data from the unified walk above - // (or from the native engine) and only does DB writes + native fallback. +// ─── Build delegation ─────────────────────────────────────────────────── - if (doAst) { +async function delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing) { + if (opts.ast !== false) { const t0 = performance.now(); try { const { buildAstNodes } = await import('../features/ast.js'); @@ -348,7 +281,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.astMs = performance.now() - t0; } - if (doComplexity) { + if (opts.complexity !== false) { const t0 = performance.now(); try { const { buildComplexityMetrics } = await import('../features/complexity.js'); @@ -359,7 +292,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.complexityMs = performance.now() - t0; } - if (doCfg) { + if (opts.cfg !== false) { const t0 = performance.now(); try { const { buildCFGData } = await import('../features/cfg.js'); @@ -370,7 +303,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.cfgMs = performance.now() - t0; } - if (doDataflow) { + if (opts.dataflow !== false) { const t0 = performance.now(); try { const { buildDataflowEdges } = await import('../features/dataflow.js'); @@ -380,6 +313,67 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { } timing.dataflowMs = performance.now() - t0; } +} + +// ─── Public API ────────────────────────────────────────────────────────── + +/** + * Run all enabled AST analyses in a coordinated pass. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) + * @param {object} [engineOpts] - engine options + * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} + */ +export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { + const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; + + if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; + + const extToLang = buildExtToLangMap(); + + // WASM pre-parse for files that need it + await ensureWasmTreesIfNeeded(fileSymbols, opts); + + // Unified pre-walk: run all applicable visitors in a single DFS per file + const t0walk = performance.now(); + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) continue; + + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || extToLang.get(ext); + if (!langId) continue; + + const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } = + setupVisitors(db, relPath, symbols, langId, opts); + + if (visitors.length === 0) continue; + + const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); + const defs = symbols.definitions || []; + + if (astVisitor) { + const astRows = results['ast-store'] || []; + if (astRows.length > 0) symbols.astNodes = astRows; + } + + if (complexityVisitor) storeComplexityResults(results, defs, langId); + if (cfgVisitor) storeCfgResults(results, defs); + if (dataflowVisitor) symbols.dataflow = results.dataflow; + } + + timing._unifiedWalkMs = performance.now() - t0walk; + + // Delegate to buildXxx functions for DB writes + native fallback + await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing); return timing; } diff --git a/src/ast-analysis/visitors/cfg-visitor.js b/src/ast-analysis/visitors/cfg-visitor.js index 1fb1de50..97bb344f 100644 --- a/src/ast-analysis/visitors/cfg-visitor.js +++ b/src/ast-analysis/visitors/cfg-visitor.js @@ -10,756 +10,746 @@ * hooks, using a control-flow frame stack to track branch/loop/switch context. */ -/** - * Create a CFG visitor for use with walkWithVisitors. - * - * @param {object} cfgRules - CFG_RULES for the language - * @returns {Visitor} - */ -export function createCfgVisitor(cfgRules) { - // ── Per-function state ────────────────────────────────────────────── - // Pushed/popped on enterFunction/exitFunction for nested function support. - - /** @type {Array} Stack of per-function CFG state */ - const funcStateStack = []; - - /** @type {object|null} Active per-function state */ - let S = null; - - // Collected results (one per top-level function) - const results = []; - - function makeFuncState() { - const blocks = []; - const edges = []; - let nextIndex = 0; - - function makeBlock(type, startLine = null, endLine = null, label = null) { - const block = { index: nextIndex++, type, startLine, endLine, label }; - blocks.push(block); - return block; - } - - function addEdge(source, target, kind) { - edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); - } +// ── Node-type predicates ──────────────────────────────────────────────── - const entry = makeBlock('entry'); - const exit = makeBlock('exit'); - const firstBody = makeBlock('body'); - addEdge(entry, firstBody, 'fallthrough'); - - return { - blocks, - edges, - makeBlock, - addEdge, - entryBlock: entry, - exitBlock: exit, - currentBlock: firstBody, - loopStack: [], - labelMap: new Map(), - /** Control-flow frame stack for nested if/switch/try/loop/labeled */ - cfgStack: [], - funcNode: null, - }; - } - - // ── Helpers ───────────────────────────────────────────────────────── +function isIfNode(type, cfgRules) { + return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); +} - function isIfNode(type) { - return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); - } +function isForNode(type, cfgRules) { + return cfgRules.forNodes.has(type); +} - function isForNode(type) { - return cfgRules.forNodes.has(type); - } +function isWhileNode(type, cfgRules) { + return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); +} - function isWhileNode(type) { - return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); - } +function isSwitchNode(type, cfgRules) { + return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); +} - function isSwitchNode(type) { - return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); - } +function isCaseNode(type, cfgRules) { + return ( + type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) + ); +} - function isCaseNode(type) { - return ( - type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) - ); - } +function isBlockNode(type, cfgRules) { + return type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type); +} - function isBlockNode(type) { - return ( - type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type) - ); - } +/** Check if a node is a control-flow statement that we handle specially */ +function isControlFlow(type, cfgRules) { + return ( + isIfNode(type, cfgRules) || + (cfgRules.unlessNode && type === cfgRules.unlessNode) || + isForNode(type, cfgRules) || + isWhileNode(type, cfgRules) || + (cfgRules.untilNode && type === cfgRules.untilNode) || + (cfgRules.doNode && type === cfgRules.doNode) || + (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || + isSwitchNode(type, cfgRules) || + (cfgRules.tryNode && type === cfgRules.tryNode) || + type === cfgRules.returnNode || + type === cfgRules.throwNode || + type === cfgRules.breakNode || + type === cfgRules.continueNode || + type === cfgRules.labeledNode + ); +} - /** Check if a node is a control-flow statement that we handle specially */ - function isControlFlow(type) { - return ( - isIfNode(type) || - (cfgRules.unlessNode && type === cfgRules.unlessNode) || - isForNode(type) || - isWhileNode(type) || - (cfgRules.untilNode && type === cfgRules.untilNode) || - (cfgRules.doNode && type === cfgRules.doNode) || - (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || - isSwitchNode(type) || - (cfgRules.tryNode && type === cfgRules.tryNode) || - type === cfgRules.returnNode || - type === cfgRules.throwNode || - type === cfgRules.breakNode || - type === cfgRules.continueNode || - type === cfgRules.labeledNode - ); - } +// ── Utility functions ─────────────────────────────────────────────────── - /** - * Get the actual control-flow node (unwrapping expression_statement if needed). - */ - function effectiveNode(node) { - if (node.type === 'expression_statement' && node.namedChildCount === 1) { - const inner = node.namedChild(0); - if (isControlFlow(inner.type)) return inner; - } - return node; +/** + * Get the actual control-flow node (unwrapping expression_statement if needed). + */ +function effectiveNode(node, cfgRules) { + if (node.type === 'expression_statement' && node.namedChildCount === 1) { + const inner = node.namedChild(0); + if (isControlFlow(inner.type, cfgRules)) return inner; } + return node; +} - /** - * Register a loop/switch in label map for labeled break/continue. - */ - function registerLabelCtx(headerBlock, exitBlock) { - for (const [, ctx] of S.labelMap) { - if (!ctx.headerBlock) { - ctx.headerBlock = headerBlock; - ctx.exitBlock = exitBlock; - } +/** + * Register a loop/switch in label map for labeled break/continue. + */ +function registerLabelCtx(S, headerBlock, exitBlock) { + for (const [, ctx] of S.labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = exitBlock; } } +} - /** - * Get statements from a body node (block or single statement). - * Returns effective (unwrapped) nodes. - */ - function getBodyStatements(bodyNode) { - if (!bodyNode) return []; - if (isBlockNode(bodyNode.type)) { - const stmts = []; - for (let i = 0; i < bodyNode.namedChildCount; i++) { - const child = bodyNode.namedChild(i); - if (child.type === 'statement_list') { - for (let j = 0; j < child.namedChildCount; j++) { - stmts.push(child.namedChild(j)); - } - } else { - stmts.push(child); +/** + * Get statements from a body node (block or single statement). + * Returns effective (unwrapped) nodes. + */ +function getBodyStatements(bodyNode, cfgRules) { + if (!bodyNode) return []; + if (isBlockNode(bodyNode.type, cfgRules)) { + const stmts = []; + for (let i = 0; i < bodyNode.namedChildCount; i++) { + const child = bodyNode.namedChild(i); + if (child.type === 'statement_list') { + for (let j = 0; j < child.namedChildCount; j++) { + stmts.push(child.namedChild(j)); } + } else { + stmts.push(child); } - return stmts; } - return [bodyNode]; + return stmts; } + return [bodyNode]; +} - // ── Statement-level processing (replicates buildFunctionCFG logic) ── - // The visitor delegates to these for each control-flow construct, - // processing the body statements sequentially just like the original. +function makeFuncState() { + const blocks = []; + const edges = []; + let nextIndex = 0; - function processStatements(stmts, currentBlock) { - let cur = currentBlock; - for (const stmt of stmts) { - if (!cur) break; - cur = processStatement(stmt, cur); - } - return cur; + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { index: nextIndex++, type, startLine, endLine, label }; + blocks.push(block); + return block; } - function processStatement(stmt, currentBlock) { - if (!stmt || !currentBlock) return currentBlock; + function addEdge(source, target, kind) { + edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); + } - // Unwrap expression_statement for Rust-style control flow expressions - const effNode = effectiveNode(stmt); - const type = effNode.type; + const entry = makeBlock('entry'); + const exit = makeBlock('exit'); + const firstBody = makeBlock('body'); + addEdge(entry, firstBody, 'fallthrough'); - // Labeled statement - if (type === cfgRules.labeledNode) { - return processLabeled(effNode, currentBlock); - } + return { + blocks, + edges, + makeBlock, + addEdge, + entryBlock: entry, + exitBlock: exit, + currentBlock: firstBody, + loopStack: [], + labelMap: new Map(), + cfgStack: [], + funcNode: null, + }; +} - // If / unless - if (isIfNode(type) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { - return processIf(effNode, currentBlock); - } +// ── Statement processors ──────────────────────────────────────────────── - // For loops - if (isForNode(type)) { - return processForLoop(effNode, currentBlock); - } +function processStatements(stmts, currentBlock, S, cfgRules) { + let cur = currentBlock; + for (const stmt of stmts) { + if (!cur) break; + cur = processStatement(stmt, cur, S, cfgRules); + } + return cur; +} - // While / until - if (isWhileNode(type) || (cfgRules.untilNode && type === cfgRules.untilNode)) { - return processWhileLoop(effNode, currentBlock); - } +function processStatement(stmt, currentBlock, S, cfgRules) { + if (!stmt || !currentBlock) return currentBlock; - // Do-while - if (cfgRules.doNode && type === cfgRules.doNode) { - return processDoWhileLoop(effNode, currentBlock); - } + const effNode = effectiveNode(stmt, cfgRules); + const type = effNode.type; - // Infinite loop (Rust) - if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { - return processInfiniteLoop(effNode, currentBlock); - } + if (type === cfgRules.labeledNode) { + return processLabeled(effNode, currentBlock, S, cfgRules); + } + if (isIfNode(type, cfgRules) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { + return processIf(effNode, currentBlock, S, cfgRules); + } + if (isForNode(type, cfgRules)) { + return processForLoop(effNode, currentBlock, S, cfgRules); + } + if (isWhileNode(type, cfgRules) || (cfgRules.untilNode && type === cfgRules.untilNode)) { + return processWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.doNode && type === cfgRules.doNode) { + return processDoWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { + return processInfiniteLoop(effNode, currentBlock, S, cfgRules); + } + if (isSwitchNode(type, cfgRules)) { + return processSwitch(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.tryNode && type === cfgRules.tryNode) { + return processTryCatch(effNode, currentBlock, S, cfgRules); + } + if (type === cfgRules.returnNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'return'); + return null; + } + if (type === cfgRules.throwNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'exception'); + return null; + } + if (type === cfgRules.breakNode) { + return processBreak(effNode, currentBlock, S); + } + if (type === cfgRules.continueNode) { + return processContinue(effNode, currentBlock, S); + } - // Switch / match - if (isSwitchNode(type)) { - return processSwitch(effNode, currentBlock); - } + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; +} - // Try/catch/finally - if (cfgRules.tryNode && type === cfgRules.tryNode) { - return processTryCatch(effNode, currentBlock); - } +// ── Labeled / break / continue ────────────────────────────────────────── + +function processLabeled(node, currentBlock, S, cfgRules) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = node.childForFieldName('body'); + if (body && labelName) { + const labelCtx = { headerBlock: null, exitBlock: null }; + S.labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock, S, cfgRules); + S.labelMap.delete(labelName); + return result; + } + return currentBlock; +} - // Return - if (type === cfgRules.returnNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'return'); - return null; - } +function processBreak(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Throw - if (type === cfgRules.throwNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'exception'); - return null; - } + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).exitBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].exitBlock; + } - // Break - if (type === cfgRules.breakNode) { - return processBreak(effNode, currentBlock); - } + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'break'); + return null; + } + return currentBlock; +} - // Continue - if (type === cfgRules.continueNode) { - return processContinue(effNode, currentBlock); - } +function processContinue(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Regular statement — extend current block - if (!currentBlock.startLine) { - currentBlock.startLine = stmt.startPosition.row + 1; - } - currentBlock.endLine = stmt.endPosition.row + 1; - return currentBlock; - } - - function processLabeled(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; - const body = node.childForFieldName('body'); - if (body && labelName) { - const labelCtx = { headerBlock: null, exitBlock: null }; - S.labelMap.set(labelName, labelCtx); - const result = processStatement(body, currentBlock); - S.labelMap.delete(labelName); - return result; - } - return currentBlock; + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).headerBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].headerBlock; } - function processBreak(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'continue'); + return null; + } + return currentBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).exitBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].exitBlock; - } +// ── If / else-if / else ───────────────────────────────────────────────── + +function processIf(ifStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = S.makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + S.addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = S.makeBlock('body'); + + // True branch + const consequentField = cfgRules.ifConsequentField || 'consequence'; + const consequent = ifStmt.childForFieldName(consequentField); + const trueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getBodyStatements(consequent, cfgRules); + const trueEnd = processStatements(trueStmts, trueBlock, S, cfgRules); + if (trueEnd) { + S.addEdge(trueEnd, joinBlock, 'fallthrough'); + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'break'); - return null; - } - return currentBlock; + // False branch + if (cfgRules.elifNode) { + processElifSiblings(ifStmt, condBlock, joinBlock, S, cfgRules); + } else { + processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules); } - function processContinue(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + return joinBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).headerBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].headerBlock; - } +function processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules) { + const alternative = ifStmt.childForFieldName('alternative'); + if (!alternative) { + S.addEdge(condBlock, joinBlock, 'branch_false'); + return; + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'continue'); - return null; + if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { + // Pattern C: direct alternative (Go, Java, C#) + if (isIfNode(alternative.type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(alternative, falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseStmts = getBodyStatements(alternative, cfgRules); + const falseEnd = processStatements(falseStmts, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } else if (alternative.type === cfgRules.elseClause) { + // Pattern A: else_clause wrapper (JS/TS, Rust) + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && isIfNode(elseChildren[0].type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); } - return currentBlock; } +} - // ── If/else-if/else ───────────────────────────────────────────────── - - function processIf(ifStmt, currentBlock) { - currentBlock.endLine = ifStmt.startPosition.row + 1; +function processElifSiblings(ifStmt, firstCondBlock, joinBlock, S, cfgRules) { + let lastCondBlock = firstCondBlock; + let foundElse = false; - const condBlock = S.makeBlock( - 'condition', - ifStmt.startPosition.row + 1, - ifStmt.startPosition.row + 1, - 'if', - ); - S.addEdge(currentBlock, condBlock, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - - // True branch - const consequentField = cfgRules.ifConsequentField || 'consequence'; - const consequent = ifStmt.childForFieldName(consequentField); - const trueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(condBlock, trueBlock, 'branch_true'); - const trueStmts = getBodyStatements(consequent); - const trueEnd = processStatements(trueStmts, trueBlock); - if (trueEnd) { - S.addEdge(trueEnd, joinBlock, 'fallthrough'); - } + for (let i = 0; i < ifStmt.namedChildCount; i++) { + const child = ifStmt.namedChild(i); - // False branch - if (cfgRules.elifNode) { - processElifSiblings(ifStmt, condBlock, joinBlock); - } else { - const alternative = ifStmt.childForFieldName('alternative'); - if (alternative) { - if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { - // Pattern C: direct alternative (Go, Java, C#) - if (isIfNode(alternative.type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(alternative, falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseStmts = getBodyStatements(alternative); - const falseEnd = processStatements(falseStmts, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } else if (alternative.type === cfgRules.elseClause) { - // Pattern A: else_clause wrapper (JS/TS, Rust) - const elseChildren = []; - for (let i = 0; i < alternative.namedChildCount; i++) { - elseChildren.push(alternative.namedChild(i)); - } - if (elseChildren.length === 1 && isIfNode(elseChildren[0].type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(elseChildren[0], falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseEnd = processStatements(elseChildren, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } + if (child.type === cfgRules.elifNode) { + const elifCondBlock = S.makeBlock( + 'condition', + child.startPosition.row + 1, + child.startPosition.row + 1, + 'else-if', + ); + S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); + + const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; + const elifConsequent = child.childForFieldName(elifConsequentField); + const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); + const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); + const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); + if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); + + lastCondBlock = elifCondBlock; + } else if (child.type === cfgRules.elseClause) { + const elseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(lastCondBlock, elseBlock, 'branch_false'); + + const elseBody = child.childForFieldName('body'); + let elseStmts; + if (elseBody) { + elseStmts = getBodyStatements(elseBody, cfgRules); } else { - // No else - S.addEdge(condBlock, joinBlock, 'branch_false'); - } - } - - return joinBlock; - } - - function processElifSiblings(ifStmt, firstCondBlock, joinBlock) { - let lastCondBlock = firstCondBlock; - let foundElse = false; - - for (let i = 0; i < ifStmt.namedChildCount; i++) { - const child = ifStmt.namedChild(i); - - if (child.type === cfgRules.elifNode) { - const elifCondBlock = S.makeBlock( - 'condition', - child.startPosition.row + 1, - child.startPosition.row + 1, - 'else-if', - ); - S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); - - const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; - const elifConsequent = child.childForFieldName(elifConsequentField); - const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); - const elifTrueStmts = getBodyStatements(elifConsequent); - const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock); - if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); - - lastCondBlock = elifCondBlock; - } else if (child.type === cfgRules.elseClause) { - const elseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(lastCondBlock, elseBlock, 'branch_false'); - - const elseBody = child.childForFieldName('body'); - let elseStmts; - if (elseBody) { - elseStmts = getBodyStatements(elseBody); - } else { - elseStmts = []; - for (let j = 0; j < child.namedChildCount; j++) { - elseStmts.push(child.namedChild(j)); - } + elseStmts = []; + for (let j = 0; j < child.namedChildCount; j++) { + elseStmts.push(child.namedChild(j)); } - const elseEnd = processStatements(elseStmts, elseBlock); - if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - - foundElse = true; } - } + const elseEnd = processStatements(elseStmts, elseBlock, S, cfgRules); + if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - if (!foundElse) { - S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + foundElse = true; } } - // ── Loops ─────────────────────────────────────────────────────────── - - function processForLoop(forStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - forStmt.startPosition.row + 1, - forStmt.startPosition.row + 1, - 'for', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); - - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + if (!foundElse) { + S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + } +} - const body = forStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); +// ── Loops ─────────────────────────────────────────────────────────────── + +function processForLoop(forStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = forStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); +function processWhileLoop(whileStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } +function processDoWhileLoop(doStmt, currentBlock, S, cfgRules) { + const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + S.addEdge(currentBlock, bodyBlock, 'fallthrough'); - function processWhileLoop(whileStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - whileStmt.startPosition.row + 1, - whileStmt.startPosition.row + 1, - 'while', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = S.makeBlock('body'); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, condBlock, loopExitBlock); - const body = whileStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const body = doStmt.childForFieldName('body'); + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + S.addEdge(condBlock, bodyBlock, 'loop_back'); + S.addEdge(condBlock, loopExitBlock, 'loop_exit'); - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } + S.loopStack.pop(); + return loopExitBlock; +} - function processDoWhileLoop(doStmt, currentBlock) { - const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); - S.addEdge(currentBlock, bodyBlock, 'fallthrough'); +function processInfiniteLoop(loopStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + loopStmt.startPosition.row + 1, + loopStmt.startPosition.row + 1, + 'loop', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = loopStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + // No loop_exit from header — only via break + S.loopStack.pop(); + return loopExitBlock; +} - const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); - const loopExitBlock = S.makeBlock('body'); +// ── Switch / match ────────────────────────────────────────────────────── - const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(condBlock, loopExitBlock); +function processSwitch(switchStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = switchStmt.startPosition.row + 1; - const body = doStmt.childForFieldName('body'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); + const switchHeader = S.makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + S.addEdge(currentBlock, switchHeader, 'fallthrough'); - S.addEdge(condBlock, bodyBlock, 'loop_back'); - S.addEdge(condBlock, loopExitBlock, 'loop_exit'); + const joinBlock = S.makeBlock('body'); + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + S.loopStack.push(switchCtx); - S.loopStack.pop(); - return loopExitBlock; - } + const switchBody = switchStmt.childForFieldName('body'); + const container = switchBody || switchStmt; - function processInfiniteLoop(loopStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - loopStmt.startPosition.row + 1, - loopStmt.startPosition.row + 1, - 'loop', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + let hasDefault = false; + for (let i = 0; i < container.namedChildCount; i++) { + const caseClause = container.namedChild(i); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const isDefault = caseClause.type === cfgRules.defaultNode; + const isCase = isDefault || isCaseNode(caseClause.type, cfgRules); + if (!isCase) continue; - const body = loopStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); + S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + const caseStmts = extractCaseBody(caseClause, cfgRules); + const caseEnd = processStatements(caseStmts, caseBlock, S, cfgRules); + if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); + } - // No loop_exit from header — only via break - S.loopStack.pop(); - return loopExitBlock; + if (!hasDefault) { + S.addEdge(switchHeader, joinBlock, 'branch_false'); } - // ── Switch / match ────────────────────────────────────────────────── + S.loopStack.pop(); + return joinBlock; +} - function processSwitch(switchStmt, currentBlock) { - currentBlock.endLine = switchStmt.startPosition.row + 1; +function extractCaseBody(caseClause, cfgRules) { + const caseBodyNode = + caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); + if (caseBodyNode) { + return getBodyStatements(caseBodyNode, cfgRules); + } - const switchHeader = S.makeBlock( - 'condition', - switchStmt.startPosition.row + 1, - switchStmt.startPosition.row + 1, - 'switch', - ); - S.addEdge(currentBlock, switchHeader, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; - S.loopStack.push(switchCtx); - - const switchBody = switchStmt.childForFieldName('body'); - const container = switchBody || switchStmt; - - let hasDefault = false; - for (let i = 0; i < container.namedChildCount; i++) { - const caseClause = container.namedChild(i); - - const isDefault = caseClause.type === cfgRules.defaultNode; - const isCase = isDefault || isCaseNode(caseClause.type); - if (!isCase) continue; - - const caseLabel = isDefault ? 'default' : 'case'; - const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); - S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); - if (isDefault) hasDefault = true; - - // Extract case body - const caseBodyNode = - caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); - let caseStmts; - if (caseBodyNode) { - caseStmts = getBodyStatements(caseBodyNode); - } else { - caseStmts = []; - const valueNode = caseClause.childForFieldName('value'); - const patternNode = caseClause.childForFieldName('pattern'); - for (let j = 0; j < caseClause.namedChildCount; j++) { - const child = caseClause.namedChild(j); - if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { - if (child.type === 'statement_list') { - for (let k = 0; k < child.namedChildCount; k++) { - caseStmts.push(child.namedChild(k)); - } - } else { - caseStmts.push(child); - } - } + const stmts = []; + const valueNode = caseClause.childForFieldName('value'); + const patternNode = caseClause.childForFieldName('pattern'); + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { + if (child.type === 'statement_list') { + for (let k = 0; k < child.namedChildCount; k++) { + stmts.push(child.namedChild(k)); } + } else { + stmts.push(child); } - - const caseEnd = processStatements(caseStmts, caseBlock); - if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); } + } + return stmts; +} - if (!hasDefault) { - S.addEdge(switchHeader, joinBlock, 'branch_false'); - } +// ── Try / catch / finally ─────────────────────────────────────────────── + +function processTryCatch(tryStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = tryStmt.startPosition.row + 1; - S.loopStack.pop(); - return joinBlock; + const joinBlock = S.makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + let tryBodyStart; + let tryStmts; + if (tryBody) { + tryBodyStart = tryBody.startPosition.row + 1; + tryStmts = getBodyStatements(tryBody, cfgRules); + } else { + tryBodyStart = tryStmt.startPosition.row + 1; + tryStmts = []; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; + tryStmts.push(child); + } } - // ── Try/catch/finally ─────────────────────────────────────────────── + const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); + S.addEdge(currentBlock, tryBlock, 'fallthrough'); + const tryEnd = processStatements(tryStmts, tryBlock, S, cfgRules); - function processTryCatch(tryStmt, currentBlock) { - currentBlock.endLine = tryStmt.startPosition.row + 1; + // Find catch and finally handlers + const { catchHandler, finallyHandler } = findTryHandlers(tryStmt, cfgRules); - const joinBlock = S.makeBlock('body'); + if (catchHandler) { + processCatchHandler(catchHandler, tryBlock, tryEnd, finallyHandler, joinBlock, S, cfgRules); + } else if (finallyHandler) { + processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + } - // Try body - const tryBody = tryStmt.childForFieldName('body'); - let tryBodyStart; - let tryStmts; - if (tryBody) { - tryBodyStart = tryBody.startPosition.row + 1; - tryStmts = getBodyStatements(tryBody); - } else { - tryBodyStart = tryStmt.startPosition.row + 1; - tryStmts = []; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; - tryStmts.push(child); - } - } + return joinBlock; +} - const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); - S.addEdge(currentBlock, tryBlock, 'fallthrough'); - const tryEnd = processStatements(tryStmts, tryBlock); +function findTryHandlers(tryStmt, cfgRules) { + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; + } + return { catchHandler, finallyHandler }; +} - // Find catch and finally handlers - let catchHandler = null; - let finallyHandler = null; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; +function processCatchHandler( + catchHandler, + tryBlock, + tryEnd, + finallyHandler, + joinBlock, + S, + cfgRules, +) { + const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + S.addEdge(tryBlock, catchBlock, 'exception'); + + const catchBodyNode = catchHandler.childForFieldName('body'); + let catchStmts; + if (catchBodyNode) { + catchStmts = getBodyStatements(catchBodyNode, cfgRules); + } else { + catchStmts = []; + for (let i = 0; i < catchHandler.namedChildCount; i++) { + catchStmts.push(catchHandler.namedChild(i)); } + } + const catchEnd = processStatements(catchStmts, catchBlock, S, cfgRules); + + if (finallyHandler) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); + } +} - if (catchHandler) { - const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); - S.addEdge(tryBlock, catchBlock, 'exception'); +function processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); +} - const catchBodyNode = catchHandler.childForFieldName('body'); - let catchStmts; - if (catchBodyNode) { - catchStmts = getBodyStatements(catchBodyNode); - } else { - catchStmts = []; - for (let i = 0; i < catchHandler.namedChildCount; i++) { - catchStmts.push(catchHandler.namedChild(i)); - } - } - const catchEnd = processStatements(catchStmts, catchBlock); - - if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); - } - } else if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - } +// ── Enter-function body processing ────────────────────────────────────── + +function processFunctionBody(funcNode, S, cfgRules) { + const body = funcNode.childForFieldName('body'); + if (!body) { + // No body — entry → exit + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } - return joinBlock; + if (!isBlockNode(body.type, cfgRules)) { + // Expression body (e.g., arrow function `(x) => x + 1`) + const bodyBlock = S.blocks[2]; + bodyBlock.startLine = body.startPosition.row + 1; + bodyBlock.endLine = body.endPosition.row + 1; + S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; } - // ── Visitor interface ─────────────────────────────────────────────── + // Block body — process statements + const stmts = getBodyStatements(body, cfgRules); + if (stmts.length === 0) { + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } + + const firstBody = S.blocks[2]; + const lastBlock = processStatements(stmts, firstBody, S, cfgRules); + if (lastBlock) { + S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); + } + S.currentBlock = null; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a CFG visitor for use with walkWithVisitors. + * + * @param {object} cfgRules - CFG_RULES for the language + * @returns {Visitor} + */ +export function createCfgVisitor(cfgRules) { + const funcStateStack = []; + let S = null; + const results = []; return { name: 'cfg', functionNodeTypes: cfgRules.functionNodes, enterFunction(funcNode, _funcName, _context) { - if (S) { - // Nested function — push current state - funcStateStack.push(S); - } + if (S) funcStateStack.push(S); S = makeFuncState(); S.funcNode = funcNode; - - // Check for expression body (arrow functions): no block body - const body = funcNode.childForFieldName('body'); - if (!body) { - // No body at all — entry → exit - // Remove the firstBody block and its edge - S.blocks.length = 2; // keep entry + exit - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - if (!isBlockNode(body.type)) { - // Expression body (e.g., arrow function `(x) => x + 1`) - // entry → body → exit (body is the expression) - const bodyBlock = S.blocks[2]; // the firstBody we already created - bodyBlock.startLine = body.startPosition.row + 1; - bodyBlock.endLine = body.endPosition.row + 1; - S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; // no further processing needed - return; - } - - // Block body — process statements - const stmts = getBodyStatements(body); - if (stmts.length === 0) { - // Empty function - S.blocks.length = 2; - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - // Process all body statements using the statement-level processor - const firstBody = S.blocks[2]; // the firstBody block - const lastBlock = processStatements(stmts, firstBody); - if (lastBlock) { - S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); - } - S.currentBlock = null; // done processing + processFunctionBody(funcNode, S, cfgRules); }, exitFunction(funcNode, _funcName, _context) { if (S && S.funcNode === funcNode) { - // Derive cyclomatic complexity from CFG: E - N + 2 const cyclomatic = S.edges.length - S.blocks.length + 2; results.push({ funcNode: S.funcNode, @@ -768,21 +758,17 @@ export function createCfgVisitor(cfgRules) { cyclomatic: Math.max(cyclomatic, 1), }); } - - // Pop to parent function state (if nested) S = funcStateStack.length > 0 ? funcStateStack.pop() : null; }, enterNode(_node, _context) { - // No-op — all CFG construction is done in enterFunction via - // processStatements. We intentionally do NOT return skipChildren here - // so that the walker still recurses into children, allowing nested - // function definitions to trigger enterFunction/exitFunction and get - // their own CFG computed via the funcStateStack. + // No-op — all CFG construction is done in enterFunction via processStatements. + // We intentionally do NOT return skipChildren so the walker recurses into + // children, allowing nested functions to trigger enterFunction/exitFunction. }, exitNode(_node, _context) { - // No-op — all work done in enterFunction/exitFunction + // No-op }, finish() { diff --git a/src/ast-analysis/visitors/complexity-visitor.js b/src/ast-analysis/visitors/complexity-visitor.js index df386afc..ca19c0c5 100644 --- a/src/ast-analysis/visitors/complexity-visitor.js +++ b/src/ast-analysis/visitors/complexity-visitor.js @@ -12,6 +12,122 @@ import { computeMaintainabilityIndex, } from '../metrics.js'; +// ── Halstead classification ───────────────────────────────────────────── + +function classifyHalstead(node, hRules, acc) { + const type = node.type; + if (hRules.skipTypes.has(type)) acc.halsteadSkipDepth++; + if (acc.halsteadSkipDepth > 0) return; + + if (hRules.compoundOperators.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } + if (node.childCount === 0) { + if (hRules.operatorLeafTypes.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } else if (hRules.operandLeafTypes.has(type)) { + const text = node.text; + acc.operands.set(text, (acc.operands.get(text) || 0) + 1); + } + } +} + +// ── Branch complexity classification ──────────────────────────────────── + +function classifyBranchNode(node, type, nestingLevel, cRules, acc) { + // Pattern A: else clause wraps if (JS/C#/Rust) + if (cRules.elseNodeType && type === cRules.elseNodeType) { + const firstChild = node.namedChild(0); + if (firstChild && firstChild.type === cRules.ifNodeType) { + // else-if: the if_statement child handles its own increment + return; + } + acc.cognitive++; + return; + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if (cRules.elifNodeType && type === cRules.elifNodeType) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Detect else-if via Pattern A or C + let isElseIf = false; + if (type === cRules.ifNodeType) { + if (cRules.elseViaAlternative) { + isElseIf = + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id; + } else if (cRules.elseNodeType) { + isElseIf = node.parent?.type === cRules.elseNodeType; + } + } + + if (isElseIf) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Regular branch node + acc.cognitive += 1 + nestingLevel; + acc.cyclomatic++; + + if (cRules.switchLikeNodes?.has(type)) { + acc.cyclomatic--; + } +} + +// ── Plain-else detection (Pattern C: Go/Java) ────────────────────────── + +function classifyPlainElse(node, type, cRules, acc) { + if ( + cRules.elseViaAlternative && + type !== cRules.ifNodeType && + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id + ) { + acc.cognitive++; + } +} + +// ── Result collection ─────────────────────────────────────────────────── + +function collectResult(funcNode, acc, hRules, langId) { + const halstead = + hRules && acc.operators && acc.operands + ? computeHalsteadDerived(acc.operators, acc.operands) + : null; + const loc = computeLOCMetrics(funcNode, langId); + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, acc.cyclomatic, loc.sloc, commentRatio); + + return { + cognitive: acc.cognitive, + cyclomatic: acc.cyclomatic, + maxNesting: acc.maxNesting, + halstead, + loc, + mi, + }; +} + +function resetAccumulators(hRules) { + return { + cognitive: 0, + cyclomatic: 1, + maxNesting: 0, + operators: hRules ? new Map() : null, + operands: hRules ? new Map() : null, + halsteadSkipDepth: 0, + }; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + /** * Create a complexity visitor for use with walkWithVisitors. * @@ -28,43 +144,12 @@ import { export function createComplexityVisitor(cRules, hRules, options = {}) { const { fileLevelWalk = false, langId = null } = options; - // Per-function accumulators - let cognitive = 0; - let cyclomatic = 1; - let maxNesting = 0; - let operators = hRules ? new Map() : null; - let operands = hRules ? new Map() : null; - let halsteadSkipDepth = 0; - - // In file-level mode, we only count when inside a function + let acc = resetAccumulators(hRules); let activeFuncNode = null; let activeFuncName = null; - // Nesting depth relative to the active function (for nested functions) let funcDepth = 0; - - // Collected results (one per function) const results = []; - function reset() { - cognitive = 0; - cyclomatic = 1; - maxNesting = 0; - operators = hRules ? new Map() : null; - operands = hRules ? new Map() : null; - halsteadSkipDepth = 0; - } - - function collectResult(funcNode) { - const halstead = - hRules && operators && operands ? computeHalsteadDerived(operators, operands) : null; - const loc = computeLOCMetrics(funcNode, langId); - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, cyclomatic, loc.sloc, commentRatio); - - return { cognitive, cyclomatic, maxNesting, halstead, loc, mi }; - } - return { name: 'complexity', functionNodeTypes: cRules.functionNodes, @@ -72,17 +157,14 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { enterFunction(funcNode, funcName, _context) { if (fileLevelWalk) { if (!activeFuncNode) { - // Top-level function: start fresh - reset(); + acc = resetAccumulators(hRules); activeFuncNode = funcNode; activeFuncName = funcName; funcDepth = 0; } else { - // Nested function: increase nesting for complexity funcDepth++; } } else { - // Function-level mode: track nested functions for correct nesting depth funcDepth++; } }, @@ -90,11 +172,10 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { exitFunction(funcNode, _funcName, _context) { if (fileLevelWalk) { if (funcNode === activeFuncNode) { - // Leaving the top-level function: emit result results.push({ funcNode, funcName: activeFuncName, - metrics: collectResult(funcNode), + metrics: collectResult(funcNode, acc, hRules, langId), }); activeFuncNode = null; activeFuncName = null; @@ -107,137 +188,52 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { }, enterNode(node, context) { - // In file-level mode, skip nodes outside any function if (fileLevelWalk && !activeFuncNode) return; const type = node.type; const nestingLevel = fileLevelWalk ? context.nestingLevel + funcDepth : context.nestingLevel; - // ── Halstead classification ── - if (hRules) { - if (hRules.skipTypes.has(type)) halsteadSkipDepth++; - if (halsteadSkipDepth === 0) { - if (hRules.compoundOperators.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } - if (node.childCount === 0) { - if (hRules.operatorLeafTypes.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } else if (hRules.operandLeafTypes.has(type)) { - const text = node.text; - operands.set(text, (operands.get(text) || 0) + 1); - } - } - } - } + if (hRules) classifyHalstead(node, hRules, acc); - // ── Complexity: track nesting depth ── - if (nestingLevel > maxNesting) maxNesting = nestingLevel; + if (nestingLevel > acc.maxNesting) acc.maxNesting = nestingLevel; - // Handle logical operators in binary expressions + // Logical operators in binary expressions if (type === cRules.logicalNodeType) { const op = node.child(1)?.type; if (op && cRules.logicalOperators.has(op)) { - cyclomatic++; + acc.cyclomatic++; const parent = node.parent; let sameSequence = false; if (parent && parent.type === cRules.logicalNodeType) { const parentOp = parent.child(1)?.type; if (parentOp === op) sameSequence = true; } - if (!sameSequence) cognitive++; - // Don't skip children — walker handles recursion + if (!sameSequence) acc.cognitive++; } } - // Handle optional chaining (cyclomatic only) - if (type === cRules.optionalChainType) { - cyclomatic++; - } + // Optional chaining (cyclomatic only) + if (type === cRules.optionalChainType) acc.cyclomatic++; - // Handle branch/control flow nodes (skip keyword leaf tokens) + // Branch/control flow nodes (skip keyword leaf tokens) if (cRules.branchNodes.has(type) && node.childCount > 0) { - // Pattern A: else clause wraps if (JS/C#/Rust) - if (cRules.elseNodeType && type === cRules.elseNodeType) { - const firstChild = node.namedChild(0); - if (firstChild && firstChild.type === cRules.ifNodeType) { - // else-if: the if_statement child handles its own increment - return; - } - cognitive++; - return; - } - - // Pattern B: explicit elif node (Python/Ruby/PHP) - if (cRules.elifNodeType && type === cRules.elifNodeType) { - cognitive++; - cyclomatic++; - return; - } - - // Detect else-if via Pattern A or C - let isElseIf = false; - if (type === cRules.ifNodeType) { - if (cRules.elseViaAlternative) { - isElseIf = - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id; - } else if (cRules.elseNodeType) { - isElseIf = node.parent?.type === cRules.elseNodeType; - } - } - - if (isElseIf) { - cognitive++; - cyclomatic++; - return; - } - - // Regular branch node - cognitive += 1 + nestingLevel; - cyclomatic++; - - if (cRules.switchLikeNodes?.has(type)) { - cyclomatic--; - } - - // Nesting nodes are handled by the walker's nestingNodeTypes option - // But we still need them to count in complexity — they already do above - } - - // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) - if ( - cRules.elseViaAlternative && - type !== cRules.ifNodeType && - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id - ) { - cognitive++; + classifyBranchNode(node, type, nestingLevel, cRules, acc); } - // Handle case nodes (cyclomatic only, skip keyword leaves) - if (cRules.caseNodes.has(type) && node.childCount > 0) { - cyclomatic++; - } + // Pattern C plain else (Go/Java) + classifyPlainElse(node, type, cRules, acc); - // Handle nested function definitions (increase nesting) - // In file-level mode funcDepth handles this; in function-level mode the - // nestingNodeTypes option should include function nodes + // Case nodes (cyclomatic only, skip keyword leaves) + if (cRules.caseNodes.has(type) && node.childCount > 0) acc.cyclomatic++; }, exitNode(node) { - // Decrement skip depth when leaving a skip-type subtree - if (hRules?.skipTypes.has(node.type)) { - halsteadSkipDepth--; - } + if (hRules?.skipTypes.has(node.type)) acc.halsteadSkipDepth--; }, finish() { - if (fileLevelWalk) { - return results; - } - // Function-level mode: return single result (no funcNode reference needed) - return collectResult({ text: '' }); + if (fileLevelWalk) return results; + return collectResult({ text: '' }, acc, hRules, langId); }, }; } diff --git a/src/ast-analysis/visitors/dataflow-visitor.js b/src/ast-analysis/visitors/dataflow-visitor.js index c6fe9fa9..644490be 100644 --- a/src/ast-analysis/visitors/dataflow-visitor.js +++ b/src/ast-analysis/visitors/dataflow-visitor.js @@ -21,254 +21,280 @@ import { truncate, } from '../visitor-utils.js'; -/** - * Create a dataflow visitor for use with walkWithVisitors. - * - * @param {object} rules - DATAFLOW_RULES for the language - * @returns {Visitor} - */ -export function createDataflowVisitor(rules) { - const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; - - const parameters = []; - const returns = []; - const assignments = []; - const argFlows = []; - const mutations = []; +// ── Scope helpers ─────────────────────────────────────────────────────── - const scopeStack = []; +function currentScope(scopeStack) { + return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +} - function currentScope() { - return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +function findBinding(name, scopeStack) { + for (let i = scopeStack.length - 1; i >= 0; i--) { + const scope = scopeStack[i]; + if (scope.params.has(name)) + return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; + if (scope.locals.has(name)) + return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; } + return null; +} - function findBinding(name) { - for (let i = scopeStack.length - 1; i >= 0; i--) { - const scope = scopeStack[i]; - if (scope.params.has(name)) - return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; - if (scope.locals.has(name)) - return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; - } - return null; +function bindingConfidence(binding) { + if (!binding) return 0.5; + if (binding.type === 'param') return 1.0; + if (binding.type === 'local') { + if (binding.source?.type === 'call_return') return 0.9; + if (binding.source?.type === 'destructured') return 0.8; + return 0.9; } + return 0.5; +} - function bindingConfidence(binding) { - if (!binding) return 0.5; - if (binding.type === 'param') return 1.0; - if (binding.type === 'local') { - if (binding.source?.type === 'call_return') return 0.9; - if (binding.source?.type === 'destructured') return 0.8; - return 0.9; - } - return 0.5; - } +// ── Node helpers ──────────────────────────────────────────────────────── - function unwrapAwait(node) { - if (rules.awaitNode && node.type === rules.awaitNode) { - return node.namedChildren[0] || node; - } - return node; +function unwrapAwait(node, rules) { + if (rules.awaitNode && node.type === rules.awaitNode) { + return node.namedChildren[0] || node; } + return node; +} - function isCall(node) { - return node && isCallNode(node.type); - } +function isCall(node, isCallNode) { + return node && isCallNode(node.type); +} - function handleVarDeclarator(node) { - let nameNode = node.childForFieldName(rules.varNameField); - let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; +// ── Node handlers ─────────────────────────────────────────────────────── - if (!valueNode && rules.equalsClauseType) { - for (const child of node.namedChildren) { - if (child.type === rules.equalsClauseType) { - valueNode = child.childForFieldName('value') || child.namedChildren[0]; - break; - } - } - } +function handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode) { + let nameNode = node.childForFieldName(rules.varNameField); + let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; - if (!valueNode) { - for (const child of node.namedChildren) { - if (child !== nameNode && isCall(unwrapAwait(child))) { - valueNode = child; - break; - } + if (!valueNode && rules.equalsClauseType) { + for (const child of node.namedChildren) { + if (child.type === rules.equalsClauseType) { + valueNode = child.childForFieldName('value') || child.namedChildren[0]; + break; } } + } - if (rules.expressionListType) { - if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; - if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + if (!valueNode) { + for (const child of node.namedChildren) { + if (child !== nameNode && isCall(unwrapAwait(child, rules), isCallNode)) { + valueNode = child; + break; + } } + } - const scope = currentScope(); - if (!nameNode || !valueNode || !scope) return; - - const unwrapped = unwrapAwait(valueNode); - const callExpr = isCall(unwrapped) ? unwrapped : null; + if (rules.expressionListType) { + if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; + if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + } - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee && scope.funcName) { - if ( - (rules.objectDestructType && nameNode.type === rules.objectDestructType) || - (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) - ) { - const names = extractParamNames(nameNode, rules); - for (const n of names) { - assignments.push({ - varName: n, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(n, { type: 'destructured', callee }); - } - } else { - const varName = - nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier - ? nameNode.text - : nameNode.text; + const scope = currentScope(scopeStack); + if (!nameNode || !valueNode || !scope) return; + + const unwrapped = unwrapAwait(valueNode, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee && scope.funcName) { + if ( + (rules.objectDestructType && nameNode.type === rules.objectDestructType) || + (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) + ) { + const names = extractParamNames(nameNode, rules); + for (const n of names) { assignments.push({ - varName, + varName: n, callerFunc: scope.funcName, sourceCallName: callee, expression: truncate(node.text), line: node.startPosition.row + 1, }); - scope.locals.set(varName, { type: 'call_return', callee }); + scope.locals.set(n, { type: 'destructured', callee }); } + } else { + const varName = + nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier + ? nameNode.text + : nameNode.text; + assignments.push({ + varName, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(varName, { type: 'call_return', callee }); } } } +} - function handleAssignment(node) { - const left = node.childForFieldName(rules.assignLeftField); - const right = node.childForFieldName(rules.assignRightField); - const scope = currentScope(); - if (!scope?.funcName) return; - - if (left && rules.memberNode && left.type === rules.memberNode) { - const receiver = memberReceiver(left, rules); - if (receiver) { - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(node.text), - line: node.startPosition.row + 1, - }); - } +function handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode) { + const left = node.childForFieldName(rules.assignLeftField); + const right = node.childForFieldName(rules.assignRightField); + const scope = currentScope(scopeStack); + if (!scope?.funcName) return; + + if (left && rules.memberNode && left.type === rules.memberNode) { + const receiver = memberReceiver(left, rules); + if (receiver) { + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(node.text), + line: node.startPosition.row + 1, + }); } } + } - if (left && isIdent(left.type, rules) && right) { - const unwrapped = unwrapAwait(right); - const callExpr = isCall(unwrapped) ? unwrapped : null; - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee) { - assignments.push({ - varName: left.text, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(left.text, { type: 'call_return', callee }); - } + if (left && isIdent(left.type, rules) && right) { + const unwrapped = unwrapAwait(right, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee) { + assignments.push({ + varName: left.text, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(left.text, { type: 'call_return', callee }); } } } +} - function handleCallExpr(node) { - const callee = resolveCalleeName(node, rules); - const argsNode = node.childForFieldName(rules.callArgsField); - const scope = currentScope(); - if (!callee || !argsNode || !scope?.funcName) return; +function handleCallExpr(node, rules, scopeStack, argFlows) { + const callee = resolveCalleeName(node, rules); + const argsNode = node.childForFieldName(rules.callArgsField); + const scope = currentScope(scopeStack); + if (!callee || !argsNode || !scope?.funcName) return; - let argIndex = 0; - for (let arg of argsNode.namedChildren) { - if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { - arg = arg.namedChildren[0] || arg; - } - const unwrapped = - rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; - if (!unwrapped) { - argIndex++; - continue; - } + let argIndex = 0; + for (let arg of argsNode.namedChildren) { + if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { + arg = arg.namedChildren[0] || arg; + } + const unwrapped = + rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; + if (!unwrapped) { + argIndex++; + continue; + } - const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; - const argMember = - rules.memberNode && unwrapped.type === rules.memberNode - ? memberReceiver(unwrapped, rules) - : null; - const trackedName = argName || argMember; - - if (trackedName) { - const binding = findBinding(trackedName); - if (binding) { - argFlows.push({ - callerFunc: scope.funcName, - calleeName: callee, - argIndex, - argName: trackedName, - binding, - confidence: bindingConfidence(binding), - expression: truncate(arg.text), - line: node.startPosition.row + 1, - }); - } + const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; + const argMember = + rules.memberNode && unwrapped.type === rules.memberNode + ? memberReceiver(unwrapped, rules) + : null; + const trackedName = argName || argMember; + + if (trackedName) { + const binding = findBinding(trackedName, scopeStack); + if (binding) { + argFlows.push({ + callerFunc: scope.funcName, + calleeName: callee, + argIndex, + argName: trackedName, + binding, + confidence: bindingConfidence(binding), + expression: truncate(arg.text), + line: node.startPosition.row + 1, + }); } - argIndex++; } + argIndex++; } +} - function handleExprStmtMutation(node) { - if (rules.mutatingMethods.size === 0) return; - const expr = node.namedChildren[0]; - if (!expr || !isCall(expr)) return; +function handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode) { + if (rules.mutatingMethods.size === 0) return; + const expr = node.namedChildren[0]; + if (!expr || !isCall(expr, isCallNode)) return; - let methodName = null; - let receiver = null; + let methodName = null; + let receiver = null; - const fn = expr.childForFieldName(rules.callFunctionField); - if (fn && fn.type === rules.memberNode) { - const prop = fn.childForFieldName(rules.memberPropertyField); - methodName = prop ? prop.text : null; - receiver = memberReceiver(fn, rules); - } + const fn = expr.childForFieldName(rules.callFunctionField); + if (fn && fn.type === rules.memberNode) { + const prop = fn.childForFieldName(rules.memberPropertyField); + methodName = prop ? prop.text : null; + receiver = memberReceiver(fn, rules); + } - if (!receiver && rules.callObjectField) { - const obj = expr.childForFieldName(rules.callObjectField); - const name = expr.childForFieldName(rules.callFunctionField); - if (obj && name) { - methodName = name.text; - receiver = isIdent(obj.type, rules) ? obj.text : null; - } + if (!receiver && rules.callObjectField) { + const obj = expr.childForFieldName(rules.callObjectField); + const name = expr.childForFieldName(rules.callFunctionField); + if (obj && name) { + methodName = name.text; + receiver = isIdent(obj.type, rules) ? obj.text : null; } + } - if (!methodName || !rules.mutatingMethods.has(methodName)) return; + if (!methodName || !rules.mutatingMethods.has(methodName)) return; - const scope = currentScope(); - if (!receiver || !scope?.funcName) return; + const scope = currentScope(scopeStack); + if (!receiver || !scope?.funcName) return; - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(expr.text), - line: node.startPosition.row + 1, - }); - } + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(expr.text), + line: node.startPosition.row + 1, + }); } +} + +// ── Return statement handler ──────────────────────────────────────────── + +function handleReturn(node, rules, scopeStack, returns) { + if (node.parent?.type === rules.returnNode) return; // keyword token, not statement + + const scope = currentScope(scopeStack); + if (scope?.funcName) { + const expr = node.namedChildren[0]; + const referencedNames = []; + if (expr) collectIdentifiers(expr, referencedNames, rules); + returns.push({ + funcName: scope.funcName, + expression: truncate(expr ? expr.text : ''), + referencedNames, + line: node.startPosition.row + 1, + }); + } +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a dataflow visitor for use with walkWithVisitors. + * + * @param {object} rules - DATAFLOW_RULES for the language + * @returns {Visitor} + */ +export function createDataflowVisitor(rules) { + const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; + + const parameters = []; + const returns = []; + const assignments = []; + const argFlows = []; + const mutations = []; + const scopeStack = []; return { name: 'dataflow', @@ -300,54 +326,34 @@ export function createDataflowVisitor(rules) { enterNode(node, _context) { const t = node.type; - // Skip function nodes — handled by enterFunction/exitFunction if (rules.functionNodes.has(t)) return; - // Return statements (skip keyword tokens inside return statements, e.g. Ruby's - // `return` node nests a `return` keyword child with the same type string) if (rules.returnNode && t === rules.returnNode) { - if (node.parent?.type === rules.returnNode) return; // keyword token, not statement - - const scope = currentScope(); - if (scope?.funcName) { - const expr = node.namedChildren[0]; - const referencedNames = []; - if (expr) collectIdentifiers(expr, referencedNames, rules); - returns.push({ - funcName: scope.funcName, - expression: truncate(expr ? expr.text : ''), - referencedNames, - line: node.startPosition.row + 1, - }); - } + handleReturn(node, rules, scopeStack, returns); return; } - // Variable declarations if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } if (rules.varDeclaratorNodes?.has(t)) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } - // Call expressions if (isCallNode(t)) { - handleCallExpr(node); + handleCallExpr(node, rules, scopeStack, argFlows); return; } - // Assignment expressions if (rules.assignmentNode && t === rules.assignmentNode) { - handleAssignment(node); + handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode); return; } - // Mutation detection via expression_statement if (rules.expressionStmtNode && t === rules.expressionStmtNode) { - handleExprStmtMutation(node); + handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode); } }, From 46a95aeaa4b5d8c1a62a9c8dbdc465c8890b6dad Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:24:42 -0600 Subject: [PATCH 12/37] refactor: decompose domain builder stages into focused helpers Extract edge-building by type (import, call-native, call-JS, class hierarchy) from buildEdges. Extract per-phase insertion logic from insertNodes. Extract scoped/incremental/full-build paths and reverse-dep cascade from detectChanges. Extract setup, engine init, alias loading from pipeline.js. Extract node/edge-building helpers from incremental.js rebuildFile. Impact: 44 functions changed, 19 affected --- src/domain/graph/builder/incremental.js | 206 ++++--- src/domain/graph/builder/pipeline.js | 186 +++--- .../graph/builder/stages/build-edges.js | 557 ++++++++++-------- .../graph/builder/stages/detect-changes.js | 372 ++++++------ .../graph/builder/stages/insert-nodes.js | 286 ++++----- 5 files changed, 889 insertions(+), 718 deletions(-) diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.js index f04a136e..63694385 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.js @@ -12,10 +12,121 @@ import { parseFileIncremental } from '../../parser.js'; import { computeConfidence, resolveImportPath } from '../resolve.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; +// ── Node insertion ────────────────────────────────────────────────────── + +function insertFileNodes(stmts, relPath, symbols) { + stmts.insertNode.run(relPath, 'file', relPath, 0, null); + for (const def of symbols.definitions) { + stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + } + for (const exp of symbols.exports) { + stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + } +} + +// ── Import edge building ──────────────────────────────────────────────── + +function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases) { + let edgesAdded = 0; + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); + if (targetRow) { + const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; + stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); + edgesAdded++; + } + } + return edgesAdded; +} + +function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +// ── Call edge building ────────────────────────────────────────────────── + +function findCaller(call, definitions, relPath, stmts) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; + } + } + } else if (!caller) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller; +} + +function resolveCallTargets(stmts, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + if (importedFrom) { + targets = stmts.findNodeInFile.all(call.name, importedFrom); + } + if (!targets || targets.length === 0) { + targets = stmts.findNodeInFile.all(call.name, relPath); + if (targets.length === 0) { + targets = stmts.findNodeByName.all(call.name); + } + } + return { targets, importedFrom }; +} + +function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { + let edgesAdded = 0; + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, stmts) || fileNodeRow; + const { targets, importedFrom } = resolveCallTargets(stmts, call, relPath, importedNames); + + for (const t of targets) { + if (t.id !== caller.id) { + const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); + stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); + edgesAdded++; + } + } + } + return edgesAdded; +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * Parse a single file and update the database incrementally. * - * @param {import('better-sqlite3').Database} db + * @param {import('better-sqlite3').Database} _db * @param {string} rootDir - Absolute root directory * @param {string} filePath - Absolute file path * @param {object} stmts - Prepared DB statements @@ -61,105 +172,20 @@ export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cac const symbols = await parseFileIncremental(cache, filePath, code, engineOpts); if (!symbols) return null; - // Insert nodes - stmts.insertNode.run(relPath, 'file', relPath, 0, null); - for (const def of symbols.definitions) { - stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); - } - for (const exp of symbols.exports) { - stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); - } + insertFileNodes(stmts, relPath, symbols); const newNodes = stmts.countNodes.get(relPath)?.c || 0; const newSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; - let edgesAdded = 0; const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0); if (!fileNodeRow) return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 }; - const fileNodeId = fileNodeRow.id; - // Load aliases for import resolution const aliases = { baseUrl: null, paths: {} }; - // Import edges - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; - stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); - edgesAdded++; - } - } - - // Build import name → resolved file mapping - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - for (const name of imp.names) { - importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); - } - } - - // Call edges - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const importedFrom = importedNames.get(call.name); - let targets; - if (importedFrom) { - targets = stmts.findNodeInFile.all(call.name, importedFrom); - } - if (!targets || targets.length === 0) { - targets = stmts.findNodeInFile.all(call.name, relPath); - if (targets.length === 0) { - targets = stmts.findNodeByName.all(call.name); - } - } - - for (const t of targets) { - if (t.id !== caller.id) { - const confidence = importedFrom - ? computeConfidence(relPath, t.file, importedFrom) - : computeConfidence(relPath, t.file, null); - stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); - edgesAdded++; - } - } - } + let edgesAdded = buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases); + const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases); + edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames); const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null; const event = oldNodes === 0 ? 'added' : 'modified'; diff --git a/src/domain/graph/builder/pipeline.js b/src/domain/graph/builder/pipeline.js index ea9848c5..963a0086 100644 --- a/src/domain/graph/builder/pipeline.js +++ b/src/domain/graph/builder/pipeline.js @@ -23,94 +23,73 @@ import { parseFiles } from './stages/parse-files.js'; import { resolveImports } from './stages/resolve-imports.js'; import { runAnalyses } from './stages/run-analyses.js'; -/** - * Build the dependency graph for a codebase. - * - * Signature and return value are identical to the original monolithic buildGraph(). - * - * @param {string} rootDir - Root directory to scan - * @param {object} [opts] - Build options - * @returns {Promise<{ phases: object } | undefined>} - */ -export async function buildGraph(rootDir, opts = {}) { - const ctx = new PipelineContext(); - ctx.buildStart = performance.now(); - ctx.opts = opts; +// ── Setup helpers ─────────────────────────────────────────────────────── - // ── Setup (creates DB, loads config, selects engine) ────────────── - ctx.rootDir = path.resolve(rootDir); - ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); - ctx.db = openDb(ctx.dbPath); - try { - initSchema(ctx.db); - - ctx.config = loadConfig(ctx.rootDir); - ctx.incremental = - opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - - ctx.engineOpts = { - engine: opts.engine || 'auto', - dataflow: opts.dataflow !== false, - ast: opts.ast !== false, - }; - const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); - ctx.engineName = engineName; - ctx.engineVersion = engineVersion; - info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); - - // Engine/schema mismatch detection - ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; - ctx.forceFullRebuild = false; - if (ctx.incremental) { - const prevEngine = getBuildMeta(ctx.db, 'engine'); - if (prevEngine && prevEngine !== engineName) { - info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`); - ctx.forceFullRebuild = true; - } - const prevSchema = getBuildMeta(ctx.db, 'schema_version'); - if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { - info( - `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, - ); - ctx.forceFullRebuild = true; - } - } +function initializeEngine(ctx) { + ctx.engineOpts = { + engine: ctx.opts.engine || 'auto', + dataflow: ctx.opts.dataflow !== false, + ast: ctx.opts.ast !== false, + }; + const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); + ctx.engineName = engineName; + ctx.engineVersion = engineVersion; + info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); +} - // Path aliases - ctx.aliases = loadPathAliases(ctx.rootDir); - if (ctx.config.aliases) { - for (const [key, value] of Object.entries(ctx.config.aliases)) { - const pattern = key.endsWith('/') ? `${key}*` : key; - const target = path.resolve(ctx.rootDir, value); - ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; - } - } - if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { - info( - `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, - ); +function checkEngineSchemaMismatch(ctx) { + ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; + ctx.forceFullRebuild = false; + if (!ctx.incremental) return; + + const prevEngine = getBuildMeta(ctx.db, 'engine'); + if (prevEngine && prevEngine !== ctx.engineName) { + info(`Engine changed (${prevEngine} → ${ctx.engineName}), promoting to full rebuild.`); + ctx.forceFullRebuild = true; + } + const prevSchema = getBuildMeta(ctx.db, 'schema_version'); + if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { + info( + `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, + ); + ctx.forceFullRebuild = true; + } +} + +function loadAliases(ctx) { + ctx.aliases = loadPathAliases(ctx.rootDir); + if (ctx.config.aliases) { + for (const [key, value] of Object.entries(ctx.config.aliases)) { + const pattern = key.endsWith('/') ? `${key}*` : key; + const target = path.resolve(ctx.rootDir, value); + ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; } + } + if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { + info( + `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, + ); + } +} - ctx.timing.setupMs = performance.now() - ctx.buildStart; +function setupPipeline(ctx) { + ctx.rootDir = path.resolve(ctx.rootDir); + ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); + ctx.db = openDb(ctx.dbPath); + initSchema(ctx.db); - // ── Pipeline stages ───────────────────────────────────────────── - await collectFiles(ctx); - await detectChanges(ctx); + ctx.config = loadConfig(ctx.rootDir); + ctx.incremental = + ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - if (ctx.earlyExit) return; + initializeEngine(ctx); + checkEngineSchemaMismatch(ctx); + loadAliases(ctx); - await parseFiles(ctx); - await insertNodes(ctx); - await resolveImports(ctx); - await buildEdges(ctx); - await buildStructure(ctx); - await runAnalyses(ctx); - await finalize(ctx); - } catch (err) { - if (!ctx.earlyExit) closeDb(ctx.db); - throw err; - } + ctx.timing.setupMs = performance.now() - ctx.buildStart; +} +function formatTimingResult(ctx) { return { phases: { setupMs: +ctx.timing.setupMs.toFixed(1), @@ -128,3 +107,50 @@ export async function buildGraph(rootDir, opts = {}) { }, }; } + +// ── Pipeline stages execution ─────────────────────────────────────────── + +async function runPipelineStages(ctx) { + await collectFiles(ctx); + await detectChanges(ctx); + + if (ctx.earlyExit) return; + + await parseFiles(ctx); + await insertNodes(ctx); + await resolveImports(ctx); + await buildEdges(ctx); + await buildStructure(ctx); + await runAnalyses(ctx); + await finalize(ctx); +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * Build the dependency graph for a codebase. + * + * Signature and return value are identical to the original monolithic buildGraph(). + * + * @param {string} rootDir - Root directory to scan + * @param {object} [opts] - Build options + * @returns {Promise<{ phases: object } | undefined>} + */ +export async function buildGraph(rootDir, opts = {}) { + const ctx = new PipelineContext(); + ctx.buildStart = performance.now(); + ctx.opts = opts; + ctx.rootDir = rootDir; + + try { + setupPipeline(ctx); + await runPipelineStages(ctx); + } catch (err) { + if (!ctx.earlyExit) closeDb(ctx.db); + throw err; + } + + if (ctx.earlyExit) return; + + return formatTimingResult(ctx); +} diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index a8879b62..f830ed1c 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -12,25 +12,18 @@ import { computeConfidence } from '../../resolve.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExport } from './resolve-imports.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildEdges(ctx) { - const { db, fileSymbols, barrelOnlyFiles, rootDir, engineName } = ctx; +// ── Node lookup setup ─────────────────────────────────────────────────── - const getNodeIdStmt = { +function makeGetNodeIdStmt(db) { + return { get: (name, kind, file, line) => { const id = getNodeId(db, name, kind, file, line); return id != null ? { id } : undefined; }, }; +} - // Pre-load all nodes into lookup maps - const allNodes = db - .prepare( - `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, - ) - .all(); +function setupNodeLookups(ctx, allNodes) { ctx.nodesByName = new Map(); for (const node of allNodes) { if (!ctx.nodesByName.has(node.name)) ctx.nodesByName.set(node.name, []); @@ -42,253 +35,339 @@ export async function buildEdges(ctx) { if (!ctx.nodesByNameAndFile.has(key)) ctx.nodesByNameAndFile.set(key, []); ctx.nodesByNameAndFile.get(key).push(node); } +} - const t0 = performance.now(); - const buildEdgesTx = db.transaction(() => { - const allEdgeRows = []; +// ── Import edges ──────────────────────────────────────────────────────── - // ── Import edges ──────────────────────────────────────────────── - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - const fileNodeId = fileNodeRow.id; - - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport - ? 'reexports' - : imp.typeOnly - ? 'imports-type' - : imp.dynamicImport - ? 'dynamic-imports' - : 'imports'; - allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); - - if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { - const resolvedSources = new Set(); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); - if ( - actualSource && - actualSource !== resolvedPath && - !resolvedSources.has(actualSource) - ) { - resolvedSources.add(actualSource); - const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); - if (actualRow) { - allEdgeRows.push([ - fileNodeId, - actualRow.id, - edgeKind === 'imports-type' - ? 'imports-type' - : edgeKind === 'dynamic-imports' - ? 'dynamic-imports' - : 'imports', - 0.9, - 0, - ]); - } - } - } - } - } +function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + const fileNodeId = fileNodeRow.id; + + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); + if (!targetRow) continue; + + const edgeKind = imp.reexport + ? 'reexports' + : imp.typeOnly + ? 'imports-type' + : imp.dynamicImport + ? 'dynamic-imports' + : 'imports'; + allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); + + if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { + buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows); } } + } +} - // ── Call/receiver/extends/implements edges ─────────────────────── - const native = engineName === 'native' ? loadNative() : null; - if (native?.buildCallEdges) { - const nativeFiles = []; - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = []; - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - let targetFile = resolvedPath; - if (isBarrelFile(ctx, resolvedPath)) { - const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); - if (actual) targetFile = actual; - } - importedNames.push({ name: cleanName, file: targetFile }); +function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, edgeRows) { + const resolvedSources = new Set(); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) { + resolvedSources.add(actualSource); + const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); + if (actualRow) { + const kind = + edgeKind === 'imports-type' + ? 'imports-type' + : edgeKind === 'dynamic-imports' + ? 'dynamic-imports' + : 'imports'; + edgeRows.push([fileNodeId, actualRow.id, kind, 0.9, 0]); + } + } + } +} + +// ── Call edges (native engine) ────────────────────────────────────────── + +function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + const nativeFiles = []; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + nativeFiles.push({ + file: relPath, + fileNodeId: fileNodeRow.id, + definitions: symbols.definitions.map((d) => ({ + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + })), + calls: symbols.calls, + importedNames, + classes: symbols.classes, + }); + } + + const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); + for (const e of nativeEdges) { + allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + } +} + +function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { + const importedNames = []; + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + let targetFile = resolvedPath; + if (isBarrelFile(ctx, resolvedPath)) { + const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actual) targetFile = actual; + } + importedNames.push({ name: cleanName, file: targetFile }); + } + } + return importedNames; +} + +// ── Call edges (JS fallback) ──────────────────────────────────────────── + +function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); + const seenCallEdges = new Set(); + + buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, + ); + buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows); + } +} + +function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; } } + } else if (!caller) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller || fileNodeRow; +} - nativeFiles.push({ - file: relPath, - fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => ({ - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - })), - calls: symbols.calls, - importedNames, - classes: symbols.classes, - }); +function resolveCallTargets(ctx, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + + if (importedFrom) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; + if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { + const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); + if (actualSource) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; } + } + } - const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); - for (const e of nativeEdges) { - allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + if (!targets || targets.length === 0) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; + if (targets.length === 0) { + targets = resolveByMethodOrGlobal(ctx, call, relPath); + } + } + + if (targets.length > 1) { + targets.sort((a, b) => { + const confA = computeConfidence(relPath, a.file, importedFrom); + const confB = computeConfidence(relPath, b.file, importedFrom); + return confB - confA; + }); + } + + return { targets, importedFrom }; +} + +function resolveByMethodOrGlobal(ctx, call, relPath) { + const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( + (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', + ); + if (methodCandidates.length > 0) return methodCandidates; + + if ( + !call.receiver || + call.receiver === 'this' || + call.receiver === 'self' || + call.receiver === 'super' + ) { + return (ctx.nodesByName.get(call.name) || []).filter( + (n) => computeConfidence(relPath, n.file, null) >= 0.5, + ); + } + return []; +} + +function buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, +) { + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); + const isDynamic = call.dynamic ? 1 : 0; + const { targets, importedFrom } = resolveCallTargets(ctx, call, relPath, importedNames); + + for (const t of targets) { + const edgeKey = `${caller.id}|${t.id}`; + if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { + seenCallEdges.add(edgeKey); + const confidence = computeConfidence(relPath, t.file, importedFrom); + allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); } - } else { - // JS fallback - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - importedNames.set(cleanName, resolvedPath); - } - } + } - const seenCallEdges = new Set(); - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const isDynamic = call.dynamic ? 1 : 0; - let targets; - const importedFrom = importedNames.get(call.name); - - if (importedFrom) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; - if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { - const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); - if (actualSource) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; - } - } - } - if (!targets || targets.length === 0) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; - if (targets.length === 0) { - const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( - (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', - ); - if (methodCandidates.length > 0) { - targets = methodCandidates; - } else if ( - !call.receiver || - call.receiver === 'this' || - call.receiver === 'self' || - call.receiver === 'super' - ) { - targets = (ctx.nodesByName.get(call.name) || []).filter( - (n) => computeConfidence(relPath, n.file, null) >= 0.5, - ); - } - } - } + // Receiver edge + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows); + } + } +} - if (targets.length > 1) { - targets.sort((a, b) => { - const confA = computeConfidence(relPath, a.file, importedFrom); - const confB = computeConfidence(relPath, b.file, importedFrom); - return confB - confA; - }); - } +function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); + } + } +} - for (const t of targets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { - seenCallEdges.add(edgeKey); - const confidence = computeConfidence(relPath, t.file, importedFrom); - allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); - } - } +// ── Class hierarchy edges ─────────────────────────────────────────────── - // Receiver edge - if ( - call.receiver && - !BUILTIN_RECEIVERS.has(call.receiver) && - call.receiver !== 'this' && - call.receiver !== 'self' && - call.receiver !== 'super' - ) { - const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; - const candidates = - samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; - const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); - if (receiverNodes.length > 0 && caller) { - const recvTarget = receiverNodes[0]; - const recvKey = `recv|${caller.id}|${recvTarget.id}`; - if (!seenCallEdges.has(recvKey)) { - seenCallEdges.add(recvKey); - allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); - } - } - } +function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { + for (const cls of symbols.classes) { + if (cls.extends) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.extends) || []).filter((n) => n.kind === 'class'); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); } + } + } - // Class extends edges - for (const cls of symbols.classes) { - if (cls.extends) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.extends) || []; - const targetRows = targetCandidates.filter((n) => n.kind === 'class'); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); - } - } - } - - if (cls.implements) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.implements) || []; - const targetRows = targetCandidates.filter( - (n) => n.kind === 'interface' || n.kind === 'class', - ); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); - } - } - } + if (cls.implements) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.implements) || []).filter( + (n) => n.kind === 'interface' || n.kind === 'class', + ); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); } } } + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function buildEdges(ctx) { + const { db, engineName } = ctx; + + const getNodeIdStmt = makeGetNodeIdStmt(db); + + const allNodes = db + .prepare( + `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, + ) + .all(); + setupNodeLookups(ctx, allNodes); + + const t0 = performance.now(); + const buildEdgesTx = db.transaction(() => { + const allEdgeRows = []; + + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + + const native = engineName === 'native' ? loadNative() : null; + if (native?.buildCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native); + } else { + buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows); + } batchInsertEdges(db, allEdgeRows); }); diff --git a/src/domain/graph/builder/stages/detect-changes.js b/src/domain/graph/builder/stages/detect-changes.js index 50ffbd1d..23d15245 100644 --- a/src/domain/graph/builder/stages/detect-changes.js +++ b/src/domain/graph/builder/stages/detect-changes.js @@ -13,12 +13,13 @@ import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; import { fileHash, fileStat, purgeFilesFromGraph, readFileSafe } from '../helpers.js'; +// ── Three-tier change detection ───────────────────────────────────────── + /** * Determine which files have changed since last build. - * Three-tier cascade: - * Tier 0 — Journal: O(changed) when watcher was running - * Tier 1 — mtime+size: O(n) stats, O(changed) reads - * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) + * Tier 0 — Journal: O(changed) when watcher was running + * Tier 1 — mtime+size: O(n) stats, O(changed) reads + * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) */ function getChangedFiles(db, allFiles, rootDir) { let hasTable = false; @@ -44,6 +45,17 @@ function getChangedFiles(db, allFiles, rootDir) { .map((r) => [r.file, r]), ); + const removed = detectRemovedFiles(existing, allFiles, rootDir); + + // Tier 0: Journal + const journalResult = tryJournalTier(db, existing, rootDir, removed); + if (journalResult) return journalResult; + + // Tier 1 + 2: mtime/size fast-path → hash comparison + return mtimeAndHashTiers(existing, allFiles, rootDir, removed); +} + +function detectRemovedFiles(existing, allFiles, rootDir) { const currentFiles = new Set(); for (const file of allFiles) { currentFiles.add(normalizePath(path.relative(rootDir, file))); @@ -55,51 +67,57 @@ function getChangedFiles(db, allFiles, rootDir) { removed.push(existingFile); } } + return removed; +} - // ── Tier 0: Journal ────────────────────────────────────────────── +function tryJournalTier(db, existing, rootDir, removed) { const journal = readJournal(rootDir); - if (journal.valid) { - const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); - const latestDbMtime = dbMtimes?.latest || 0; - const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - - if (hasJournalEntries && journal.timestamp >= latestDbMtime) { - debug( - `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, - ); - const changed = []; - - for (const relPath of journal.changed) { - const absPath = path.join(rootDir, relPath); - const stat = fileStat(absPath); - if (!stat) continue; - - let content; - try { - content = readFileSafe(absPath); - } catch { - continue; - } - const hash = fileHash(content); - const record = existing.get(relPath); - if (!record || record.hash !== hash) { - changed.push({ file: absPath, content, hash, relPath, stat }); - } - } + if (!journal.valid) return null; - const removedSet = new Set(removed); - for (const relPath of journal.removed) { - if (existing.has(relPath)) removedSet.add(relPath); - } + const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); + const latestDbMtime = dbMtimes?.latest || 0; + const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - return { changed, removed: [...removedSet], isFullBuild: false }; - } + if (!hasJournalEntries || journal.timestamp < latestDbMtime) { debug( `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`, ); + return null; } - // ── Tier 1: mtime+size fast-path ───────────────────────────────── + debug( + `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, + ); + const changed = []; + + for (const relPath of journal.changed) { + const absPath = path.join(rootDir, relPath); + const stat = fileStat(absPath); + if (!stat) continue; + + let content; + try { + content = readFileSafe(absPath); + } catch { + continue; + } + const hash = fileHash(content); + const record = existing.get(relPath); + if (!record || record.hash !== hash) { + changed.push({ file: absPath, content, hash, relPath, stat }); + } + } + + const removedSet = new Set(removed); + for (const relPath of journal.removed) { + if (existing.has(relPath)) removedSet.add(relPath); + } + + return { changed, removed: [...removedSet], isFullBuild: false }; +} + +function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { + // Tier 1: mtime+size fast-path const needsHash = []; const skipped = []; @@ -130,7 +148,7 @@ function getChangedFiles(db, allFiles, rootDir) { debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`); } - // ── Tier 2: Hash comparison ────────────────────────────────────── + // Tier 2: Hash comparison const changed = []; for (const item of needsHash) { @@ -168,9 +186,10 @@ function getChangedFiles(db, allFiles, rootDir) { return { changed, removed, isFullBuild: false }; } +// ── Pending analysis ──────────────────────────────────────────────────── + /** * Run pending analysis pass when no file changes but analysis tables are empty. - * @returns {boolean} true if analysis was run and we should early-exit */ async function runPendingAnalysis(ctx) { const { db, opts, engineOpts, allFiles, rootDir } = ctx; @@ -213,9 +232,8 @@ async function runPendingAnalysis(ctx) { return true; } -/** - * Self-heal metadata-only updates (mtime/size) without re-parsing. - */ +// ── Metadata self-heal ────────────────────────────────────────────────── + function healMetadata(ctx) { const { db, metadataUpdates } = ctx; if (!metadataUpdates || metadataUpdates.length === 0) return; @@ -237,104 +255,91 @@ function healMetadata(ctx) { } } -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function detectChanges(ctx) { - const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; - - // Scoped builds already set parseChanges in collectFiles. - // Still need to purge removed files and set hasEmbeddings. - if (opts.scope) { - let hasEmbeddings = false; - try { - db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); - hasEmbeddings = true; - } catch { - /* table doesn't exist */ - } - ctx.hasEmbeddings = hasEmbeddings; +// ── Reverse-dependency cascade ────────────────────────────────────────── - // Reverse-dependency cascade BEFORE purging (needs existing edges to find importers) - const changePaths = ctx.parseChanges.map( - (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), - ); - const reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set([...changePaths, ...ctx.removed]); - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } +function findReverseDependencies(db, changedRelPaths, rootDir) { + const reverseDeps = new Set(); + if (changedRelPaths.size === 0) return reverseDeps; + + const findReverseDepsStmt = db.prepare(` + SELECT DISTINCT n_src.file FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' + `); + for (const relPath of changedRelPaths) { + for (const row of findReverseDepsStmt.all(relPath)) { + if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { + const absPath = path.join(rootDir, row.file); + if (fs.existsSync(absPath)) { + reverseDeps.add(row.file); } } } + } + return reverseDeps; +} - // Now purge changed + removed files - if (changePaths.length > 0 || ctx.removed.length > 0) { - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); - } +function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { + const { db, rootDir } = ctx; - // Delete outgoing edges for reverse-dep files and add to parse list - if (reverseDeps.size > 0) { - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); - } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); - } - info( - `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, - ); + if (changePaths.length > 0 || ctx.removed.length > 0) { + purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + } + + if (reverseDeps.size > 0) { + const deleteOutgoingEdgesForFile = db.prepare( + 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + for (const relPath of reverseDeps) { + deleteOutgoingEdgesForFile.run(relPath); + } + for (const relPath of reverseDeps) { + const absPath = path.join(rootDir, relPath); + ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); } - return; } +} - const increResult = - incremental && !forceFullRebuild - ? getChangedFiles(db, allFiles, rootDir) - : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; +// ── Scoped build path ─────────────────────────────────────────────────── - ctx.removed = increResult.removed; - ctx.isFullBuild = increResult.isFullBuild; - ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); - ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); +function handleScopedBuild(ctx) { + const { db, rootDir, opts } = ctx; - // Early exit: no changes detected - if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { - const ranAnalysis = await runPendingAnalysis(ctx); - if (ranAnalysis) { - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; - } + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ + } + ctx.hasEmbeddings = hasEmbeddings; - healMetadata(ctx); - info('No changes detected. Graph is up to date.'); - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; + const changePaths = ctx.parseChanges.map( + (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), + ); + + let reverseDeps = new Set(); + if (!opts.noReverseDeps) { + const changedRelPaths = new Set([...changePaths, ...ctx.removed]); + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); + } + + // Purge changed + removed files, then add reverse-deps + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); + + if (reverseDeps.size > 0) { + info( + `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, + ); } +} + +// ── Full/incremental build path ───────────────────────────────────────── + +function handleFullBuild(ctx) { + const { db } = ctx; - // ── Full build: truncate all tables ────────────────────────────── let hasEmbeddings = false; try { db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); @@ -344,19 +349,28 @@ export async function detectChanges(ctx) { } ctx.hasEmbeddings = hasEmbeddings; - if (ctx.isFullBuild) { - const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; - db.exec( - hasEmbeddings - ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` - : deletions, - ); - return; + const deletions = + 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + db.exec( + hasEmbeddings + ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` + : deletions, + ); +} + +function handleIncrementalBuild(ctx) { + const { db, rootDir, opts } = ctx; + + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ } + ctx.hasEmbeddings = hasEmbeddings; - // ── Reverse-dependency cascade (incremental) ───────────────────── - const reverseDeps = new Set(); + let reverseDeps = new Set(); if (!opts.noReverseDeps) { const changedRelPaths = new Set(); for (const item of ctx.parseChanges) { @@ -365,25 +379,7 @@ export async function detectChanges(ctx) { for (const relPath of ctx.removed) { changedRelPaths.add(relPath); } - - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } - } - } + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } info( @@ -393,21 +389,57 @@ export async function detectChanges(ctx) { debug(`Changed files: ${ctx.parseChanges.map((c) => c.relPath).join(', ')}`); if (ctx.removed.length > 0) debug(`Removed files: ${ctx.removed.join(', ')}`); - // Purge changed and removed files const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); +} - // Delete outgoing edges for reverse-dep files, then add them to parse list - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function detectChanges(ctx) { + const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; + + // Scoped builds already set parseChanges in collectFiles + if (opts.scope) { + handleScopedBuild(ctx); + return; } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); + + const increResult = + incremental && !forceFullRebuild + ? getChangedFiles(db, allFiles, rootDir) + : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; + + ctx.removed = increResult.removed; + ctx.isFullBuild = increResult.isFullBuild; + ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); + ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); + + // Early exit: no changes detected + if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { + const ranAnalysis = await runPendingAnalysis(ctx); + if (ranAnalysis) { + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + healMetadata(ctx); + info('No changes detected. Graph is up to date.'); + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + if (ctx.isFullBuild) { + handleFullBuild(ctx); + } else { + handleIncrementalBuild(ctx); } } diff --git a/src/domain/graph/builder/stages/insert-nodes.js b/src/domain/graph/builder/stages/insert-nodes.js index 2eaf6a73..6e22c966 100644 --- a/src/domain/graph/builder/stages/insert-nodes.js +++ b/src/domain/graph/builder/stages/insert-nodes.js @@ -15,23 +15,159 @@ import { readFileSafe, } from '../helpers.js'; +// ── Phase 1: Insert file nodes, definitions, exports ──────────────────── + +function insertDefinitionsAndExports(db, allSymbols) { + const phase1Rows = []; + for (const [relPath, symbols] of allSymbols) { + phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); + for (const def of symbols.definitions) { + const dotIdx = def.name.lastIndexOf('.'); + const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; + phase1Rows.push([ + def.name, + def.kind, + relPath, + def.line, + def.endLine || null, + null, + def.name, + scope, + def.visibility || null, + ]); + } + for (const exp of symbols.exports) { + phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); + } + } + batchInsertNodes(db, phase1Rows); + + // Mark exported symbols + const markExported = db.prepare( + 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + for (const [relPath, symbols] of allSymbols) { + for (const exp of symbols.exports) { + markExported.run(exp.name, exp.kind, relPath, exp.line); + } + } +} + +// ── Phase 2: Insert children (needs parent IDs) ──────────────────────── + +function insertChildren(db, allSymbols) { + const childRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + for (const def of symbols.definitions) { + if (!def.children?.length) continue; + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (!defId) continue; + for (const child of def.children) { + const qualifiedName = `${def.name}.${child.name}`; + childRows.push([ + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defId, + qualifiedName, + def.name, + child.visibility || null, + ]); + } + } + } + batchInsertNodes(db, childRows); +} + +// ── Phase 3: Insert containment + parameter_of edges ──────────────────── + +function insertContainmentEdges(db, allSymbols) { + const edgeRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + const fileId = nodeIdMap.get(`${relPath}|file|0`); + for (const def of symbols.definitions) { + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (fileId && defId) { + edgeRows.push([fileId, defId, 'contains', 1.0, 0]); + } + if (def.children?.length && defId) { + for (const child of def.children) { + const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); + if (childId) { + edgeRows.push([defId, childId, 'contains', 1.0, 0]); + if (child.kind === 'parameter') { + edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); + } + } + } + } + } + } + batchInsertEdges(db, edgeRows); +} + +// ── Phase 4: Update file hashes ───────────────────────────────────────── + +function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash) { + if (!upsertHash) return; + + for (const [relPath] of allSymbols) { + const precomputed = precomputedData.get(relPath); + if (precomputed?._reverseDepOnly) { + // no-op: file unchanged, hash already correct + } else if (precomputed?.hash) { + const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, precomputed.hash, mtime, size); + } else { + const absPath = path.join(rootDir, relPath); + let code; + try { + code = readFileSafe(absPath); + } catch { + code = null; + } + if (code !== null) { + const stat = fileStat(absPath); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, fileHash(code), mtime, size); + } + } + } + + // Also update metadata-only entries (self-heal mtime/size without re-parse) + for (const item of metadataUpdates) { + const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const size = item.stat ? item.stat.size : 0; + upsertHash.run(item.relPath, item.hash, mtime, size); + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * @param {import('../context.js').PipelineContext} ctx */ export async function insertNodes(ctx) { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - // Build lookup from incremental data (pre-computed hashes + stats) const precomputedData = new Map(); for (const item of filesToParse) { - if (item.relPath) { - precomputedData.set(item.relPath, item); - } + if (item.relPath) precomputedData.set(item.relPath, item); } - const bulkGetNodeIds = { all: (file) => bulkNodeIdsByFile(db, file) }; - - // Prepare hash upsert let upsertHash; try { upsertHash = db.prepare( @@ -42,143 +178,15 @@ export async function insertNodes(ctx) { } // Populate fileSymbols before the transaction so it is a pure input - // to (rather than a side-effect of) the DB write — avoids partial - // population if the transaction rolls back. for (const [relPath, symbols] of allSymbols) { ctx.fileSymbols.set(relPath, symbols); } const insertAll = db.transaction(() => { - // Phase 1: Batch insert all file nodes + definitions + exports - // Row format: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] - const phase1Rows = []; - for (const [relPath, symbols] of allSymbols) { - phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); - for (const def of symbols.definitions) { - // Methods already have 'Class.method' as name — use as qualified_name. - // For methods, scope is the class portion; for top-level defs, scope is null. - const dotIdx = def.name.lastIndexOf('.'); - const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; - phase1Rows.push([ - def.name, - def.kind, - relPath, - def.line, - def.endLine || null, - null, - def.name, - scope, - def.visibility || null, - ]); - } - for (const exp of symbols.exports) { - phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); - } - } - batchInsertNodes(db, phase1Rows); - - // Phase 1b: Mark exported symbols - const markExported = db.prepare( - 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ); - for (const [relPath, symbols] of allSymbols) { - for (const exp of symbols.exports) { - markExported.run(exp.name, exp.kind, relPath, exp.line); - } - } - - // Phase 3: Batch insert children (needs parent IDs from Phase 2) - const childRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - for (const def of symbols.definitions) { - if (!def.children?.length) continue; - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (!defId) continue; - for (const child of def.children) { - const qualifiedName = `${def.name}.${child.name}`; - childRows.push([ - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - defId, - qualifiedName, - def.name, - child.visibility || null, - ]); - } - } - } - batchInsertNodes(db, childRows); - - // Phase 5: Batch insert contains/parameter_of edges - const edgeRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - const fileId = nodeIdMap.get(`${relPath}|file|0`); - for (const def of symbols.definitions) { - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (fileId && defId) { - edgeRows.push([fileId, defId, 'contains', 1.0, 0]); - } - if (def.children?.length && defId) { - for (const child of def.children) { - const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); - if (childId) { - edgeRows.push([defId, childId, 'contains', 1.0, 0]); - if (child.kind === 'parameter') { - edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); - } - } - } - } - } - - // Update file hash — skip reverse-dep files (unchanged) - if (upsertHash) { - const precomputed = precomputedData.get(relPath); - if (precomputed?._reverseDepOnly) { - // no-op: file unchanged, hash already correct - } else if (precomputed?.hash) { - const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, precomputed.hash, mtime, size); - } else { - const absPath = path.join(rootDir, relPath); - let code; - try { - code = readFileSafe(absPath); - } catch { - code = null; - } - if (code !== null) { - const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, fileHash(code), mtime, size); - } - } - } - } - batchInsertEdges(db, edgeRows); - - // Also update metadata-only entries (self-heal mtime/size without re-parse) - if (upsertHash) { - for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; - const size = item.stat ? item.stat.size : 0; - upsertHash.run(item.relPath, item.hash, mtime, size); - } - } + insertDefinitionsAndExports(db, allSymbols); + insertChildren(db, allSymbols); + insertContainmentEdges(db, allSymbols); + updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); const t0 = performance.now(); From 0a3fbc7d6aee22111cdd3408df68bb42871c955a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:30:02 -0600 Subject: [PATCH 13/37] refactor: decompose domain analysis functions into focused helpers Impact: 37 functions changed, 29 affected --- src/domain/analysis/context.js | 361 ++++++++++----------- src/domain/analysis/dependencies.js | 346 ++++++++++++--------- src/domain/analysis/impact.js | 418 ++++++++++++++++--------- src/domain/analysis/module-map.js | 467 +++++++++++++++------------- 4 files changed, 884 insertions(+), 708 deletions(-) diff --git a/src/domain/analysis/context.js b/src/domain/analysis/context.js index a97e5419..a8f3261f 100644 --- a/src/domain/analysis/context.js +++ b/src/domain/analysis/context.js @@ -27,6 +27,149 @@ import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; import { findMatchingNodes } from './symbol-lookup.js'; +function buildCallees(db, node, repoRoot, getFileLines, opts) { + const { noTests, depth } = opts; + const calleeRows = findCallees(db, node.id); + const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; + + const callees = filteredCallees.map((c) => { + const cLines = getFileLines(c.file); + const summary = cLines ? extractSummary(cLines, c.line) : null; + let calleeSource = null; + if (depth >= 1) { + calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); + } + return { + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary, + source: calleeSource, + }; + }); + + if (depth > 1) { + const visited = new Set(filteredCallees.map((c) => c.id)); + visited.add(node.id); + let frontier = filteredCallees.map((c) => c.id); + const maxDepth = Math.min(depth, 5); + for (let d = 2; d <= maxDepth; d++) { + const nextFrontier = []; + for (const fid of frontier) { + const deeper = findCallees(db, fid); + for (const c of deeper) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + const cLines = getFileLines(c.file); + callees.push({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary: cLines ? extractSummary(cLines, c.line) : null, + source: readSourceRange(repoRoot, c.file, c.line, c.end_line), + }); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + } + + return callees; +} + +function buildCallers(db, node, noTests) { + let callerRows = findCallers(db, node.id); + + if (node.kind === 'method' && node.name.includes('.')) { + const methodName = node.name.split('.').pop(); + const relatedMethods = resolveMethodViaHierarchy(db, methodName); + for (const rm of relatedMethods) { + if (rm.id === node.id) continue; + const extraCallers = findCallers(db, rm.id); + callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); + } + } + if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); + + return callerRows.map((c) => ({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + viaHierarchy: c.viaHierarchy || undefined, + })); +} + +function buildRelatedTests(db, node, getFileLines, includeTests) { + const testCallerRows = findCallers(db, node.id); + const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); + + const testsByFile = new Map(); + for (const tc of testCallers) { + if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); + testsByFile.get(tc.file).push(tc); + } + + const relatedTests = []; + for (const [file] of testsByFile) { + const tLines = getFileLines(file); + const testNames = []; + if (tLines) { + for (const tl of tLines) { + const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); + if (tm) testNames.push(tm[1]); + } + } + const testSource = includeTests && tLines ? tLines.join('\n') : undefined; + relatedTests.push({ + file, + testCount: testNames.length, + testNames, + source: testSource, + }); + } + + return relatedTests; +} + +function getComplexityMetrics(db, nodeId) { + try { + const cRow = getComplexityForNode(db, nodeId); + if (!cRow) return null; + return { + cognitive: cRow.cognitive, + cyclomatic: cRow.cyclomatic, + maxNesting: cRow.max_nesting, + maintainabilityIndex: cRow.maintainability_index || 0, + halsteadVolume: cRow.halstead_volume || 0, + }; + } catch (e) { + debug(`complexity lookup failed for node ${nodeId}: ${e.message}`); + return null; + } +} + +function getNodeChildrenSafe(db, nodeId) { + try { + return findNodeChildren(db, nodeId).map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })); + } catch (e) { + debug(`findNodeChildren failed for node ${nodeId}: ${e.message}`); + return []; + } +} + function explainFileImpl(db, target, getFileLines) { const fileNodes = findFileNodes(db, `%${target}%`); if (fileNodes.length === 0) return []; @@ -50,14 +193,10 @@ function explainFileImpl(db, target, getFileLines) { const publicApi = symbols.filter((s) => publicIds.has(s.id)).map(mapSymbol); const internal = symbols.filter((s) => !publicIds.has(s.id)).map(mapSymbol); - // Imports / importedBy const imports = findImportTargets(db, fn.id).map((r) => ({ file: r.file })); - const importedBy = findImportSources(db, fn.id).map((r) => ({ file: r.file })); - // Intra-file data flow const intraEdges = findIntraFileCallEdges(db, fn.file); - const dataFlowMap = new Map(); for (const edge of intraEdges) { if (!dataFlowMap.has(edge.caller_name)) dataFlowMap.set(edge.caller_name, []); @@ -68,7 +207,6 @@ function explainFileImpl(db, target, getFileLines) { callees, })); - // Line count: prefer node_metrics (actual), fall back to MAX(end_line) const metric = db .prepare(`SELECT nm.line_count FROM node_metrics nm WHERE nm.node_id = ?`) .get(fn.id); @@ -130,29 +268,12 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { .filter((r) => isTestFile(r.file) && !seenFiles.has(r.file) && seenFiles.add(r.file)) .map((r) => ({ file: r.file })); - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - return { ...normalizeSymbol(node, db, hc), lineCount, summary, signature, - complexity: complexityMetrics, + complexity: getComplexityMetrics(db, node.id), callees, callers, relatedTests, @@ -160,6 +281,28 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { }); } +function explainCallees(parentResults, currentDepth, visited, db, noTests, getFileLines) { + if (currentDepth <= 0) return; + for (const r of parentResults) { + const newCallees = []; + for (const callee of r.callees) { + const key = `${callee.name}:${callee.file}:${callee.line}`; + if (visited.has(key)) continue; + visited.add(key); + const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); + const exact = calleeResults.find((cr) => cr.file === callee.file && cr.line === callee.line); + if (exact) { + exact._depth = (r._depth || 0) + 1; + newCallees.push(exact); + } + } + if (newCallees.length > 0) { + r.depDetails = newCallees; + explainCallees(newCallees, currentDepth - 1, visited, db, noTests, getFileLines); + } + } +} + // ─── Exported functions ────────────────────────────────────────────────── export function contextData(name, customDbPath, opts = {}) { @@ -178,156 +321,22 @@ export function contextData(name, customDbPath, opts = {}) { return { name, results: [] }; } - // No hardcoded slice — pagination handles bounding via limit/offset - const getFileLines = createFileLinesReader(repoRoot); const results = nodes.map((node) => { const fileLines = getFileLines(node.file); - // Source const source = noSource ? null : readSourceRange(repoRoot, node.file, node.line, node.end_line); - // Signature const signature = fileLines ? extractSignature(fileLines, node.line) : null; - // Callees - const calleeRows = findCallees(db, node.id); - const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; - - const callees = filteredCallees.map((c) => { - const cLines = getFileLines(c.file); - const summary = cLines ? extractSummary(cLines, c.line) : null; - let calleeSource = null; - if (depth >= 1) { - calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); - } - return { - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary, - source: calleeSource, - }; - }); - - // Deep callee expansion via BFS (depth > 1, capped at 5) - if (depth > 1) { - const visited = new Set(filteredCallees.map((c) => c.id)); - visited.add(node.id); - let frontier = filteredCallees.map((c) => c.id); - const maxDepth = Math.min(depth, 5); - for (let d = 2; d <= maxDepth; d++) { - const nextFrontier = []; - for (const fid of frontier) { - const deeper = findCallees(db, fid); - for (const c of deeper) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - const cLines = getFileLines(c.file); - callees.push({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary: cLines ? extractSummary(cLines, c.line) : null, - source: readSourceRange(repoRoot, c.file, c.line, c.end_line), - }); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } - - // Callers - let callerRows = findCallers(db, node.id); - - // Method hierarchy resolution - if (node.kind === 'method' && node.name.includes('.')) { - const methodName = node.name.split('.').pop(); - const relatedMethods = resolveMethodViaHierarchy(db, methodName); - for (const rm of relatedMethods) { - if (rm.id === node.id) continue; - const extraCallers = findCallers(db, rm.id); - callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); - } - } - if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); - - const callers = callerRows.map((c) => ({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - viaHierarchy: c.viaHierarchy || undefined, - })); - - // Related tests: callers that live in test files - const testCallerRows = findCallers(db, node.id); - const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); - - const testsByFile = new Map(); - for (const tc of testCallers) { - if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); - testsByFile.get(tc.file).push(tc); - } - - const relatedTests = []; - for (const [file] of testsByFile) { - const tLines = getFileLines(file); - const testNames = []; - if (tLines) { - for (const tl of tLines) { - const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); - if (tm) testNames.push(tm[1]); - } - } - const testSource = includeTests && tLines ? tLines.join('\n') : undefined; - relatedTests.push({ - file, - testCount: testNames.length, - testNames, - source: testSource, - }); - } - - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - - // Children (parameters, properties, constants) - let nodeChildren = []; - try { - nodeChildren = findNodeChildren(db, node.id).map((c) => ({ - name: c.name, - kind: c.kind, - line: c.line, - endLine: c.end_line || null, - })); - } catch (e) { - debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); - } + const callees = buildCallees(db, node, repoRoot, getFileLines, { noTests, depth }); + const callers = buildCallers(db, node, noTests); + const relatedTests = buildRelatedTests(db, node, getFileLines, includeTests); + const complexityMetrics = getComplexityMetrics(db, node.id); + const nodeChildren = getNodeChildrenSafe(db, node.id); return { name: node.name, @@ -370,35 +379,9 @@ export function explainData(target, customDbPath, opts = {}) { ? explainFileImpl(db, target, getFileLines) : explainFunctionImpl(db, target, noTests, getFileLines); - // Recursive dependency explanation for function targets if (kind === 'function' && depth > 0 && results.length > 0) { const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`)); - - function explainCallees(parentResults, currentDepth) { - if (currentDepth <= 0) return; - for (const r of parentResults) { - const newCallees = []; - for (const callee of r.callees) { - const key = `${callee.name}:${callee.file}:${callee.line}`; - if (visited.has(key)) continue; - visited.add(key); - const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); - const exact = calleeResults.find( - (cr) => cr.file === callee.file && cr.line === callee.line, - ); - if (exact) { - exact._depth = (r._depth || 0) + 1; - newCallees.push(exact); - } - } - if (newCallees.length > 0) { - r.depDetails = newCallees; - explainCallees(newCallees, currentDepth - 1); - } - } - } - - explainCallees(results, depth); + explainCallees(results, depth, visited, db, noTests, getFileLines); } const base = { target, kind, results }; diff --git a/src/domain/analysis/dependencies.js b/src/domain/analysis/dependencies.js index e632470f..867cd5bd 100644 --- a/src/domain/analysis/dependencies.js +++ b/src/domain/analysis/dependencies.js @@ -46,6 +46,61 @@ export function fileDepsData(file, customDbPath, opts = {}) { } } +/** + * BFS transitive caller traversal starting from `callers` of `nodeId`. + * Returns an object keyed by depth (2..depth) → array of caller descriptors. + */ +function buildTransitiveCallers(db, callers, nodeId, depth, noTests) { + const transitiveCallers = {}; + if (depth <= 1) return transitiveCallers; + + const visited = new Set([nodeId]); + let frontier = callers + .map((c) => { + const row = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(c.name, c.kind, c.file, c.line); + return row ? { ...c, id: row.id } : null; + }) + .filter(Boolean); + + for (let d = 2; d <= depth; d++) { + const nextFrontier = []; + for (const f of frontier) { + if (visited.has(f.id)) continue; + visited.add(f.id); + const upstream = db + .prepare(` + SELECT n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls' + `) + .all(f.id); + for (const u of upstream) { + if (noTests && isTestFile(u.file)) continue; + const uid = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(u.name, u.kind, u.file, u.line)?.id; + if (uid && !visited.has(uid)) { + nextFrontier.push({ ...u, id: uid }); + } + } + } + if (nextFrontier.length > 0) { + transitiveCallers[d] = nextFrontier.map((n) => ({ + name: n.name, + kind: n.kind, + file: n.file, + line: n.line, + })); + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + + return transitiveCallers; +} + export function fnDepsData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -75,55 +130,7 @@ export function fnDepsData(name, customDbPath, opts = {}) { } if (noTests) callers = callers.filter((c) => !isTestFile(c.file)); - // Transitive callers - const transitiveCallers = {}; - if (depth > 1) { - const visited = new Set([node.id]); - let frontier = callers - .map((c) => { - const row = db - .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') - .get(c.name, c.kind, c.file, c.line); - return row ? { ...c, id: row.id } : null; - }) - .filter(Boolean); - - for (let d = 2; d <= depth; d++) { - const nextFrontier = []; - for (const f of frontier) { - if (visited.has(f.id)) continue; - visited.add(f.id); - const upstream = db - .prepare(` - SELECT n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls' - `) - .all(f.id); - for (const u of upstream) { - if (noTests && isTestFile(u.file)) continue; - const uid = db - .prepare( - 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ) - .get(u.name, u.kind, u.file, u.line)?.id; - if (uid && !visited.has(uid)) { - nextFrontier.push({ ...u, id: uid }); - } - } - } - if (nextFrontier.length > 0) { - transitiveCallers[d] = nextFrontier.map((n) => ({ - name: n.name, - kind: n.kind, - file: n.file, - line: n.line, - })); - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } + const transitiveCallers = buildTransitiveCallers(db, callers, node.id, depth, noTests); return { ...normalizeSymbol(node, db, hc), @@ -151,37 +158,40 @@ export function fnDepsData(name, customDbPath, opts = {}) { } } -export function pathData(from, to, customDbPath, opts = {}) { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - const maxDepth = opts.maxDepth || 10; - const edgeKinds = opts.edgeKinds || ['calls']; - const reverse = opts.reverse || false; +/** + * Resolve from/to symbol names to node records. + * Returns { sourceNode, targetNode, fromCandidates, toCandidates } on success, + * or { earlyResult } when a caller-facing error/not-found response should be returned immediately. + */ +function resolveEndpoints(db, from, to, opts) { + const { noTests = false } = opts; - const fromNodes = findMatchingNodes(db, from, { - noTests, - file: opts.fromFile, - kind: opts.kind, - }); - if (fromNodes.length === 0) { - return { + const fromNodes = findMatchingNodes(db, from, { + noTests, + file: opts.fromFile, + kind: opts.kind, + }); + if (fromNodes.length === 0) { + return { + earlyResult: { from, to, found: false, error: `No symbol matching "${from}"`, fromCandidates: [], toCandidates: [], - }; - } + }, + }; + } - const toNodes = findMatchingNodes(db, to, { - noTests, - file: opts.toFile, - kind: opts.kind, - }); - if (toNodes.length === 0) { - return { + const toNodes = findMatchingNodes(db, to, { + noTests, + file: opts.toFile, + kind: opts.kind, + }); + if (toNodes.length === 0) { + return { + earlyResult: { from, to, found: false, @@ -190,18 +200,118 @@ export function pathData(from, to, customDbPath, opts = {}) { .slice(0, 5) .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })), toCandidates: [], - }; + }, + }; + } + + const fromCandidates = fromNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const toCandidates = toNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + + return { + sourceNode: fromNodes[0], + targetNode: toNodes[0], + fromCandidates, + toCandidates, + }; +} + +/** + * BFS from sourceId toward targetId. + * Returns { found, parent, alternateCount, foundDepth }. + * `parent` maps nodeId → { parentId, edgeKind }. + */ +function bfsShortestPath(db, sourceId, targetId, edgeKinds, reverse, maxDepth, noTests) { + const kindPlaceholders = edgeKinds.map(() => '?').join(', '); + + // Forward: source_id → target_id (A calls... calls B) + // Reverse: target_id → source_id (B is called by... called by A) + const neighborQuery = reverse + ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` + : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.target_id = n.id + WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; + const neighborStmt = db.prepare(neighborQuery); + + const visited = new Set([sourceId]); + const parent = new Map(); + let queue = [sourceId]; + let found = false; + let alternateCount = 0; + let foundDepth = -1; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue = []; + for (const currentId of queue) { + const neighbors = neighborStmt.all(currentId, ...edgeKinds); + for (const n of neighbors) { + if (noTests && isTestFile(n.file)) continue; + if (n.id === targetId) { + if (!found) { + found = true; + foundDepth = depth; + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + } + alternateCount++; + continue; + } + if (!visited.has(n.id)) { + visited.add(n.id); + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + nextQueue.push(n.id); + } + } } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + return { found, parent, alternateCount, foundDepth }; +} + +/** + * Walk the parent map from targetId back to sourceId and return an ordered + * array of node IDs source → target. + */ +function reconstructPath(db, pathIds, parent) { + const nodeCache = new Map(); + const getNode = (id) => { + if (nodeCache.has(id)) return nodeCache.get(id); + const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); + nodeCache.set(id, row); + return row; + }; + + return pathIds.map((id, idx) => { + const node = getNode(id); + const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; + return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; + }); +} + +export function pathData(from, to, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const maxDepth = opts.maxDepth || 10; + const edgeKinds = opts.edgeKinds || ['calls']; + const reverse = opts.reverse || false; - const sourceNode = fromNodes[0]; - const targetNode = toNodes[0]; + const resolved = resolveEndpoints(db, from, to, { + noTests, + fromFile: opts.fromFile, + toFile: opts.toFile, + kind: opts.kind, + }); + if (resolved.earlyResult) return resolved.earlyResult; - const fromCandidates = fromNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); - const toCandidates = toNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const { sourceNode, targetNode, fromCandidates, toCandidates } = resolved; // Self-path if (sourceNode.id === targetNode.id) { @@ -228,55 +338,12 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // Build edge kind filter - const kindPlaceholders = edgeKinds.map(() => '?').join(', '); - - // BFS — direction depends on `reverse` flag - // Forward: source_id → target_id (A calls... calls B) - // Reverse: target_id → source_id (B is called by... called by A) - const neighborQuery = reverse - ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` - : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.target_id = n.id - WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; - const neighborStmt = db.prepare(neighborQuery); - - const visited = new Set([sourceNode.id]); - // parent map: nodeId → { parentId, edgeKind } - const parent = new Map(); - let queue = [sourceNode.id]; - let found = false; - let alternateCount = 0; - let foundDepth = -1; - - for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue = []; - for (const currentId of queue) { - const neighbors = neighborStmt.all(currentId, ...edgeKinds); - for (const n of neighbors) { - if (noTests && isTestFile(n.file)) continue; - if (n.id === targetNode.id) { - if (!found) { - found = true; - foundDepth = depth; - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - } - alternateCount++; - continue; - } - if (!visited.has(n.id)) { - visited.add(n.id); - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - nextQueue.push(n.id); - } - } - } - if (found) break; - queue = nextQueue; - if (queue.length === 0) break; - } + const { + found, + parent, + alternateCount: rawAlternateCount, + foundDepth, + } = bfsShortestPath(db, sourceNode.id, targetNode.id, edgeKinds, reverse, maxDepth, noTests); if (!found) { return { @@ -294,8 +361,8 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // alternateCount includes the one we kept; subtract 1 for "alternates" - alternateCount = Math.max(0, alternateCount - 1); + // rawAlternateCount includes the one we kept; subtract 1 for "alternates" + const alternateCount = Math.max(0, rawAlternateCount - 1); // Reconstruct path from target back to source const pathIds = [targetNode.id]; @@ -307,20 +374,7 @@ export function pathData(from, to, customDbPath, opts = {}) { } pathIds.reverse(); - // Build path with node info - const nodeCache = new Map(); - const getNode = (id) => { - if (nodeCache.has(id)) return nodeCache.get(id); - const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); - nodeCache.set(id, row); - return row; - }; - - const resultPath = pathIds.map((id, idx) => { - const node = getNode(id); - const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; - return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; - }); + const resultPath = reconstructPath(db, pathIds, parent); return { from, diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index bd3bbe1d..6bdd5464 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -134,6 +134,251 @@ export function fnImpactData(name, customDbPath, opts = {}) { } } +// ─── diffImpactData helpers ───────────────────────────────────────────── + +/** + * Walk up from repoRoot until a .git directory is found. + * Returns true if a git root exists, false otherwise. + * + * @param {string} repoRoot + * @returns {boolean} + */ +function findGitRoot(repoRoot) { + let checkDir = repoRoot; + while (checkDir) { + if (fs.existsSync(path.join(checkDir, '.git'))) { + return true; + } + const parent = path.dirname(checkDir); + if (parent === checkDir) break; + checkDir = parent; + } + return false; +} + +/** + * Execute git diff and return the raw output string. + * Returns `{ output: string }` on success or `{ error: string }` on failure. + * + * @param {string} repoRoot + * @param {{ staged?: boolean, ref?: string }} opts + * @returns {{ output: string } | { error: string }} + */ +function runGitDiff(repoRoot, opts) { + try { + const args = opts.staged + ? ['diff', '--cached', '--unified=0', '--no-color'] + : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; + const output = execFileSync('git', args, { + cwd: repoRoot, + encoding: 'utf-8', + maxBuffer: 10 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'], + }); + return { output }; + } catch (e) { + return { error: `Failed to run git diff: ${e.message}` }; + } +} + +/** + * Parse raw git diff output into a changedRanges map and newFiles set. + * + * @param {string} diffOutput + * @returns {{ changedRanges: Map>, newFiles: Set }} + */ +function parseGitDiff(diffOutput) { + const changedRanges = new Map(); + const newFiles = new Set(); + let currentFile = null; + let prevIsDevNull = false; + + for (const line of diffOutput.split('\n')) { + if (line.startsWith('--- /dev/null')) { + prevIsDevNull = true; + continue; + } + if (line.startsWith('--- ')) { + prevIsDevNull = false; + continue; + } + const fileMatch = line.match(/^\+\+\+ b\/(.+)/); + if (fileMatch) { + currentFile = fileMatch[1]; + if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); + if (prevIsDevNull) newFiles.add(currentFile); + prevIsDevNull = false; + continue; + } + const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); + if (hunkMatch && currentFile) { + const start = parseInt(hunkMatch[1], 10); + const count = parseInt(hunkMatch[2] || '1', 10); + changedRanges.get(currentFile).push({ start, end: start + count - 1 }); + } + } + + return { changedRanges, newFiles }; +} + +/** + * Find all function/method/class nodes whose line ranges overlap any changed range. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @returns {Array} + */ +function findAffectedFunctions(db, changedRanges, noTests) { + const affectedFunctions = []; + for (const [file, ranges] of changedRanges) { + if (noTests && isTestFile(file)) continue; + const defs = db + .prepare( + `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, + ) + .all(file); + for (let i = 0; i < defs.length; i++) { + const def = defs[i]; + const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); + for (const range of ranges) { + if (range.start <= endLine && range.end >= def.line) { + affectedFunctions.push(def); + break; + } + } + } + } + return affectedFunctions; +} + +/** + * Run BFS per affected function, collecting per-function results and the full affected set. + * + * @param {import('better-sqlite3').Database} db + * @param {Array} affectedFunctions + * @param {boolean} noTests + * @param {number} maxDepth + * @returns {{ functionResults: Array, allAffected: Set }} + */ +function buildFunctionImpactResults(db, affectedFunctions, noTests, maxDepth) { + const allAffected = new Set(); + const functionResults = affectedFunctions.map((fn) => { + const edges = []; + const idToKey = new Map(); + idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); + + const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { + noTests, + maxDepth, + onVisit(c, parentId) { + allAffected.add(`${c.file}:${c.name}`); + const callerKey = `${c.file}::${c.name}:${c.line}`; + idToKey.set(c.id, callerKey); + edges.push({ from: idToKey.get(parentId), to: callerKey }); + }, + }); + + return { + name: fn.name, + kind: fn.kind, + file: fn.file, + line: fn.line, + transitiveCallers: totalDependents, + levels, + edges, + }; + }); + + return { functionResults, allAffected }; +} + +/** + * Look up historically co-changed files for the set of changed files. + * Returns an empty array if the co_changes table is unavailable. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {boolean} noTests + * @returns {Array} + */ +function lookupCoChanges(db, changedRanges, affectedFiles, noTests) { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + const changedFilesList = [...changedRanges.keys()]; + const coResults = coChangeForFiles(changedFilesList, db, { + minJaccard: 0.3, + limit: 20, + noTests, + }); + return coResults.filter((r) => !affectedFiles.has(r.file)); + } catch (e) { + debug(`co_changes lookup skipped: ${e.message}`); + return []; + } +} + +/** + * Look up CODEOWNERS for changed and affected files. + * Returns null if no owners are found or lookup fails. + * + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {string} repoRoot + * @returns {{ owners: object, affectedOwners: Array, suggestedReviewers: Array } | null} + */ +function lookupOwnership(changedRanges, affectedFiles, repoRoot) { + try { + const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; + const ownerResult = ownersForFiles(allFilePaths, repoRoot); + if (ownerResult.affectedOwners.length > 0) { + return { + owners: Object.fromEntries(ownerResult.owners), + affectedOwners: ownerResult.affectedOwners, + suggestedReviewers: ownerResult.suggestedReviewers, + }; + } + return null; + } catch (e) { + debug(`CODEOWNERS lookup skipped: ${e.message}`); + return null; + } +} + +/** + * Check manifesto boundary violations scoped to the changed files. + * Returns `{ boundaryViolations, boundaryViolationCount }`. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @param {object} opts — full diffImpactData opts (may contain `opts.config`) + * @param {string} repoRoot + * @returns {{ boundaryViolations: Array, boundaryViolationCount: number }} + */ +function checkBoundaryViolations(db, changedRanges, noTests, opts, repoRoot) { + try { + const cfg = opts.config || loadConfig(repoRoot); + const boundaryConfig = cfg.manifesto?.boundaries; + if (boundaryConfig) { + const result = evaluateBoundaries(db, boundaryConfig, { + scopeFiles: [...changedRanges.keys()], + noTests, + }); + return { + boundaryViolations: result.violations, + boundaryViolationCount: result.violationCount, + }; + } + } catch (e) { + debug(`boundary check skipped: ${e.message}`); + } + return { boundaryViolations: [], boundaryViolationCount: 0 }; +} + +// ─── diffImpactData ───────────────────────────────────────────────────── + /** * Fix #2: Shell injection vulnerability. * Uses execFileSync instead of execSync to prevent shell interpretation of user input. @@ -147,38 +392,14 @@ export function diffImpactData(customDbPath, opts = {}) { const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); - // Verify we're in a git repository before running git diff - let checkDir = repoRoot; - let isGitRepo = false; - while (checkDir) { - if (fs.existsSync(path.join(checkDir, '.git'))) { - isGitRepo = true; - break; - } - const parent = path.dirname(checkDir); - if (parent === checkDir) break; - checkDir = parent; - } - if (!isGitRepo) { + if (!findGitRoot(repoRoot)) { return { error: `Not a git repository: ${repoRoot}` }; } - let diffOutput; - try { - const args = opts.staged - ? ['diff', '--cached', '--unified=0', '--no-color'] - : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; - diffOutput = execFileSync('git', args, { - cwd: repoRoot, - encoding: 'utf-8', - maxBuffer: 10 * 1024 * 1024, - stdio: ['pipe', 'pipe', 'pipe'], - }); - } catch (e) { - return { error: `Failed to run git diff: ${e.message}` }; - } + const gitResult = runGitDiff(repoRoot, opts); + if (gitResult.error) return { error: gitResult.error }; - if (!diffOutput.trim()) { + if (!gitResult.output.trim()) { return { changedFiles: 0, newFiles: [], @@ -188,34 +409,7 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const changedRanges = new Map(); - const newFiles = new Set(); - let currentFile = null; - let prevIsDevNull = false; - for (const line of diffOutput.split('\n')) { - if (line.startsWith('--- /dev/null')) { - prevIsDevNull = true; - continue; - } - if (line.startsWith('--- ')) { - prevIsDevNull = false; - continue; - } - const fileMatch = line.match(/^\+\+\+ b\/(.+)/); - if (fileMatch) { - currentFile = fileMatch[1]; - if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); - if (prevIsDevNull) newFiles.add(currentFile); - prevIsDevNull = false; - continue; - } - const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); - if (hunkMatch && currentFile) { - const start = parseInt(hunkMatch[1], 10); - const count = parseInt(hunkMatch[2] || '1', 10); - changedRanges.get(currentFile).push({ start, end: start + count - 1 }); - } - } + const { changedRanges, newFiles } = parseGitDiff(gitResult.output); if (changedRanges.size === 0) { return { @@ -227,106 +421,26 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const affectedFunctions = []; - for (const [file, ranges] of changedRanges) { - if (noTests && isTestFile(file)) continue; - const defs = db - .prepare( - `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, - ) - .all(file); - for (let i = 0; i < defs.length; i++) { - const def = defs[i]; - const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); - for (const range of ranges) { - if (range.start <= endLine && range.end >= def.line) { - affectedFunctions.push(def); - break; - } - } - } - } - - const allAffected = new Set(); - const functionResults = affectedFunctions.map((fn) => { - const edges = []; - const idToKey = new Map(); - idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); - - const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { - noTests, - maxDepth, - onVisit(c, parentId) { - allAffected.add(`${c.file}:${c.name}`); - const callerKey = `${c.file}::${c.name}:${c.line}`; - idToKey.set(c.id, callerKey); - edges.push({ from: idToKey.get(parentId), to: callerKey }); - }, - }); - - return { - name: fn.name, - kind: fn.kind, - file: fn.file, - line: fn.line, - transitiveCallers: totalDependents, - levels, - edges, - }; - }); + const affectedFunctions = findAffectedFunctions(db, changedRanges, noTests); + const { functionResults, allAffected } = buildFunctionImpactResults( + db, + affectedFunctions, + noTests, + maxDepth, + ); const affectedFiles = new Set(); for (const key of allAffected) affectedFiles.add(key.split(':')[0]); - // Look up historically coupled files from co-change data - let historicallyCoupled = []; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - const changedFilesList = [...changedRanges.keys()]; - const coResults = coChangeForFiles(changedFilesList, db, { - minJaccard: 0.3, - limit: 20, - noTests, - }); - // Exclude files already found via static analysis - historicallyCoupled = coResults.filter((r) => !affectedFiles.has(r.file)); - } catch (e) { - debug(`co_changes lookup skipped: ${e.message}`); - } - - // Look up CODEOWNERS for changed + affected files - let ownership = null; - try { - const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; - const ownerResult = ownersForFiles(allFilePaths, repoRoot); - if (ownerResult.affectedOwners.length > 0) { - ownership = { - owners: Object.fromEntries(ownerResult.owners), - affectedOwners: ownerResult.affectedOwners, - suggestedReviewers: ownerResult.suggestedReviewers, - }; - } - } catch (e) { - debug(`CODEOWNERS lookup skipped: ${e.message}`); - } - - // Check boundary violations scoped to changed files - let boundaryViolations = []; - let boundaryViolationCount = 0; - try { - const cfg = opts.config || loadConfig(repoRoot); - const boundaryConfig = cfg.manifesto?.boundaries; - if (boundaryConfig) { - const result = evaluateBoundaries(db, boundaryConfig, { - scopeFiles: [...changedRanges.keys()], - noTests, - }); - boundaryViolations = result.violations; - boundaryViolationCount = result.violationCount; - } - } catch (e) { - debug(`boundary check skipped: ${e.message}`); - } + const historicallyCoupled = lookupCoChanges(db, changedRanges, affectedFiles, noTests); + const ownership = lookupOwnership(changedRanges, affectedFiles, repoRoot); + const { boundaryViolations, boundaryViolationCount } = checkBoundaryViolations( + db, + changedRanges, + noTests, + opts, + repoRoot, + ); const base = { changedFiles: changedRanges.size, diff --git a/src/domain/analysis/module-map.js b/src/domain/analysis/module-map.js index d2bc613b..daf09b33 100644 --- a/src/domain/analysis/module-map.js +++ b/src/domain/analysis/module-map.js @@ -37,6 +37,241 @@ export const FALSE_POSITIVE_NAMES = new Set([ ]); export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; +// --------------------------------------------------------------------------- +// Section helpers +// --------------------------------------------------------------------------- + +function buildTestFileIds(db) { + const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); + const testFileIds = new Set(); + const testFiles = new Set(); + for (const n of allFileNodes) { + if (isTestFile(n.file)) { + testFileIds.add(n.id); + testFiles.add(n.file); + } + } + const allNodes = db.prepare('SELECT id, file FROM nodes').all(); + for (const n of allNodes) { + if (testFiles.has(n.file)) testFileIds.add(n.id); + } + return testFileIds; +} + +function countNodesByKind(db, testFileIds) { + let nodeRows; + if (testFileIds) { + const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); + const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); + const counts = {}; + for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; + nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of nodeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countEdgesByKind(db, testFileIds) { + let edgeRows; + if (testFileIds) { + const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); + const filtered = allEdges.filter( + (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), + ); + const counts = {}; + for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; + edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of edgeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countFilesByLanguage(db, noTests) { + const extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); + const byLanguage = {}; + for (const row of fileNodes) { + const ext = path.extname(row.file).toLowerCase(); + const lang = extToLang.get(ext) || 'other'; + byLanguage[lang] = (byLanguage[lang] || 0) + 1; + } + return { total: fileNodes.length, languages: Object.keys(byLanguage).length, byLanguage }; +} + +function findHotspots(db, noTests, limit) { + const testFilter = testFilterSQL('n.file', noTests); + const hotspotRows = db + .prepare(` + SELECT n.file, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out + FROM nodes n + WHERE n.kind = 'file' ${testFilter} + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) + + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC + `) + .all(); + const filtered = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; + return filtered.slice(0, limit).map((r) => ({ + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + })); +} + +function getEmbeddingsInfo(db) { + try { + const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); + if (count && count.c > 0) { + const meta = {}; + const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); + for (const r of metaRows) meta[r.key] = r.value; + return { + count: count.c, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, + }; + } + } catch (e) { + debug(`embeddings lookup skipped: ${e.message}`); + } + return null; +} + +function computeQualityMetrics(db, testFilter) { + const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); + + const totalCallable = db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, + ) + .get().c; + const callableWithCallers = db + .prepare(` + SELECT COUNT(DISTINCT e.target_id) as c FROM edges e + JOIN nodes n ON e.target_id = n.id + WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} + `) + .get().c; + const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; + + const totalCallEdges = db.prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'").get().c; + const highConfCallEdges = db + .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") + .get().c; + const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; + + const fpRows = db + .prepare(` + SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count + FROM nodes n + LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' + WHERE n.kind IN ('function', 'method') + GROUP BY n.id + HAVING caller_count > ? + ORDER BY caller_count DESC + `) + .all(FALSE_POSITIVE_CALLER_THRESHOLD); + const falsePositiveWarnings = fpRows + .filter((r) => + FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), + ) + .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); + + let fpEdgeCount = 0; + for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; + const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; + + const score = Math.round( + callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, + ); + + return { + score, + callerCoverage: { + ratio: callerCoverage, + covered: callableWithCallers, + total: totalCallable, + }, + callConfidence: { + ratio: callConfidence, + highConf: highConfCallEdges, + total: totalCallEdges, + }, + falsePositiveWarnings, + }; +} + +function countRoles(db, noTests) { + let roleRows; + if (noTests) { + const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); + const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); + const counts = {}; + for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; + roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); + } else { + roleRows = db + .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') + .all(); + } + const roles = {}; + for (const r of roleRows) roles[r.role] = r.c; + return roles; +} + +function getComplexitySummary(db, testFilter) { + try { + const cRows = db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') ${testFilter}`, + ) + .all(); + if (cRows.length > 0) { + const miValues = cRows.map((r) => r.maintainability_index || 0); + return { + analyzed: cRows.length, + avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), + avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), + maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), + maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), + avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), + minMI: +Math.min(...miValues).toFixed(1), + }; + } + } catch (e) { + debug(`complexity summary skipped: ${e.message}`); + } + return null; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + export function moduleMapData(customDbPath, limit = 20, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -79,237 +314,27 @@ export function statsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { const noTests = opts.noTests || false; + const testFilter = testFilterSQL('n.file', noTests); - // Build set of test file IDs for filtering nodes and edges - let testFileIds = null; - if (noTests) { - const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); - testFileIds = new Set(); - const testFiles = new Set(); - for (const n of allFileNodes) { - if (isTestFile(n.file)) { - testFileIds.add(n.id); - testFiles.add(n.file); - } - } - - // Also collect non-file node IDs that belong to test files - const allNodes = db.prepare('SELECT id, file FROM nodes').all(); - for (const n of allNodes) { - if (testFiles.has(n.file)) testFileIds.add(n.id); - } - } - - // Node breakdown by kind - let nodeRows; - if (noTests) { - const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); - const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); - const counts = {}; - for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; - nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); - } - const nodesByKind = {}; - let totalNodes = 0; - for (const r of nodeRows) { - nodesByKind[r.kind] = r.c; - totalNodes += r.c; - } - - // Edge breakdown by kind - let edgeRows; - if (noTests) { - const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); - const filtered = allEdges.filter( - (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), - ); - const counts = {}; - for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; - edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); - } - const edgesByKind = {}; - let totalEdges = 0; - for (const r of edgeRows) { - edgesByKind[r.kind] = r.c; - totalEdges += r.c; - } + const testFileIds = noTests ? buildTestFileIds(db) : null; - // File/language distribution — map extensions via LANGUAGE_REGISTRY - const extToLang = new Map(); - for (const entry of LANGUAGE_REGISTRY) { - for (const ext of entry.extensions) { - extToLang.set(ext, entry.id); - } - } - let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); - if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); - const byLanguage = {}; - for (const row of fileNodes) { - const ext = path.extname(row.file).toLowerCase(); - const lang = extToLang.get(ext) || 'other'; - byLanguage[lang] = (byLanguage[lang] || 0) + 1; - } - const langCount = Object.keys(byLanguage).length; + const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, testFileIds); + const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, testFileIds); + const files = countFilesByLanguage(db, noTests); - // Cycles const fileCycles = findCycles(db, { fileLevel: true, noTests }); const fnCycles = findCycles(db, { fileLevel: false, noTests }); - // Top 5 coupling hotspots (fan-in + fan-out, file nodes) - const testFilter = testFilterSQL('n.file', noTests); - const hotspotRows = db - .prepare(` - SELECT n.file, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out - FROM nodes n - WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) - + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC - `) - .all(); - const filteredHotspots = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; - const hotspots = filteredHotspots.slice(0, 5).map((r) => ({ - file: r.file, - fanIn: r.fan_in, - fanOut: r.fan_out, - })); - - // Embeddings metadata - let embeddings = null; - try { - const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); - if (count && count.c > 0) { - const meta = {}; - const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); - for (const r of metaRows) meta[r.key] = r.value; - embeddings = { - count: count.c, - model: meta.model || null, - dim: meta.dim ? parseInt(meta.dim, 10) : null, - builtAt: meta.built_at || null, - }; - } - } catch (e) { - debug(`embeddings lookup skipped: ${e.message}`); - } - - // Graph quality metrics - const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); - const totalCallable = db - .prepare( - `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, - ) - .get().c; - const callableWithCallers = db - .prepare(` - SELECT COUNT(DISTINCT e.target_id) as c FROM edges e - JOIN nodes n ON e.target_id = n.id - WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} - `) - .get().c; - const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; - - const totalCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'") - .get().c; - const highConfCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") - .get().c; - const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; - - // False-positive warnings: generic names with > threshold callers - const fpRows = db - .prepare(` - SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count - FROM nodes n - LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' - WHERE n.kind IN ('function', 'method') - GROUP BY n.id - HAVING caller_count > ? - ORDER BY caller_count DESC - `) - .all(FALSE_POSITIVE_CALLER_THRESHOLD); - const falsePositiveWarnings = fpRows - .filter((r) => - FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), - ) - .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); - - // Edges from suspicious nodes - let fpEdgeCount = 0; - for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; - const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; - - const score = Math.round( - callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, - ); - - const quality = { - score, - callerCoverage: { - ratio: callerCoverage, - covered: callableWithCallers, - total: totalCallable, - }, - callConfidence: { - ratio: callConfidence, - highConf: highConfCallEdges, - total: totalCallEdges, - }, - falsePositiveWarnings, - }; - - // Role distribution - let roleRows; - if (noTests) { - const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); - const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); - const counts = {}; - for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; - roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); - } else { - roleRows = db - .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') - .all(); - } - const roles = {}; - for (const r of roleRows) roles[r.role] = r.c; - - // Complexity summary - let complexity = null; - try { - const cRows = db - .prepare( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') ${testFilter}`, - ) - .all(); - if (cRows.length > 0) { - const miValues = cRows.map((r) => r.maintainability_index || 0); - complexity = { - analyzed: cRows.length, - avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), - avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), - maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - }; - } - } catch (e) { - debug(`complexity summary skipped: ${e.message}`); - } + const hotspots = findHotspots(db, noTests, 5); + const embeddings = getEmbeddingsInfo(db); + const quality = computeQualityMetrics(db, testFilter); + const roles = countRoles(db, noTests); + const complexity = getComplexitySummary(db, testFilter); return { nodes: { total: totalNodes, byKind: nodesByKind }, edges: { total: totalEdges, byKind: edgesByKind }, - files: { total: fileNodes.length, languages: langCount, byLanguage }, + files, cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length }, hotspots, embeddings, From b2f89f13448946ade2876e5f7ac0dd05534bf010 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:35:59 -0600 Subject: [PATCH 14/37] refactor: decompose buildComplexityMetrics Impact: 5 functions changed, 3 affected --- src/features/complexity.js | 246 +++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 121 deletions(-) diff --git a/src/features/complexity.js b/src/features/complexity.js index 12f5acf1..4f82e5ef 100644 --- a/src/features/complexity.js +++ b/src/features/complexity.js @@ -330,41 +330,138 @@ export function computeAllMetrics(functionNode, langId) { */ export { _findFunctionNode as findFunctionNode }; -/** - * Re-parse changed files with WASM tree-sitter, find function AST subtrees, - * compute complexity, and upsert into function_complexity table. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { - // Only initialize WASM parsers if some files lack both a cached tree AND pre-computed complexity - let parsers = null; - let extToLang = null; - let needsFallback = false; +async function initWasmParsersIfNeeded(fileSymbols) { for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { - // Only consider files whose language actually has complexity rules const ext = path.extname(relPath).toLowerCase(); if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; - // Check if all function/method defs have pre-computed complexity (native engine) const hasPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, ); if (!hasPrecomputed) { - needsFallback = true; - break; + const { createParsers } = await import('../domain/parser.js'); + const parsers = await createParsers(); + const extToLang = buildExtToLangMap(); + return { parsers, extToLang }; } } } - if (needsFallback) { - const { createParsers } = await import('../domain/parser.js'); - parsers = await createParsers(); - extToLang = buildExtToLangMap(); + return { parsers: null, extToLang: null }; +} + +function getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser) { + let tree = symbols._tree; + let langId = symbols._langId; + + const allPrecomputed = symbols.definitions.every( + (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, + ); + + if (!allPrecomputed && !tree) { + const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) return null; + if (!extToLang) return null; + langId = extToLang.get(ext); + if (!langId) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`complexity: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParser(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`complexity: parse failed for ${relPath}: ${e.message}`); + return null; + } } + return { tree, langId }; +} + +function upsertPrecomputedComplexity(db, upsert, def, relPath) { + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; + upsert.run( + nodeId, + def.complexity.cognitive, + def.complexity.cyclomatic, + def.complexity.maxNesting ?? 0, + cl ? cl.loc : 0, + cl ? cl.sloc : 0, + cl ? cl.commentLines : 0, + ch ? ch.n1 : 0, + ch ? ch.n2 : 0, + ch ? ch.bigN1 : 0, + ch ? ch.bigN2 : 0, + ch ? ch.vocabulary : 0, + ch ? ch.length : 0, + ch ? ch.volume : 0, + ch ? ch.difficulty : 0, + ch ? ch.effort : 0, + ch ? ch.bugs : 0, + def.complexity.maintainabilityIndex ?? 0, + ); + return 1; +} + +function upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules) { + if (!tree || !rules) return 0; + + const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); + if (!funcNode) return 0; + + const metrics = computeAllMetrics(funcNode, langId); + if (!metrics) return 0; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + + const h = metrics.halstead; + upsert.run( + nodeId, + metrics.cognitive, + metrics.cyclomatic, + metrics.maxNesting, + metrics.loc.loc, + metrics.loc.sloc, + metrics.loc.commentLines, + h ? h.n1 : 0, + h ? h.n2 : 0, + h ? h.bigN1 : 0, + h ? h.bigN2 : 0, + h ? h.vocabulary : 0, + h ? h.length : 0, + h ? h.volume : 0, + h ? h.difficulty : 0, + h ? h.effort : 0, + h ? h.bugs : 0, + metrics.mi, + ); + return 1; +} + +/** + * Re-parse changed files with WASM tree-sitter, find function AST subtrees, + * compute complexity, and upsert into function_complexity table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { + const { parsers, extToLang } = await initWasmParsersIfNeeded(fileSymbols); const { getParser } = await import('../domain/parser.js'); const upsert = db.prepare( @@ -381,113 +478,20 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp const tx = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { - // Check if all function/method defs have pre-computed complexity - const allPrecomputed = symbols.definitions.every( - (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, - ); - - let tree = symbols._tree; - let langId = symbols._langId; - - // Only attempt WASM fallback if we actually need AST-based computation - if (!allPrecomputed && !tree) { - const ext = path.extname(relPath).toLowerCase(); - if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Language has no complexity rules - if (!extToLang) continue; // No WASM parsers available - langId = extToLang.get(ext); - if (!langId) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`complexity: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParser(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`complexity: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - + const result = getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser); + const tree = result ? result.tree : null; + const langId = result ? result.langId : null; const rules = langId ? COMPLEXITY_RULES.get(langId) : null; for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; if (!def.line) continue; - // Use pre-computed complexity from native engine if available if (def.complexity) { - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - const ch = def.complexity.halstead; - const cl = def.complexity.loc; - upsert.run( - nodeId, - def.complexity.cognitive, - def.complexity.cyclomatic, - def.complexity.maxNesting ?? 0, - cl ? cl.loc : 0, - cl ? cl.sloc : 0, - cl ? cl.commentLines : 0, - ch ? ch.n1 : 0, - ch ? ch.n2 : 0, - ch ? ch.bigN1 : 0, - ch ? ch.bigN2 : 0, - ch ? ch.vocabulary : 0, - ch ? ch.length : 0, - ch ? ch.volume : 0, - ch ? ch.difficulty : 0, - ch ? ch.effort : 0, - ch ? ch.bugs : 0, - def.complexity.maintainabilityIndex ?? 0, - ); - analyzed++; - continue; + analyzed += upsertPrecomputedComplexity(db, upsert, def, relPath); + } else { + analyzed += upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules); } - - // Fallback: compute from AST tree - if (!tree || !rules) continue; - - const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); - if (!funcNode) continue; - - // Single-pass: complexity + Halstead + LOC + MI in one DFS walk - const metrics = computeAllMetrics(funcNode, langId); - if (!metrics) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - const h = metrics.halstead; - upsert.run( - nodeId, - metrics.cognitive, - metrics.cyclomatic, - metrics.maxNesting, - metrics.loc.loc, - metrics.loc.sloc, - metrics.loc.commentLines, - h ? h.n1 : 0, - h ? h.n2 : 0, - h ? h.bigN1 : 0, - h ? h.bigN2 : 0, - h ? h.vocabulary : 0, - h ? h.length : 0, - h ? h.volume : 0, - h ? h.difficulty : 0, - h ? h.effort : 0, - h ? h.bugs : 0, - metrics.mi, - ); - analyzed++; } } }); From cb822587f285542e09fa4d9d0a4486ece5d01683 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:05 -0600 Subject: [PATCH 15/37] refactor: decompose buildStructure into traversal, cohesion, and classification Impact: 8 functions changed, 3 affected --- src/features/structure.js | 208 ++++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 97 deletions(-) diff --git a/src/features/structure.js b/src/features/structure.js index 4ba9ee0a..7f582076 100644 --- a/src/features/structure.js +++ b/src/features/structure.js @@ -5,73 +5,41 @@ import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; -// ─── Build-time: insert directory nodes, contains edges, and metrics ──── +// ─── Build-time helpers ─────────────────────────────────────────────── -/** - * Build directory structure nodes, containment edges, and compute metrics. - * Called from builder.js after edge building. - * - * @param {import('better-sqlite3').Database} db - Open read-write database - * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } - * @param {string} rootDir - Absolute root directory - * @param {Map} lineCountMap - Map of relPath → line count - * @param {Set} directories - Set of relative directory paths - */ -export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { - const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', - ); - const getNodeIdStmt = { - get: (name, kind, file, line) => { - const id = getNodeId(db, name, kind, file, line); - return id != null ? { id } : undefined; - }, - }; - const insertEdge = db.prepare( - 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', - ); - const upsertMetric = db.prepare(` - INSERT OR REPLACE INTO node_metrics - (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - - const isIncremental = changedFiles != null && changedFiles.length > 0; +function getAncestorDirs(filePaths) { + const dirs = new Set(); + for (const f of filePaths) { + let d = normalizePath(path.dirname(f)); + while (d && d !== '.') { + dirs.add(d); + d = normalizePath(path.dirname(d)); + } + } + return dirs; +} +function cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles) { if (isIncremental) { - // Incremental: only clean up data for changed files and their ancestor directories - const affectedDirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - affectedDirs.add(d); - d = normalizePath(path.dirname(d)); - } - } + const affectedDirs = getAncestorDirs(changedFiles); const deleteContainsForDir = db.prepare( "DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE name = ? AND kind = 'directory')", ); const deleteMetricForNode = db.prepare('DELETE FROM node_metrics WHERE node_id = ?'); db.transaction(() => { - // Delete contains edges only from affected directories for (const dir of affectedDirs) { deleteContainsForDir.run(dir); } - // Delete metrics for changed files for (const f of changedFiles) { const fileRow = getNodeIdStmt.get(f, 'file', f, 0); if (fileRow) deleteMetricForNode.run(fileRow.id); } - // Delete metrics for affected directories for (const dir of affectedDirs) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (dirRow) deleteMetricForNode.run(dirRow.id); } })(); } else { - // Full rebuild: clean previous directory nodes/edges (idempotent) - // Scope contains-edge delete to directory-sourced edges only, - // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); @@ -79,8 +47,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director DELETE FROM nodes WHERE kind = 'directory'; `); } +} - // Step 1: Ensure all directories are represented (including intermediate parents) +function collectAllDirectories(directories, fileSymbols) { const allDirs = new Set(); for (const dir of directories) { let d = dir; @@ -89,7 +58,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } - // Also add dirs derived from file paths for (const relPath of fileSymbols.keys()) { let d = normalizePath(path.dirname(relPath)); while (d && d !== '.') { @@ -97,37 +65,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } + return allDirs; +} - // Step 2: Insert directory nodes (INSERT OR IGNORE — safe for incremental) - const insertDirs = db.transaction(() => { - for (const dir of allDirs) { - insertNode.run(dir, 'directory', dir, 0, null); - } - }); - insertDirs(); - - // Step 3: Insert 'contains' edges (dir → file, dir → subdirectory) - // On incremental, only re-insert for affected directories (others are intact) - const affectedDirs = isIncremental - ? (() => { - const dirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - dirs.add(d); - d = normalizePath(path.dirname(d)); - } - } - return dirs; - })() - : null; +function insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles) { + const isIncremental = changedFiles != null && changedFiles.length > 0; + const affectedDirs = isIncremental ? getAncestorDirs(changedFiles) : null; - const insertContains = db.transaction(() => { - // dir → file + db.transaction(() => { for (const relPath of fileSymbols.keys()) { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') continue; - // On incremental, skip dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(dir)) continue; const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); @@ -135,11 +83,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(dirRow.id, fileRow.id, 'contains', 1.0, 0); } } - // dir → subdirectory for (const dir of allDirs) { const parent = normalizePath(path.dirname(dir)); if (!parent || parent === '.' || parent === dir) continue; - // On incremental, skip parent dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(parent)) continue; const parentRow = getNodeIdStmt.get(parent, 'directory', parent, 0); const childRow = getNodeIdStmt.get(dir, 'directory', dir, 0); @@ -147,11 +93,10 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(parentRow.id, childRow.id, 'contains', 1.0, 0); } } - }); - insertContains(); + })(); +} - // Step 4: Compute per-file metrics - // Pre-compute fan-in/fan-out per file from import edges +function computeImportEdgeMaps(db) { const fanInMap = new Map(); const fanOutMap = new Map(); const importEdges = db @@ -169,14 +114,24 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fanOutMap.set(source_file, (fanOutMap.get(source_file) || 0) + 1); fanInMap.set(target_file, (fanInMap.get(target_file) || 0) + 1); } + return { fanInMap, fanOutMap, importEdges }; +} - const computeFileMetrics = db.transaction(() => { +function computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, +) { + db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); if (!fileRow) continue; const lineCount = lineCountMap.get(relPath) || 0; - // Deduplicate definitions by name+kind+line const seen = new Set(); let symbolCount = 0; for (const d of symbols.definitions) { @@ -203,11 +158,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director null, ); } - }); - computeFileMetrics(); + })(); +} - // Step 5: Compute per-directory metrics - // Build a map of dir → descendant files +function computeDirectoryMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + allDirs, + importEdges, +) { const dirFiles = new Map(); for (const dir of allDirs) { dirFiles.set(dir, []); @@ -222,7 +183,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Build reverse index: file → set of ancestor directories (O(files × depth)) const fileToAncestorDirs = new Map(); for (const [dir, files] of dirFiles) { for (const f of files) { @@ -231,7 +191,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Single O(E) pass: pre-aggregate edge counts per directory const dirEdgeCounts = new Map(); for (const dir of allDirs) { dirEdgeCounts.set(dir, { intra: 0, fanIn: 0, fanOut: 0 }); @@ -241,7 +200,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director const tgtDirs = fileToAncestorDirs.get(target_file); if (!srcDirs && !tgtDirs) continue; - // For each directory that contains the source file if (srcDirs) { for (const dir of srcDirs) { const counts = dirEdgeCounts.get(dir); @@ -253,10 +211,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } } - // For each directory that contains the target but NOT the source if (tgtDirs) { for (const dir of tgtDirs) { - if (srcDirs?.has(dir)) continue; // already counted as intra + if (srcDirs?.has(dir)) continue; const counts = dirEdgeCounts.get(dir); if (!counts) continue; counts.fanIn++; @@ -264,7 +221,7 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - const computeDirMetrics = db.transaction(() => { + db.transaction(() => { for (const [dir, files] of dirFiles) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (!dirRow) continue; @@ -286,7 +243,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // O(1) lookup from pre-aggregated edge counts const counts = dirEdgeCounts.get(dir) || { intra: 0, fanIn: 0, fanOut: 0 }; const totalEdges = counts.intra + counts.fanIn + counts.fanOut; const cohesion = totalEdges > 0 ? counts.intra / totalEdges : null; @@ -303,11 +259,69 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fileCount, ); } - }); - computeDirMetrics(); + })(); +} + +// ─── Build-time: insert directory nodes, contains edges, and metrics ──── + +/** + * Build directory structure nodes, containment edges, and compute metrics. + * Called from builder.js after edge building. + * + * @param {import('better-sqlite3').Database} db - Open read-write database + * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } + * @param {string} rootDir - Absolute root directory + * @param {Map} lineCountMap - Map of relPath → line count + * @param {Set} directories - Set of relative directory paths + */ +export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { + const insertNode = db.prepare( + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + const getNodeIdStmt = { + get: (name, kind, file, line) => { + const id = getNodeId(db, name, kind, file, line); + return id != null ? { id } : undefined; + }, + }; + const insertEdge = db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', + ); + const upsertMetric = db.prepare(` + INSERT OR REPLACE INTO node_metrics + (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + const isIncremental = changedFiles != null && changedFiles.length > 0; + + cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles); + + const allDirs = collectAllDirectories(directories, fileSymbols); + + db.transaction(() => { + for (const dir of allDirs) { + insertNode.run(dir, 'directory', dir, 0, null); + } + })(); + + insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles); + + const { fanInMap, fanOutMap, importEdges } = computeImportEdgeMaps(db); + + computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, + ); + + computeDirectoryMetrics(db, upsertMetric, getNodeIdStmt, fileSymbols, allDirs, importEdges); - const dirCount = allDirs.size; - debug(`Structure: ${dirCount} directories, ${fileSymbols.size} files with metrics`); + debug(`Structure: ${allDirs.size} directories, ${fileSymbols.size} files with metrics`); } // ─── Node role classification ───────────────────────────────────────── From 54b0067b0e27e9ec97ec0270b1cd34516c3c319c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:16 -0600 Subject: [PATCH 16/37] refactor: decompose buildCFGData and buildDataflowEdges Impact: 10 functions changed, 5 affected --- src/features/cfg.js | 246 ++++++++++++++++++++------------------- src/features/dataflow.js | 240 +++++++++++++++++++------------------- 2 files changed, 244 insertions(+), 242 deletions(-) diff --git a/src/features/cfg.js b/src/features/cfg.js index ae1b8564..3f029274 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -68,30 +68,15 @@ export function buildFunctionCFG(functionNode, langId) { return { blocks: r.blocks, edges: r.edges, cyclomatic: r.cyclomatic }; } -// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── +// ─── Build-Time Helpers ───────────────────────────────────────────────── -/** - * Build CFG data for all function/method definitions and persist to DB. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initCfgParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { const ext = path.extname(relPath).toLowerCase(); if (CFG_EXTENSIONS.has(ext)) { - // Check if all function/method defs already have native CFG data const hasNativeCfg = symbols.definitions .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) .every((d) => d.cfg === null || d.cfg?.blocks?.length); @@ -103,18 +88,131 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } - // findFunctionNode imported from ./ast-analysis/shared.js at module level + return { parsers, getParserFn }; +} + +function getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + const ext = path.extname(relPath).toLowerCase(); + let tree = symbols._tree; + let langId = symbols._langId; + + const allNative = symbols.definitions + .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) + .every((d) => d.cfg === null || d.cfg?.blocks?.length); + + if (!tree && !allNative) { + if (!getParserFn) return null; + langId = extToLang.get(ext); + if (!langId || !CFG_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`cfg: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`cfg: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + langId = extToLang.get(ext); + if (!langId) return null; + } + + return { tree, langId }; +} + +function buildVisitorCfgMap(tree, cfgRules, symbols, langId) { + const needsVisitor = + tree && + symbols.definitions.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !d.cfg?.blocks?.length, + ); + if (!needsVisitor) return null; + + const visitor = createCfgVisitor(cfgRules); + const walkerOpts = { + functionNodeTypes: new Set(cfgRules.functionNodes), + nestingNodeTypes: new Set(), + getFunctionName: (node) => { + const nameNode = node.childForFieldName('name'); + return nameNode ? nameNode.text : null; + }, + }; + const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); + const cfgResults = walkResults.cfg || []; + const visitorCfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); + visitorCfgByLine.get(line).push(r); + } + } + return visitorCfgByLine; +} + +function persistCfg(cfg, nodeId, insertBlock, insertEdge) { + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + nodeId, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); + } + } +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initCfgParsers(fileSymbols); const insertBlock = db.prepare( `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) @@ -131,81 +229,14 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const ext = path.extname(relPath).toLowerCase(); if (!CFG_EXTENSIONS.has(ext)) continue; - let tree = symbols._tree; - let langId = symbols._langId; - - // Check if all defs already have native CFG — skip WASM parse if so - const allNative = symbols.definitions - .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) - .every((d) => d.cfg === null || d.cfg?.blocks?.length); - - // WASM fallback if no cached tree and not all native - if (!tree && !allNative) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !CFG_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`cfg: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`cfg: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } + const treeLang = getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!treeLang) continue; + const { tree, langId } = treeLang; const cfgRules = CFG_RULES.get(langId); if (!cfgRules) continue; - // WASM fallback: run file-level visitor walk to compute CFG for all functions - // that don't already have pre-computed data (from native engine or unified walk) - let visitorCfgByLine = null; - const needsVisitor = - tree && - symbols.definitions.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !d.cfg?.blocks?.length, - ); - if (needsVisitor) { - const visitor = createCfgVisitor(cfgRules); - const walkerOpts = { - functionNodeTypes: new Set(cfgRules.functionNodes), - nestingNodeTypes: new Set(), - getFunctionName: (node) => { - const nameNode = node.childForFieldName('name'); - return nameNode ? nameNode.text : null; - }, - }; - const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); - const cfgResults = walkResults.cfg || []; - visitorCfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); - visitorCfgByLine.get(line).push(r); - } - } - } + const visitorCfgByLine = buildVisitorCfgMap(tree, cfgRules, symbols, langId); for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; @@ -214,7 +245,6 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); if (!nodeId) continue; - // Use pre-computed CFG (native engine or unified walk), then visitor fallback let cfg = null; if (def.cfg?.blocks?.length) { cfg = def.cfg; @@ -233,36 +263,10 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { if (!cfg || cfg.blocks.length === 0) continue; - // Clear old CFG data for this function deleteCfgForNode(db, nodeId); - - // Insert blocks and build index→dbId mapping - const blockDbIds = new Map(); - for (const block of cfg.blocks) { - const result = insertBlock.run( - nodeId, - block.index, - block.type, - block.startLine, - block.endLine, - block.label, - ); - blockDbIds.set(block.index, result.lastInsertRowid); - } - - // Insert edges - for (const edge of cfg.edges) { - const sourceDbId = blockDbIds.get(edge.sourceIndex); - const targetDbId = blockDbIds.get(edge.targetIndex); - if (sourceDbId && targetDbId) { - insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); - } - } - + persistCfg(cfg, nodeId, insertBlock, insertEdge); analyzed++; } - - // Don't release _tree here — complexity/dataflow may still need it } }); diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 695afa95..2dee25b6 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -58,26 +58,11 @@ export function extractDataflow(tree, _filePath, _definitions, langId = 'javascr return results.dataflow; } -// ── buildDataflowEdges ────────────────────────────────────────────────────── +// ── Build-Time Helpers ────────────────────────────────────────────────────── -/** - * Build dataflow edges and insert them into the database. - * Called during graph build when --dataflow is enabled. - * - * @param {object} db - better-sqlite3 database instance - * @param {Map} fileSymbols - map of relPath → symbols - * @param {string} rootDir - absolute root directory - * @param {object} engineOpts - engine options - */ -export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initDataflowParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree && !symbols.dataflow) { const ext = path.extname(relPath).toLowerCase(); @@ -88,25 +73,130 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } + return { parsers, getParserFn }; +} + +function getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + if (symbols.dataflow) return symbols.dataflow; + + let tree = symbols._tree; + let langId = symbols._langId; + + if (!tree) { + if (!getParserFn) return null; + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId || !DATAFLOW_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`dataflow: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`dataflow: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId) return null; + } + + if (!DATAFLOW_RULES.has(langId)) return null; + + return extractDataflow(tree, relPath, symbols.definitions, langId); +} + +function insertDataflowEdges(insert, data, resolveNode) { + let edgeCount = 0; + + for (const flow of data.argFlows) { + const sourceNode = resolveNode(flow.callerFunc); + const targetNode = resolveNode(flow.calleeName); + if (sourceNode && targetNode) { + insert.run( + sourceNode.id, + targetNode.id, + 'flows_to', + flow.argIndex, + flow.expression, + flow.line, + flow.confidence, + ); + edgeCount++; + } + } + + for (const assignment of data.assignments) { + const producerNode = resolveNode(assignment.sourceCallName); + const consumerNode = resolveNode(assignment.callerFunc); + if (producerNode && consumerNode) { + insert.run( + producerNode.id, + consumerNode.id, + 'returns', + null, + assignment.expression, + assignment.line, + 1.0, + ); + edgeCount++; + } + } + + for (const mut of data.mutations) { + const mutatorNode = resolveNode(mut.funcName); + if (mutatorNode && mut.binding?.type === 'param') { + insert.run(mutatorNode.id, mutatorNode.id, 'mutates', null, mut.mutatingExpr, mut.line, 1.0); + edgeCount++; + } + } + + return edgeCount; +} + +// ── buildDataflowEdges ────────────────────────────────────────────────────── + +/** + * Build dataflow edges and insert them into the database. + * Called during graph build when --dataflow is enabled. + * + * @param {object} db - better-sqlite3 database instance + * @param {Map} fileSymbols - map of relPath → symbols + * @param {string} rootDir - absolute root directory + * @param {object} engineOpts - engine options + */ +export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initDataflowParsers(fileSymbols); + const insert = db.prepare( `INSERT INTO dataflow (source_id, target_id, kind, param_index, expression, line, confidence) VALUES (?, ?, ?, ?, ?, ?, ?)`, ); - // MVP scope: only resolve function/method nodes for dataflow edges. - // Future expansion: add 'parameter', 'property', 'constant' kinds to track - // data flow through property accessors or constant references. const getNodeByNameAndFile = db.prepare( `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND file = ? AND kind IN ('function', 'method')`, @@ -125,109 +215,17 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) const ext = path.extname(relPath).toLowerCase(); if (!DATAFLOW_EXTENSIONS.has(ext)) continue; - // Use native dataflow data if available — skip WASM extraction - let data = symbols.dataflow; - if (!data) { - let tree = symbols._tree; - let langId = symbols._langId; - - // WASM fallback if no cached tree - if (!tree) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !DATAFLOW_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`dataflow: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`dataflow: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } - - if (!DATAFLOW_RULES.has(langId)) continue; - - data = extractDataflow(tree, relPath, symbols.definitions, langId); - } + const data = getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!data) continue; - // Resolve function names to node IDs in this file first, then globally - function resolveNode(funcName) { + const resolveNode = (funcName) => { const local = getNodeByNameAndFile.all(funcName, relPath); if (local.length > 0) return local[0]; const global = getNodeByName.all(funcName); return global.length > 0 ? global[0] : null; - } - - // flows_to: parameter/variable passed as argument to another function - for (const flow of data.argFlows) { - const sourceNode = resolveNode(flow.callerFunc); - const targetNode = resolveNode(flow.calleeName); - if (sourceNode && targetNode) { - insert.run( - sourceNode.id, - targetNode.id, - 'flows_to', - flow.argIndex, - flow.expression, - flow.line, - flow.confidence, - ); - totalEdges++; - } - } - - // returns: call return value captured in caller - for (const assignment of data.assignments) { - const producerNode = resolveNode(assignment.sourceCallName); - const consumerNode = resolveNode(assignment.callerFunc); - if (producerNode && consumerNode) { - insert.run( - producerNode.id, - consumerNode.id, - 'returns', - null, - assignment.expression, - assignment.line, - 1.0, - ); - totalEdges++; - } - } + }; - // mutates: parameter-derived value is mutated - for (const mut of data.mutations) { - const mutatorNode = resolveNode(mut.funcName); - if (mutatorNode && mut.binding?.type === 'param') { - // The mutation in this function affects the parameter source - insert.run( - mutatorNode.id, - mutatorNode.id, - 'mutates', - null, - mut.mutatingExpr, - mut.line, - 1.0, - ); - totalEdges++; - } - } + totalEdges += insertDataflowEdges(insert, data, resolveNode); } }); From 7030e7f369312ce9bb0cb3e164bc1b78023f3026 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:21 -0600 Subject: [PATCH 17/37] refactor: decompose sequenceData into BFS and message construction Impact: 5 functions changed, 2 affected --- src/features/sequence.js | 293 ++++++++++++++++++++------------------- 1 file changed, 152 insertions(+), 141 deletions(-) diff --git a/src/features/sequence.js b/src/features/sequence.js index 271d2ea2..cf59ddc3 100644 --- a/src/features/sequence.js +++ b/src/features/sequence.js @@ -68,6 +68,148 @@ function buildAliases(files) { return aliases; } +// ─── Helpers ───────────────────────────────────────────────────────── + +function findEntryNode(repo, name, opts) { + let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; + if (!matchNode) { + for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { + matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; + if (matchNode) break; + } + } + return matchNode; +} + +function bfsCallees(repo, matchNode, maxDepth, noTests) { + const visited = new Set([matchNode.id]); + let frontier = [matchNode.id]; + const messages = []; + const fileSet = new Set([matchNode.file]); + const idToNode = new Map(); + idToNode.set(matchNode.id, matchNode); + let truncated = false; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier = []; + + for (const fid of frontier) { + const callees = repo.findCallees(fid); + const caller = idToNode.get(fid); + + for (const c of callees) { + if (noTests && isTestFile(c.file)) continue; + + fileSet.add(c.file); + messages.push({ + from: caller.file, + to: c.file, + label: c.name, + type: 'call', + depth: d, + }); + + if (visited.has(c.id)) continue; + + visited.add(c.id); + nextFrontier.push(c.id); + idToNode.set(c.id, c); + } + } + + frontier = nextFrontier; + if (frontier.length === 0) break; + + if (d === maxDepth && frontier.length > 0) { + const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); + if (hasMoreCalls) truncated = true; + } + } + + return { messages, fileSet, idToNode, truncated }; +} + +function annotateDataflow(repo, messages, idToNode) { + const hasTable = repo.hasDataflowTable(); + + if (!hasTable || !(repo instanceof SqliteRepository)) return; + + const db = repo.db; + const nodeByNameFile = new Map(); + for (const n of idToNode.values()) { + nodeByNameFile.set(`${n.name}|${n.file}`, n); + } + + const getReturns = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.source_id = ? AND d.kind = 'returns'`, + ); + const getFlowsTo = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.target_id = ? AND d.kind = 'flows_to' + ORDER BY d.param_index`, + ); + + const seenReturns = new Set(); + for (const msg of [...messages]) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const returnKey = `${msg.to}->${msg.from}:${msg.label}`; + if (seenReturns.has(returnKey)) continue; + + const returns = getReturns.all(targetNode.id); + + if (returns.length > 0) { + seenReturns.add(returnKey); + const expr = returns[0].expression || 'result'; + messages.push({ + from: msg.to, + to: msg.from, + label: expr, + type: 'return', + depth: msg.depth, + }); + } + } + + for (const msg of messages) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const params = getFlowsTo.all(targetNode.id); + + if (params.length > 0) { + const paramNames = params + .map((p) => p.expression) + .filter(Boolean) + .slice(0, 3); + if (paramNames.length > 0) { + msg.label = `${msg.label}(${paramNames.join(', ')})`; + } + } + } +} + +function buildParticipants(fileSet, entryFile) { + const aliases = buildAliases([...fileSet]); + const participants = [...fileSet].map((file) => ({ + id: aliases.get(file), + label: file.split('/').pop(), + file, + })); + + participants.sort((a, b) => { + if (a.file === entryFile) return -1; + if (b.file === entryFile) return 1; + return a.file.localeCompare(b.file); + }); + + return { participants, aliases }; +} + // ─── Core data function ────────────────────────────────────────────── /** @@ -90,19 +232,8 @@ export function sequenceData(name, dbPath, opts = {}) { try { const maxDepth = opts.depth || 10; const noTests = opts.noTests || false; - const withDataflow = opts.dataflow || false; - - // Phase 1: Direct LIKE match - let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; - - // Phase 2: Prefix-stripped matching - if (!matchNode) { - for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { - matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; - if (matchNode) break; - } - } + const matchNode = findEntryNode(repo, name, opts); if (!matchNode) { return { entry: null, @@ -121,123 +252,17 @@ export function sequenceData(name, dbPath, opts = {}) { line: matchNode.line, }; - // BFS forward — track edges, not just nodes - const visited = new Set([matchNode.id]); - let frontier = [matchNode.id]; - const messages = []; - const fileSet = new Set([matchNode.file]); - const idToNode = new Map(); - idToNode.set(matchNode.id, matchNode); - let truncated = false; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier = []; - - for (const fid of frontier) { - const callees = repo.findCallees(fid); - - const caller = idToNode.get(fid); - - for (const c of callees) { - if (noTests && isTestFile(c.file)) continue; - - // Always record the message (even for visited nodes — different caller path) - fileSet.add(c.file); - messages.push({ - from: caller.file, - to: c.file, - label: c.name, - type: 'call', - depth: d, - }); - - if (visited.has(c.id)) continue; - - visited.add(c.id); - nextFrontier.push(c.id); - idToNode.set(c.id, c); - } - } - - frontier = nextFrontier; - if (frontier.length === 0) break; - - if (d === maxDepth && frontier.length > 0) { - // Only mark truncated if at least one frontier node has further callees - const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); - if (hasMoreCalls) truncated = true; - } - } - - // Dataflow annotations: add return arrows - if (withDataflow && messages.length > 0) { - const hasTable = repo.hasDataflowTable(); - - if (hasTable && repo instanceof SqliteRepository) { - const db = repo.db; - // Build name|file lookup for O(1) target node access - const nodeByNameFile = new Map(); - for (const n of idToNode.values()) { - nodeByNameFile.set(`${n.name}|${n.file}`, n); - } - - const getReturns = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.source_id = ? AND d.kind = 'returns'`, - ); - const getFlowsTo = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.target_id = ? AND d.kind = 'flows_to' - ORDER BY d.param_index`, - ); - - // For each called function, check if it has return edges - const seenReturns = new Set(); - for (const msg of [...messages]) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const returnKey = `${msg.to}->${msg.from}:${msg.label}`; - if (seenReturns.has(returnKey)) continue; - - const returns = getReturns.all(targetNode.id); - - if (returns.length > 0) { - seenReturns.add(returnKey); - const expr = returns[0].expression || 'result'; - messages.push({ - from: msg.to, - to: msg.from, - label: expr, - type: 'return', - depth: msg.depth, - }); - } - } + const { messages, fileSet, idToNode, truncated } = bfsCallees( + repo, + matchNode, + maxDepth, + noTests, + ); - // Annotate call messages with parameter names - for (const msg of messages) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const params = getFlowsTo.all(targetNode.id); - - if (params.length > 0) { - const paramNames = params - .map((p) => p.expression) - .filter(Boolean) - .slice(0, 3); - if (paramNames.length > 0) { - msg.label = `${msg.label}(${paramNames.join(', ')})`; - } - } - } - } + if (opts.dataflow && messages.length > 0) { + annotateDataflow(repo, messages, idToNode); } - // Sort messages by depth, then call before return messages.sort((a, b) => { if (a.depth !== b.depth) return a.depth - b.depth; if (a.type === 'call' && b.type === 'return') return -1; @@ -245,22 +270,8 @@ export function sequenceData(name, dbPath, opts = {}) { return 0; }); - // Build participant list from files - const aliases = buildAliases([...fileSet]); - const participants = [...fileSet].map((file) => ({ - id: aliases.get(file), - label: file.split('/').pop(), - file, - })); - - // Sort participants: entry file first, then alphabetically - participants.sort((a, b) => { - if (a.file === entry.file) return -1; - if (b.file === entry.file) return 1; - return a.file.localeCompare(b.file); - }); + const { participants, aliases } = buildParticipants(fileSet, entry.file); - // Replace file paths with alias IDs in messages for (const msg of messages) { msg.from = aliases.get(msg.from); msg.to = aliases.get(msg.to); From b4d8a0dc642a7eefc5df717644bd4fdaf28f02a1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:38 -0600 Subject: [PATCH 18/37] refactor: decompose explain() into section renderers Impact: 5 functions changed, 2 affected --- src/presentation/queries-cli/inspect.js | 373 ++++++++++++------------ 1 file changed, 184 insertions(+), 189 deletions(-) diff --git a/src/presentation/queries-cli/inspect.js b/src/presentation/queries-cli/inspect.js index 5a3ddcb7..59b85d63 100644 --- a/src/presentation/queries-cli/inspect.js +++ b/src/presentation/queries-cli/inspect.js @@ -96,96 +96,7 @@ export function context(name, customDbPath, opts = {}) { } for (const r of data.results) { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const roleTag = r.role ? ` [${r.role}]` : ''; - console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - - // Signature - if (r.signature) { - console.log('## Type/Shape Info'); - if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); - console.log(); - } - - // Children - if (r.children && r.children.length > 0) { - console.log(`## Children (${r.children.length})`); - for (const c of r.children) { - console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); - } - console.log(); - } - - // Complexity - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; - console.log('## Complexity'); - console.log( - ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, - ); - console.log(); - } - - // Source - if (r.source) { - console.log('## Source'); - for (const line of r.source.split('\n')) { - console.log(` ${line}`); - } - console.log(); - } - - // Callees - if (r.callees.length > 0) { - console.log(`## Direct Dependencies (${r.callees.length})`); - for (const c of r.callees) { - const summary = c.summary ? ` — ${c.summary}` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); - if (c.source) { - for (const line of c.source.split('\n').slice(0, 10)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - // Callers - if (r.callers.length > 0) { - console.log(`## Callers (${r.callers.length})`); - for (const c of r.callers) { - const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); - } - console.log(); - } - - // Related tests - if (r.relatedTests.length > 0) { - console.log('## Related Tests'); - for (const t of r.relatedTests) { - console.log(` ${t.file} — ${t.testCount} tests`); - for (const tn of t.testNames) { - console.log(` - ${tn}`); - } - if (t.source) { - console.log(' Source:'); - for (const line of t.source.split('\n').slice(0, 20)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { - console.log( - ' (no call edges or tests found — may be invoked dynamically or via re-exports)', - ); - console.log(); - } + renderContextResult(r); } } @@ -209,126 +120,210 @@ export function children(name, customDbPath, opts = {}) { } } -export function explain(target, customDbPath, opts = {}) { - const data = explainData(target, customDbPath, opts); - if (outputResult(data, 'results', opts)) return; +function renderContextResult(r) { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const roleTag = r.role ? ` [${r.role}]` : ''; + console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - if (data.results.length === 0) { - console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); - return; + if (r.signature) { + console.log('## Type/Shape Info'); + if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); + console.log(); } - if (data.kind === 'file') { - for (const r of data.results) { - const publicCount = r.publicApi.length; - const internalCount = r.internal.length; - const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; - console.log(`\n# ${r.file}`); - console.log( - ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, - ); + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } - if (r.imports.length > 0) { - console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); - } - if (r.importedBy.length > 0) { - console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); - } + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; + console.log('## Complexity'); + console.log( + ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, + ); + console.log(); + } - if (r.publicApi.length > 0) { - console.log(`\n## Exported`); - for (const s of r.publicApi) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); - } - } + if (r.source) { + console.log('## Source'); + for (const line of r.source.split('\n')) { + console.log(` ${line}`); + } + console.log(); + } - if (r.internal.length > 0) { - console.log(`\n## Internal`); - for (const s of r.internal) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + if (r.callees.length > 0) { + console.log(`## Direct Dependencies (${r.callees.length})`); + for (const c of r.callees) { + const summary = c.summary ? ` — ${c.summary}` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); + if (c.source) { + for (const line of c.source.split('\n').slice(0, 10)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.dataFlow.length > 0) { - console.log(`\n## Data Flow`); - for (const df of r.dataFlow) { - console.log(` ${df.caller} -> ${df.callees.join(', ')}`); - } - } - console.log(); + if (r.callers.length > 0) { + console.log(`## Callers (${r.callers.length})`); + for (const c of r.callers) { + const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); } - } else { - function printFunctionExplain(r, indent = '') { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; - const summaryPart = r.summary ? ` | ${r.summary}` : ''; - const roleTag = r.role ? ` [${r.role}]` : ''; - const depthLevel = r._depth || 0; - const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); - console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); - if (lineInfo || r.summary) { - console.log(`${indent} ${lineInfo}${summaryPart}`); - } - if (r.signature) { - if (r.signature.params != null) - console.log(`${indent} Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); - } + console.log(); + } - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; - console.log( - `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, - ); + if (r.relatedTests.length > 0) { + console.log('## Related Tests'); + for (const t of r.relatedTests) { + console.log(` ${t.file} — ${t.testCount} tests`); + for (const tn of t.testNames) { + console.log(` - ${tn}`); } - - if (r.callees.length > 0) { - console.log(`\n${indent} Calls (${r.callees.length}):`); - for (const c of r.callees) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + if (t.source) { + console.log(' Source:'); + for (const line of t.source.split('\n').slice(0, 20)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.callers.length > 0) { - console.log(`\n${indent} Called by (${r.callers.length}):`); - for (const c of r.callers) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } + if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { + console.log(' (no call edges or tests found — may be invoked dynamically or via re-exports)'); + console.log(); + } +} - if (r.relatedTests.length > 0) { - const label = r.relatedTests.length === 1 ? 'file' : 'files'; - console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); - for (const t of r.relatedTests) { - console.log(`${indent} ${t.file}`); - } - } +function renderFileExplain(r) { + const publicCount = r.publicApi.length; + const internalCount = r.internal.length; + const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; + console.log(`\n# ${r.file}`); + console.log( + ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, + ); + + if (r.imports.length > 0) { + console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); + } + if (r.importedBy.length > 0) { + console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); + } - if (r.callees.length === 0 && r.callers.length === 0) { - console.log( - `${indent} (no call edges found -- may be invoked dynamically or via re-exports)`, - ); - } + if (r.publicApi.length > 0) { + console.log(`\n## Exported`); + for (const s of r.publicApi) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } - // Render recursive dependency details - if (r.depDetails && r.depDetails.length > 0) { - console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); - for (const dep of r.depDetails) { - printFunctionExplain(dep, `${indent} `); - } - } - console.log(); + if (r.internal.length > 0) { + console.log(`\n## Internal`); + for (const s of r.internal) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } + + if (r.dataFlow.length > 0) { + console.log(`\n## Data Flow`); + for (const df of r.dataFlow) { + console.log(` ${df.caller} -> ${df.callees.join(', ')}`); + } + } + console.log(); +} + +function renderFunctionExplain(r, indent = '') { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; + const summaryPart = r.summary ? ` | ${r.summary}` : ''; + const roleTag = r.role ? ` [${r.role}]` : ''; + const depthLevel = r._depth || 0; + const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); + console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); + if (lineInfo || r.summary) { + console.log(`${indent} ${lineInfo}${summaryPart}`); + } + if (r.signature) { + if (r.signature.params != null) console.log(`${indent} Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); + } + + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; + console.log( + `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, + ); + } + + if (r.callees.length > 0) { + console.log(`\n${indent} Calls (${r.callees.length}):`); + for (const c of r.callees) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.callers.length > 0) { + console.log(`\n${indent} Called by (${r.callers.length}):`); + for (const c of r.callers) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.relatedTests.length > 0) { + const label = r.relatedTests.length === 1 ? 'file' : 'files'; + console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); + for (const t of r.relatedTests) { + console.log(`${indent} ${t.file}`); } + } + + if (r.callees.length === 0 && r.callers.length === 0) { + console.log(`${indent} (no call edges found -- may be invoked dynamically or via re-exports)`); + } + if (r.depDetails && r.depDetails.length > 0) { + console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); + for (const dep of r.depDetails) { + renderFunctionExplain(dep, `${indent} `); + } + } + console.log(); +} + +export function explain(target, customDbPath, opts = {}) { + const data = explainData(target, customDbPath, opts); + if (outputResult(data, 'results', opts)) return; + + if (data.results.length === 0) { + console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); + return; + } + + if (data.kind === 'file') { + for (const r of data.results) { + renderFileExplain(r); + } + } else { for (const r of data.results) { - printFunctionExplain(r); + renderFunctionExplain(r); } } } From ae805d5fc65a45b36532cea6b50ab3fa67f27c0f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:44 -0600 Subject: [PATCH 19/37] refactor: decompose stats() into section printers Impact: 12 functions changed, 6 affected --- src/presentation/queries-cli/overview.js | 115 +++++++++++------------ 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 88409da2..29a4f6e9 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -2,64 +2,42 @@ import path from 'node:path'; import { kindIcon, moduleMapData, rolesData, statsData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; -export async function stats(customDbPath, opts = {}) { - const data = statsData(customDbPath, { noTests: opts.noTests }); - - // Community detection summary (async import for lazy-loading) - try { - const { communitySummaryForStats } = await import('../../features/communities.js'); - data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); - } catch { - /* graphology may not be available */ - } - - if (outputResult(data, null, opts)) return; - - // Human-readable output - console.log('\n# Codegraph Stats\n'); - - // Nodes - console.log(`Nodes: ${data.nodes.total} total`); - const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); - const kindParts = kindEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < kindParts.length; i += 3) { - const row = kindParts +function printCountGrid(entries, padWidth) { + const parts = entries.map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < parts.length; i += 3) { + const row = parts .slice(i, i + 3) - .map((p) => p.padEnd(18)) + .map((p) => p.padEnd(padWidth)) .join(''); console.log(` ${row}`); } +} - // Edges +function printNodes(data) { + console.log(`Nodes: ${data.nodes.total} total`); + const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); + printCountGrid(kindEntries, 18); +} + +function printEdges(data) { console.log(`\nEdges: ${data.edges.total} total`); const edgeEntries = Object.entries(data.edges.byKind).sort((a, b) => b[1] - a[1]); - const edgeParts = edgeEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < edgeParts.length; i += 3) { - const row = edgeParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(edgeEntries, 18); +} - // Files +function printFiles(data) { console.log(`\nFiles: ${data.files.total} (${data.files.languages} languages)`); const langEntries = Object.entries(data.files.byLanguage).sort((a, b) => b[1] - a[1]); - const langParts = langEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < langParts.length; i += 3) { - const row = langParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(langEntries, 18); +} - // Cycles +function printCycles(data) { console.log( `\nCycles: ${data.cycles.fileLevel} file-level, ${data.cycles.functionLevel} function-level`, ); +} - // Hotspots +function printHotspots(data) { if (data.hotspots.length > 0) { console.log(`\nTop ${data.hotspots.length} coupling hotspots:`); for (let i = 0; i < data.hotspots.length; i++) { @@ -69,8 +47,9 @@ export async function stats(customDbPath, opts = {}) { ); } } +} - // Embeddings +function printEmbeddings(data) { if (data.embeddings) { const e = data.embeddings; console.log( @@ -79,8 +58,9 @@ export async function stats(customDbPath, opts = {}) { } else { console.log('\nEmbeddings: not built'); } +} - // Quality +function printQuality(data) { if (data.quality) { const q = data.quality; const cc = q.callerCoverage; @@ -99,24 +79,18 @@ export async function stats(customDbPath, opts = {}) { } } } +} - // Roles +function printRoles(data) { if (data.roles && Object.keys(data.roles).length > 0) { const total = Object.values(data.roles).reduce((a, b) => a + b, 0); console.log(`\nRoles: ${total} classified symbols`); - const roleParts = Object.entries(data.roles) - .sort((a, b) => b[1] - a[1]) - .map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < roleParts.length; i += 3) { - const row = roleParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + const roleEntries = Object.entries(data.roles).sort((a, b) => b[1] - a[1]); + printCountGrid(roleEntries, 18); } +} - // Complexity +function printComplexity(data) { if (data.complexity) { const cx = data.complexity; const miPart = cx.avgMI != null ? ` | avg MI: ${cx.avgMI} | min MI: ${cx.minMI}` : ''; @@ -124,15 +98,40 @@ export async function stats(customDbPath, opts = {}) { `\nComplexity: ${cx.analyzed} functions | avg cognitive: ${cx.avgCognitive} | avg cyclomatic: ${cx.avgCyclomatic} | max cognitive: ${cx.maxCognitive}${miPart}`, ); } +} - // Communities +function printCommunities(data) { if (data.communities) { const cm = data.communities; console.log( `\nCommunities: ${cm.communityCount} detected | modularity: ${cm.modularity} | drift: ${cm.driftScore}%`, ); } +} + +export async function stats(customDbPath, opts = {}) { + const data = statsData(customDbPath, { noTests: opts.noTests }); + + try { + const { communitySummaryForStats } = await import('../../features/communities.js'); + data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); + } catch { + /* graphology may not be available */ + } + if (outputResult(data, null, opts)) return; + + console.log('\n# Codegraph Stats\n'); + printNodes(data); + printEdges(data); + printFiles(data); + printCycles(data); + printHotspots(data); + printEmbeddings(data); + printQuality(data); + printRoles(data); + printComplexity(data); + printCommunities(data); console.log(); } From 3aa2e4b69d20e8798c9bd8b4892812c87d11bce2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:53:11 -0600 Subject: [PATCH 20/37] fix: address quality issues in features (boundaries, communities, triage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract per-section validators from validateBoundaryConfig (cog 101→2). Extract buildCommunityObjects and analyzeDrift from communitiesData (cog 32→4). Extract buildTriageItems and computeTriageSummary from triageData (bugs 1.4→0.48). Impact: 13 functions changed, 11 affected --- src/features/boundaries.js | 181 +++++++++++++++++++----------------- src/features/communities.js | 121 +++++++++++++----------- src/features/triage.js | 151 ++++++++++++++++-------------- 3 files changed, 249 insertions(+), 204 deletions(-) diff --git a/src/features/boundaries.js b/src/features/boundaries.js index 7a357ebd..536dbafa 100644 --- a/src/features/boundaries.js +++ b/src/features/boundaries.js @@ -94,104 +94,119 @@ export function resolveModules(boundaryConfig) { // ─── Validation ────────────────────────────────────────────────────── /** - * Validate a boundary configuration object. - * @param {object} config - The `manifesto.boundaries` config - * @returns {{ valid: boolean, errors: string[] }} + * Validate the `modules` section of a boundary config. + * @param {object} modules + * @param {string[]} errors - Mutated: push any validation errors */ -export function validateBoundaryConfig(config) { - const errors = []; +function validateModules(modules, errors) { + if (!modules || typeof modules !== 'object' || Object.keys(modules).length === 0) { + errors.push('boundaries.modules must be a non-empty object'); + return; + } + for (const [name, value] of Object.entries(modules)) { + if (typeof value === 'string') continue; + if (value && typeof value === 'object' && typeof value.match === 'string') continue; + errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); + } +} - if (!config || typeof config !== 'object') { - return { valid: false, errors: ['boundaries config must be an object'] }; +/** + * Validate the `preset` field of a boundary config. + * @param {string|null|undefined} preset + * @param {string[]} errors - Mutated: push any validation errors + */ +function validatePreset(preset, errors) { + if (preset == null) return; + if (typeof preset !== 'string' || !PRESETS[preset]) { + errors.push( + `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${preset}")`, + ); } +} - // Validate modules - if ( - !config.modules || - typeof config.modules !== 'object' || - Object.keys(config.modules).length === 0 - ) { - errors.push('boundaries.modules must be a non-empty object'); - } else { - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'string') continue; - if (value && typeof value === 'object' && typeof value.match === 'string') continue; - errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); +/** + * Validate a single rule's target list (`notTo` or `onlyTo`). + * @param {*} list - The target list value + * @param {string} field - "notTo" or "onlyTo" + * @param {number} idx - Rule index for error messages + * @param {Set} moduleNames + * @param {string[]} errors - Mutated + */ +function validateTargetList(list, field, idx, moduleNames, errors) { + if (!Array.isArray(list)) { + errors.push(`boundaries.rules[${idx}]: "${field}" must be an array`); + return; + } + for (const target of list) { + if (!moduleNames.has(target)) { + errors.push(`boundaries.rules[${idx}]: "${field}" references unknown module "${target}"`); } } +} - // Validate preset - if (config.preset != null) { - if (typeof config.preset !== 'string' || !PRESETS[config.preset]) { - errors.push( - `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${config.preset}")`, - ); +/** + * Validate the `rules` array of a boundary config. + * @param {Array} rules + * @param {object|undefined} modules - The modules config (for cross-referencing names) + * @param {string[]} errors - Mutated + */ +function validateRules(rules, modules, errors) { + if (!rules) return; + if (!Array.isArray(rules)) { + errors.push('boundaries.rules must be an array'); + return; + } + const moduleNames = modules ? new Set(Object.keys(modules)) : new Set(); + for (let i = 0; i < rules.length; i++) { + const rule = rules[i]; + if (!rule.from) { + errors.push(`boundaries.rules[${i}]: missing "from" field`); + } else if (!moduleNames.has(rule.from)) { + errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); + } + if (rule.notTo && rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); + } + if (!rule.notTo && !rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); } + if (rule.notTo) validateTargetList(rule.notTo, 'notTo', i, moduleNames, errors); + if (rule.onlyTo) validateTargetList(rule.onlyTo, 'onlyTo', i, moduleNames, errors); } +} - // Validate rules - if (config.rules) { - if (!Array.isArray(config.rules)) { - errors.push('boundaries.rules must be an array'); - } else { - const moduleNames = config.modules ? new Set(Object.keys(config.modules)) : new Set(); - for (let i = 0; i < config.rules.length; i++) { - const rule = config.rules[i]; - if (!rule.from) { - errors.push(`boundaries.rules[${i}]: missing "from" field`); - } else if (!moduleNames.has(rule.from)) { - errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); - } - if (rule.notTo && rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); - } - if (!rule.notTo && !rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); - } - if (rule.notTo) { - if (!Array.isArray(rule.notTo)) { - errors.push(`boundaries.rules[${i}]: "notTo" must be an array`); - } else { - for (const target of rule.notTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "notTo" references unknown module "${target}"`, - ); - } - } - } - } - if (rule.onlyTo) { - if (!Array.isArray(rule.onlyTo)) { - errors.push(`boundaries.rules[${i}]: "onlyTo" must be an array`); - } else { - for (const target of rule.onlyTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "onlyTo" references unknown module "${target}"`, - ); - } - } - } - } - } +/** + * Validate that module layer assignments match preset layers. + * @param {object} config + * @param {string[]} errors - Mutated + */ +function validateLayerAssignments(config, errors) { + if (!config.preset || !PRESETS[config.preset] || !config.modules) return; + const presetLayers = new Set(PRESETS[config.preset].layers); + for (const [name, value] of Object.entries(config.modules)) { + if (typeof value === 'object' && value.layer && !presetLayers.has(value.layer)) { + errors.push( + `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, + ); } } +} - // Validate preset + layer assignments - if (config.preset && PRESETS[config.preset] && config.modules) { - const presetLayers = new Set(PRESETS[config.preset].layers); - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'object' && value.layer) { - if (!presetLayers.has(value.layer)) { - errors.push( - `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, - ); - } - } - } +/** + * Validate a boundary configuration object. + * @param {object} config - The `manifesto.boundaries` config + * @returns {{ valid: boolean, errors: string[] }} + */ +export function validateBoundaryConfig(config) { + if (!config || typeof config !== 'object') { + return { valid: false, errors: ['boundaries config must be an object'] }; } + const errors = []; + validateModules(config.modules, errors); + validatePreset(config.preset, errors); + validateRules(config.rules, config.modules, errors); + validateLayerAssignments(config, errors); return { valid: errors.length === 0, errors }; } diff --git a/src/features/communities.js b/src/features/communities.js index 062a89b5..f850dc8d 100644 --- a/src/features/communities.js +++ b/src/features/communities.js @@ -11,48 +11,18 @@ function getDirectory(filePath) { return dir === '.' ? '(root)' : dir; } -// ─── Core Analysis ──────────────────────────────────────────────────── +// ─── Community Building ────────────────────────────────────────────── /** - * Run Louvain community detection and return structured data. - * - * @param {string} [customDbPath] - Path to graph.db - * @param {object} [opts] - * @param {boolean} [opts.functions] - Function-level instead of file-level - * @param {number} [opts.resolution] - Louvain resolution (default 1.0) - * @param {boolean} [opts.noTests] - Exclude test files - * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) - * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) - * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + * Group graph nodes by Louvain community assignment and build structured objects. + * @param {object} graph - The dependency graph + * @param {Map} assignments - Node key → community ID + * @param {object} opts + * @param {boolean} [opts.drift] - If true, omit member lists + * @returns {{ communities: object[], communityDirs: Map> }} */ -export function communitiesData(customDbPath, opts = {}) { - const { repo, close } = openRepo(customDbPath, opts); - let graph; - try { - graph = buildDependencyGraph(repo, { - fileLevel: !opts.functions, - noTests: opts.noTests, - }); - } finally { - close(); - } - - // Handle empty or trivial graphs - if (graph.nodeCount === 0 || graph.edgeCount === 0) { - return { - communities: [], - modularity: 0, - drift: { splitCandidates: [], mergeCandidates: [] }, - summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, - }; - } - - // Run Louvain - const resolution = opts.resolution ?? 1.0; - const { assignments, modularity } = louvainCommunities(graph, { resolution }); - - // Group nodes by community - const communityMap = new Map(); // community id → node keys[] +function buildCommunityObjects(graph, assignments, opts) { + const communityMap = new Map(); for (const [key] of graph.nodes()) { const cid = assignments.get(key); if (cid == null) continue; @@ -60,9 +30,8 @@ export function communitiesData(customDbPath, opts = {}) { communityMap.get(cid).push(key); } - // Build community objects const communities = []; - const communityDirs = new Map(); // community id → Set + const communityDirs = new Map(); for (const [cid, members] of communityMap) { const dirCounts = {}; @@ -88,19 +57,27 @@ export function communitiesData(customDbPath, opts = {}) { }); } - // Sort by size descending communities.sort((a, b) => b.size - a.size); + return { communities, communityDirs }; +} - // ─── Drift Analysis ───────────────────────────────────────────── +// ─── Drift Analysis ────────────────────────────────────────────────── - // Split candidates: directories with members in 2+ communities - const dirToCommunities = new Map(); // dir → Set +/** + * Compute split/merge candidates and drift score from community directory data. + * @param {object[]} communities - Community objects with `directories` + * @param {Map>} communityDirs - Community ID → directory set + * @returns {{ splitCandidates: object[], mergeCandidates: object[], driftScore: number }} + */ +function analyzeDrift(communities, communityDirs) { + const dirToCommunities = new Map(); for (const [cid, dirs] of communityDirs) { for (const dir of dirs) { if (!dirToCommunities.has(dir)) dirToCommunities.set(dir, new Set()); dirToCommunities.get(dir).add(cid); } } + const splitCandidates = []; for (const [dir, cids] of dirToCommunities) { if (cids.size >= 2) { @@ -109,7 +86,6 @@ export function communitiesData(customDbPath, opts = {}) { } splitCandidates.sort((a, b) => b.communityCount - a.communityCount); - // Merge candidates: communities spanning 2+ directories const mergeCandidates = []; for (const c of communities) { const dirCount = Object.keys(c.directories).length; @@ -124,17 +100,56 @@ export function communitiesData(customDbPath, opts = {}) { } mergeCandidates.sort((a, b) => b.directoryCount - a.directoryCount); - // Drift score: 0-100 based on how much directory structure diverges from communities const totalDirs = dirToCommunities.size; - const splitDirs = splitCandidates.length; - const splitRatio = totalDirs > 0 ? splitDirs / totalDirs : 0; - + const splitRatio = totalDirs > 0 ? splitCandidates.length / totalDirs : 0; const totalComms = communities.length; - const mergeComms = mergeCandidates.length; - const mergeRatio = totalComms > 0 ? mergeComms / totalComms : 0; - + const mergeRatio = totalComms > 0 ? mergeCandidates.length / totalComms : 0; const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100); + return { splitCandidates, mergeCandidates, driftScore }; +} + +// ─── Core Analysis ──────────────────────────────────────────────────── + +/** + * Run Louvain community detection and return structured data. + * + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] + * @param {boolean} [opts.functions] - Function-level instead of file-level + * @param {number} [opts.resolution] - Louvain resolution (default 1.0) + * @param {boolean} [opts.noTests] - Exclude test files + * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) + * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) + * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + */ +export function communitiesData(customDbPath, opts = {}) { + const { repo, close } = openRepo(customDbPath, opts); + let graph; + try { + graph = buildDependencyGraph(repo, { + fileLevel: !opts.functions, + noTests: opts.noTests, + }); + } finally { + close(); + } + + if (graph.nodeCount === 0 || graph.edgeCount === 0) { + return { + communities: [], + modularity: 0, + drift: { splitCandidates: [], mergeCandidates: [] }, + summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, + }; + } + + const resolution = opts.resolution ?? 1.0; + const { assignments, modularity } = louvainCommunities(graph, { resolution }); + + const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); + const { splitCandidates, mergeCandidates, driftScore } = analyzeDrift(communities, communityDirs); + const base = { communities: opts.drift ? [] : communities, modularity: +modularity.toFixed(4), diff --git a/src/features/triage.js b/src/features/triage.js index 00b35ccd..8c23875a 100644 --- a/src/features/triage.js +++ b/src/features/triage.js @@ -4,8 +4,83 @@ import { warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +// ─── Scoring ───────────────────────────────────────────────────────── + +const SORT_FNS = { + risk: (a, b) => b.riskScore - a.riskScore, + complexity: (a, b) => b.cognitive - a.cognitive, + churn: (a, b) => b.churn - a.churn, + 'fan-in': (a, b) => b.fanIn - a.fanIn, + mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, +}; + +/** + * Build scored triage items from raw rows and risk metrics. + * @param {object[]} rows - Raw DB rows + * @param {object[]} riskMetrics - Per-row risk metric objects from scoreRisk + * @returns {object[]} + */ +function buildTriageItems(rows, riskMetrics) { + return rows.map((r, i) => ({ + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + role: r.role || null, + fanIn: r.fan_in, + cognitive: r.cognitive, + churn: r.churn, + maintainabilityIndex: r.mi, + normFanIn: riskMetrics[i].normFanIn, + normComplexity: riskMetrics[i].normComplexity, + normChurn: riskMetrics[i].normChurn, + normMI: riskMetrics[i].normMI, + roleWeight: riskMetrics[i].roleWeight, + riskScore: riskMetrics[i].riskScore, + })); +} + +/** + * Compute signal coverage and summary statistics. + * @param {object[]} filtered - All filtered rows + * @param {object[]} scored - Scored and filtered items + * @param {object} weights - Active weights + * @returns {object} + */ +function computeTriageSummary(filtered, scored, weights) { + const signalCoverage = { + complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), + churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), + fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), + mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), + }; + + const scores = scored.map((it) => it.riskScore); + const avgScore = + scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; + const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + + return { + total: filtered.length, + analyzed: scored.length, + avgScore, + maxScore, + weights, + signalCoverage, + }; +} + // ─── Data Function ──────────────────────────────────────────────────── +const EMPTY_SUMMARY = (weights) => ({ + total: 0, + analyzed: 0, + avgScore: 0, + maxScore: 0, + weights, + signalCoverage: {}, +}); + /** * Compute composite risk scores for all symbols. * @@ -17,9 +92,6 @@ export function triageData(customDbPath, opts = {}) { const { repo, close } = openRepo(customDbPath, opts); try { const noTests = opts.noTests || false; - const fileFilter = opts.file || null; - const kindFilter = opts.kind || null; - const roleFilter = opts.role || null; const minScore = opts.minScore != null ? Number(opts.minScore) : null; const sort = opts.sort || 'risk'; const weights = { ...DEFAULT_WEIGHTS, ...(opts.weights || {}) }; @@ -28,86 +100,29 @@ export function triageData(customDbPath, opts = {}) { try { rows = repo.findNodesForTriage({ noTests, - file: fileFilter, - kind: kindFilter, - role: roleFilter, + file: opts.file || null, + kind: opts.kind || null, + role: opts.role || null, }); } catch (err) { warn(`triage query failed: ${err.message}`); - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Post-filter test files (belt-and-suspenders) const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - if (filtered.length === 0) { - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Delegate scoring to classifier const riskMetrics = scoreRisk(filtered, weights); + const items = buildTriageItems(filtered, riskMetrics); - // Compute risk scores - const items = filtered.map((r, i) => ({ - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - role: r.role || null, - fanIn: r.fan_in, - cognitive: r.cognitive, - churn: r.churn, - maintainabilityIndex: r.mi, - normFanIn: riskMetrics[i].normFanIn, - normComplexity: riskMetrics[i].normComplexity, - normChurn: riskMetrics[i].normChurn, - normMI: riskMetrics[i].normMI, - roleWeight: riskMetrics[i].roleWeight, - riskScore: riskMetrics[i].riskScore, - })); - - // Apply minScore filter const scored = minScore != null ? items.filter((it) => it.riskScore >= minScore) : items; - - // Sort - const sortFns = { - risk: (a, b) => b.riskScore - a.riskScore, - complexity: (a, b) => b.cognitive - a.cognitive, - churn: (a, b) => b.churn - a.churn, - 'fan-in': (a, b) => b.fanIn - a.fanIn, - mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, - }; - scored.sort(sortFns[sort] || sortFns.risk); - - // Signal coverage: % of items with non-zero signal - const signalCoverage = { - complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), - churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), - fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), - mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), - }; - - const scores = scored.map((it) => it.riskScore); - const avgScore = - scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; - const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + scored.sort(SORT_FNS[sort] || SORT_FNS.risk); const result = { items: scored, - summary: { - total: filtered.length, - analyzed: scored.length, - avgScore, - maxScore, - weights, - signalCoverage, - }, + summary: computeTriageSummary(filtered, scored, weights), }; return paginateResult(result, 'items', { From 246fc21a76fc287f37d13dd172cbc5645f41ea77 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:56:08 -0600 Subject: [PATCH 21/37] fix: split data fetching from formatting in presentation queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract printDiffFunctions/Coupled/Ownership/Boundaries/Summary from diffImpact (cog 28→6, cyc 21→7). Extract printExportHeader/Symbols from fileExports. Extract printNotFound/PathSteps from symbolPath. Impact: 12 functions changed, 7 affected --- src/presentation/queries-cli/exports.js | 35 +++++---- src/presentation/queries-cli/impact.js | 94 +++++++++++++++---------- src/presentation/queries-cli/path.js | 65 +++++++++-------- 3 files changed, 112 insertions(+), 82 deletions(-) diff --git a/src/presentation/queries-cli/exports.js b/src/presentation/queries-cli/exports.js index ea7dcade..fe06f731 100644 --- a/src/presentation/queries-cli/exports.js +++ b/src/presentation/queries-cli/exports.js @@ -1,19 +1,7 @@ import { exportsData, kindIcon } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; -export function fileExports(file, customDbPath, opts = {}) { - const data = exportsData(file, customDbPath, opts); - if (outputResult(data, 'results', opts)) return; - - if (data.results.length === 0) { - if (opts.unused) { - console.log(`No unused exports found for "${file}".`); - } else { - console.log(`No exported symbols found for "${file}". Run "codegraph build" first.`); - } - return; - } - +function printExportHeader(data, opts) { if (opts.unused) { console.log( `\n# ${data.file} — ${data.totalUnused} unused export${data.totalUnused !== 1 ? 's' : ''} (of ${data.totalExported} exported)\n`, @@ -24,8 +12,10 @@ export function fileExports(file, customDbPath, opts = {}) { `\n# ${data.file} — ${data.totalExported} exported${unusedNote}, ${data.totalInternal} internal\n`, ); } +} - for (const sym of data.results) { +function printExportSymbols(results) { + for (const sym of results) { const icon = kindIcon(sym.kind); const sig = sym.signature?.params ? `(${sym.signature.params})` : ''; const role = sym.role ? ` [${sym.role}]` : ''; @@ -38,6 +28,23 @@ export function fileExports(file, customDbPath, opts = {}) { } } } +} + +export function fileExports(file, customDbPath, opts = {}) { + const data = exportsData(file, customDbPath, opts); + if (outputResult(data, 'results', opts)) return; + + if (data.results.length === 0) { + if (opts.unused) { + console.log(`No unused exports found for "${file}".`); + } else { + console.log(`No exported symbols found for "${file}". Run "codegraph build" first.`); + } + return; + } + + printExportHeader(data, opts); + printExportSymbols(data.results); if (data.reexports.length > 0) { console.log(`\n Re-exports: ${data.reexports.map((r) => r.file).join(', ')}`); diff --git a/src/presentation/queries-cli/impact.js b/src/presentation/queries-cli/impact.js index 176172be..511cb42e 100644 --- a/src/presentation/queries-cli/impact.js +++ b/src/presentation/queries-cli/impact.js @@ -132,6 +132,56 @@ export function fnImpact(name, customDbPath, opts = {}) { } } +function printDiffFunctions(data) { + console.log(`\ndiff-impact: ${data.changedFiles} files changed\n`); + console.log(` ${data.affectedFunctions.length} functions changed:\n`); + for (const fn of data.affectedFunctions) { + console.log(` ${kindIcon(fn.kind)} ${fn.name} -- ${fn.file}:${fn.line}`); + if (fn.transitiveCallers > 0) console.log(` ^ ${fn.transitiveCallers} transitive callers`); + } +} + +function printDiffCoupled(data) { + if (!data.historicallyCoupled?.length) return; + console.log('\n Historically coupled (not in static graph):\n'); + for (const c of data.historicallyCoupled) { + const pct = `${(c.jaccard * 100).toFixed(0)}%`; + console.log( + ` ${c.file} <- coupled with ${c.coupledWith} (${pct}, ${c.commitCount} commits)`, + ); + } +} + +function printDiffOwnership(data) { + if (!data.ownership) return; + console.log(`\n Affected owners: ${data.ownership.affectedOwners.join(', ')}`); + console.log(` Suggested reviewers: ${data.ownership.suggestedReviewers.join(', ')}`); +} + +function printDiffBoundaries(data) { + if (!data.boundaryViolations?.length) return; + console.log(`\n Boundary violations (${data.boundaryViolationCount}):\n`); + for (const v of data.boundaryViolations) { + console.log(` [${v.name}] ${v.file} -> ${v.targetFile}`); + if (v.message) console.log(` ${v.message}`); + } +} + +function printDiffSummary(summary) { + if (!summary) return; + let line = `\n Summary: ${summary.functionsChanged} functions changed -> ${summary.callersAffected} callers affected across ${summary.filesAffected} files`; + if (summary.historicallyCoupledCount > 0) { + line += `, ${summary.historicallyCoupledCount} historically coupled`; + } + if (summary.ownersAffected > 0) { + line += `, ${summary.ownersAffected} owners affected`; + } + if (summary.boundaryViolationCount > 0) { + line += `, ${summary.boundaryViolationCount} boundary violations`; + } + console.log(`${line}\n`); +} + export function diffImpact(customDbPath, opts = {}) { if (opts.format === 'mermaid') { console.log(diffImpactMermaid(customDbPath, opts)); @@ -156,43 +206,9 @@ export function diffImpact(customDbPath, opts = {}) { return; } - console.log(`\ndiff-impact: ${data.changedFiles} files changed\n`); - console.log(` ${data.affectedFunctions.length} functions changed:\n`); - for (const fn of data.affectedFunctions) { - console.log(` ${kindIcon(fn.kind)} ${fn.name} -- ${fn.file}:${fn.line}`); - if (fn.transitiveCallers > 0) console.log(` ^ ${fn.transitiveCallers} transitive callers`); - } - if (data.historicallyCoupled && data.historicallyCoupled.length > 0) { - console.log('\n Historically coupled (not in static graph):\n'); - for (const c of data.historicallyCoupled) { - const pct = `${(c.jaccard * 100).toFixed(0)}%`; - console.log( - ` ${c.file} <- coupled with ${c.coupledWith} (${pct}, ${c.commitCount} commits)`, - ); - } - } - if (data.ownership) { - console.log(`\n Affected owners: ${data.ownership.affectedOwners.join(', ')}`); - console.log(` Suggested reviewers: ${data.ownership.suggestedReviewers.join(', ')}`); - } - if (data.boundaryViolations && data.boundaryViolations.length > 0) { - console.log(`\n Boundary violations (${data.boundaryViolationCount}):\n`); - for (const v of data.boundaryViolations) { - console.log(` [${v.name}] ${v.file} -> ${v.targetFile}`); - if (v.message) console.log(` ${v.message}`); - } - } - if (data.summary) { - let summaryLine = `\n Summary: ${data.summary.functionsChanged} functions changed -> ${data.summary.callersAffected} callers affected across ${data.summary.filesAffected} files`; - if (data.summary.historicallyCoupledCount > 0) { - summaryLine += `, ${data.summary.historicallyCoupledCount} historically coupled`; - } - if (data.summary.ownersAffected > 0) { - summaryLine += `, ${data.summary.ownersAffected} owners affected`; - } - if (data.summary.boundaryViolationCount > 0) { - summaryLine += `, ${data.summary.boundaryViolationCount} boundary violations`; - } - console.log(`${summaryLine}\n`); - } + printDiffFunctions(data); + printDiffCoupled(data); + printDiffOwnership(data); + printDiffBoundaries(data); + printDiffSummary(data.summary); } diff --git a/src/presentation/queries-cli/path.js b/src/presentation/queries-cli/path.js index fbdaafa5..9d61b1a6 100644 --- a/src/presentation/queries-cli/path.js +++ b/src/presentation/queries-cli/path.js @@ -1,6 +1,40 @@ import { kindIcon, pathData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; +function printNotFound(from, to, data) { + const dir = data.reverse ? 'reverse ' : ''; + console.log(`No ${dir}path from "${from}" to "${to}" within ${data.maxDepth} hops.`); + if (data.fromCandidates.length > 1) { + console.log( + `\n "${from}" matched ${data.fromCandidates.length} symbols — using top match: ${data.fromCandidates[0].name} (${data.fromCandidates[0].file}:${data.fromCandidates[0].line})`, + ); + } + if (data.toCandidates.length > 1) { + console.log( + ` "${to}" matched ${data.toCandidates.length} symbols — using top match: ${data.toCandidates[0].name} (${data.toCandidates[0].file}:${data.toCandidates[0].line})`, + ); + } +} + +function printPathSteps(data) { + for (let i = 0; i < data.path.length; i++) { + const n = data.path[i]; + const indent = ' '.repeat(i + 1); + if (i === 0) { + console.log(`${indent}${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`); + } else { + console.log( + `${indent}--[${n.edgeKind}]--> ${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`, + ); + } + } + if (data.alternateCount > 0) { + console.log( + `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, + ); + } +} + export function symbolPath(from, to, customDbPath, opts = {}) { const data = pathData(from, to, customDbPath, opts); if (outputResult(data, null, opts)) return; @@ -11,18 +45,7 @@ export function symbolPath(from, to, customDbPath, opts = {}) { } if (!data.found) { - const dir = data.reverse ? 'reverse ' : ''; - console.log(`No ${dir}path from "${from}" to "${to}" within ${data.maxDepth} hops.`); - if (data.fromCandidates.length > 1) { - console.log( - `\n "${from}" matched ${data.fromCandidates.length} symbols — using top match: ${data.fromCandidates[0].name} (${data.fromCandidates[0].file}:${data.fromCandidates[0].line})`, - ); - } - if (data.toCandidates.length > 1) { - console.log( - ` "${to}" matched ${data.toCandidates.length} symbols — using top match: ${data.toCandidates[0].name} (${data.toCandidates[0].file}:${data.toCandidates[0].line})`, - ); - } + printNotFound(from, to, data); return; } @@ -37,22 +60,6 @@ export function symbolPath(from, to, customDbPath, opts = {}) { console.log( `\nPath from ${from} to ${to} (${data.hops} ${data.hops === 1 ? 'hop' : 'hops'})${dir}:\n`, ); - for (let i = 0; i < data.path.length; i++) { - const n = data.path[i]; - const indent = ' '.repeat(i + 1); - if (i === 0) { - console.log(`${indent}${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`); - } else { - console.log( - `${indent}--[${n.edgeKind}]--> ${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`, - ); - } - } - - if (data.alternateCount > 0) { - console.log( - `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, - ); - } + printPathSteps(data); console.log(); } From 3d601597f6011ac49ef474a0451f71ce729599f6 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:59:35 -0600 Subject: [PATCH 22/37] fix: extract subcommand dispatch in check, triage CLI and MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract runManifesto/validateKind from check execute (cyc 14→10). Extract runHotspots/validateFilters/parseWeights from triage execute (cyc 13→4). Extract loadMCPSdk/createLazyLoaders/resolveDbPath/validateMultiRepoAccess from startMCPServer (cog 34→13, cyc 19→7). Impact: 14 functions changed, 4 affected --- src/cli/commands/check.js | 40 ++++++------ src/cli/commands/triage.js | 62 +++++++++++-------- src/mcp/server.js | 121 ++++++++++++++++++++----------------- 3 files changed, 119 insertions(+), 104 deletions(-) diff --git a/src/cli/commands/check.js b/src/cli/commands/check.js index 24cd9a63..501e4aa4 100644 --- a/src/cli/commands/check.js +++ b/src/cli/commands/check.js @@ -2,6 +2,22 @@ import { EVERY_SYMBOL_KIND } from '../../domain/queries.js'; import { ConfigError } from '../../shared/errors.js'; import { config } from '../shared/options.js'; +function validateKind(kind) { + if (kind && !EVERY_SYMBOL_KIND.includes(kind)) { + throw new ConfigError(`Invalid kind "${kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + } +} + +async function runManifesto(opts, qOpts) { + validateKind(opts.kind); + const { manifesto } = await import('../../presentation/manifesto.js'); + manifesto(opts.db, { + file: opts.file, + kind: opts.kind, + ...qOpts, + }); +} + export const command = { name: 'check [ref]', description: @@ -29,17 +45,7 @@ export const command = { const qOpts = ctx.resolveQueryOpts(opts); if (!isDiffMode && !opts.rules) { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError( - `Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`, - ); - } - const { manifesto } = await import('../../presentation/manifesto.js'); - manifesto(opts.db, { - file: opts.file, - kind: opts.kind, - ...qOpts, - }); + await runManifesto(opts, qOpts); return; } @@ -58,17 +64,7 @@ export const command = { }); if (opts.rules) { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError( - `Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`, - ); - } - const { manifesto } = await import('../../presentation/manifesto.js'); - manifesto(opts.db, { - file: opts.file, - kind: opts.kind, - ...qOpts, - }); + await runManifesto(opts, qOpts); } }, }; diff --git a/src/cli/commands/triage.js b/src/cli/commands/triage.js index 5a8a570f..828b5623 100644 --- a/src/cli/commands/triage.js +++ b/src/cli/commands/triage.js @@ -1,6 +1,39 @@ import { EVERY_SYMBOL_KIND, VALID_ROLES } from '../../domain/queries.js'; import { ConfigError } from '../../shared/errors.js'; +function validateFilters(opts) { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + throw new ConfigError(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + } + if (opts.role && !VALID_ROLES.includes(opts.role)) { + throw new ConfigError(`Invalid role "${opts.role}". Valid: ${VALID_ROLES.join(', ')}`); + } +} + +function parseWeights(raw) { + if (!raw) return undefined; + try { + return JSON.parse(raw); + } catch (err) { + throw new ConfigError('Invalid --weights JSON', { cause: err }); + } +} + +async function runHotspots(opts, ctx) { + const { hotspotsData, formatHotspots } = await import('../../presentation/structure.js'); + const metric = opts.sort === 'risk' ? 'fan-in' : opts.sort; + const data = hotspotsData(opts.db, { + metric, + level: opts.level, + limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + noTests: ctx.resolveNoTests(opts), + }); + if (!ctx.outputResult(data, 'hotspots', opts)) { + console.log(formatHotspots(data)); + } +} + export const command = { name: 'triage', description: @@ -31,35 +64,12 @@ export const command = { ], async execute(_args, opts, ctx) { if (opts.level === 'file' || opts.level === 'directory') { - const { hotspotsData, formatHotspots } = await import('../../presentation/structure.js'); - const metric = opts.sort === 'risk' ? 'fan-in' : opts.sort; - const data = hotspotsData(opts.db, { - metric, - level: opts.level, - limit: parseInt(opts.limit, 10), - offset: opts.offset ? parseInt(opts.offset, 10) : undefined, - noTests: ctx.resolveNoTests(opts), - }); - if (!ctx.outputResult(data, 'hotspots', opts)) { - console.log(formatHotspots(data)); - } + await runHotspots(opts, ctx); return; } - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); - } - if (opts.role && !VALID_ROLES.includes(opts.role)) { - throw new ConfigError(`Invalid role "${opts.role}". Valid: ${VALID_ROLES.join(', ')}`); - } - let weights; - if (opts.weights) { - try { - weights = JSON.parse(opts.weights); - } catch (err) { - throw new ConfigError('Invalid --weights JSON', { cause: err }); - } - } + validateFilters(opts); + const weights = parseWeights(opts.weights); const { triage } = await import('../../presentation/triage.js'); triage(opts.db, { limit: parseInt(opts.limit, 10), diff --git a/src/mcp/server.js b/src/mcp/server.js index 464fafaf..bae0bbbf 100644 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -21,45 +21,84 @@ import { TOOL_HANDLERS } from './tools/index.js'; * @param {boolean} [options.multiRepo] - Enable multi-repo access (default: false) * @param {string[]} [options.allowedRepos] - Restrict access to these repo names only */ -export async function startMCPServer(customDbPath, options = {}) { - const { allowedRepos } = options; - const multiRepo = options.multiRepo || !!allowedRepos; - let Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema; +async function loadMCPSdk() { try { const sdk = await import('@modelcontextprotocol/sdk/server/index.js'); - Server = sdk.Server; const transport = await import('@modelcontextprotocol/sdk/server/stdio.js'); - StdioServerTransport = transport.StdioServerTransport; const types = await import('@modelcontextprotocol/sdk/types.js'); - ListToolsRequestSchema = types.ListToolsRequestSchema; - CallToolRequestSchema = types.CallToolRequestSchema; + return { + Server: sdk.Server, + StdioServerTransport: transport.StdioServerTransport, + ListToolsRequestSchema: types.ListToolsRequestSchema, + CallToolRequestSchema: types.CallToolRequestSchema, + }; } catch { throw new ConfigError( 'MCP server requires @modelcontextprotocol/sdk.\nInstall it with: npm install @modelcontextprotocol/sdk', ); } +} - // Connect transport FIRST so the server can receive the client's - // `initialize` request while heavy modules (queries, better-sqlite3) - // are still loading. These are lazy-loaded on the first tool call - // and cached for subsequent calls. +function createLazyLoaders() { let _queries; let _Database; + return { + async getQueries() { + if (!_queries) _queries = await import('../domain/queries.js'); + return _queries; + }, + getDatabase() { + if (!_Database) { + const require = createRequire(import.meta.url); + _Database = require('better-sqlite3'); + } + return _Database; + }, + }; +} - async function getQueries() { - if (!_queries) { - _queries = await import('../domain/queries.js'); +async function resolveDbPath(customDbPath, args, allowedRepos) { + let dbPath = customDbPath || undefined; + if (args.repo) { + if (allowedRepos && !allowedRepos.includes(args.repo)) { + throw new ConfigError(`Repository "${args.repo}" is not in the allowed repos list.`); } - return _queries; + const { resolveRepoDbPath } = await import('../infrastructure/registry.js'); + const resolved = resolveRepoDbPath(args.repo); + if (!resolved) + throw new ConfigError( + `Repository "${args.repo}" not found in registry or its database is missing.`, + ); + dbPath = resolved; } + return dbPath; +} - function getDatabase() { - if (!_Database) { - const require = createRequire(import.meta.url); - _Database = require('better-sqlite3'); - } - return _Database; +function validateMultiRepoAccess(multiRepo, name, args) { + if (!multiRepo && args.repo) { + throw new ConfigError( + 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.', + ); + } + if (!multiRepo && name === 'list_repos') { + throw new ConfigError( + 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to list repositories.', + ); } +} + +export async function startMCPServer(customDbPath, options = {}) { + const { allowedRepos } = options; + const multiRepo = options.multiRepo || !!allowedRepos; + + const { Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema } = + await loadMCPSdk(); + + // Connect transport FIRST so the server can receive the client's + // `initialize` request while heavy modules (queries, better-sqlite3) + // are still loading. These are lazy-loaded on the first tool call + // and cached for subsequent calls. + const { getQueries, getDatabase } = createLazyLoaders(); const server = new Server( { name: 'codegraph', version: '1.0.0' }, @@ -73,47 +112,17 @@ export async function startMCPServer(customDbPath, options = {}) { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { - if (!multiRepo && args.repo) { - throw new ConfigError( - 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.', - ); - } - if (!multiRepo && name === 'list_repos') { - throw new ConfigError( - 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to list repositories.', - ); - } - - let dbPath = customDbPath || undefined; - if (args.repo) { - if (allowedRepos && !allowedRepos.includes(args.repo)) { - throw new ConfigError(`Repository "${args.repo}" is not in the allowed repos list.`); - } - const { resolveRepoDbPath } = await import('../infrastructure/registry.js'); - const resolved = resolveRepoDbPath(args.repo); - if (!resolved) - throw new ConfigError( - `Repository "${args.repo}" not found in registry or its database is missing.`, - ); - dbPath = resolved; - } + validateMultiRepoAccess(multiRepo, name, args); + const dbPath = await resolveDbPath(customDbPath, args, allowedRepos); const toolEntry = TOOL_HANDLERS.get(name); if (!toolEntry) { return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true }; } - const ctx = { - dbPath, - getQueries, - getDatabase, - findDbPath, - allowedRepos, - MCP_MAX_LIMIT, - }; - + const ctx = { dbPath, getQueries, getDatabase, findDbPath, allowedRepos, MCP_MAX_LIMIT }; const result = await toolEntry.handler(args, ctx); - if (result?.content) return result; // pass-through MCP responses + if (result?.content) return result; return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }; } catch (err) { const code = err instanceof CodegraphError ? err.code : 'UNKNOWN_ERROR'; From fc721f3d09d247d2b1ec5323879d943eafdcd808 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 04:44:35 -0600 Subject: [PATCH 23/37] fix: move startMCPServer JSDoc to correct function location --- src/mcp/server.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mcp/server.js b/src/mcp/server.js index bae0bbbf..777a6844 100644 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -12,15 +12,6 @@ import { MCP_MAX_LIMIT } from '../shared/paginate.js'; import { buildToolList } from './tool-registry.js'; import { TOOL_HANDLERS } from './tools/index.js'; -/** - * Start the MCP server. - * This function requires @modelcontextprotocol/sdk to be installed. - * - * @param {string} [customDbPath] - Path to a specific graph.db - * @param {object} [options] - * @param {boolean} [options.multiRepo] - Enable multi-repo access (default: false) - * @param {string[]} [options.allowedRepos] - Restrict access to these repo names only - */ async function loadMCPSdk() { try { const sdk = await import('@modelcontextprotocol/sdk/server/index.js'); @@ -87,6 +78,15 @@ function validateMultiRepoAccess(multiRepo, name, args) { } } +/** + * Start the MCP server. + * This function requires @modelcontextprotocol/sdk to be installed. + * + * @param {string} [customDbPath] - Path to a specific graph.db + * @param {object} [options] + * @param {boolean} [options.multiRepo] - Enable multi-repo access (default: false) + * @param {string[]} [options.allowedRepos] - Restrict access to these repo names only + */ export async function startMCPServer(customDbPath, options = {}) { const { allowedRepos } = options; const multiRepo = options.multiRepo || !!allowedRepos; From 22ae887df17b536df73d120aca1a38ac9686de16 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 05:32:29 -0600 Subject: [PATCH 24/37] fix: reorder imports in MCP server for lint compliance Move createRequire assignment after all import declarations to satisfy Biome's import sorting rule. Impact: 3 functions changed, 1 affected --- src/mcp/server.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mcp/server.js b/src/mcp/server.js index 777a6844..8e16dffe 100644 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -12,6 +12,8 @@ import { MCP_MAX_LIMIT } from '../shared/paginate.js'; import { buildToolList } from './tool-registry.js'; import { TOOL_HANDLERS } from './tools/index.js'; +const require = createRequire(import.meta.url); + async function loadMCPSdk() { try { const sdk = await import('@modelcontextprotocol/sdk/server/index.js'); @@ -40,7 +42,6 @@ function createLazyLoaders() { }, getDatabase() { if (!_Database) { - const require = createRequire(import.meta.url); _Database = require('better-sqlite3'); } return _Database; @@ -49,7 +50,7 @@ function createLazyLoaders() { } async function resolveDbPath(customDbPath, args, allowedRepos) { - let dbPath = customDbPath || undefined; + let dbPath = customDbPath; if (args.repo) { if (allowedRepos && !allowedRepos.includes(args.repo)) { throw new ConfigError(`Repository "${args.repo}" is not in the allowed repos list.`); From a21840f7f4e0cf1c05e96a3e65c2dd5b501b33c3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 07:51:24 -0600 Subject: [PATCH 25/37] chore: release v3.2.0 --- CHANGELOG.md | 22 ++++++++++++++++++++++ README.md | 5 +++-- docs/roadmap/BACKLOG.md | 4 ++-- docs/roadmap/ROADMAP.md | 2 +- package-lock.json | 13 ++----------- package.json | 2 +- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 991f0c26..6bf7f42c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,28 @@ All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines. +## [3.2.0](https://github.com/optave/codegraph/compare/v3.1.5...v3.2.0) (2026-03-17) + +**Post-Phase 3 decomposition and dead code accuracy.** This release completes a thorough decomposition of the remaining monolithic modules — language extractors, AST analysis visitors, domain analysis functions, and feature modules are all broken into focused, single-responsibility helpers. Dead code detection now correctly classifies symbols that are only referenced by tests as "test-only" instead of "dead", and constants are properly included in edge building so they no longer appear as false-positive dead exports. A new `brief` command provides token-efficient file summaries designed for AI hook context injection. The native engine gains a MAX_WALK_DEPTH guard to prevent stack overflows on deeply nested ASTs. + +### Features + +* **cli:** `codegraph brief ` command — token-efficient file summary with symbols, roles, caller counts, and risk tiers; designed for hook-based context injection ([#480](https://github.com/optave/codegraph/pull/480)) + +### Bug Fixes + +* **roles:** classify test-only-called symbols as "test-only" instead of "dead" — reduces false positives in dead code detection ([#497](https://github.com/optave/codegraph/pull/497)) +* **builder:** include constant nodes in edge building — constants no longer appear as false-positive dead exports ([#495](https://github.com/optave/codegraph/pull/495)) +* **native:** add MAX_WALK_DEPTH guard to native engine AST walkers — prevents stack overflows on deeply nested files ([#484](https://github.com/optave/codegraph/pull/484)) +* **versioning:** use semver-compliant dev version numbering (`-dev.0` suffix instead of non-standard format) ([#479](https://github.com/optave/codegraph/pull/479)) + +### Refactors + +* **extractors:** decompose monolithic language extractors (JS/TS, Python, Java) into per-category handlers ([#490](https://github.com/optave/codegraph/pull/490)) +* **ast-analysis:** decompose AST analysis visitors and domain builder stages into focused helpers ([#491](https://github.com/optave/codegraph/pull/491)) +* **domain:** decompose domain analysis and feature modules into single-responsibility functions ([#492](https://github.com/optave/codegraph/pull/492)) +* **cleanup:** dead code removal, shared abstractions, and empty catch block replacement across all layers ([#489](https://github.com/optave/codegraph/pull/489)) + ## [3.1.5](https://github.com/optave/codegraph/compare/v3.1.4...v3.1.5) (2026-03-16) **Phase 3 architectural refactoring completes.** This release finishes the remaining two Phase 3 roadmap tasks — domain directory grouping (3.15) and CLI composability (3.16) — bringing Phase 3 to 14 of 14 tasks complete. The `src/` directory is now reorganized into `domain/`, `features/`, and `presentation/` layers. A new `openGraph()` helper eliminates DB-open/close boilerplate across CLI commands, and a universal output formatter adds `--table` and `--csv` output to all commands. Several post-reorganization bugs are fixed: complexity/CFG/dataflow analysis restored after the move, MCP server imports corrected, worktree boundary escapes prevented, CJS `require()` support added, and LIKE wildcard injection in queries patched. diff --git a/README.md b/README.md index 5861d19e..a0df31c6 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ That's it. The graph is ready. Now connect your AI agent. Connect directly via MCP — your agent gets 30 tools to query the graph: ```bash -codegraph mcp # 30-tool MCP server — AI queries the graph directly +codegraph mcp # 33-tool MCP server — AI queries the graph directly ``` Or add codegraph to your agent's instructions (e.g. `CLAUDE.md`): @@ -183,7 +183,7 @@ cd codegraph && npm install && npm link | | Feature | Description | |---|---|---| -| 🤖 | **MCP server** | 30-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo | +| 🤖 | **MCP server** | 33-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo | | 🎯 | **Deep context** | `context` gives agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries | | 🏷️ | **Node role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on connectivity — agents instantly know architectural role | | 📦 | **Batch querying** | Accept a list of targets and return all results in one JSON payload — enables multi-agent parallel dispatch | @@ -258,6 +258,7 @@ codegraph children # List parameters, properties, constants of a ```bash codegraph context # Full context: source, deps, callers, signature, tests codegraph context --depth 2 --no-tests # Include callee source 2 levels deep +codegraph brief # Token-efficient file summary: symbols, roles, risk tiers codegraph audit --quick # Structural summary: public API, internals, data flow codegraph audit --quick # Function summary: signature, calls, callers, tests ``` diff --git a/docs/roadmap/BACKLOG.md b/docs/roadmap/BACKLOG.md index c5876a18..f017a518 100644 --- a/docs/roadmap/BACKLOG.md +++ b/docs/roadmap/BACKLOG.md @@ -1,6 +1,6 @@ # Codegraph Feature Backlog -**Last updated:** 2026-03-16 +**Last updated:** 2026-03-17 **Source:** Features derived from [COMPETITIVE_ANALYSIS.md](../../generated/competitive/COMPETITIVE_ANALYSIS.md) and internal roadmap discussions. --- @@ -29,7 +29,7 @@ These two items directly improve agent experience and graph accuracy today, with | ID | Title | Description | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | Depends on | |----|-------|-------------|----------|---------|----------|-------------------|-------------------|----------|------------| -| 83 | Hook-optimized `codegraph brief` command | New `codegraph brief ` command designed for Claude Code hook context injection. Returns a compact, token-efficient summary per file: each symbol with its role and caller count (e.g. `buildGraph [core, 12 callers]`), blast radius count on importers (`Imported by: src/cli.js (+8 transitive)`), and overall file risk tier. Current `deps --json` output used by `enrich-context.sh` is shallow — just file-level imports/importedBy and symbol names with no role or blast radius info. The `brief` command would include: **(a)** symbol roles in the output — knowing a file defines `core` vs `leaf` symbols changes editing caution; **(b)** per-symbol transitive caller counts — makes blast radius visible without a separate `fn-impact` call; **(c)** file-level risk tier (high/medium/low based on max fan-in and role composition). Output optimized for `additionalContext` injection — single compact block, not verbose JSON. Also add `--brief` flag to `deps` as an alias. | Embeddability | The `enrich-context.sh` hook is the only codegraph context agents actually see (they ignore CLAUDE.md instructions to run commands manually). Making that passively-injected context richer — with roles, caller counts, and risk tiers — directly reduces blind edits to high-impact code. Currently the hook shows `Defines: function buildGraph` but not that it's a core symbol with 12 transitive callers | ✓ | ✓ | 4 | No | — | +| 83 | ~~Hook-optimized `codegraph brief` command~~ | New `codegraph brief ` command designed for Claude Code hook context injection. Returns a compact, token-efficient summary per file: each symbol with its role and caller count (e.g. `buildGraph [core, 12 callers]`), blast radius count on importers (`Imported by: src/cli.js (+8 transitive)`), and overall file risk tier. Current `deps --json` output used by `enrich-context.sh` is shallow — just file-level imports/importedBy and symbol names with no role or blast radius info. The `brief` command would include: **(a)** symbol roles in the output — knowing a file defines `core` vs `leaf` symbols changes editing caution; **(b)** per-symbol transitive caller counts — makes blast radius visible without a separate `fn-impact` call; **(c)** file-level risk tier (high/medium/low based on max fan-in and role composition). Output optimized for `additionalContext` injection — single compact block, not verbose JSON. Also add `--brief` flag to `deps` as an alias. | Embeddability | The `enrich-context.sh` hook is the only codegraph context agents actually see (they ignore CLAUDE.md instructions to run commands manually). Making that passively-injected context richer — with roles, caller counts, and risk tiers — directly reduces blind edits to high-impact code. Currently the hook shows `Defines: function buildGraph` but not that it's a core symbol with 12 transitive callers | ✓ | ✓ | 4 | No | — | **DONE** — `codegraph brief ` command with symbol roles, caller counts, and risk tiers. CLI command, MCP tool, and presentation layer. ([#480](https://github.com/optave/codegraph/pull/480)) | | 71 | Basic type inference for typed languages | Extract type annotations from TypeScript and Java AST nodes (variable declarations, function parameters, return types, generics) to resolve method calls through typed references. Currently `const x: Router = express.Router(); x.get(...)` produces no edge because `x.get` can't be resolved without knowing `x` is a `Router`. Tree-sitter already parses type annotations — we just don't use them for resolution. Start with declared types (no flow inference), which covers the majority of TS/Java code. | Resolution | Dramatically improves call graph completeness for TypeScript and Java — the two languages where developers annotate types explicitly and expect tooling to use them. Directly prevents hallucinated "no callers" results for methods called through typed variables | ✓ | ✓ | 5 | No | — | ### Tier 1 — Zero-dep + Foundation-aligned (build these first) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index f4297e85..01f082b9 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1,6 +1,6 @@ # Codegraph Roadmap -> **Current version:** 3.1.5 | **Status:** Active development | **Updated:** March 2026 +> **Current version:** 3.2.0 | **Status:** Active development | **Updated:** March 2026 Codegraph is a strong local-first code graph CLI. This roadmap describes planned improvements across eleven phases -- closing gaps with commercial code intelligence platforms while preserving codegraph's core strengths: fully local, open source, zero cloud dependency by default. diff --git a/package-lock.json b/package-lock.json index 8e69ef25..f10056a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@optave/codegraph", - "version": "3.1.5", + "version": "3.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@optave/codegraph", - "version": "3.1.5", + "version": "3.2.0", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", @@ -1276,9 +1276,6 @@ "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1292,9 +1289,6 @@ "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1308,9 +1302,6 @@ "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "Apache-2.0", "optional": true, "os": [ diff --git a/package.json b/package.json index d2984eab..215688a6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@optave/codegraph", - "version": "3.1.5", + "version": "3.2.0", "description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them", "type": "module", "main": "src/index.js", From d4f94909b67aa4dc814dd1fa129f90dd948bfc8d Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 08:05:59 -0600 Subject: [PATCH 26/37] fix: add missing changelog entries for #498 and #493, restore libc fields in lockfile --- CHANGELOG.md | 2 ++ package-lock.json | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bf7f42c..5c8edf3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. See [commit * **roles:** classify test-only-called symbols as "test-only" instead of "dead" — reduces false positives in dead code detection ([#497](https://github.com/optave/codegraph/pull/497)) * **builder:** include constant nodes in edge building — constants no longer appear as false-positive dead exports ([#495](https://github.com/optave/codegraph/pull/495)) * **native:** add MAX_WALK_DEPTH guard to native engine AST walkers — prevents stack overflows on deeply nested files ([#484](https://github.com/optave/codegraph/pull/484)) +* **cli:** support repeated `--file` flag for multi-file scoping across all commands ([#498](https://github.com/optave/codegraph/pull/498)) * **versioning:** use semver-compliant dev version numbering (`-dev.0` suffix instead of non-standard format) ([#479](https://github.com/optave/codegraph/pull/479)) ### Refactors @@ -22,6 +23,7 @@ All notable changes to this project will be documented in this file. See [commit * **extractors:** decompose monolithic language extractors (JS/TS, Python, Java) into per-category handlers ([#490](https://github.com/optave/codegraph/pull/490)) * **ast-analysis:** decompose AST analysis visitors and domain builder stages into focused helpers ([#491](https://github.com/optave/codegraph/pull/491)) * **domain:** decompose domain analysis and feature modules into single-responsibility functions ([#492](https://github.com/optave/codegraph/pull/492)) +* **presentation:** split data fetching from formatting and extract CLI/MCP subcommand dispatch ([#493](https://github.com/optave/codegraph/pull/493)) * **cleanup:** dead code removal, shared abstractions, and empty catch block replacement across all layers ([#489](https://github.com/optave/codegraph/pull/489)) ## [3.1.5](https://github.com/optave/codegraph/compare/v3.1.4...v3.1.5) (2026-03-16) diff --git a/package-lock.json b/package-lock.json index f10056a4..86368ae5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1276,6 +1276,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1289,6 +1292,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1302,6 +1308,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ From fe45c555e342b7d8771cb7ee355ec4c17138e8d8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:50:16 -0600 Subject: [PATCH 27/37] feat: add type inference for all typed languages (WASM + native) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract per-file typeMap (varName → typeName) from type annotations, new expressions, and typed parameters for JS/TS, Java, Go, Rust, C#, PHP, and Python. The edge resolver uses typeMap to connect variable receiver calls (x.method()) to their declared types (Type.method()) with 0.9 confidence. Implemented in both WASM (JS extractors) and native (Rust extractors + edge builder) engines for full parity. Updated README language table with symbols-extracted, type-inference, and engine-parity columns. Impact: 73 functions changed, 54 affected --- README.md | 26 +++-- crates/codegraph-core/src/edge_builder.rs | 46 +++++++- .../codegraph-core/src/extractors/csharp.rs | 70 ++++++++++++ crates/codegraph-core/src/extractors/go.rs | 68 ++++++++++++ crates/codegraph-core/src/extractors/java.rs | 59 ++++++++++ .../src/extractors/javascript.rs | 105 ++++++++++++++++++ crates/codegraph-core/src/extractors/php.rs | 48 ++++++++ .../codegraph-core/src/extractors/python.rs | 73 ++++++++++++ .../src/extractors/rust_lang.rs | 68 ++++++++++++ crates/codegraph-core/src/types.rs | 11 ++ src/domain/graph/builder/incremental.js | 19 +++- .../graph/builder/stages/build-edges.js | 46 ++++++-- src/extractors/csharp.js | 62 +++++++++++ src/extractors/go.js | 65 +++++++++++ src/extractors/java.js | 32 +++++- src/extractors/javascript.js | 86 +++++++++++++- src/extractors/php.js | 43 +++++++ src/extractors/python.js | 52 +++++++++ src/extractors/rust.js | 60 ++++++++++ tests/integration/build-parity.test.js | 4 + tests/integration/build.test.js | 63 +++++++++++ tests/parsers/java.test.js | 22 ++++ tests/parsers/javascript.test.js | 51 +++++++++ 23 files changed, 1147 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index a0df31c6..01371097 100644 --- a/README.md +++ b/README.md @@ -486,18 +486,20 @@ codegraph registry remove # Unregister ## 🌐 Language Support -| Language | Extensions | Coverage | -|---|---|---| -| ![JavaScript](https://img.shields.io/badge/-JavaScript-F7DF1E?style=flat-square&logo=javascript&logoColor=black) | `.js`, `.jsx`, `.mjs`, `.cjs` | Full — functions, classes, imports, call sites, dataflow | -| ![TypeScript](https://img.shields.io/badge/-TypeScript-3178C6?style=flat-square&logo=typescript&logoColor=white) | `.ts`, `.tsx` | Full — interfaces, type aliases, `.d.ts`, dataflow | -| ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py` | Functions, classes, methods, imports, decorators, dataflow | -| ![Go](https://img.shields.io/badge/-Go-00ADD8?style=flat-square&logo=go&logoColor=white) | `.go` | Functions, methods, structs, interfaces, imports, call sites, dataflow | -| ![Rust](https://img.shields.io/badge/-Rust-000000?style=flat-square&logo=rust&logoColor=white) | `.rs` | Functions, methods, structs, traits, `use` imports, call sites, dataflow | -| ![Java](https://img.shields.io/badge/-Java-ED8B00?style=flat-square&logo=openjdk&logoColor=white) | `.java` | Classes, methods, constructors, interfaces, imports, call sites, dataflow | -| ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | Classes, structs, records, interfaces, enums, methods, constructors, using directives, invocations, dataflow | -| ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php` | Functions, classes, interfaces, traits, enums, methods, namespace use, calls, dataflow | -| ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb` | Classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | -| ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | Resource, data, variable, module, output blocks | +| Language | Extensions | Symbols Extracted | Type Inference | Parity | +|---|---|---|:---:|:---:| +| ![JavaScript](https://img.shields.io/badge/-JavaScript-F7DF1E?style=flat-square&logo=javascript&logoColor=black) | `.js`, `.jsx`, `.mjs`, `.cjs` | functions, classes, methods, imports, exports, call sites, constants, dataflow | ✅ | ✅ | +| ![TypeScript](https://img.shields.io/badge/-TypeScript-3178C6?style=flat-square&logo=typescript&logoColor=white) | `.ts`, `.tsx` | functions, classes, interfaces, type aliases, methods, imports, exports, call sites, dataflow | ✅ | ✅ | +| ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py` | functions, classes, methods, imports, decorators, constants, call sites, dataflow | ✅ | ✅ | +| ![Go](https://img.shields.io/badge/-Go-00ADD8?style=flat-square&logo=go&logoColor=white) | `.go` | functions, methods, structs, interfaces, constants, imports, call sites, dataflow | ✅ | ✅ | +| ![Rust](https://img.shields.io/badge/-Rust-000000?style=flat-square&logo=rust&logoColor=white) | `.rs` | functions, methods, structs, enums, traits, constants, `use` imports, call sites, dataflow | ✅ | ✅ | +| ![Java](https://img.shields.io/badge/-Java-ED8B00?style=flat-square&logo=openjdk&logoColor=white) | `.java` | classes, methods, constructors, interfaces, enums, imports, call sites, dataflow | ✅ | ✅ | +| ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | classes, structs, records, interfaces, enums, methods, constructors, properties, using directives, call sites, dataflow | ✅ | ✅ | +| ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php` | functions, classes, interfaces, traits, enums, methods, namespace use, call sites, dataflow | ✅ | ✅ | +| ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb` | classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | — | ✅ | +| ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | resource, data, variable, module, output blocks | — | ✅ | + +> **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. **Parity** = WASM and native Rust engines produce identical output. ## ⚙️ How It Works diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 4549acfb..522ce768 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -43,6 +43,13 @@ pub struct DefInfo { pub end_line: Option, } +#[napi(object)] +pub struct TypeMapInput { + pub name: String, + #[napi(js_name = "typeName")] + pub type_name: String, +} + #[napi(object)] pub struct FileEdgeInput { pub file: String, @@ -53,6 +60,8 @@ pub struct FileEdgeInput { #[napi(js_name = "importedNames")] pub imported_names: Vec, pub classes: Vec, + #[napi(js_name = "typeMap")] + pub type_map: Vec, } #[napi(object)] @@ -108,6 +117,13 @@ pub fn build_call_edges( .map(|im| (im.name.as_str(), im.file.as_str())) .collect(); + // Build type map (variable name → declared type name) + let type_map: HashMap<&str, &str> = file_input + .type_map + .iter() + .map(|tm| (tm.name.as_str(), tm.type_name.as_str())) + .collect(); + // Build def → node ID map for caller resolution (match by name+kind+file+line) let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == *rel_path).collect(); @@ -204,10 +220,25 @@ pub fn build_call_edges( if !method_candidates.is_empty() { targets = method_candidates; - } else if call.receiver.is_none() + } else if let Some(ref receiver) = call.receiver { + // Type-aware resolution: translate variable receiver to declared type + if let Some(type_name) = type_map.get(receiver.as_str()) { + let qualified = format!("{}.{}", type_name, call.name); + let typed: Vec<&NodeInfo> = nodes_by_name + .get(qualified.as_str()) + .map(|v| v.iter().filter(|n| n.kind == "method").copied().collect()) + .unwrap_or_default(); + if !typed.is_empty() { + targets = typed; + } + } + } + + if targets.is_empty() + && (call.receiver.is_none() || call.receiver.as_deref() == Some("this") || call.receiver.as_deref() == Some("self") - || call.receiver.as_deref() == Some("super") + || call.receiver.as_deref() == Some("super")) { // Scoped fallback — same-dir or parent-dir only targets = nodes_by_name @@ -263,15 +294,19 @@ pub fn build_call_edges( && receiver != "self" && receiver != "super" { + // Resolve variable to its declared type via typeMap + let effective_receiver = type_map.get(receiver.as_str()).copied().unwrap_or(receiver.as_str()); + let type_resolved = effective_receiver != receiver.as_str(); + let samefile = nodes_by_name_and_file - .get(&(receiver.as_str(), rel_path.as_str())) + .get(&(effective_receiver, rel_path.as_str())) .cloned() .unwrap_or_default(); let candidates = if !samefile.is_empty() { samefile } else { nodes_by_name - .get(receiver.as_str()) + .get(effective_receiver) .cloned() .unwrap_or_default() }; @@ -286,11 +321,12 @@ pub fn build_call_edges( (1u64 << 63) | ((caller_id as u64) << 32) | (recv_target.id as u64); if !seen_edges.contains(&recv_key) { seen_edges.insert(recv_key); + let confidence = if type_resolved { 0.9 } else { 0.7 }; edges.push(ComputedEdge { source_id: caller_id, target_id: recv_target.id, kind: "receiver".to_string(), - confidence: 0.7, + confidence, dynamic: 0, }); } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 8ef0cc16..e2243c2f 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -12,6 +12,7 @@ impl SymbolExtractor for CSharpExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &CSHARP_AST_CONFIG); + extract_csharp_type_map(&tree.root_node(), source, &mut symbols); symbols } } @@ -469,3 +470,72 @@ fn extract_csharp_base_types( } } } + +// ── Type map extraction ───────────────────────────────────────────────────── + +fn extract_csharp_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + match type_node.kind() { + "identifier" | "qualified_name" => Some(node_text(type_node, source)), + "predefined_type" => None, // skip int, string, etc. + "generic_name" => type_node.child(0).map(|n| node_text(&n, source)), + "nullable_type" => { + type_node.child(0).and_then(|inner| extract_csharp_type_name(&inner, source)) + } + _ => None, + } +} + +fn extract_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_csharp_type_map_depth(node, source, symbols, 0); +} + +fn extract_csharp_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "variable_declaration" => { + let type_node = node.child_by_field_name("type").or_else(|| node.child(0)); + if let Some(type_node) = type_node { + if type_node.kind() != "var_keyword" && type_node.kind() != "implicit_type" { + if let Some(type_name) = extract_csharp_type_name(&type_node, source) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "variable_declarator" { + let name_node = child.child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(name_node) = name_node { + if name_node.kind() == "identifier" { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + } + } + } + "parameter" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_csharp_type_name(&type_node, source) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_csharp_type_map_depth(&child, source, symbols, depth + 1); + } + } +} diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index b823935c..d9f0c0d6 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -12,6 +12,7 @@ impl SymbolExtractor for GoExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &GO_AST_CONFIG); + extract_go_type_map(&tree.root_node(), source, &mut symbols); symbols } } @@ -310,6 +311,73 @@ fn extract_go_import_spec(spec: &Node, source: &[u8], symbols: &mut FileSymbols) } } +// ── Type map extraction ───────────────────────────────────────────────────── + +fn extract_go_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + match type_node.kind() { + "type_identifier" | "identifier" | "qualified_type" => Some(node_text(type_node, source)), + "pointer_type" => { + // *MyType → MyType + for i in 0..type_node.child_count() { + if let Some(child) = type_node.child(i) { + if child.kind() == "type_identifier" || child.kind() == "identifier" { + return Some(node_text(&child, source)); + } + } + } + None + } + "generic_type" => type_node.child(0).map(|n| node_text(&n, source)), + _ => None, + } +} + +fn extract_go_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_go_type_map_depth(node, source, symbols, 0); +} + +fn extract_go_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "var_spec" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_go_type_name(&type_node, source) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + "parameter_declaration" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_go_type_name(&type_node, source) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" { + symbols.type_map.push(TypeMapEntry { + name: node_text(&child, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_go_type_map_depth(&child, source, symbols, depth + 1); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 2fefde95..fd8faaa7 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -12,10 +12,69 @@ impl SymbolExtractor for JavaExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &JAVA_AST_CONFIG); + extract_java_type_map(&tree.root_node(), source, &mut symbols); symbols } } +// ── Type inference helpers ────────────────────────────────────────────────── + +fn extract_java_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + if type_node.kind() == "generic_type" { + type_node.child(0).map(|n| node_text(&n, source)) + } else { + Some(node_text(type_node, source)) + } +} + +fn extract_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_java_type_map_depth(node, source, symbols, 0); +} + +fn extract_java_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "local_variable_declaration" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_java_type_name(&type_node, source) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "variable_declarator" { + if let Some(name_node) = child.child_by_field_name("name") { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + } + "formal_parameter" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_java_type_name(&type_node, source) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_java_type_map_depth(&child, source, symbols, depth + 1); + } + } +} + fn find_java_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { let mut current = node.parent(); while let Some(parent) = current { diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 51e94a40..30a032b8 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -12,10 +12,115 @@ impl SymbolExtractor for JsExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes(&tree.root_node(), source, &mut symbols.ast_nodes); + extract_type_map(&tree.root_node(), source, &mut symbols); symbols } } +// ── Type inference helpers ────────────────────────────────────────────────── + +/// Extract simple type name from a type_annotation node. +/// Returns the type name for simple types and generics, None for unions/intersections/arrays. +fn extract_simple_type_name<'a>(node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "type_identifier" | "identifier" => return Some(node_text(&child, source)), + "generic_type" => { + return child.child(0).map(|n| node_text(&n, source)); + } + "parenthesized_type" => return extract_simple_type_name(&child, source), + _ => {} + } + } + } + None +} + +/// Extract constructor type name from a new_expression node. +fn extract_new_expr_type_name<'a>(node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + if node.kind() != "new_expression" { + return None; + } + let ctor = node.child_by_field_name("constructor").or_else(|| node.child(1))?; + match ctor.kind() { + "identifier" => Some(node_text(&ctor, source)), + "member_expression" => { + ctor.child_by_field_name("property").map(|p| node_text(&p, source)) + } + _ => None, + } +} + +/// Walk the entire tree to extract type annotations and new-expression type inferences. +fn extract_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_type_map_depth(node, source, symbols, 0); +} + +fn extract_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "variable_declarator" => { + if let Some(name_n) = node.child_by_field_name("name") { + if name_n.kind() == "identifier" { + let var_name = node_text(&name_n, source); + // Type annotation takes priority + if let Some(type_anno) = find_child(node, "type_annotation") { + if let Some(type_name) = extract_simple_type_name(&type_anno, source) { + symbols.type_map.push(TypeMapEntry { + name: var_name.to_string(), + type_name: type_name.to_string(), + }); + // Skip new_expression check — annotation wins + return walk_type_map_children(node, source, symbols, depth); + } + } + // Fall back to new expression inference + if let Some(value_n) = node.child_by_field_name("value") { + if value_n.kind() == "new_expression" { + if let Some(type_name) = extract_new_expr_type_name(&value_n, source) { + symbols.type_map.push(TypeMapEntry { + name: var_name.to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + "required_parameter" | "optional_parameter" => { + let name_node = node.child_by_field_name("pattern") + .or_else(|| node.child_by_field_name("left")) + .or_else(|| node.child(0)); + if let Some(name_node) = name_node { + if name_node.kind() == "identifier" { + if let Some(type_anno) = find_child(node, "type_annotation") { + if let Some(type_name) = extract_simple_type_name(&type_anno, source) { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + _ => {} + } + walk_type_map_children(node, source, symbols, depth); +} + +fn walk_type_map_children(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_type_map_depth(&child, source, symbols, depth + 1); + } + } +} + fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { walk_node_depth(node, source, symbols, 0); } diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs index 432f3c3e..c692e2e0 100644 --- a/crates/codegraph-core/src/extractors/php.rs +++ b/crates/codegraph-core/src/extractors/php.rs @@ -11,6 +11,7 @@ impl SymbolExtractor for PhpExtractor { fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); + extract_php_type_map(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &PHP_AST_CONFIG); symbols } @@ -397,3 +398,50 @@ fn extract_php_enum_cases(node: &Node, source: &[u8]) -> Vec { } cases } + +fn extract_php_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + match type_node.kind() { + "named_type" | "name" | "qualified_name" => Some(node_text(type_node, source)), + "optional_type" => { + // ?MyType → skip the ? and get inner type + type_node.child(1) + .or_else(|| type_node.child(0)) + .and_then(|inner| extract_php_type_name(&inner, source)) + } + // Skip union/intersection types (too ambiguous) + "union_type" | "intersection_type" => None, + _ => None, + } +} + +fn extract_php_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_php_type_map_depth(node, source, symbols, 0); +} + +fn extract_php_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "simple_parameter" | "variadic_parameter" | "property_promotion_parameter" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_php_type_name(&type_node, source) { + let name_node = node.child_by_field_name("name") + .or_else(|| find_child(node, "variable_name")); + if let Some(name_node) = name_node { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_php_type_map_depth(&child, source, symbols, depth + 1); + } + } +} diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index 01f3df7b..2af7af5d 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -12,6 +12,7 @@ impl SymbolExtractor for PythonExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &PYTHON_AST_CONFIG); + extract_python_type_map(&tree.root_node(), source, &mut symbols); symbols } } @@ -354,6 +355,78 @@ fn find_python_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + match type_node.kind() { + "identifier" | "attribute" => Some(node_text(type_node, source)), + "subscript" => { + // List[int] → List + type_node + .child_by_field_name("value") + .map(|n| node_text(&n, source)) + } + _ => None, + } +} + +fn extract_python_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_python_type_map_depth(node, source, symbols, 0); +} + +fn extract_python_type_map_depth( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + depth: usize, +) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "typed_parameter" => { + // first child is identifier, type field is the type + if let Some(name_node) = node.child(0) { + if name_node.kind() == "identifier" { + let name = node_text(&name_node, source); + if name != "self" && name != "cls" { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = + extract_python_type_name(&type_node, source) + { + symbols.type_map.push(TypeMapEntry { + name: name.to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + "typed_default_parameter" => { + if let Some(name_node) = node.child_by_field_name("name") { + if name_node.kind() == "identifier" { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = + extract_python_type_name(&type_node, source) + { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_python_type_map_depth(&child, source, symbols, depth + 1); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index 1a1e2d25..550fc5db 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -12,6 +12,7 @@ impl SymbolExtractor for RustExtractor { let mut symbols = FileSymbols::new(file_path.to_string()); walk_node(&tree.root_node(), source, &mut symbols); walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &RUST_AST_CONFIG); + extract_rust_type_map(&tree.root_node(), source, &mut symbols); symbols } } @@ -381,6 +382,73 @@ fn extract_rust_use_path(node: &Node, source: &[u8]) -> Vec<(String, Vec } } +fn extract_rust_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + match type_node.kind() { + "type_identifier" | "identifier" | "scoped_type_identifier" => Some(node_text(type_node, source)), + "reference_type" => { + for i in 0..type_node.child_count() { + if let Some(child) = type_node.child(i) { + if child.kind() == "type_identifier" || child.kind() == "scoped_type_identifier" { + return Some(node_text(&child, source)); + } + } + } + None + } + "generic_type" => type_node.child(0).map(|n| node_text(&n, source)), + _ => None, + } +} + +fn extract_rust_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + extract_rust_type_map_depth(node, source, symbols, 0); +} + +fn extract_rust_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } + match node.kind() { + "let_declaration" => { + if let Some(pattern) = node.child_by_field_name("pattern") { + if pattern.kind() == "identifier" { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_rust_type_name(&type_node, source) { + symbols.type_map.push(TypeMapEntry { + name: node_text(&pattern, source).to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + "parameter" => { + if let Some(pattern) = node.child_by_field_name("pattern") { + if pattern.kind() == "identifier" { + let name = node_text(&pattern, source); + if name != "self" && name != "&self" && name != "&mut self" && name != "mut self" { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(type_name) = extract_rust_type_name(&type_node, source) { + symbols.type_map.push(TypeMapEntry { + name: name.to_string(), + type_name: type_name.to_string(), + }); + } + } + } + } + } + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + extract_rust_type_map_depth(&child, source, symbols, depth + 1); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index fa99291a..77ea559d 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -269,6 +269,14 @@ pub struct DataflowResult { pub mutations: Vec, } +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TypeMapEntry { + pub name: String, + #[napi(js_name = "typeName")] + pub type_name: String, +} + #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSymbols { @@ -283,6 +291,8 @@ pub struct FileSymbols { pub dataflow: Option, #[napi(js_name = "lineCount")] pub line_count: Option, + #[napi(js_name = "typeMap")] + pub type_map: Vec, } impl FileSymbols { @@ -297,6 +307,7 @@ impl FileSymbols { ast_nodes: Vec::new(), dataflow: None, line_count: None, + type_map: Vec::new(), } } } diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.js index 63694385..c2ea1509 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.js @@ -87,7 +87,7 @@ function findCaller(call, definitions, relPath, stmts) { return caller; } -function resolveCallTargets(stmts, call, relPath, importedNames) { +function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) { const importedFrom = importedNames.get(call.name); let targets; if (importedFrom) { @@ -99,16 +99,31 @@ function resolveCallTargets(stmts, call, relPath, importedNames) { targets = stmts.findNodeByName.all(call.name); } } + // Type-aware resolution: translate variable receiver to declared type + if ((!targets || targets.length === 0) && call.receiver && typeMap) { + const typeName = typeMap.get(call.receiver); + if (typeName) { + const qualified = `${typeName}.${call.name}`; + targets = stmts.findNodeByName.all(qualified); + } + } return { targets, importedFrom }; } function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { + const typeMap = symbols.typeMap || new Map(); let edgesAdded = 0; for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; const caller = findCaller(call, symbols.definitions, relPath, stmts) || fileNodeRow; - const { targets, importedFrom } = resolveCallTargets(stmts, call, relPath, importedNames); + const { targets, importedFrom } = resolveCallTargets( + stmts, + call, + relPath, + importedNames, + typeMap, + ); for (const t of targets) { if (t.id !== caller.id) { diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index 0651c865..085717fa 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -102,6 +102,12 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) if (!fileNodeRow) continue; const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + const typeMap = + symbols.typeMap instanceof Map + ? [...symbols.typeMap.entries()].map(([name, typeName]) => ({ name, typeName })) + : Array.isArray(symbols.typeMap) + ? symbols.typeMap + : []; nativeFiles.push({ file: relPath, fileNodeId: fileNodeRow.id, @@ -114,6 +120,7 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) calls: symbols.calls, importedNames, classes: symbols.classes, + typeMap, }); } @@ -151,6 +158,7 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { if (!fileNodeRow) continue; const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); + const typeMap = symbols.typeMap || new Map(); const seenCallEdges = new Set(); buildFileCallEdges( @@ -162,6 +170,7 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { seenCallEdges, getNodeIdStmt, allEdgeRows, + typeMap, ); buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows); } @@ -202,7 +211,7 @@ function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { return caller || fileNodeRow; } -function resolveCallTargets(ctx, call, relPath, importedNames) { +function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { const importedFrom = importedNames.get(call.name); let targets; @@ -219,7 +228,7 @@ function resolveCallTargets(ctx, call, relPath, importedNames) { if (!targets || targets.length === 0) { targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; if (targets.length === 0) { - targets = resolveByMethodOrGlobal(ctx, call, relPath); + targets = resolveByMethodOrGlobal(ctx, call, relPath, typeMap); } } @@ -234,12 +243,22 @@ function resolveCallTargets(ctx, call, relPath, importedNames) { return { targets, importedFrom }; } -function resolveByMethodOrGlobal(ctx, call, relPath) { +function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', ); if (methodCandidates.length > 0) return methodCandidates; + // Type-aware resolution: translate variable receiver to its declared type + if (call.receiver && typeMap) { + const typeName = typeMap.get(call.receiver); + if (typeName) { + const qualifiedName = `${typeName}.${call.name}`; + const typed = (ctx.nodesByName.get(qualifiedName) || []).filter((n) => n.kind === 'method'); + if (typed.length > 0) return typed; + } + } + if ( !call.receiver || call.receiver === 'this' || @@ -262,13 +281,20 @@ function buildFileCallEdges( seenCallEdges, getNodeIdStmt, allEdgeRows, + typeMap, ) { for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); const isDynamic = call.dynamic ? 1 : 0; - const { targets, importedFrom } = resolveCallTargets(ctx, call, relPath, importedNames); + const { targets, importedFrom } = resolveCallTargets( + ctx, + call, + relPath, + importedNames, + typeMap, + ); for (const t of targets) { const edgeKey = `${caller.id}|${t.id}`; @@ -287,22 +313,24 @@ function buildFileCallEdges( call.receiver !== 'self' && call.receiver !== 'super' ) { - buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows); + buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap); } } } -function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows) { +function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap) { const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; - const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; + const effectiveReceiver = typeMap?.get(call.receiver) || call.receiver; + const samefile = ctx.nodesByNameAndFile.get(`${effectiveReceiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(effectiveReceiver) || []; const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); if (receiverNodes.length > 0 && caller) { const recvTarget = receiverNodes[0]; const recvKey = `recv|${caller.id}|${recvTarget.id}`; if (!seenCallEdges.has(recvKey)) { seenCallEdges.add(recvKey); - allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); + const confidence = effectiveReceiver !== call.receiver ? 0.9 : 0.7; + allEdgeRows.push([caller.id, recvTarget.id, 'receiver', confidence, 0]); } } } diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index bcea24b4..68fd3121 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -10,9 +10,11 @@ export function extractCSharpSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkCSharpNode(tree.rootNode, ctx); + extractCSharpTypeMap(tree.rootNode, ctx); return ctx; } @@ -308,6 +310,66 @@ function extractCSharpEnumMembers(enumNode) { return constants; } +// ── Type map extraction ────────────────────────────────────────────────────── + +function extractCSharpTypeMap(node, ctx) { + extractCSharpTypeMapDepth(node, ctx, 0); +} + +function extractCSharpTypeMapDepth(node, ctx, depth) { + if (depth >= 200) return; + + // local_declaration_statement → variable_declaration → type + variable_declarator(s) + if (node.type === 'variable_declaration') { + const typeNode = node.childForFieldName('type') || node.child(0); + if (typeNode && typeNode.type !== 'var_keyword') { + const typeName = extractCSharpTypeName(typeNode); + if (typeName) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'variable_declarator') { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + ctx.typeMap.set(nameNode.text, typeName); + } + } + } + } + } + } + + // Method/constructor parameter: parameter node has type + name fields + if (node.type === 'parameter') { + const typeNode = node.childForFieldName('type'); + const nameNode = node.childForFieldName('name'); + if (typeNode && nameNode) { + const typeName = extractCSharpTypeName(typeNode); + if (typeName) ctx.typeMap.set(nameNode.text, typeName); + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) extractCSharpTypeMapDepth(child, ctx, depth + 1); + } +} + +function extractCSharpTypeName(typeNode) { + if (!typeNode) return null; + const t = typeNode.type; + if (t === 'identifier' || t === 'qualified_name') return typeNode.text; + if (t === 'predefined_type') return null; // skip int, string, etc + if (t === 'generic_name') { + const first = typeNode.child(0); + return first ? first.text : null; + } + if (t === 'nullable_type') { + const inner = typeNode.child(0); + return inner ? extractCSharpTypeName(inner) : null; + } + return null; +} + function extractCSharpBaseTypes(node, className, classes) { const baseList = node.childForFieldName('bases'); if (!baseList) return; diff --git a/src/extractors/go.js b/src/extractors/go.js index cadf65b7..33cf44e6 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -10,9 +10,11 @@ export function extractGoSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkGoNode(tree.rootNode, ctx); + extractGoTypeMap(tree.rootNode, ctx); return ctx; } @@ -200,6 +202,69 @@ function handleGoCallExpr(node, ctx) { } } +// ── Type map extraction ───────────────────────────────────────────────────── + +function extractGoTypeMap(node, ctx) { + extractGoTypeMapDepth(node, ctx, 0); +} + +function extractGoTypeMapDepth(node, ctx, depth) { + if (depth >= 200) return; + + // var x MyType = ... → var_declaration > var_spec + if (node.type === 'var_spec') { + const nameNode = node.childForFieldName('name'); + const typeNode = node.childForFieldName('type'); + if (nameNode && typeNode) { + const typeName = extractGoTypeName(typeNode); + if (typeName) ctx.typeMap.set(nameNode.text, typeName); + } + } + + // Function/method parameter types: parameter_declaration has identifiers then a type + if (node.type === 'parameter_declaration') { + const typeNode = node.childForFieldName('type'); + if (typeNode) { + const typeName = extractGoTypeName(typeNode); + if (typeName) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'identifier') { + ctx.typeMap.set(child.text, typeName); + } + } + } + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) extractGoTypeMapDepth(child, ctx, depth + 1); + } +} + +function extractGoTypeName(typeNode) { + if (!typeNode) return null; + const t = typeNode.type; + if (t === 'type_identifier' || t === 'identifier') return typeNode.text; + if (t === 'qualified_type') return typeNode.text; + // pointer type: *MyType → MyType + if (t === 'pointer_type') { + for (let i = 0; i < typeNode.childCount; i++) { + const child = typeNode.child(i); + if (child && (child.type === 'type_identifier' || child.type === 'identifier')) { + return child.text; + } + } + } + // generic type: MyType[T] → MyType + if (t === 'generic_type') { + const first = typeNode.child(0); + return first ? first.text : null; + } + return null; +} + // ── Child extraction helpers ──────────────────────────────────────────────── function extractGoParameters(paramListNode) { diff --git a/src/extractors/java.js b/src/extractors/java.js index d4519a08..f1c024d6 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -10,6 +10,7 @@ export function extractJavaSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkJavaNode(tree.rootNode, ctx); @@ -42,6 +43,9 @@ function walkJavaNode(node, ctx) { case 'object_creation_expression': handleJavaObjectCreation(node, ctx); break; + case 'local_variable_declaration': + handleJavaLocalVarDecl(node, ctx); + break; } for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i), ctx); @@ -166,7 +170,7 @@ function handleJavaMethodDecl(node, ctx) { if (!nameNode) return; const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); + const params = extractJavaParameters(node.childForFieldName('parameters'), ctx.typeMap); ctx.definitions.push({ name: fullName, kind: 'method', @@ -182,7 +186,7 @@ function handleJavaConstructorDecl(node, ctx) { if (!nameNode) return; const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); + const params = extractJavaParameters(node.childForFieldName('parameters'), ctx.typeMap); ctx.definitions.push({ name: fullName, kind: 'method', @@ -222,6 +226,20 @@ function handleJavaMethodInvocation(node, ctx) { ctx.calls.push(call); } +function handleJavaLocalVarDecl(node, ctx) { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (!typeName) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child?.type === 'variable_declarator') { + const nameNode = child.childForFieldName('name'); + if (nameNode) ctx.typeMap.set(nameNode.text, typeName); + } + } +} + function handleJavaObjectCreation(node, ctx) { const typeNode = node.childForFieldName('type'); if (!typeNode) return; @@ -247,7 +265,7 @@ function findJavaParentClass(node) { // ── Child extraction helpers ──────────────────────────────────────────────── -function extractJavaParameters(paramListNode) { +function extractJavaParameters(paramListNode, typeMap) { const params = []; if (!paramListNode) return params; for (let i = 0; i < paramListNode.childCount; i++) { @@ -257,6 +275,14 @@ function extractJavaParameters(paramListNode) { const nameNode = param.childForFieldName('name'); if (nameNode) { params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + if (typeMap) { + const typeNode = param.childForFieldName('type'); + if (typeNode) { + const typeName = + typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (typeName) typeMap.set(nameNode.text, typeName); + } + } } } } diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index 997c8ea6..f86cefd3 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -19,6 +19,7 @@ function extractSymbolsQuery(tree, query) { const imports = []; const classes = []; const exps = []; + const typeMap = new Map(); const matches = query.matches(tree.rootNode); @@ -179,7 +180,10 @@ function extractSymbolsQuery(tree, query) { // Extract dynamic import() calls via targeted walk (query patterns don't match `import` function type) extractDynamicImportsWalk(tree.rootNode, imports); - return { definitions, calls, imports, classes, exports: exps }; + // Extract typeMap from type annotations and new expressions + extractTypeMapWalk(tree.rootNode, typeMap); + + return { definitions, calls, imports, classes, exports: exps, typeMap }; } /** @@ -326,9 +330,12 @@ function extractSymbolsWalk(tree) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkJavaScriptNode(tree.rootNode, ctx); + // Populate typeMap for parameter type annotations (walk path handles variables inline) + extractTypeMapWalk(tree.rootNode, ctx.typeMap); return ctx; } @@ -472,6 +479,19 @@ function handleVariableDecl(node, ctx) { if (declarator && declarator.type === 'variable_declarator') { const nameN = declarator.childForFieldName('name'); const valueN = declarator.childForFieldName('value'); + + // Populate typeMap from type annotations or new expressions + if (nameN && nameN.type === 'identifier') { + const typeAnno = findChild(declarator, 'type_annotation'); + if (typeAnno) { + const typeName = extractSimpleTypeName(typeAnno); + if (typeName) ctx.typeMap.set(nameN.text, typeName); + } else if (valueN && valueN.type === 'new_expression') { + const ctorType = extractNewExprTypeName(valueN); + if (ctorType) ctx.typeMap.set(nameN.text, ctorType); + } + } + if (nameN && valueN) { const valType = valueN.type; if ( @@ -788,6 +808,70 @@ function extractImplementsFromNode(node) { return result; } +// ── Type inference helpers ─────────────────────────────────────────────── + +function extractSimpleTypeName(typeAnnotationNode) { + if (!typeAnnotationNode) return null; + for (let i = 0; i < typeAnnotationNode.childCount; i++) { + const child = typeAnnotationNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'type_identifier' || t === 'identifier') return child.text; + if (t === 'generic_type') return child.child(0)?.text || null; + if (t === 'parenthesized_type') return extractSimpleTypeName(child); + // Skip union, intersection, and array types — too ambiguous + } + return null; +} + +function extractNewExprTypeName(newExprNode) { + if (!newExprNode || newExprNode.type !== 'new_expression') return null; + const ctor = newExprNode.childForFieldName('constructor') || newExprNode.child(1); + if (!ctor) return null; + if (ctor.type === 'identifier') return ctor.text; + if (ctor.type === 'member_expression') { + const prop = ctor.childForFieldName('property'); + return prop ? prop.text : null; + } + return null; +} + +function extractTypeMapWalk(rootNode, typeMap) { + function walk(node) { + const t = node.type; + if (t === 'variable_declarator') { + const nameN = node.childForFieldName('name'); + if (nameN && nameN.type === 'identifier') { + const typeAnno = findChild(node, 'type_annotation'); + if (typeAnno) { + const typeName = extractSimpleTypeName(typeAnno); + if (typeName) typeMap.set(nameN.text, typeName); + } else { + const valueN = node.childForFieldName('value'); + if (valueN && valueN.type === 'new_expression') { + const ctorType = extractNewExprTypeName(valueN); + if (ctorType) typeMap.set(nameN.text, ctorType); + } + } + } + } else if (t === 'required_parameter' || t === 'optional_parameter') { + const nameNode = + node.childForFieldName('pattern') || node.childForFieldName('left') || node.child(0); + if (nameNode && nameNode.type === 'identifier') { + const typeAnno = findChild(node, 'type_annotation'); + if (typeAnno) { + const typeName = extractSimpleTypeName(typeAnno); + if (typeName) typeMap.set(nameNode.text, typeName); + } + } + } + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i)); + } + } + walk(rootNode); +} + function extractReceiverName(objNode) { if (!objNode) return undefined; const t = objNode.type; diff --git a/src/extractors/php.js b/src/extractors/php.js index 686c9031..fa1cfe04 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -82,9 +82,11 @@ export function extractPHPSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkPhpNode(tree.rootNode, ctx); + extractPhpTypeMap(tree.rootNode, ctx); return ctx; } @@ -320,6 +322,47 @@ function handlePhpObjectCreation(node, ctx) { } } +function extractPhpTypeMap(node, ctx) { + extractPhpTypeMapDepth(node, ctx, 0); +} + +function extractPhpTypeMapDepth(node, ctx, depth) { + if (depth >= 200) return; + + // Function/method parameters with type hints + if ( + node.type === 'simple_parameter' || + node.type === 'variadic_parameter' || + node.type === 'property_promotion_parameter' + ) { + const typeNode = node.childForFieldName('type'); + const nameNode = node.childForFieldName('name') || findChild(node, 'variable_name'); + if (typeNode && nameNode) { + const typeName = extractPhpTypeName(typeNode); + if (typeName) ctx.typeMap.set(nameNode.text, typeName); + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) extractPhpTypeMapDepth(child, ctx, depth + 1); + } +} + +function extractPhpTypeName(typeNode) { + if (!typeNode) return null; + const t = typeNode.type; + if (t === 'named_type' || t === 'name' || t === 'qualified_name') return typeNode.text; + // Nullable: ?MyType + if (t === 'optional_type') { + const inner = typeNode.child(1) || typeNode.child(0); + return inner ? extractPhpTypeName(inner) : null; + } + // Skip union types (too ambiguous) + if (t === 'union_type' || t === 'intersection_type') return null; + return null; +} + function findPHPParentClass(node) { let current = node.parent; while (current) { diff --git a/src/extractors/python.js b/src/extractors/python.js index 053a07ca..d5e41f80 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -10,9 +10,11 @@ export function extractPythonSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkPythonNode(tree.rootNode, ctx); + extractPythonTypeMap(tree.rootNode, ctx); return ctx; } @@ -284,6 +286,56 @@ function walkInitBody(bodyNode, seen, props) { } } +function extractPythonTypeMap(node, ctx) { + extractPythonTypeMapDepth(node, ctx, 0); +} + +function extractPythonTypeMapDepth(node, ctx, depth) { + if (depth >= 200) return; + + // typed_parameter: identifier : type + if (node.type === 'typed_parameter') { + const nameNode = node.child(0); + const typeNode = node.childForFieldName('type'); + if (nameNode && nameNode.type === 'identifier' && typeNode) { + const typeName = extractPythonTypeName(typeNode); + if (typeName && typeName !== 'self' && typeName !== 'cls') { + ctx.typeMap.set(nameNode.text, typeName); + } + } + } + + // typed_default_parameter: name : type = default + if (node.type === 'typed_default_parameter') { + const nameNode = node.childForFieldName('name'); + const typeNode = node.childForFieldName('type'); + if (nameNode && nameNode.type === 'identifier' && typeNode) { + const typeName = extractPythonTypeName(typeNode); + if (typeName) ctx.typeMap.set(nameNode.text, typeName); + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) extractPythonTypeMapDepth(child, ctx, depth + 1); + } +} + +function extractPythonTypeName(typeNode) { + if (!typeNode) return null; + const t = typeNode.type; + if (t === 'identifier') return typeNode.text; + if (t === 'attribute') return typeNode.text; // module.Type + // Generic: List[int] → subscript → value is identifier + if (t === 'subscript') { + const value = typeNode.childForFieldName('value'); + return value ? value.text : null; + } + // None type, string, etc → skip + if (t === 'none' || t === 'string') return null; + return null; +} + function findPythonParentClass(node) { let current = node.parent; while (current) { diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 8d46d3a6..e0f8fe33 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -10,9 +10,11 @@ export function extractRustSymbols(tree, _filePath) { imports: [], classes: [], exports: [], + typeMap: new Map(), }; walkRustNode(tree.rootNode, ctx); + extractRustTypeMap(tree.rootNode, ctx); return ctx; } @@ -257,6 +259,64 @@ function extractEnumVariants(enumNode) { return variants; } +function extractRustTypeMap(node, ctx) { + extractRustTypeMapDepth(node, ctx, 0); +} + +function extractRustTypeMapDepth(node, ctx, depth) { + if (depth >= 200) return; + + // let x: MyType = ... + if (node.type === 'let_declaration') { + const pattern = node.childForFieldName('pattern'); + const typeNode = node.childForFieldName('type'); + if (pattern && pattern.type === 'identifier' && typeNode) { + const typeName = extractRustTypeName(typeNode); + if (typeName) ctx.typeMap.set(pattern.text, typeName); + } + } + + // fn foo(x: MyType) — parameter node has pattern + type fields + if (node.type === 'parameter') { + const pattern = node.childForFieldName('pattern'); + const typeNode = node.childForFieldName('type'); + if (pattern && typeNode) { + const name = pattern.type === 'identifier' ? pattern.text : null; + if (name && name !== 'self' && name !== '&self' && name !== '&mut self') { + const typeName = extractRustTypeName(typeNode); + if (typeName) ctx.typeMap.set(name, typeName); + } + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) extractRustTypeMapDepth(child, ctx, depth + 1); + } +} + +function extractRustTypeName(typeNode) { + if (!typeNode) return null; + const t = typeNode.type; + if (t === 'type_identifier' || t === 'identifier') return typeNode.text; + if (t === 'scoped_type_identifier') return typeNode.text; + // Reference: &MyType or &mut MyType → MyType + if (t === 'reference_type') { + for (let i = 0; i < typeNode.childCount; i++) { + const child = typeNode.child(i); + if (child && (child.type === 'type_identifier' || child.type === 'scoped_type_identifier')) { + return child.text; + } + } + } + // Generic: Vec → Vec + if (t === 'generic_type') { + const first = typeNode.child(0); + return first ? first.text : null; + } + return null; +} + function extractRustUsePath(node) { if (!node) return []; diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 86ef5043..98ce843d 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -4,6 +4,10 @@ * Build the same fixture project with both WASM and native engines, * then compare the resulting nodes/edges in SQLite. * + * IMPORTANT: Every feature MUST be implemented for BOTH engines (WASM and native). + * This test is a hard gate — if it fails, the feature is incomplete. Do not weaken, + * skip, or filter this test to work around missing engine parity. Fix the code instead. + * * Skipped when the native engine is not installed. */ diff --git a/tests/integration/build.test.js b/tests/integration/build.test.js index d7bee6bc..0d0b3d64 100644 --- a/tests/integration/build.test.js +++ b/tests/integration/build.test.js @@ -478,3 +478,66 @@ describe('version/engine mismatch auto-promotes to full rebuild', () => { expect(output).not.toContain('No changes detected'); }); }); + +describe('typed method call resolution', () => { + let typedDir, typedDbPath; + + beforeAll(async () => { + typedDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-typed-')); + fs.writeFileSync( + path.join(typedDir, 'typed.ts'), + [ + 'class Router {', + ' get(path: string) {}', + ' post(path: string) {}', + '}', + 'const app: Router = new Router();', + 'app.get("/users");', + 'app.post("/items");', + '', + ].join('\n'), + ); + // Force WASM engine — typeMap resolution is JS-only (native deferred) + await buildGraph(typedDir, { skipRegistry: true, engine: 'wasm' }); + typedDbPath = path.join(typedDir, '.codegraph', 'graph.db'); + }); + + afterAll(() => { + if (typedDir) fs.rmSync(typedDir, { recursive: true, force: true }); + }); + + test('typed variable call produces call edge to the declared type method', () => { + const db = new Database(typedDbPath, { readonly: true }); + const edges = db + .prepare(` + SELECT s.name as caller, t.name as callee FROM edges e + JOIN nodes s ON e.source_id = s.id + JOIN nodes t ON e.target_id = t.id + WHERE e.kind = 'calls' + `) + .all(); + db.close(); + const callees = edges.map((e) => e.callee); + // The key assertion: typed receiver 'app' resolves to Router, producing + // call edges to Router.get and Router.post + expect(callees).toContain('Router.get'); + expect(callees).toContain('Router.post'); + }); + + test('typed variable produces receiver edge to the class', () => { + const db = new Database(typedDbPath, { readonly: true }); + const edges = db + .prepare(` + SELECT s.name as caller, t.name as target, e.confidence FROM edges e + JOIN nodes s ON e.source_id = s.id + JOIN nodes t ON e.target_id = t.id + WHERE e.kind = 'receiver' + `) + .all(); + db.close(); + const receiverEdges = edges.filter((e) => e.target === 'Router'); + expect(receiverEdges.length).toBeGreaterThan(0); + // Type-resolved receiver edges should have 0.9 confidence + expect(receiverEdges[0].confidence).toBe(0.9); + }); +}); diff --git a/tests/parsers/java.test.js b/tests/parsers/java.test.js index 79486a04..83d05683 100644 --- a/tests/parsers/java.test.js +++ b/tests/parsers/java.test.js @@ -109,4 +109,26 @@ public class Foo {}`); }`); expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'User' })); }); + + describe('typeMap extraction', () => { + it('extracts typeMap from local variables', () => { + const symbols = parseJava(`public class Foo { + void run() { + List items = new ArrayList<>(); + Router router = new Router(); + } +}`); + expect(symbols.typeMap).toBeInstanceOf(Map); + expect(symbols.typeMap.get('items')).toBe('List'); + expect(symbols.typeMap.get('router')).toBe('Router'); + }); + + it('extracts typeMap from method parameters', () => { + const symbols = parseJava(`public class Foo { + void handle(Request req, Response res) {} +}`); + expect(symbols.typeMap.get('req')).toBe('Request'); + expect(symbols.typeMap.get('res')).toBe('Response'); + }); + }); }); diff --git a/tests/parsers/javascript.test.js b/tests/parsers/javascript.test.js index 63875fc8..00a04547 100644 --- a/tests/parsers/javascript.test.js +++ b/tests/parsers/javascript.test.js @@ -96,6 +96,57 @@ describe('JavaScript parser', () => { expect(c.receiver).toBe('a.b'); }); + describe('typeMap extraction', () => { + function parseTS(code) { + const parser = parsers.get('typescript'); + const tree = parser.parse(code); + return extractSymbols(tree, 'test.ts'); + } + + it('extracts typeMap from type annotations', () => { + const symbols = parseTS(`const x: Router = express.Router();`); + expect(symbols.typeMap).toBeInstanceOf(Map); + expect(symbols.typeMap.get('x')).toBe('Router'); + }); + + it('extracts typeMap from generic types', () => { + const symbols = parseTS(`const m: Map = new Map();`); + expect(symbols.typeMap.get('m')).toBe('Map'); + }); + + it('infers type from new expressions', () => { + const symbols = parseTS(`const r = new Router();`); + expect(symbols.typeMap.get('r')).toBe('Router'); + }); + + it('extracts parameter types into typeMap', () => { + const symbols = parseTS(`function process(req: Request, res: Response) {}`); + expect(symbols.typeMap.get('req')).toBe('Request'); + expect(symbols.typeMap.get('res')).toBe('Response'); + }); + + it('returns empty typeMap when no annotations', () => { + const symbols = parseJS(`const x = 42; function foo(a, b) {}`); + expect(symbols.typeMap).toBeInstanceOf(Map); + expect(symbols.typeMap.size).toBe(0); + }); + + it('skips union and intersection types', () => { + const symbols = parseTS(`const x: string | number = 42;`); + expect(symbols.typeMap.has('x')).toBe(false); + }); + + it('handles let/var declarations with type annotations', () => { + const symbols = parseTS(`let app: Express = createApp();`); + expect(symbols.typeMap.get('app')).toBe('Express'); + }); + + it('prefers type annotation over new expression', () => { + const symbols = parseTS(`const x: Base = new Derived();`); + expect(symbols.typeMap.get('x')).toBe('Base'); + }); + }); + it('does not set receiver for .call()/.apply()/.bind() unwrapped calls', () => { const symbols = parseJS(`fn.call(null, arg);`); const fnCall = symbols.calls.find((c) => c.name === 'fn'); From 9cdb93174bf224c6f9d2672522a6b40eebcdbbc9 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:17:05 -0600 Subject: [PATCH 28/37] fix: check parameter name not type name for self/cls filter in Python extractor (#501) Impact: 1 functions changed, 2 affected --- src/extractors/python.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/extractors/python.js b/src/extractors/python.js index d5e41f80..28c8d308 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -299,7 +299,7 @@ function extractPythonTypeMapDepth(node, ctx, depth) { const typeNode = node.childForFieldName('type'); if (nameNode && nameNode.type === 'identifier' && typeNode) { const typeName = extractPythonTypeName(typeNode); - if (typeName && typeName !== 'self' && typeName !== 'cls') { + if (typeName && nameNode.text !== 'self' && nameNode.text !== 'cls') { ctx.typeMap.set(nameNode.text, typeName); } } @@ -311,7 +311,9 @@ function extractPythonTypeMapDepth(node, ctx, depth) { const typeNode = node.childForFieldName('type'); if (nameNode && nameNode.type === 'identifier' && typeNode) { const typeName = extractPythonTypeName(typeNode); - if (typeName) ctx.typeMap.set(nameNode.text, typeName); + if (typeName && nameNode.text !== 'self' && nameNode.text !== 'cls') { + ctx.typeMap.set(nameNode.text, typeName); + } } } From 1c961466edb5adb864572c189b4bc3dcb72c966c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:17:15 -0600 Subject: [PATCH 29/37] refactor: remove redundant variable typeMap extraction in walk path (#501) Impact: 2 functions changed, 3 affected --- src/extractors/javascript.js | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index f86cefd3..fc52d117 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -334,7 +334,7 @@ function extractSymbolsWalk(tree) { }; walkJavaScriptNode(tree.rootNode, ctx); - // Populate typeMap for parameter type annotations (walk path handles variables inline) + // Populate typeMap for variables and parameter type annotations extractTypeMapWalk(tree.rootNode, ctx.typeMap); return ctx; } @@ -480,18 +480,6 @@ function handleVariableDecl(node, ctx) { const nameN = declarator.childForFieldName('name'); const valueN = declarator.childForFieldName('value'); - // Populate typeMap from type annotations or new expressions - if (nameN && nameN.type === 'identifier') { - const typeAnno = findChild(declarator, 'type_annotation'); - if (typeAnno) { - const typeName = extractSimpleTypeName(typeAnno); - if (typeName) ctx.typeMap.set(nameN.text, typeName); - } else if (valueN && valueN.type === 'new_expression') { - const ctorType = extractNewExprTypeName(valueN); - if (ctorType) ctx.typeMap.set(nameN.text, ctorType); - } - } - if (nameN && valueN) { const valType = valueN.type; if ( From 9192b4107adb2694f15d5c945882b1aefc76116f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:43:03 -0600 Subject: [PATCH 30/37] feat: add .pyi, .phtml, .rake, .gemspec extensions; condense comparison table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Python: .pyi (type stubs), PHP: .phtml, Ruby: .rake/.gemspec - Updated WASM registry, native Rust registry, and import resolution - Condensed README comparison table from 28×8 to 12×5, verified claims against actual repos, added security scanning row, grouped features - Marked backlog #71 (type inference) as done (#501) - Ruby/HCL type inference column: N/A (dynamic/declarative) Impact: 3 functions changed, 3 affected --- README.md | 63 ++++++++----------- .../codegraph-core/src/import_resolution.rs | 1 + crates/codegraph-core/src/parser_registry.rs | 6 +- docs/roadmap/BACKLOG.md | 6 +- src/domain/graph/resolve.js | 1 + src/domain/parser.js | 6 +- 6 files changed, 36 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 01371097..18732d51 100644 --- a/README.md +++ b/README.md @@ -76,39 +76,26 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a ### Feature comparison -Comparison last verified: March 2026. Full analysis: COMPETITIVE_ANALYSIS.md - -| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [code-graph-rag](https://github.com/vitali87/code-graph-rag) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | [axon](https://github.com/harshkedia177/axon) | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | -| Batch querying | **Yes** | — | — | — | — | — | — | — | -| Composite audit command | **Yes** | — | — | — | — | — | — | — | -| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | -| Multi-language | **11** | **14** | **32** | **11** | **~10** | **12** | **12** | **3** | -| Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** | — | **Yes** | -| Hybrid BM25 + semantic | **Yes** | — | — | — | — | **Yes** | — | **Yes** | -| CODEOWNERS integration | **Yes** | — | — | — | — | — | — | — | -| Architecture boundary rules | **Yes** | — | — | — | — | — | — | — | -| CI validation predicates | **Yes** | — | — | — | — | — | — | — | -| Graph snapshots | **Yes** | — | — | — | — | — | — | — | -| Git diff impact | **Yes** | — | — | — | — | **Yes** | **Yes** | **Yes** | -| Branch structural diff | **Yes** | — | — | — | — | — | — | **Yes** | -| Git co-change analysis | **Yes** | — | — | — | — | — | — | **Yes** | -| Watch mode | **Yes** | — | **Yes** | **Yes** | — | — | **Yes** | **Yes** | -| Dead code / role classification | **Yes** | — | **Yes** | — | — | — | **Yes** | **Yes** | -| Cycle detection | **Yes** | — | — | — | — | — | — | — | -| Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | Go only | **Yes** | -| Zero config | **Yes** | — | **Yes** | — | — | **Yes** | — | **Yes** | -| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — | -| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | -| Dataflow analysis | **Yes** | **Yes** | — | — | **Yes** | — | — | — | -| Control flow graph (CFG) | **Yes** | **Yes** | — | — | **Yes** | — | — | — | -| AST node querying | **Yes** | **Yes** | — | — | **Yes** | — | — | — | -| Expanded node/edge types | **Yes** | **Yes** | — | — | **Yes** | — | — | — | -| GraphML / Neo4j export | **Yes** | **Yes** | — | — | — | — | — | — | -| Interactive graph viewer | **Yes** | — | — | — | — | — | — | — | -| Commercial use allowed | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | No | Paid | **Yes** | -| Open source | **Yes** | Yes | Yes | Yes | Yes | No | No | Yes | +Comparison last verified: March 2026. Claims verified against each repo's README/docs. Full analysis: COMPETITIVE_ANALYSIS.md + +| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | +|---|:---:|:---:|:---:|:---:|:---:| +| Languages | **11** | ~12 | **32** | ~10 | 3 | +| MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | +| Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | +| Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | +| Git-aware (diff impact, co-change, branch diff) | **All 3** | — | — | — | **All 3** | +| Dead code / role classification | **Yes** | — | **Yes** | — | **Yes** | +| Incremental rebuilds | **O(changed)** | — | O(n) | — | **Yes** | +| Architecture rules + CI gate | **Yes** | — | — | — | — | +| Security scanning (SAST / vuln detection) | Intentionally out of scope² | **Yes** | **Yes** | **Yes** | — | +| Zero config, `npm install` | **Yes** | — | **Yes** | — | **Yes** | +| Graph export (GraphML / Neo4j / DOT) | **Yes** | **Yes** | — | — | — | +| Open source + commercial use | **Yes** (MIT) | **Yes** (Apache-2.0) | **Yes** (MIT/Apache-2.0) | **Yes** (Apache-2.0) | Source-available³ | + +¹ narsil-mcp added CFG and dataflow in recent versions. ² Codegraph focuses on structural understanding, not vulnerability detection — use dedicated SAST tools (Semgrep, CodeQL, Snyk) for that. ³ axon claims MIT in pyproject.toml but has no LICENSE file in the repo. + +> **Other tools evaluated:** [code-graph-rag](https://github.com/vitali87/code-graph-rag) (7 languages, requires Docker + Memgraph + API keys), [GitNexus](https://github.com/abhigyanpatwari/GitNexus) (13 languages, non-commercial license), [CodeMCP](https://github.com/SimplyLiz/CodeMCP) (12 languages, freemium/paid). See [COMPETITIVE_ANALYSIS.md](generated/competitive/COMPETITIVE_ANALYSIS.md) for details. ### What makes codegraph different @@ -490,16 +477,16 @@ codegraph registry remove # Unregister |---|---|---|:---:|:---:| | ![JavaScript](https://img.shields.io/badge/-JavaScript-F7DF1E?style=flat-square&logo=javascript&logoColor=black) | `.js`, `.jsx`, `.mjs`, `.cjs` | functions, classes, methods, imports, exports, call sites, constants, dataflow | ✅ | ✅ | | ![TypeScript](https://img.shields.io/badge/-TypeScript-3178C6?style=flat-square&logo=typescript&logoColor=white) | `.ts`, `.tsx` | functions, classes, interfaces, type aliases, methods, imports, exports, call sites, dataflow | ✅ | ✅ | -| ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py` | functions, classes, methods, imports, decorators, constants, call sites, dataflow | ✅ | ✅ | +| ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py`, `.pyi` | functions, classes, methods, imports, decorators, constants, call sites, dataflow | ✅ | ✅ | | ![Go](https://img.shields.io/badge/-Go-00ADD8?style=flat-square&logo=go&logoColor=white) | `.go` | functions, methods, structs, interfaces, constants, imports, call sites, dataflow | ✅ | ✅ | | ![Rust](https://img.shields.io/badge/-Rust-000000?style=flat-square&logo=rust&logoColor=white) | `.rs` | functions, methods, structs, enums, traits, constants, `use` imports, call sites, dataflow | ✅ | ✅ | | ![Java](https://img.shields.io/badge/-Java-ED8B00?style=flat-square&logo=openjdk&logoColor=white) | `.java` | classes, methods, constructors, interfaces, enums, imports, call sites, dataflow | ✅ | ✅ | | ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | classes, structs, records, interfaces, enums, methods, constructors, properties, using directives, call sites, dataflow | ✅ | ✅ | -| ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php` | functions, classes, interfaces, traits, enums, methods, namespace use, call sites, dataflow | ✅ | ✅ | -| ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb` | classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | — | ✅ | -| ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | resource, data, variable, module, output blocks | — | ✅ | +| ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php`, `.phtml` | functions, classes, interfaces, traits, enums, methods, namespace use, call sites, dataflow | ✅ | ✅ | +| ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb`, `.rake`, `.gemspec` | classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow | N/A⁴ | ✅ | +| ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | resource, data, variable, module, output blocks | N/A⁴ | ✅ | -> **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. **Parity** = WASM and native Rust engines produce identical output. +> **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. **Parity** = WASM and native Rust engines produce identical output. ⁴ Ruby and HCL are dynamically typed / declarative — type inference does not apply. ## ⚙️ How It Works diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs index f0071502..69480151 100644 --- a/crates/codegraph-core/src/import_resolution.rs +++ b/crates/codegraph-core/src/import_resolution.rs @@ -140,6 +140,7 @@ fn resolve_import_path_inner( ".jsx", ".mjs", ".py", + ".pyi", "/index.ts", "/index.tsx", "/index.js", diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index f800b275..0dde0bd6 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -50,14 +50,14 @@ impl LanguageKind { } match ext { "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), - "py" => Some(Self::Python), + "py" | "pyi" => Some(Self::Python), "tf" | "hcl" => Some(Self::Hcl), "go" => Some(Self::Go), "rs" => Some(Self::Rust), "java" => Some(Self::Java), "cs" => Some(Self::CSharp), - "rb" => Some(Self::Ruby), - "php" => Some(Self::Php), + "rb" | "rake" | "gemspec" => Some(Self::Ruby), + "php" | "phtml" => Some(Self::Php), _ => None, } } diff --git a/docs/roadmap/BACKLOG.md b/docs/roadmap/BACKLOG.md index f017a518..23f84128 100644 --- a/docs/roadmap/BACKLOG.md +++ b/docs/roadmap/BACKLOG.md @@ -23,14 +23,14 @@ Each item has a short title, description, category, expected benefit, and four a ### Tier 0 — Promote before Phase 4-5 (highest immediate impact) -These two items directly improve agent experience and graph accuracy today, without requiring Rust porting or TypeScript migration. They should be implemented before any Phase 4+ roadmap work begins. +Both items are now **DONE**. These directly improved agent experience and graph accuracy without requiring Rust porting or TypeScript migration. -**Rationale:** Item #83 enriches the *passively-injected* context that agents actually see via hooks — the single highest-leverage surface for reducing blind edits. Item #71 closes the biggest accuracy gap in the graph for TypeScript and Java, where missing type-aware resolution causes hallucinated "no callers" results. +**Rationale:** Item #83 enriches the *passively-injected* context that agents actually see via hooks — the single highest-leverage surface for reducing blind edits. Item #71 closed the biggest accuracy gap in the graph for TypeScript and Java, where missing type-aware resolution caused hallucinated "no callers" results. | ID | Title | Description | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | Depends on | |----|-------|-------------|----------|---------|----------|-------------------|-------------------|----------|------------| | 83 | ~~Hook-optimized `codegraph brief` command~~ | New `codegraph brief ` command designed for Claude Code hook context injection. Returns a compact, token-efficient summary per file: each symbol with its role and caller count (e.g. `buildGraph [core, 12 callers]`), blast radius count on importers (`Imported by: src/cli.js (+8 transitive)`), and overall file risk tier. Current `deps --json` output used by `enrich-context.sh` is shallow — just file-level imports/importedBy and symbol names with no role or blast radius info. The `brief` command would include: **(a)** symbol roles in the output — knowing a file defines `core` vs `leaf` symbols changes editing caution; **(b)** per-symbol transitive caller counts — makes blast radius visible without a separate `fn-impact` call; **(c)** file-level risk tier (high/medium/low based on max fan-in and role composition). Output optimized for `additionalContext` injection — single compact block, not verbose JSON. Also add `--brief` flag to `deps` as an alias. | Embeddability | The `enrich-context.sh` hook is the only codegraph context agents actually see (they ignore CLAUDE.md instructions to run commands manually). Making that passively-injected context richer — with roles, caller counts, and risk tiers — directly reduces blind edits to high-impact code. Currently the hook shows `Defines: function buildGraph` but not that it's a core symbol with 12 transitive callers | ✓ | ✓ | 4 | No | — | **DONE** — `codegraph brief ` command with symbol roles, caller counts, and risk tiers. CLI command, MCP tool, and presentation layer. ([#480](https://github.com/optave/codegraph/pull/480)) | -| 71 | Basic type inference for typed languages | Extract type annotations from TypeScript and Java AST nodes (variable declarations, function parameters, return types, generics) to resolve method calls through typed references. Currently `const x: Router = express.Router(); x.get(...)` produces no edge because `x.get` can't be resolved without knowing `x` is a `Router`. Tree-sitter already parses type annotations — we just don't use them for resolution. Start with declared types (no flow inference), which covers the majority of TS/Java code. | Resolution | Dramatically improves call graph completeness for TypeScript and Java — the two languages where developers annotate types explicitly and expect tooling to use them. Directly prevents hallucinated "no callers" results for methods called through typed variables | ✓ | ✓ | 5 | No | — | +| 71 | ~~Basic type inference for typed languages~~ | Extract type annotations from TypeScript and Java AST nodes (variable declarations, function parameters, return types, generics) to resolve method calls through typed references. Currently `const x: Router = express.Router(); x.get(...)` produces no edge because `x.get` can't be resolved without knowing `x` is a `Router`. Tree-sitter already parses type annotations — we just don't use them for resolution. Start with declared types (no flow inference), which covers the majority of TS/Java code. | Resolution | Dramatically improves call graph completeness for TypeScript and Java — the two languages where developers annotate types explicitly and expect tooling to use them. Directly prevents hallucinated "no callers" results for methods called through typed variables | ✓ | ✓ | 5 | No | — | **DONE** — Type inference for all typed languages (TS, Java, Go, Rust, C#, PHP, Python). WASM + native engines. ([#501](https://github.com/optave/codegraph/pull/501)) | ### Tier 1 — Zero-dep + Foundation-aligned (build these first) diff --git a/src/domain/graph/resolve.js b/src/domain/graph/resolve.js index 5e0ab1d3..5a82a5c6 100644 --- a/src/domain/graph/resolve.js +++ b/src/domain/graph/resolve.js @@ -78,6 +78,7 @@ function resolveImportPathJS(fromFile, importSource, rootDir, aliases) { '.jsx', '.mjs', '.py', + '.pyi', '/index.ts', '/index.tsx', '/index.js', diff --git a/src/domain/parser.js b/src/domain/parser.js index 476e6184..e77f1b4a 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -320,7 +320,7 @@ export const LANGUAGE_REGISTRY = [ }, { id: 'python', - extensions: ['.py'], + extensions: ['.py', '.pyi'], grammarFile: 'tree-sitter-python.wasm', extractor: extractPythonSymbols, required: false, @@ -355,14 +355,14 @@ export const LANGUAGE_REGISTRY = [ }, { id: 'ruby', - extensions: ['.rb'], + extensions: ['.rb', '.rake', '.gemspec'], grammarFile: 'tree-sitter-ruby.wasm', extractor: extractRubySymbols, required: false, }, { id: 'php', - extensions: ['.php'], + extensions: ['.php', '.phtml'], grammarFile: 'tree-sitter-php.wasm', extractor: extractPHPSymbols, required: false, From 9094589120e48c83d5ff95fc140490dfe5a64710 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:56:45 -0600 Subject: [PATCH 31/37] feat: add GitNexus to comparison table, fix license and roadmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add GitNexus column to feature comparison table (verified against repo) - Fix codegraph license: MIT → Apache-2.0 - Remove "Other tools evaluated" footnote (GitNexus now in table) - Update Limitations: reflect that type inference exists, clarify tsc gap - Sync Roadmap section with actual ROADMAP.md (11 phases, correct versions) --- README.md | 55 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 18732d51..0bb6948e 100644 --- a/README.md +++ b/README.md @@ -78,24 +78,22 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a Comparison last verified: March 2026. Claims verified against each repo's README/docs. Full analysis: COMPETITIVE_ANALYSIS.md -| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | -|---|:---:|:---:|:---:|:---:|:---:| -| Languages | **11** | ~12 | **32** | ~10 | 3 | -| MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | -| Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | -| Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | -| Git-aware (diff impact, co-change, branch diff) | **All 3** | — | — | — | **All 3** | -| Dead code / role classification | **Yes** | — | **Yes** | — | **Yes** | -| Incremental rebuilds | **O(changed)** | — | O(n) | — | **Yes** | -| Architecture rules + CI gate | **Yes** | — | — | — | — | -| Security scanning (SAST / vuln detection) | Intentionally out of scope² | **Yes** | **Yes** | **Yes** | — | -| Zero config, `npm install` | **Yes** | — | **Yes** | — | **Yes** | -| Graph export (GraphML / Neo4j / DOT) | **Yes** | **Yes** | — | — | — | -| Open source + commercial use | **Yes** (MIT) | **Yes** (Apache-2.0) | **Yes** (MIT/Apache-2.0) | **Yes** (Apache-2.0) | Source-available³ | - -¹ narsil-mcp added CFG and dataflow in recent versions. ² Codegraph focuses on structural understanding, not vulnerability detection — use dedicated SAST tools (Semgrep, CodeQL, Snyk) for that. ³ axon claims MIT in pyproject.toml but has no LICENSE file in the repo. - -> **Other tools evaluated:** [code-graph-rag](https://github.com/vitali87/code-graph-rag) (7 languages, requires Docker + Memgraph + API keys), [GitNexus](https://github.com/abhigyanpatwari/GitNexus) (13 languages, non-commercial license), [CodeMCP](https://github.com/SimplyLiz/CodeMCP) (12 languages, freemium/paid). See [COMPETITIVE_ANALYSIS.md](generated/competitive/COMPETITIVE_ANALYSIS.md) for details. +| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | +|---|:---:|:---:|:---:|:---:|:---:|:---:| +| Languages | **11** | ~12 | **32** | ~10 | 3 | 13 | +| MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | +| Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | — | +| Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | **Yes** | +| Git-aware (diff impact, co-change, branch diff) | **All 3** | — | — | — | **All 3** | — | +| Dead code / role classification | **Yes** | — | **Yes** | — | **Yes** | — | +| Incremental rebuilds | **O(changed)** | — | O(n) | — | **Yes** | Commit-level⁴ | +| Architecture rules + CI gate | **Yes** | — | — | — | — | — | +| Security scanning (SAST / vuln detection) | Intentionally out of scope² | **Yes** | **Yes** | **Yes** | — | — | +| Zero config, `npm install` | **Yes** | — | **Yes** | — | **Yes** | **Yes** | +| Graph export (GraphML / Neo4j / DOT) | **Yes** | **Yes** | — | — | — | — | +| Open source + commercial use | **Yes** (Apache-2.0) | **Yes** (Apache-2.0) | **Yes** (MIT/Apache-2.0) | **Yes** (Apache-2.0) | Source-available³ | Non-commercial⁵ | + +¹ narsil-mcp added CFG and dataflow in recent versions. ² Codegraph focuses on structural understanding, not vulnerability detection — use dedicated SAST tools (Semgrep, CodeQL, Snyk) for that. ³ axon claims MIT in pyproject.toml but has no LICENSE file in the repo. ⁴ GitNexus skips re-index if the git commit hasn't changed, but re-processes the entire repo when it does — no per-file incremental parsing. ⁵ GitNexus uses the PolyForm Noncommercial 1.0.0 license. ### What makes codegraph different @@ -755,7 +753,7 @@ const { results: fused } = await multiSearchData( ## ⚠️ Limitations -- **No full type inference** — parses `.d.ts` interfaces but doesn't use TypeScript's type checker for overload resolution +- **No TypeScript type-checker integration** — type inference resolves annotations, `new` expressions, and assignment chains, but does not invoke `tsc` for overload resolution or complex generics - **Dynamic calls are best-effort** — complex computed property access and `eval` patterns are not resolved - **Python imports** — resolves relative imports but doesn't follow `sys.path` or virtual environment packages - **Dataflow analysis** — intraprocedural (single-function scope), not interprocedural @@ -765,13 +763,18 @@ const { results: fused } = await multiSearchData( See **[ROADMAP.md](docs/roadmap/ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan: 1. ~~**Rust Core**~~ — **Complete** (v1.3.0) — native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust -2. ~~**Foundation Hardening**~~ — **Complete** (v1.4.0) — parser registry, 12-tool MCP server with multi-repo support, test coverage 62%→75%, `apiKeyCommand` secret resolution, global repo registry -3. ~~**Deep Analysis**~~ — **Complete** (v3.0.0) — dataflow analysis (flows_to, returns, mutates), intraprocedural CFG for all 11 languages, stored AST nodes, expanded node/edge types (parameter, property, constant, contains, parameter_of, receiver), GraphML/GraphSON/Neo4j CSV export, interactive HTML viewer, CLI consolidation, stable JSON schema -4. ~~**Architectural Refactoring**~~ — **Complete** (v3.1.5) — unified AST analysis, composable MCP, domain errors, builder pipeline, embedder subsystem, graph model, qualified names, presentation layer, InMemoryRepository, domain directory grouping, CLI composability -5. **Natural Language Queries** — `codegraph ask` command, conversational sessions -6. **Expanded Language Support** — 8 new languages (12 → 20) -7. **GitHub Integration & CI** — reusable GitHub Action, PR review, SARIF output -8. **TypeScript Migration** — gradual migration from JS to TypeScript +2. ~~**Foundation Hardening**~~ — **Complete** (v1.5.0) — parser registry, complete MCP, test coverage, enhanced config, multi-repo MCP +3. ~~**Analysis Expansion**~~ — **Complete** (v2.7.0) — complexity metrics, community detection, flow tracing, co-change, manifesto, boundary rules, check, triage, audit, batch, hybrid search +4. ~~**Deep Analysis & Graph Enrichment**~~ — **Complete** (v3.0.0) — dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, interactive viewer, exports command +5. ~~**Architectural Refactoring**~~ — **Complete** (v3.1.5) — unified AST analysis, composable MCP, domain errors, builder pipeline, graph model, qualified names, presentation layer, CLI composability +6. **Native Analysis Acceleration** — move JS-only build phases to Rust, sub-100ms 1-file rebuilds +7. **TypeScript Migration** — project setup, core type definitions, leaf → core → orchestration migration +8. **Runtime & Extensibility** — event-driven pipeline, plugin system, query caching, pagination +9. **Intelligent Embeddings** — LLM-generated descriptions, enhanced embeddings, module summaries +10. **Natural Language Queries** — `codegraph ask` command, conversational sessions +11. **Expanded Language Support** — 8 new languages (11 → 19) +12. **GitHub Integration & CI** — reusable GitHub Action, LLM-enhanced PR review, SARIF output +13. **Visualization & Advanced** — web UI, dead code detection, monorepo, agentic search ## 🤝 Contributing From 3ff783a9eb31973bf97f27b138258a6aab5b8442 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:17:58 -0600 Subject: [PATCH 32/37] fix: supplement receiver edges for older native binaries Native binaries < 3.2.0 don't emit typeMap from parsing or handle receiver-based method calls in buildCallEdges. This adds two fallbacks: 1. extractTypeMapRegex: when native parse results lack typeMap, extract `const x = new Foo()` patterns via regex so the edge builder can resolve variable receivers to their declared types. 2. supplementReceiverEdges: after native buildCallEdges runs, if no receiver edges were produced, create them JS-side using the same buildReceiverEdge + type-resolved method-call logic as the WASM path. Both fallbacks are gated on the absence of the feature and will be no-ops once native binaries >= 3.2.0 are published. Fixes build-parity test failure on CI (native 31 edges vs WASM 33). Impact: 6 functions changed, 15 affected Impact: 2 functions changed, 5 affected --- .../graph/builder/stages/build-edges.js | 52 +++++++++++++++++++ src/domain/parser.js | 40 ++++++++++++-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index 085717fa..25528976 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -128,6 +128,14 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) for (const e of nativeEdges) { allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); } + + // Older native binaries (< 3.2.0) don't emit receiver or type-resolved method-call + // edges. Supplement them on the JS side if the native binary missed them. + // TODO: Remove once all published native binaries handle receivers (>= 3.2.0) + const hasReceiver = nativeEdges.some((e) => e.kind === 'receiver'); + if (!hasReceiver) { + supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows); + } } function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { @@ -147,6 +155,50 @@ function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { return importedNames; } +// ── Receiver edge supplement for older native binaries ────────────────── + +function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { + const seenCallEdges = new Set(); + // Collect existing edges to avoid duplicates + for (const row of allEdgeRows) { + seenCallEdges.add(`${row[0]}|${row[1]}|${row[2]}`); + } + + for (const nf of nativeFiles) { + const relPath = nf.file; + const typeMap = new Map(nf.typeMap.map((t) => [t.name, t.typeName])); + const fileNodeRow = { id: nf.fileNodeId }; + + for (const call of nf.calls) { + if (!call.receiver || BUILTIN_RECEIVERS.has(call.receiver)) continue; + if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') + continue; + + const caller = findCaller(call, nf.definitions, relPath, getNodeIdStmt, fileNodeRow); + + // Receiver edge: caller → receiver type node + buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap); + + // Type-resolved method call: caller → Type.method + const typeName = typeMap.get(call.receiver); + if (typeName) { + const qualifiedName = `${typeName}.${call.name}`; + const targets = (ctx.nodesByName.get(qualifiedName) || []).filter( + (n) => n.kind === 'method', + ); + for (const t of targets) { + const key = `${caller.id}|${t.id}|calls`; + if (t.id !== caller.id && !seenCallEdges.has(key)) { + seenCallEdges.add(key); + const confidence = computeConfidence(relPath, t.file, null); + allEdgeRows.push([caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0]); + } + } + } + } + } +} + // ── Call edges (JS fallback) ──────────────────────────────────────────── function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { diff --git a/src/domain/parser.js b/src/domain/parser.js index e77f1b4a..5eff1497 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -378,6 +378,25 @@ for (const entry of LANGUAGE_REGISTRY) { export const SUPPORTED_EXTENSIONS = new Set(_extToLang.keys()); +/** + * Regex-based typeMap extraction for older native binaries that don't emit typeMap. + * Handles `const x = new Foo()` patterns. Removes the need for tree-sitter. + * TODO: Remove once all published native binaries include typeMap extraction (>= 3.2.0) + */ +function extractTypeMapRegex(filePath) { + let code; + try { + code = fs.readFileSync(filePath, 'utf-8'); + } catch { + return []; + } + const entries = []; + for (const m of code.matchAll(/(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)/g)) { + entries.push({ name: m[1], typeName: m[2] }); + } + return entries; +} + /** * WASM extraction helper: picks the right extractor based on file extension. */ @@ -414,7 +433,12 @@ export async function parseFileAuto(filePath, source, opts = {}) { if (native) { const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false); - return result ? patchNativeResult(result) : null; + if (!result) return null; + const patched = patchNativeResult(result); + if (!patched.typeMap || patched.typeMap.length === 0) { + patched.typeMap = extractTypeMapRegex(filePath); + } + return patched; } // WASM path @@ -444,8 +468,13 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) { ); for (const r of nativeResults) { if (!r) continue; + const patched = patchNativeResult(r); + // Older native binaries (< 3.2.0) don't extract typeMap; supplement from source + if (!patched.typeMap || patched.typeMap.length === 0) { + patched.typeMap = extractTypeMapRegex(r.file); + } const relPath = path.relative(rootDir, r.file).split(path.sep).join('/'); - result.set(relPath, patchNativeResult(r)); + result.set(relPath, patched); } return result; } @@ -519,7 +548,12 @@ export function createParseTreeCache() { export async function parseFileIncremental(cache, filePath, source, opts = {}) { if (cache) { const result = cache.parseFile(filePath, source); - return result ? patchNativeResult(result) : null; + if (!result) return null; + const patched = patchNativeResult(result); + if (!patched.typeMap || patched.typeMap.length === 0) { + patched.typeMap = extractTypeMapRegex(filePath); + } + return patched; } return parseFileAuto(filePath, source, opts); } From e34aff6d91ae3110d447a9e0ab0387709a8d634c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:21:38 -0600 Subject: [PATCH 33/37] fix: correct misleading WASM engine test comment (#502) The comment incorrectly stated native type-map resolution was deferred, but native Rust support is fully implemented in this PR. The actual reason is that the native binary may not be present in CI. --- tests/integration/build.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/build.test.js b/tests/integration/build.test.js index 0d0b3d64..a4148642 100644 --- a/tests/integration/build.test.js +++ b/tests/integration/build.test.js @@ -497,7 +497,7 @@ describe('typed method call resolution', () => { '', ].join('\n'), ); - // Force WASM engine — typeMap resolution is JS-only (native deferred) + // Force WASM engine — native binary may not be present in all test environments await buildGraph(typedDir, { skipRegistry: true, engine: 'wasm' }); typedDbPath = path.join(typedDir, '.codegraph', 'graph.db'); }); From f940fda6cf0b2d32314a95429daa63a67c048802 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:21:50 -0600 Subject: [PATCH 34/37] fix: add recursion depth guard to extractTypeMapWalk (#502) Every other type-map extractor guards at depth 200, but the JS/TS walker recursed unconditionally. This could cause stack overflow on pathologically large or deeply nested auto-generated files. Impact: 2 functions changed, 3 affected --- src/extractors/javascript.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index fc52d117..7762959c 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -825,7 +825,8 @@ function extractNewExprTypeName(newExprNode) { } function extractTypeMapWalk(rootNode, typeMap) { - function walk(node) { + function walk(node, depth) { + if (depth >= 200) return; const t = node.type; if (t === 'variable_declarator') { const nameN = node.childForFieldName('name'); @@ -854,10 +855,10 @@ function extractTypeMapWalk(rootNode, typeMap) { } } for (let i = 0; i < node.childCount; i++) { - walk(node.child(i)); + walk(node.child(i), depth + 1); } } - walk(rootNode); + walk(rootNode, 0); } function extractReceiverName(objNode) { From d8cd057617b86b1331d271807d9c7c278cc6db4d Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:22:01 -0600 Subject: [PATCH 35/37] fix: remove dead methodCandidates guard in resolveByMethodOrGlobal (#502) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nodesByName is keyed by fully-qualified name (e.g. Router.get), so get(call.name) with a simple name like 'get' never returns class methods — the filter was always empty. Removed the unreachable block so the type-aware resolution path is the clear entry point. --- src/domain/graph/builder/stages/build-edges.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index 25528976..47d75320 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -296,11 +296,6 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { } function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { - const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( - (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', - ); - if (methodCandidates.length > 0) return methodCandidates; - // Type-aware resolution: translate variable receiver to its declared type if (call.receiver && typeMap) { const typeName = typeMap.get(call.receiver); From 5304622feab3e9926532a184b08f1125fd01fca6 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:22:11 -0600 Subject: [PATCH 36/37] fix: handle Go multi-name var declarations in type map (#502) var x, y MyService now registers all names in the type map, not just the first. Fixed in both JS (extractGoTypeMapDepth) and Rust (extract_go_type_map_depth) extractors by iterating identifier children instead of using child_by_field_name('name'). Impact: 2 functions changed, 4 affected --- crates/codegraph-core/src/extractors/go.rs | 14 +++++++++----- src/extractors/go.js | 14 ++++++++++---- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index d9f0c0d6..f860dbac 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -344,11 +344,15 @@ fn extract_go_type_map_depth(node: &Node, source: &[u8], symbols: &mut FileSymbo "var_spec" => { if let Some(type_node) = node.child_by_field_name("type") { if let Some(type_name) = extract_go_type_name(&type_node, source) { - if let Some(name_node) = node.child_by_field_name("name") { - symbols.type_map.push(TypeMapEntry { - name: node_text(&name_node, source).to_string(), - type_name: type_name.to_string(), - }); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" { + symbols.type_map.push(TypeMapEntry { + name: node_text(&child, source).to_string(), + type_name: type_name.to_string(), + }); + } + } } } } diff --git a/src/extractors/go.js b/src/extractors/go.js index 33cf44e6..23b5f1b0 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -211,13 +211,19 @@ function extractGoTypeMap(node, ctx) { function extractGoTypeMapDepth(node, ctx, depth) { if (depth >= 200) return; - // var x MyType = ... → var_declaration > var_spec + // var x MyType = ... or var x, y MyType → var_declaration > var_spec if (node.type === 'var_spec') { - const nameNode = node.childForFieldName('name'); const typeNode = node.childForFieldName('type'); - if (nameNode && typeNode) { + if (typeNode) { const typeName = extractGoTypeName(typeNode); - if (typeName) ctx.typeMap.set(nameNode.text, typeName); + if (typeName) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'identifier') { + ctx.typeMap.set(child.text, typeName); + } + } + } } } From eeb6cf83f82c69f08adeff9cb8ec4187b8a4331f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:44:29 -0600 Subject: [PATCH 37/37] fix: replace regex typeMap backfill with WASM-based AST extraction (#502) The regex pattern matched inside comments and string literals, producing spurious type map entries that could create phantom call edges. Replace with WASM tree-sitter extraction which uses the AST and is immune to false positives from non-code contexts. Also eliminates redundant disk reads in parseFileAuto where source was already in memory. Impact: 4 functions changed, 2 affected --- src/domain/parser.js | 68 +++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/src/domain/parser.js b/src/domain/parser.js index 5eff1497..8ccbcd3b 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -379,22 +379,28 @@ for (const entry of LANGUAGE_REGISTRY) { export const SUPPORTED_EXTENSIONS = new Set(_extToLang.keys()); /** - * Regex-based typeMap extraction for older native binaries that don't emit typeMap. - * Handles `const x = new Foo()` patterns. Removes the need for tree-sitter. + * WASM-based typeMap backfill for older native binaries that don't emit typeMap. + * Uses tree-sitter AST extraction instead of regex to avoid false positives from + * matches inside comments and string literals. * TODO: Remove once all published native binaries include typeMap extraction (>= 3.2.0) */ -function extractTypeMapRegex(filePath) { - let code; - try { - code = fs.readFileSync(filePath, 'utf-8'); - } catch { - return []; - } - const entries = []; - for (const m of code.matchAll(/(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)/g)) { - entries.push({ name: m[1], typeName: m[2] }); +async function backfillTypeMap(filePath, source) { + let code = source; + if (!code) { + try { + code = fs.readFileSync(filePath, 'utf-8'); + } catch { + return { typeMap: [], backfilled: false }; + } } - return entries; + const parsers = await createParsers(); + const extracted = wasmExtractSymbols(parsers, filePath, code); + if (!extracted?.symbols?.typeMap) return { typeMap: [], backfilled: false }; + const tm = extracted.symbols.typeMap; + return { + typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])), + backfilled: true, + }; } /** @@ -436,7 +442,9 @@ export async function parseFileAuto(filePath, source, opts = {}) { if (!result) return null; const patched = patchNativeResult(result); if (!patched.typeMap || patched.typeMap.length === 0) { - patched.typeMap = extractTypeMapRegex(filePath); + const { typeMap, backfilled } = await backfillTypeMap(filePath, source); + patched.typeMap = typeMap; + if (backfilled) patched._typeMapBackfilled = true; } return patched; } @@ -466,15 +474,35 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) { !!opts.dataflow, opts.ast !== false, ); + const needsTypeMap = []; for (const r of nativeResults) { if (!r) continue; const patched = patchNativeResult(r); - // Older native binaries (< 3.2.0) don't extract typeMap; supplement from source - if (!patched.typeMap || patched.typeMap.length === 0) { - patched.typeMap = extractTypeMapRegex(r.file); - } const relPath = path.relative(rootDir, r.file).split(path.sep).join('/'); result.set(relPath, patched); + if (!patched.typeMap || patched.typeMap.length === 0) { + needsTypeMap.push({ filePath: r.file, relPath }); + } + } + // Backfill typeMap via WASM for native binaries that predate the type-map feature + if (needsTypeMap.length > 0) { + const parsers = await createParsers(); + for (const { filePath, relPath } of needsTypeMap) { + try { + const code = fs.readFileSync(filePath, 'utf-8'); + const extracted = wasmExtractSymbols(parsers, filePath, code); + if (extracted?.symbols?.typeMap) { + const symbols = result.get(relPath); + symbols.typeMap = + extracted.symbols.typeMap instanceof Map + ? extracted.symbols.typeMap + : new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName])); + symbols._typeMapBackfilled = true; + } + } catch { + /* skip — typeMap is a best-effort backfill */ + } + } } return result; } @@ -551,7 +579,9 @@ export async function parseFileIncremental(cache, filePath, source, opts = {}) { if (!result) return null; const patched = patchNativeResult(result); if (!patched.typeMap || patched.typeMap.length === 0) { - patched.typeMap = extractTypeMapRegex(filePath); + const { typeMap, backfilled } = await backfillTypeMap(filePath, source); + patched.typeMap = typeMap; + if (backfilled) patched._typeMapBackfilled = true; } return patched; }