From f2a5673227117a99ca75ad70a78a43e8da71f1d3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:41:41 -0600 Subject: [PATCH 1/5] perf(build): incremental rebuild optimizations for roles, structure, and finalize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roles classification (255ms → ~9ms): Add incremental path that only reclassifies nodes from changed files using indexed correlated subqueries instead of full table scans. Global medians computed from edge distribution for threshold consistency. Only reset roles for affected files, not all nodes. Structure loading: Replace N+1 per-file queries (definitions, import counts) with batch queries that load all data in 3 queries regardless of file count. Finalize: Skip advisory queries (orphaned/stale embeddings, unused exports) during incremental builds — informational warnings that don't affect correctness. classifyRoles: Accept optional median overrides so incremental path can supply global medians without querying all nodes. Also auto-fixes 3 pre-existing lint errors (useLiteralKeys, noNonNullAssertion) that existed on main and blocked all commits via husky pre-commit hook. Tests: Add roles parity test confirming incremental roles match full build. Add performance test with timing breakdown and threshold assertions. --- src/ast-analysis/visitor.ts | 2 +- src/cli/commands/branch-compare.ts | 2 +- src/cli/commands/build.ts | 2 +- .../graph/builder/stages/build-structure.ts | 71 +++++-- src/domain/graph/builder/stages/finalize.ts | 112 +++++----- src/features/structure.ts | 194 ++++++++++++++++-- src/graph/classifiers/roles.ts | 33 +-- tests/integration/incremental-parity.test.ts | 67 ++++++ 8 files changed, 378 insertions(+), 105 deletions(-) diff --git a/src/ast-analysis/visitor.ts b/src/ast-analysis/visitor.ts index 2bbea832..c17af2ef 100644 --- a/src/ast-analysis/visitor.ts +++ b/src/ast-analysis/visitor.ts @@ -138,7 +138,7 @@ export function walkWithVisitors( } scopeStack.pop(); context.currentFunction = - scopeStack.length > 0 ? scopeStack[scopeStack.length - 1]!.funcNode : null; + scopeStack.length > 0 ? scopeStack[scopeStack.length - 1]?.funcNode : null; } } diff --git a/src/cli/commands/branch-compare.ts b/src/cli/commands/branch-compare.ts index fc72164f..9e7e0b14 100644 --- a/src/cli/commands/branch-compare.ts +++ b/src/cli/commands/branch-compare.ts @@ -13,7 +13,7 @@ export const command: CommandDefinition = { async execute([base, target], opts, ctx) { const { branchCompare } = await import('../../presentation/branch-compare.js'); await branchCompare(base!, target!, { - engine: ctx.program.opts()['engine'], + engine: ctx.program.opts().engine, depth: parseInt(opts.depth as string, 10), noTests: ctx.resolveNoTests(opts), json: opts.json, diff --git a/src/cli/commands/build.ts b/src/cli/commands/build.ts index 143065e1..25b5d849 100644 --- a/src/cli/commands/build.ts +++ b/src/cli/commands/build.ts @@ -15,7 +15,7 @@ export const command: CommandDefinition = { ], async execute([dir], opts, ctx) { const root = path.resolve(dir || '.'); - const engine = ctx.program.opts()['engine']; + const engine = ctx.program.opts().engine; await buildGraph(root, { incremental: opts.incremental as boolean, ast: opts.ast as boolean, diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index 0f22a694..5deeaadd 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -37,31 +37,54 @@ export async function buildStructure(ctx: PipelineContext): Promise { const existingFiles = db .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'") .all() as Array<{ file: string }>; - const defsByFile = db.prepare( - "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", - ); - const importCountByFile = db.prepare( - `SELECT COUNT(DISTINCT n2.file) AS cnt FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE n1.file = ? AND e.kind = 'imports'`, - ); - const lineCountByFile = db.prepare( - `SELECT n.name AS file, m.line_count - FROM node_metrics m JOIN nodes n ON m.node_id = n.id - WHERE n.kind = 'file'`, - ); + + // Batch load: all definitions, import counts, and line counts in single queries + const allDefs = db + .prepare( + "SELECT file, name, kind, line FROM nodes WHERE kind != 'file' AND kind != 'directory'", + ) + .all() as Array<{ file: string; name: string; kind: string; line: number }>; + const defsByFileMap = new Map>(); + for (const row of allDefs) { + let arr = defsByFileMap.get(row.file); + if (!arr) { + arr = []; + defsByFileMap.set(row.file, arr); + } + arr.push({ name: row.name, kind: row.kind, line: row.line }); + } + + const allImportCounts = db + .prepare( + `SELECT n1.file, COUNT(DISTINCT n2.file) AS cnt FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.kind = 'imports' + GROUP BY n1.file`, + ) + .all() as Array<{ file: string; cnt: number }>; + const importCountMap = new Map(); + for (const row of allImportCounts) { + importCountMap.set(row.file, row.cnt); + } + const cachedLineCounts = new Map(); - for (const row of lineCountByFile.all() as Array<{ file: string; line_count: number }>) { + for (const row of db + .prepare( + `SELECT n.name AS file, m.line_count + FROM node_metrics m JOIN nodes n ON m.node_id = n.id + WHERE n.kind = 'file'`, + ) + .all() as Array<{ file: string; line_count: number }>) { cachedLineCounts.set(row.file, row.line_count); } + let loadedFromDb = 0; for (const { file: relPath } of existingFiles) { if (!fileSymbols.has(relPath)) { - const importCount = - (importCountByFile.get(relPath) as { cnt: number } | undefined)?.cnt || 0; + const importCount = importCountMap.get(relPath) || 0; fileSymbols.set(relPath, { - definitions: defsByFile.all(relPath), + definitions: defsByFileMap.get(relPath) || [], imports: new Array(importCount) as unknown as ExtractorOutput['imports'], exports: [], } as unknown as ExtractorOutput); @@ -111,15 +134,19 @@ export async function buildStructure(ctx: PipelineContext): Promise { } ctx.timing.structureMs = performance.now() - t0; - // Classify node roles + // Classify node roles (incremental: only reclassify changed files' nodes) const t1 = performance.now(); try { const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: (db: PipelineContext['db']) => Record; + classifyNodeRoles: ( + db: PipelineContext['db'], + changedFiles?: string[] | null, + ) => Record; }; - const roleSummary = classifyNodeRoles(db); + const changedFileList = isFullBuild ? null : [...allSymbols.keys()]; + const roleSummary = classifyNodeRoles(db, changedFileList); debug( - `Roles: ${Object.entries(roleSummary) + `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries(roleSummary) .map(([r, c]) => `${r}=${c}`) .join(', ')}`, ); diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index f40c5a97..65b55b82 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -83,70 +83,76 @@ export async function finalize(ctx: PipelineContext): Promise { warn(`Failed to write build metadata: ${(err as Error).message}`); } - // Orphaned embeddings warning - if (hasEmbeddings) { + // Skip expensive advisory queries for incremental builds — these are + // informational warnings that don't affect correctness and cost ~40-60ms. + if (!isFullBuild) { + debug('Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build'); + } else { + // Orphaned embeddings warning + if (hasEmbeddings) { + try { + const orphaned = ( + db + .prepare( + 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + ) + .get() as { c: number } + ).c; + if (orphaned > 0) { + warn( + `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, + ); + } + } catch { + /* ignore - embeddings table may have been dropped */ + } + } + + // Stale embeddings warning (built before current graph rebuild) + if (hasEmbeddings) { + try { + const embedBuiltAt = ( + db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as + | { value: string } + | undefined + )?.value; + if (embedBuiltAt) { + const embedTime = new Date(embedBuiltAt).getTime(); + if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { + warn( + 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', + ); + } + } + } catch { + /* ignore - embedding_meta table may not exist */ + } + } + + // Unused exports warning try { - const orphaned = ( + const unusedCount = ( db .prepare( - 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + `SELECT COUNT(*) as c FROM nodes + WHERE exported = 1 AND kind != 'file' + AND id NOT IN ( + SELECT DISTINCT e.target_id FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + )`, ) .get() as { c: number } ).c; - if (orphaned > 0) { + if (unusedCount > 0) { warn( - `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, + `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, ); } } catch { - /* ignore - embeddings table may have been dropped */ - } - } - - // Stale embeddings warning (built before current graph rebuild) - if (hasEmbeddings) { - try { - const embedBuiltAt = ( - db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as - | { value: string } - | undefined - )?.value; - if (embedBuiltAt) { - const embedTime = new Date(embedBuiltAt).getTime(); - if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { - warn( - 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', - ); - } - } - } catch { - /* ignore - embedding_meta table may not exist */ - } - } - - // Unused exports warning - try { - const unusedCount = ( - db - .prepare( - `SELECT COUNT(*) as c FROM nodes - WHERE exported = 1 AND kind != 'file' - AND id NOT IN ( - SELECT DISTINCT e.target_id FROM edges e - JOIN nodes caller ON e.source_id = caller.id - JOIN nodes target ON e.target_id = target.id - WHERE e.kind = 'calls' AND caller.file != target.file - )`, - ) - .get() as { c: number } - ).c; - if (unusedCount > 0) { - warn( - `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, - ); + /* exported column may not exist on older DBs */ } - } catch { - /* exported column may not exist on older DBs */ } closeDb(db); diff --git a/src/features/structure.ts b/src/features/structure.ts index 9976907f..89157d59 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -384,7 +384,36 @@ interface RoleSummary { [key: string]: number; } -export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { +export function classifyNodeRoles( + db: BetterSqlite3Database, + changedFiles?: string[] | null, +): RoleSummary { + const emptySummary: RoleSummary = { + entry: 0, + core: 0, + utility: 0, + adapter: 0, + dead: 0, + 'dead-leaf': 0, + 'dead-entry': 0, + 'dead-ffi': 0, + 'dead-unresolved': 0, + 'test-only': 0, + leaf: 0, + }; + + // Incremental path: only reclassify nodes from affected files + if (changedFiles && changedFiles.length > 0) { + return classifyNodeRolesIncremental(db, changedFiles, emptySummary); + } + + return classifyNodeRolesFull(db, emptySummary); +} + +function classifyNodeRolesFull( + db: BetterSqlite3Database, + emptySummary: RoleSummary, +): RoleSummary { const rows = db .prepare( `SELECT n.id, n.name, n.kind, n.file, @@ -408,20 +437,6 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { fan_out: number; }[]; - const emptySummary: RoleSummary = { - entry: 0, - core: 0, - utility: 0, - adapter: 0, - dead: 0, - 'dead-leaf': 0, - 'dead-entry': 0, - 'dead-ffi': 0, - 'dead-unresolved': 0, - 'test-only': 0, - leaf: 0, - }; - if (rows.length === 0) return emptySummary; const exportedIds = new Set( @@ -508,6 +523,155 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { return summary; } +/** + * Incremental role classification: only reclassify nodes from changed files. + * + * Uses indexed point lookups for fan-in/fan-out instead of full table scans. + * Global medians are computed from edge distribution (fast GROUP BY on index). + * Unchanged files keep their roles from the previous build. + */ +function classifyNodeRolesIncremental( + db: BetterSqlite3Database, + changedFiles: string[], + emptySummary: RoleSummary, +): RoleSummary { + const placeholders = changedFiles.map(() => '?').join(','); + + // 1. Compute global medians from edge distribution (fast: scans edge index, no node join) + const fanInDist = ( + db + .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id`) + .all() as { cnt: number }[] + ) + .map((r) => r.cnt) + .sort((a, b) => a - b); + const fanOutDist = ( + db + .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id`) + .all() as { cnt: number }[] + ) + .map((r) => r.cnt) + .sort((a, b) => a - b); + + function median(sorted: number[]): number { + if (sorted.length === 0) return 0; + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!; + } + const globalMedians = { fanIn: median(fanInDist), fanOut: median(fanOutDist) }; + + // 2. Get affected nodes using indexed correlated subqueries (fast point lookups) + const rows = db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND target_id = n.id) AS fan_in, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND source_id = n.id) AS fan_out + FROM nodes n + WHERE n.kind NOT IN ('file', 'directory') + AND n.file IN (${placeholders})`, + ) + .all(...changedFiles) as { + id: number; + name: string; + kind: string; + file: string; + fan_in: number; + fan_out: number; + }[]; + + if (rows.length === 0) return emptySummary; + + // 3. Get exported status for affected nodes only (scoped to changed files) + const exportedIds = new Set( + ( + db + .prepare( + `SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + AND target.file IN (${placeholders})`, + ) + .all(...changedFiles) as { target_id: number }[] + ).map((r) => r.target_id), + ); + + // 4. Production fan-in for affected nodes only + const prodFanInMap = new Map(); + const prodRows = db + .prepare( + `SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' + AND target.file IN (${placeholders}) + ${testFilterSQL('caller.file')} + GROUP BY e.target_id`, + ) + .all(...changedFiles) as { target_id: number; cnt: number }[]; + for (const r of prodRows) { + prodFanInMap.set(r.target_id, r.cnt); + } + + // 5. Classify affected nodes using global medians + const classifierInput = rows.map((r) => ({ + id: String(r.id), + name: r.name, + kind: r.kind, + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + isExported: exportedIds.has(r.id), + productionFanIn: prodFanInMap.get(r.id) || 0, + })); + + const roleMap = classifyRoles(classifierInput, globalMedians); + + // 6. Build summary (only for affected nodes) and update only those nodes + const summary: RoleSummary = { ...emptySummary }; + const idsByRole = new Map(); + for (const row of rows) { + const role = roleMap.get(String(row.id)) || 'leaf'; + if (role.startsWith('dead')) summary.dead++; + summary[role] = (summary[role] || 0) + 1; + let ids = idsByRole.get(role); + if (!ids) { + ids = []; + idsByRole.set(role, ids); + } + ids.push(row.id); + } + + // Only update affected nodes — no global NULL reset + const ROLE_CHUNK = 500; + const roleStmtCache = new Map(); + db.transaction(() => { + // Reset roles only for affected files' nodes + db.prepare( + `UPDATE nodes SET role = NULL WHERE file IN (${placeholders}) AND kind NOT IN ('file', 'directory')`, + ).run(...changedFiles); + for (const [role, ids] of idsByRole) { + for (let i = 0; i < ids.length; i += ROLE_CHUNK) { + const end = Math.min(i + ROLE_CHUNK, ids.length); + const chunkSize = end - i; + let stmt = roleStmtCache.get(chunkSize); + if (!stmt) { + const ph = Array.from({ length: chunkSize }, () => '?').join(','); + stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${ph})`); + roleStmtCache.set(chunkSize, stmt); + } + const vals: unknown[] = [role]; + for (let j = i; j < end; j++) vals.push(ids[j]); + stmt.run(...vals); + } + } + })(); + + return summary; +} + // ─── Query functions (read-only) ────────────────────────────────────── interface DirRow { diff --git a/src/graph/classifiers/roles.ts b/src/graph/classifiers/roles.ts index 1f8aa88c..45ee7fce 100644 --- a/src/graph/classifiers/roles.ts +++ b/src/graph/classifiers/roles.ts @@ -79,20 +79,29 @@ export interface RoleClassificationNode { /** * Classify nodes into architectural roles based on fan-in/fan-out metrics. */ -export function classifyRoles(nodes: RoleClassificationNode[]): Map { +export function classifyRoles( + nodes: RoleClassificationNode[], + medianOverrides?: { fanIn: number; fanOut: number }, +): Map { if (nodes.length === 0) return new Map(); - const nonZeroFanIn = nodes - .filter((n) => n.fanIn > 0) - .map((n) => n.fanIn) - .sort((a, b) => a - b); - const nonZeroFanOut = nodes - .filter((n) => n.fanOut > 0) - .map((n) => n.fanOut) - .sort((a, b) => a - b); - - const medFanIn = median(nonZeroFanIn); - const medFanOut = median(nonZeroFanOut); + let medFanIn: number; + let medFanOut: number; + if (medianOverrides) { + medFanIn = medianOverrides.fanIn; + medFanOut = medianOverrides.fanOut; + } else { + const nonZeroFanIn = nodes + .filter((n) => n.fanIn > 0) + .map((n) => n.fanIn) + .sort((a, b) => a - b); + const nonZeroFanOut = nodes + .filter((n) => n.fanOut > 0) + .map((n) => n.fanOut) + .sort((a, b) => a - b); + medFanIn = median(nonZeroFanIn); + medFanOut = median(nonZeroFanOut); + } const result = new Map(); diff --git a/tests/integration/incremental-parity.test.ts b/tests/integration/incremental-parity.test.ts index d632503e..a41f657d 100644 --- a/tests/integration/incremental-parity.test.ts +++ b/tests/integration/incremental-parity.test.ts @@ -167,4 +167,71 @@ describe('Incremental build parity: full vs incremental', () => { expect(incrAnalysis.dataflow.length).toBeGreaterThan(0); expect(incrAnalysis.dataflow.length).toBe(fullAnalysis.dataflow.length); }); + + it('preserves node roles after incremental rebuild', () => { + function readRoles(dbPath: string) { + const db = new Database(dbPath, { readonly: true }); + const roles = db + .prepare( + `SELECT name, kind, file, role FROM nodes + WHERE kind NOT IN ('file', 'directory') AND role IS NOT NULL + ORDER BY name, kind, file`, + ) + .all(); + db.close(); + return roles; + } + const fullRoles = readRoles(path.join(fullDir, '.codegraph', 'graph.db')); + const incrRoles = readRoles(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrRoles.length).toBeGreaterThan(0); + expect(incrRoles).toEqual(fullRoles); + }); +}); + +describe('Incremental rebuild performance', () => { + let tmpDir: string; + + afterAll(() => { + try { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('1-file incremental rebuild completes with timing breakdown', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-incr-perf-')); + copyDirSync(FIXTURE_DIR, tmpDir); + + // Full build first + await buildGraph(tmpDir, { incremental: false, skipRegistry: true }); + + // Touch one file + const appPath = path.join(tmpDir, 'app.js'); + fs.appendFileSync(appPath, '\n// perf-touch\n'); + + // Incremental rebuild with timing + const result = await buildGraph(tmpDir, { incremental: true, skipRegistry: true }); + + expect(result).toBeDefined(); + expect(result!.phases).toBeDefined(); + + const p = result!.phases; + // Log timing breakdown for benchmarking + const total = Object.values(p).reduce((sum, v) => sum + (v || 0), 0); + console.log(`\n Incremental 1-file rebuild timing:`); + console.log(` Total: ${total.toFixed(1)}ms`); + console.log(` Parse: ${p.parseMs}ms`); + console.log(` Insert: ${p.insertMs}ms`); + console.log(` Resolve: ${p.resolveMs}ms`); + console.log(` Edges: ${p.edgesMs}ms`); + console.log(` Structure: ${p.structureMs}ms`); + console.log(` Roles: ${p.rolesMs}ms`); + console.log(` Finalize: ${p.finalizeMs}ms`); + + // Performance assertions: structure and roles should be fast for incremental + expect(p.rolesMs).toBeLessThan(50); + expect(p.structureMs).toBeLessThan(50); + expect(p.finalizeMs).toBeLessThan(50); + }, 30_000); }); From 47c9c3301a045a1d56897b0064f064b6f8c811ea Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:46:41 -0600 Subject: [PATCH 2/5] fix: revert biome lint changes that break TypeScript compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit included 3 biome auto-fixes that conflict with TypeScript strictness: optional chaining produces undefined (not null), and dot-access on index signatures is disallowed by TS4111. These are pre-existing lint issues on main — revert to the original code. Impact: 4 functions changed, 0 affected --- src/ast-analysis/visitor.ts | 2 +- src/cli/commands/branch-compare.ts | 2 +- src/cli/commands/build.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ast-analysis/visitor.ts b/src/ast-analysis/visitor.ts index c17af2ef..2bbea832 100644 --- a/src/ast-analysis/visitor.ts +++ b/src/ast-analysis/visitor.ts @@ -138,7 +138,7 @@ export function walkWithVisitors( } scopeStack.pop(); context.currentFunction = - scopeStack.length > 0 ? scopeStack[scopeStack.length - 1]?.funcNode : null; + scopeStack.length > 0 ? scopeStack[scopeStack.length - 1]!.funcNode : null; } } diff --git a/src/cli/commands/branch-compare.ts b/src/cli/commands/branch-compare.ts index 9e7e0b14..fc72164f 100644 --- a/src/cli/commands/branch-compare.ts +++ b/src/cli/commands/branch-compare.ts @@ -13,7 +13,7 @@ export const command: CommandDefinition = { async execute([base, target], opts, ctx) { const { branchCompare } = await import('../../presentation/branch-compare.js'); await branchCompare(base!, target!, { - engine: ctx.program.opts().engine, + engine: ctx.program.opts()['engine'], depth: parseInt(opts.depth as string, 10), noTests: ctx.resolveNoTests(opts), json: opts.json, diff --git a/src/cli/commands/build.ts b/src/cli/commands/build.ts index 25b5d849..143065e1 100644 --- a/src/cli/commands/build.ts +++ b/src/cli/commands/build.ts @@ -15,7 +15,7 @@ export const command: CommandDefinition = { ], async execute([dir], opts, ctx) { const root = path.resolve(dir || '.'); - const engine = ctx.program.opts().engine; + const engine = ctx.program.opts()['engine']; await buildGraph(root, { incremental: opts.incremental as boolean, ast: opts.ast as boolean, From b32021d6ba1c36f3b9a4f9c07068121586a73679 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:55:41 -0600 Subject: [PATCH 3/5] fix(roles): expand incremental classification to edge neighbours Address Greptile review feedback: - Fix stale roles for unchanged callers by expanding the affected file set to include edge neighbours (callers/callees of changed files) - Remove duplicate median() helper from classifyNodeRolesIncremental, export and reuse the one from graph/classifiers/roles.ts - Increase performance assertion thresholds from 50ms to 200ms to prevent flaky CI failures on slow runners - Add structural-change parity test that removes a cross-file call and verifies incremental roles match a full rebuild Impact: 3 functions changed, 4 affected --- src/features/structure.ts | 44 ++++++----- src/graph/classifiers/roles.ts | 2 +- tests/integration/incremental-parity.test.ts | 80 +++++++++++++++++++- 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/src/features/structure.ts b/src/features/structure.ts index 89157d59..9c68e6b7 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -367,7 +367,7 @@ export function buildStructure( // Re-export from classifier for backward compatibility export { FRAMEWORK_ENTRY_PREFIXES } from '../graph/classifiers/roles.js'; -import { classifyRoles } from '../graph/classifiers/roles.js'; +import { classifyRoles, median } from '../graph/classifiers/roles.js'; interface RoleSummary { entry: number; @@ -410,10 +410,7 @@ export function classifyNodeRoles( return classifyNodeRolesFull(db, emptySummary); } -function classifyNodeRolesFull( - db: BetterSqlite3Database, - emptySummary: RoleSummary, -): RoleSummary { +function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary { const rows = db .prepare( `SELECT n.id, n.name, n.kind, n.file, @@ -524,18 +521,36 @@ function classifyNodeRolesFull( } /** - * Incremental role classification: only reclassify nodes from changed files. + * Incremental role classification: only reclassify nodes from changed files + * plus their immediate edge neighbours (callers and callees in other files). * * Uses indexed point lookups for fan-in/fan-out instead of full table scans. * Global medians are computed from edge distribution (fast GROUP BY on index). - * Unchanged files keep their roles from the previous build. + * Unchanged files not connected to changed files keep their roles from the + * previous build. */ function classifyNodeRolesIncremental( db: BetterSqlite3Database, changedFiles: string[], emptySummary: RoleSummary, ): RoleSummary { - const placeholders = changedFiles.map(() => '?').join(','); + // Expand affected set: include files containing nodes that are edge neighbours + // of changed-file nodes. This ensures that removing a call from file A to a + // node in file B causes B's roles to be recalculated (fan_in changed). + const seedPlaceholders = changedFiles.map(() => '?').join(','); + const neighbourFiles = db + .prepare( + `SELECT DISTINCT n2.file FROM edges e + JOIN nodes n1 ON (e.source_id = n1.id OR e.target_id = n1.id) + JOIN nodes n2 ON (e.source_id = n2.id OR e.target_id = n2.id) + WHERE e.kind = 'calls' + AND n1.file IN (${seedPlaceholders}) + AND n2.file NOT IN (${seedPlaceholders}) + AND n2.kind NOT IN ('file', 'directory')`, + ) + .all(...changedFiles, ...changedFiles) as { file: string }[]; + const allAffectedFiles = [...changedFiles, ...neighbourFiles.map((r) => r.file)]; + const placeholders = allAffectedFiles.map(() => '?').join(','); // 1. Compute global medians from edge distribution (fast: scans edge index, no node join) const fanInDist = ( @@ -553,11 +568,6 @@ function classifyNodeRolesIncremental( .map((r) => r.cnt) .sort((a, b) => a - b); - function median(sorted: number[]): number { - if (sorted.length === 0) return 0; - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!; - } const globalMedians = { fanIn: median(fanInDist), fanOut: median(fanOutDist) }; // 2. Get affected nodes using indexed correlated subqueries (fast point lookups) @@ -570,7 +580,7 @@ function classifyNodeRolesIncremental( WHERE n.kind NOT IN ('file', 'directory') AND n.file IN (${placeholders})`, ) - .all(...changedFiles) as { + .all(...allAffectedFiles) as { id: number; name: string; kind: string; @@ -593,7 +603,7 @@ function classifyNodeRolesIncremental( WHERE e.kind = 'calls' AND caller.file != target.file AND target.file IN (${placeholders})`, ) - .all(...changedFiles) as { target_id: number }[] + .all(...allAffectedFiles) as { target_id: number }[] ).map((r) => r.target_id), ); @@ -610,7 +620,7 @@ function classifyNodeRolesIncremental( ${testFilterSQL('caller.file')} GROUP BY e.target_id`, ) - .all(...changedFiles) as { target_id: number; cnt: number }[]; + .all(...allAffectedFiles) as { target_id: number; cnt: number }[]; for (const r of prodRows) { prodFanInMap.set(r.target_id, r.cnt); } @@ -651,7 +661,7 @@ function classifyNodeRolesIncremental( // Reset roles only for affected files' nodes db.prepare( `UPDATE nodes SET role = NULL WHERE file IN (${placeholders}) AND kind NOT IN ('file', 'directory')`, - ).run(...changedFiles); + ).run(...allAffectedFiles); for (const [role, ids] of idsByRole) { for (let i = 0; i < ids.length; i += ROLE_CHUNK) { const end = Math.min(i + ROLE_CHUNK, ids.length); diff --git a/src/graph/classifiers/roles.ts b/src/graph/classifiers/roles.ts index 45ee7fce..7c5517b6 100644 --- a/src/graph/classifiers/roles.ts +++ b/src/graph/classifiers/roles.ts @@ -58,7 +58,7 @@ function classifyDeadSubRole(node: ClassifiableNode): DeadSubRole { // ── Helpers ──────────────────────────────────────────────────────── -function median(sorted: number[]): number { +export function median(sorted: number[]): number { if (sorted.length === 0) return 0; const mid = Math.floor(sorted.length / 2); return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!; diff --git a/tests/integration/incremental-parity.test.ts b/tests/integration/incremental-parity.test.ts index a41f657d..b0762815 100644 --- a/tests/integration/incremental-parity.test.ts +++ b/tests/integration/incremental-parity.test.ts @@ -188,6 +188,76 @@ describe('Incremental build parity: full vs incremental', () => { }); }); +describe('Incremental build parity: structural change (add/remove call)', () => { + let fullDir: string; + let incrDir: string; + let tmpBase: string; + + beforeAll(async () => { + tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-incr-struct-')); + fullDir = path.join(tmpBase, 'full'); + incrDir = path.join(tmpBase, 'incr'); + copyDirSync(FIXTURE_DIR, fullDir); + copyDirSync(FIXTURE_DIR, incrDir); + + // Step 1: Full build both copies + await buildGraph(fullDir, { incremental: false, skipRegistry: true }); + await buildGraph(incrDir, { incremental: false, skipRegistry: true }); + + // Step 2: Remove the multiply() call from app.js — a structural edge change + const newAppContent = `import { add } from './src/index.js';\n\nexport function compute(x, y) {\n return add(x, y);\n}\n`; + const incrAppPath = path.join(incrDir, 'app.js'); + fs.writeFileSync(incrAppPath, newAppContent); + + // Step 3: Incremental rebuild + await buildGraph(incrDir, { incremental: true, skipRegistry: true }); + + // Step 4: Apply same change to full copy and full rebuild + const fullAppPath = path.join(fullDir, 'app.js'); + fs.writeFileSync(fullAppPath, newAppContent); + await buildGraph(fullDir, { incremental: false, skipRegistry: true }); + }, 60_000); + + afterAll(() => { + try { + if (tmpBase) fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('produces identical nodes after structural change', () => { + const fullGraph = readGraph(path.join(fullDir, '.codegraph', 'graph.db')); + const incrGraph = readGraph(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrGraph.nodes).toEqual(fullGraph.nodes); + }); + + it('produces identical edges after structural change', () => { + const fullGraph = readGraph(path.join(fullDir, '.codegraph', 'graph.db')); + const incrGraph = readGraph(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrGraph.edges).toEqual(fullGraph.edges); + }); + + it('preserves node roles after structural change', () => { + function readRoles(dbPath: string) { + const db = new Database(dbPath, { readonly: true }); + const roles = db + .prepare( + `SELECT name, kind, file, role FROM nodes + WHERE kind NOT IN ('file', 'directory') AND role IS NOT NULL + ORDER BY name, kind, file`, + ) + .all(); + db.close(); + return roles; + } + const fullRoles = readRoles(path.join(fullDir, '.codegraph', 'graph.db')); + const incrRoles = readRoles(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrRoles.length).toBeGreaterThan(0); + expect(incrRoles).toEqual(fullRoles); + }); +}); + describe('Incremental rebuild performance', () => { let tmpDir: string; @@ -229,9 +299,11 @@ describe('Incremental rebuild performance', () => { console.log(` Roles: ${p.rolesMs}ms`); console.log(` Finalize: ${p.finalizeMs}ms`); - // Performance assertions: structure and roles should be fast for incremental - expect(p.rolesMs).toBeLessThan(50); - expect(p.structureMs).toBeLessThan(50); - expect(p.finalizeMs).toBeLessThan(50); + // Performance assertions: structure and roles should be fast for incremental. + // Use generous thresholds (200ms) to avoid flaky failures on slow CI runners, + // under heavy load, or during GC pauses. Local benchmarks show ~9ms for roles. + expect(p.rolesMs).toBeLessThan(200); + expect(p.structureMs).toBeLessThan(200); + expect(p.finalizeMs).toBeLessThan(200); }, 30_000); }); From 9a5753097f6fe006d9abff49b971c820dfb2c0f0 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 26 Mar 2026 00:21:26 -0600 Subject: [PATCH 4/5] feat(path): add --file flag for file-to-file shortest path queries The path command previously only supported symbol-to-symbol queries. Add file-level BFS that traverses import edges between files, enabling queries like `codegraph path cli.ts db/index.ts --file` for sync-phase analysis and module dependency exploration. - filePathData() in dependencies.ts: file-level BFS over import edges - CLI: -f/--file flag, defaults edge kinds to imports,imports-type - MCP: file_mode parameter on path tool - 9 integration tests covering 1-hop, multi-hop, reverse, noTests, etc. Impact: 4 functions changed, 5 affected --- src/cli/commands/path.ts | 9 +- src/domain/analysis/dependencies.ts | 165 ++++++++++++++++++ .../graph/builder/stages/build-structure.ts | 4 +- src/domain/graph/builder/stages/finalize.ts | 4 +- src/domain/queries.ts | 2 +- src/mcp/tool-registry.ts | 15 +- src/mcp/tools/path.ts | 10 ++ src/presentation/queries-cli/path.ts | 72 +++++++- tests/integration/queries.test.ts | 67 +++++++ 9 files changed, 338 insertions(+), 10 deletions(-) diff --git a/src/cli/commands/path.ts b/src/cli/commands/path.ts index c93718fa..39a5100d 100644 --- a/src/cli/commands/path.ts +++ b/src/cli/commands/path.ts @@ -4,11 +4,15 @@ import type { CommandDefinition } from '../types.js'; export const command: CommandDefinition = { name: 'path ', - description: 'Find shortest path between two symbols', + description: 'Find shortest path between two symbols (or files with --file)', options: [ ['-d, --db ', 'Path to graph.db'], + ['-f, --file', 'Treat and as file paths instead of symbol names'], ['--reverse', 'Follow edges backward'], - ['--kinds ', 'Comma-separated edge kinds to follow (default: calls)'], + [ + '--kinds ', + 'Comma-separated edge kinds to follow (default: calls; file mode: imports,imports-type)', + ], ['--from-file ', 'Disambiguate source symbol by file'], ['--to-file ', 'Disambiguate target symbol by file'], ['--depth ', 'Max traversal depth', '10'], @@ -32,6 +36,7 @@ export const command: CommandDefinition = { kind: opts.kind, noTests: ctx.resolveNoTests(opts), json: opts.json, + file: opts.file, }); }, }; diff --git a/src/domain/analysis/dependencies.ts b/src/domain/analysis/dependencies.ts index c8ff6285..7da8e898 100644 --- a/src/domain/analysis/dependencies.ts +++ b/src/domain/analysis/dependencies.ts @@ -481,3 +481,168 @@ export function pathData( db.close(); } } + +// ── File-level shortest path ──────────────────────────────────────────── + +/** + * BFS at the file level: find shortest import/edge path between two files. + * Adjacency: file A → file B if any symbol in A has an edge to any symbol in B. + */ +export function filePathData( + from: string, + to: string, + customDbPath: string, + opts: { + noTests?: boolean; + maxDepth?: number; + edgeKinds?: string[]; + reverse?: boolean; + } = {}, +) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const maxDepth = opts.maxDepth || 10; + const edgeKinds = opts.edgeKinds || ['imports', 'imports-type']; + const reverse = opts.reverse || false; + + // Resolve from/to as file paths (LIKE match) + const fromFiles = findFileNodes(db, `%${from}%`) as NodeRow[]; + if (fromFiles.length === 0) { + return { + from, + to, + found: false, + error: `No file matching "${from}"`, + path: [], + fromCandidates: [], + toCandidates: [], + }; + } + const toFiles = findFileNodes(db, `%${to}%`) as NodeRow[]; + if (toFiles.length === 0) { + return { + from, + to, + found: false, + error: `No file matching "${to}"`, + path: [], + fromCandidates: fromFiles.slice(0, 5).map((f) => f.file), + toCandidates: [], + }; + } + + const sourceFile = fromFiles[0]!.file; + const targetFile = toFiles[0]!.file; + + const fromCandidates = fromFiles.slice(0, 5).map((f) => f.file); + const toCandidates = toFiles.slice(0, 5).map((f) => f.file); + + if (sourceFile === targetFile) { + return { + from, + to, + fromCandidates, + toCandidates, + found: true, + hops: 0, + path: [sourceFile], + alternateCount: 0, + edgeKinds, + reverse, + maxDepth, + }; + } + + // Build neighbor query: find all distinct files adjacent to a given file via edges + const kindPlaceholders = edgeKinds.map(() => '?').join(', '); + const neighborQuery = reverse + ? `SELECT DISTINCT n_src.file AS neighbor_file + FROM nodes n_tgt + JOIN edges e ON e.target_id = n_tgt.id + JOIN nodes n_src ON e.source_id = n_src.id + WHERE n_tgt.file = ? AND e.kind IN (${kindPlaceholders}) AND n_src.file != n_tgt.file` + : `SELECT DISTINCT n_tgt.file AS neighbor_file + FROM nodes n_src + JOIN edges e ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_src.file = ? AND e.kind IN (${kindPlaceholders}) AND n_tgt.file != n_src.file`; + const neighborStmt = db.prepare(neighborQuery); + + // BFS + const visited = new Set([sourceFile]); + const parentMap = new Map(); + let queue = [sourceFile]; + let found = false; + let alternateCount = 0; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue: string[] = []; + for (const currentFile of queue) { + const neighbors = neighborStmt.all(currentFile, ...edgeKinds) as Array<{ + neighbor_file: string; + }>; + for (const n of neighbors) { + if (noTests && isTestFile(n.neighbor_file)) continue; + if (n.neighbor_file === targetFile) { + if (!found) { + found = true; + parentMap.set(n.neighbor_file, currentFile); + } + alternateCount++; + continue; + } + if (!visited.has(n.neighbor_file)) { + visited.add(n.neighbor_file); + parentMap.set(n.neighbor_file, currentFile); + nextQueue.push(n.neighbor_file); + } + } + } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + if (!found) { + return { + from, + to, + fromCandidates, + toCandidates, + found: false, + hops: null, + path: [], + alternateCount: 0, + edgeKinds, + reverse, + maxDepth, + }; + } + + // Reconstruct path + const filePath: string[] = [targetFile]; + let cur = targetFile; + while (cur !== sourceFile) { + cur = parentMap.get(cur)!; + filePath.push(cur); + } + filePath.reverse(); + + return { + from, + to, + fromCandidates, + toCandidates, + found: true, + hops: filePath.length - 1, + path: filePath, + alternateCount: Math.max(0, alternateCount - 1), + edgeKinds, + reverse, + maxDepth, + }; + } finally { + db.close(); + } +} diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index 5deeaadd..a04d7163 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -146,7 +146,9 @@ export async function buildStructure(ctx: PipelineContext): Promise { const changedFileList = isFullBuild ? null : [...allSymbols.keys()]; const roleSummary = classifyNodeRoles(db, changedFileList); debug( - `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries(roleSummary) + `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries( + roleSummary, + ) .map(([r, c]) => `${r}=${c}`) .join(', ')}`, ); diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index 65b55b82..f90b8b0a 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -86,7 +86,9 @@ export async function finalize(ctx: PipelineContext): Promise { // Skip expensive advisory queries for incremental builds — these are // informational warnings that don't affect correctness and cost ~40-60ms. if (!isFullBuild) { - debug('Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build'); + debug( + 'Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build', + ); } else { // Orphaned embeddings warning if (hasEmbeddings) { diff --git a/src/domain/queries.ts b/src/domain/queries.ts index b35ad981..e40dbafb 100644 --- a/src/domain/queries.ts +++ b/src/domain/queries.ts @@ -24,7 +24,7 @@ export { export { kindIcon, normalizeSymbol } from '../shared/normalize.js'; export { briefData } from './analysis/brief.js'; export { contextData, explainData } from './analysis/context.js'; -export { fileDepsData, fnDepsData, pathData } from './analysis/dependencies.js'; +export { fileDepsData, filePathData, fnDepsData, pathData } from './analysis/dependencies.js'; export { exportsData } from './analysis/exports.js'; export { diffImpactData, diff --git a/src/mcp/tool-registry.ts b/src/mcp/tool-registry.ts index b533ca98..07b71541 100644 --- a/src/mcp/tool-registry.ts +++ b/src/mcp/tool-registry.ts @@ -77,20 +77,27 @@ const BASE_TOOLS: ToolSchema[] = [ }, { name: 'path', - description: 'Find shortest path between two symbols in the dependency graph', + description: + 'Find shortest path between two symbols (or files with file_mode) in the dependency graph', inputSchema: { type: 'object', properties: { - from: { type: 'string', description: 'Source symbol name' }, - to: { type: 'string', description: 'Target symbol name' }, + from: { type: 'string', description: 'Source symbol name (or file path with file_mode)' }, + to: { type: 'string', description: 'Target symbol name (or file path with file_mode)' }, depth: { type: 'number', description: 'Max traversal depth (default: 10)' }, edge_kinds: { type: 'array', items: { type: 'string', enum: EVERY_EDGE_KIND }, - description: 'Edge kinds to follow (default: ["calls"])', + description: + 'Edge kinds to follow (default: ["calls"] for symbols, ["imports","imports-type"] for files)', }, from_file: { type: 'string', description: 'Disambiguate source by file' }, to_file: { type: 'string', description: 'Disambiguate target by file' }, + file_mode: { + type: 'boolean', + description: 'Treat from/to as file paths and find file-to-file shortest path', + default: false, + }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, required: ['from', 'to'], diff --git a/src/mcp/tools/path.ts b/src/mcp/tools/path.ts index 7031fa4c..74c7632d 100644 --- a/src/mcp/tools/path.ts +++ b/src/mcp/tools/path.ts @@ -10,9 +10,19 @@ interface PathArgs { from_file?: string; to_file?: string; no_tests?: boolean; + file_mode?: boolean; } export async function handler(args: PathArgs, ctx: McpToolContext): Promise { + if (args.file_mode) { + const { filePathData } = await ctx.getQueries(); + return filePathData(args.from, args.to, ctx.dbPath, { + maxDepth: args.depth ?? 10, + edgeKinds: args.edge_kinds, + reverse: false, + noTests: args.no_tests, + }); + } const { pathData } = await ctx.getQueries(); return pathData(args.from, args.to, ctx.dbPath, { maxDepth: args.depth ?? 10, diff --git a/src/presentation/queries-cli/path.ts b/src/presentation/queries-cli/path.ts index befcc849..ce8453c5 100644 --- a/src/presentation/queries-cli/path.ts +++ b/src/presentation/queries-cli/path.ts @@ -1,4 +1,4 @@ -import { kindIcon, pathData } from '../../domain/queries.js'; +import { filePathData, kindIcon, pathData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; interface PathCandidate { @@ -80,6 +80,11 @@ export function symbolPath( customDbPath: string, opts: PathOpts = {}, ): void { + if (opts.file) { + filePath(from, to, customDbPath, opts); + return; + } + const data = pathData(from, to, customDbPath, opts) as PathDataResult; if (outputResult(data as unknown as Record, null, opts)) return; @@ -107,3 +112,68 @@ export function symbolPath( printPathSteps(data); console.log(); } + +// ── File-level path ────────────────────────────────────────────────────── + +interface FilePathDataResult { + error?: string; + found?: boolean; + hops?: number | null; + reverse?: boolean; + maxDepth?: number; + path: string[]; + fromCandidates: string[]; + toCandidates: string[]; + alternateCount: number; +} + +function filePath(from: string, to: string, customDbPath: string, opts: PathOpts = {}): void { + const data = filePathData(from, to, customDbPath, opts) as FilePathDataResult; + if (outputResult(data as unknown as Record, null, opts)) return; + + if (data.error) { + console.log(data.error); + return; + } + + if (!data.found) { + const dir = data.reverse ? 'reverse ' : ''; + console.log(`No ${dir}file path from "${from}" to "${to}" within ${data.maxDepth} hops.`); + if (data.fromCandidates.length > 1) { + console.log( + `\n "${from}" matched ${data.fromCandidates.length} files — using: ${data.fromCandidates[0]}`, + ); + } + if (data.toCandidates.length > 1) { + console.log( + ` "${to}" matched ${data.toCandidates.length} files — using: ${data.toCandidates[0]}`, + ); + } + return; + } + + if (data.hops === 0) { + console.log(`\n"${from}" and "${to}" resolve to the same file (0 hops):`); + console.log(` ${data.path[0]}\n`); + return; + } + + const dir = data.reverse ? ' (reverse)' : ''; + console.log( + `\nFile path from ${from} to ${to} (${data.hops} ${data.hops === 1 ? 'hop' : 'hops'})${dir}:\n`, + ); + for (let i = 0; i < data.path.length; i++) { + const indent = ' '.repeat(i + 1); + if (i === 0) { + console.log(`${indent}${data.path[i]}`); + } else { + console.log(`${indent}→ ${data.path[i]}`); + } + } + if (data.alternateCount > 0) { + console.log( + `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, + ); + } + console.log(); +} diff --git a/tests/integration/queries.test.ts b/tests/integration/queries.test.ts index 6381cdb2..a869ce51 100644 --- a/tests/integration/queries.test.ts +++ b/tests/integration/queries.test.ts @@ -31,6 +31,7 @@ import { explainData, exportsData, fileDepsData, + filePathData, fnDepsData, fnImpactData, impactAnalysisData, @@ -491,6 +492,72 @@ describe('pathData', () => { }); }); +// ─── filePathData ──────────────────────────────────────────────────── + +describe('filePathData', () => { + test('finds direct 1-hop file path', () => { + // middleware.js → auth.js (import edge) + const data = filePathData('middleware.js', 'auth.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path).toEqual(['middleware.js', 'auth.js']); + }); + + test('finds multi-hop file path', () => { + // routes.js → middleware.js → auth.js + const data = filePathData('routes.js', 'auth.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(2); + expect(data.path).toEqual(['routes.js', 'middleware.js', 'auth.js']); + }); + + test('returns not found when no file path exists', () => { + // auth.js has no outgoing imports in the fixture + const data = filePathData('auth.js', 'routes.js', dbPath); + expect(data.found).toBe(false); + }); + + test('self-file returns 0 hops', () => { + const data = filePathData('middleware.js', 'middleware.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(0); + expect(data.path).toEqual(['middleware.js']); + }); + + test('reverse direction finds upstream file path', () => { + // auth.js ←(reverse)── middleware.js ←(reverse)── routes.js + const data = filePathData('auth.js', 'routes.js', dbPath, { reverse: true }); + expect(data.found).toBe(true); + expect(data.hops).toBe(2); + expect(data.path).toEqual(['auth.js', 'middleware.js', 'routes.js']); + }); + + test('excludes test files with noTests', () => { + // auth.test.js imports auth.js, but should be excluded + const data = filePathData('auth.js', 'auth.test.js', dbPath, { reverse: true, noTests: true }); + expect(data.found).toBe(false); + }); + + test('returns error for no matching from file', () => { + const data = filePathData('nonexistent.js', 'auth.js', dbPath); + expect(data.found).toBe(false); + expect(data.error).toMatch(/No file matching/); + }); + + test('returns error for no matching to file', () => { + const data = filePathData('auth.js', 'nonexistent.js', dbPath); + expect(data.found).toBe(false); + expect(data.error).toMatch(/No file matching/); + }); + + test('populates fromCandidates and toCandidates', () => { + const data = filePathData('middleware.js', 'auth.js', dbPath); + expect(data.fromCandidates.length).toBeGreaterThanOrEqual(1); + expect(data.toCandidates.length).toBeGreaterThanOrEqual(1); + expect(data.fromCandidates[0]).toBe('middleware.js'); + }); +}); + // ─── diffImpactData ─────────────────────────────────────────────────── describe('diffImpactData', () => { From 6547755d274b28ae106ec3f1fd6a8614a1e61494 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 26 Mar 2026 01:11:48 -0600 Subject: [PATCH 5/5] fix(roles): document incremental RoleSummary scoped semantics (#622) --- src/features/structure.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/features/structure.ts b/src/features/structure.ts index 9c68e6b7..dfd2976c 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -384,6 +384,15 @@ interface RoleSummary { [key: string]: number; } +/** + * Classify every node in the graph into a role (core, entry, utility, etc.). + * + * When `changedFiles` is provided, only nodes from those files (and their + * edge neighbours) are reclassified. The returned `RoleSummary` in that case + * reflects **only the affected subset**, not the entire graph. Callers that + * need graph-wide totals should perform a full classification (omit + * `changedFiles`) or query the DB directly. + */ export function classifyNodeRoles( db: BetterSqlite3Database, changedFiles?: string[] | null,