diff --git a/src/cli/commands/path.ts b/src/cli/commands/path.ts index c93718fa..39a5100d 100644 --- a/src/cli/commands/path.ts +++ b/src/cli/commands/path.ts @@ -4,11 +4,15 @@ import type { CommandDefinition } from '../types.js'; export const command: CommandDefinition = { name: 'path ', - description: 'Find shortest path between two symbols', + description: 'Find shortest path between two symbols (or files with --file)', options: [ ['-d, --db ', 'Path to graph.db'], + ['-f, --file', 'Treat and as file paths instead of symbol names'], ['--reverse', 'Follow edges backward'], - ['--kinds ', 'Comma-separated edge kinds to follow (default: calls)'], + [ + '--kinds ', + 'Comma-separated edge kinds to follow (default: calls; file mode: imports,imports-type)', + ], ['--from-file ', 'Disambiguate source symbol by file'], ['--to-file ', 'Disambiguate target symbol by file'], ['--depth ', 'Max traversal depth', '10'], @@ -32,6 +36,7 @@ export const command: CommandDefinition = { kind: opts.kind, noTests: ctx.resolveNoTests(opts), json: opts.json, + file: opts.file, }); }, }; diff --git a/src/domain/analysis/dependencies.ts b/src/domain/analysis/dependencies.ts index c8ff6285..7da8e898 100644 --- a/src/domain/analysis/dependencies.ts +++ b/src/domain/analysis/dependencies.ts @@ -481,3 +481,168 @@ export function pathData( db.close(); } } + +// ── File-level shortest path ──────────────────────────────────────────── + +/** + * BFS at the file level: find shortest import/edge path between two files. + * Adjacency: file A → file B if any symbol in A has an edge to any symbol in B. + */ +export function filePathData( + from: string, + to: string, + customDbPath: string, + opts: { + noTests?: boolean; + maxDepth?: number; + edgeKinds?: string[]; + reverse?: boolean; + } = {}, +) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const maxDepth = opts.maxDepth || 10; + const edgeKinds = opts.edgeKinds || ['imports', 'imports-type']; + const reverse = opts.reverse || false; + + // Resolve from/to as file paths (LIKE match) + const fromFiles = findFileNodes(db, `%${from}%`) as NodeRow[]; + if (fromFiles.length === 0) { + return { + from, + to, + found: false, + error: `No file matching "${from}"`, + path: [], + fromCandidates: [], + toCandidates: [], + }; + } + const toFiles = findFileNodes(db, `%${to}%`) as NodeRow[]; + if (toFiles.length === 0) { + return { + from, + to, + found: false, + error: `No file matching "${to}"`, + path: [], + fromCandidates: fromFiles.slice(0, 5).map((f) => f.file), + toCandidates: [], + }; + } + + const sourceFile = fromFiles[0]!.file; + const targetFile = toFiles[0]!.file; + + const fromCandidates = fromFiles.slice(0, 5).map((f) => f.file); + const toCandidates = toFiles.slice(0, 5).map((f) => f.file); + + if (sourceFile === targetFile) { + return { + from, + to, + fromCandidates, + toCandidates, + found: true, + hops: 0, + path: [sourceFile], + alternateCount: 0, + edgeKinds, + reverse, + maxDepth, + }; + } + + // Build neighbor query: find all distinct files adjacent to a given file via edges + const kindPlaceholders = edgeKinds.map(() => '?').join(', '); + const neighborQuery = reverse + ? `SELECT DISTINCT n_src.file AS neighbor_file + FROM nodes n_tgt + JOIN edges e ON e.target_id = n_tgt.id + JOIN nodes n_src ON e.source_id = n_src.id + WHERE n_tgt.file = ? AND e.kind IN (${kindPlaceholders}) AND n_src.file != n_tgt.file` + : `SELECT DISTINCT n_tgt.file AS neighbor_file + FROM nodes n_src + JOIN edges e ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_src.file = ? AND e.kind IN (${kindPlaceholders}) AND n_tgt.file != n_src.file`; + const neighborStmt = db.prepare(neighborQuery); + + // BFS + const visited = new Set([sourceFile]); + const parentMap = new Map(); + let queue = [sourceFile]; + let found = false; + let alternateCount = 0; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue: string[] = []; + for (const currentFile of queue) { + const neighbors = neighborStmt.all(currentFile, ...edgeKinds) as Array<{ + neighbor_file: string; + }>; + for (const n of neighbors) { + if (noTests && isTestFile(n.neighbor_file)) continue; + if (n.neighbor_file === targetFile) { + if (!found) { + found = true; + parentMap.set(n.neighbor_file, currentFile); + } + alternateCount++; + continue; + } + if (!visited.has(n.neighbor_file)) { + visited.add(n.neighbor_file); + parentMap.set(n.neighbor_file, currentFile); + nextQueue.push(n.neighbor_file); + } + } + } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + if (!found) { + return { + from, + to, + fromCandidates, + toCandidates, + found: false, + hops: null, + path: [], + alternateCount: 0, + edgeKinds, + reverse, + maxDepth, + }; + } + + // Reconstruct path + const filePath: string[] = [targetFile]; + let cur = targetFile; + while (cur !== sourceFile) { + cur = parentMap.get(cur)!; + filePath.push(cur); + } + filePath.reverse(); + + return { + from, + to, + fromCandidates, + toCandidates, + found: true, + hops: filePath.length - 1, + path: filePath, + alternateCount: Math.max(0, alternateCount - 1), + edgeKinds, + reverse, + maxDepth, + }; + } finally { + db.close(); + } +} diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index 0f22a694..a04d7163 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -37,31 +37,54 @@ export async function buildStructure(ctx: PipelineContext): Promise { const existingFiles = db .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'") .all() as Array<{ file: string }>; - const defsByFile = db.prepare( - "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", - ); - const importCountByFile = db.prepare( - `SELECT COUNT(DISTINCT n2.file) AS cnt FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE n1.file = ? AND e.kind = 'imports'`, - ); - const lineCountByFile = db.prepare( - `SELECT n.name AS file, m.line_count - FROM node_metrics m JOIN nodes n ON m.node_id = n.id - WHERE n.kind = 'file'`, - ); + + // Batch load: all definitions, import counts, and line counts in single queries + const allDefs = db + .prepare( + "SELECT file, name, kind, line FROM nodes WHERE kind != 'file' AND kind != 'directory'", + ) + .all() as Array<{ file: string; name: string; kind: string; line: number }>; + const defsByFileMap = new Map>(); + for (const row of allDefs) { + let arr = defsByFileMap.get(row.file); + if (!arr) { + arr = []; + defsByFileMap.set(row.file, arr); + } + arr.push({ name: row.name, kind: row.kind, line: row.line }); + } + + const allImportCounts = db + .prepare( + `SELECT n1.file, COUNT(DISTINCT n2.file) AS cnt FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.kind = 'imports' + GROUP BY n1.file`, + ) + .all() as Array<{ file: string; cnt: number }>; + const importCountMap = new Map(); + for (const row of allImportCounts) { + importCountMap.set(row.file, row.cnt); + } + const cachedLineCounts = new Map(); - for (const row of lineCountByFile.all() as Array<{ file: string; line_count: number }>) { + for (const row of db + .prepare( + `SELECT n.name AS file, m.line_count + FROM node_metrics m JOIN nodes n ON m.node_id = n.id + WHERE n.kind = 'file'`, + ) + .all() as Array<{ file: string; line_count: number }>) { cachedLineCounts.set(row.file, row.line_count); } + let loadedFromDb = 0; for (const { file: relPath } of existingFiles) { if (!fileSymbols.has(relPath)) { - const importCount = - (importCountByFile.get(relPath) as { cnt: number } | undefined)?.cnt || 0; + const importCount = importCountMap.get(relPath) || 0; fileSymbols.set(relPath, { - definitions: defsByFile.all(relPath), + definitions: defsByFileMap.get(relPath) || [], imports: new Array(importCount) as unknown as ExtractorOutput['imports'], exports: [], } as unknown as ExtractorOutput); @@ -111,15 +134,21 @@ export async function buildStructure(ctx: PipelineContext): Promise { } ctx.timing.structureMs = performance.now() - t0; - // Classify node roles + // Classify node roles (incremental: only reclassify changed files' nodes) const t1 = performance.now(); try { const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: (db: PipelineContext['db']) => Record; + classifyNodeRoles: ( + db: PipelineContext['db'], + changedFiles?: string[] | null, + ) => Record; }; - const roleSummary = classifyNodeRoles(db); + const changedFileList = isFullBuild ? null : [...allSymbols.keys()]; + const roleSummary = classifyNodeRoles(db, changedFileList); debug( - `Roles: ${Object.entries(roleSummary) + `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries( + roleSummary, + ) .map(([r, c]) => `${r}=${c}`) .join(', ')}`, ); diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index f40c5a97..f90b8b0a 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -83,70 +83,78 @@ export async function finalize(ctx: PipelineContext): Promise { warn(`Failed to write build metadata: ${(err as Error).message}`); } - // Orphaned embeddings warning - if (hasEmbeddings) { + // Skip expensive advisory queries for incremental builds — these are + // informational warnings that don't affect correctness and cost ~40-60ms. + if (!isFullBuild) { + debug( + 'Finalize: skipping advisory queries (orphaned/stale embeddings, unused exports) for incremental build', + ); + } else { + // Orphaned embeddings warning + if (hasEmbeddings) { + try { + const orphaned = ( + db + .prepare( + 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + ) + .get() as { c: number } + ).c; + if (orphaned > 0) { + warn( + `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, + ); + } + } catch { + /* ignore - embeddings table may have been dropped */ + } + } + + // Stale embeddings warning (built before current graph rebuild) + if (hasEmbeddings) { + try { + const embedBuiltAt = ( + db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as + | { value: string } + | undefined + )?.value; + if (embedBuiltAt) { + const embedTime = new Date(embedBuiltAt).getTime(); + if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { + warn( + 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', + ); + } + } + } catch { + /* ignore - embedding_meta table may not exist */ + } + } + + // Unused exports warning try { - const orphaned = ( + const unusedCount = ( db .prepare( - 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + `SELECT COUNT(*) as c FROM nodes + WHERE exported = 1 AND kind != 'file' + AND id NOT IN ( + SELECT DISTINCT e.target_id FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + )`, ) .get() as { c: number } ).c; - if (orphaned > 0) { + if (unusedCount > 0) { warn( - `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, + `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, ); } } catch { - /* ignore - embeddings table may have been dropped */ - } - } - - // Stale embeddings warning (built before current graph rebuild) - if (hasEmbeddings) { - try { - const embedBuiltAt = ( - db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as - | { value: string } - | undefined - )?.value; - if (embedBuiltAt) { - const embedTime = new Date(embedBuiltAt).getTime(); - if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) { - warn( - 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', - ); - } - } - } catch { - /* ignore - embedding_meta table may not exist */ - } - } - - // Unused exports warning - try { - const unusedCount = ( - db - .prepare( - `SELECT COUNT(*) as c FROM nodes - WHERE exported = 1 AND kind != 'file' - AND id NOT IN ( - SELECT DISTINCT e.target_id FROM edges e - JOIN nodes caller ON e.source_id = caller.id - JOIN nodes target ON e.target_id = target.id - WHERE e.kind = 'calls' AND caller.file != target.file - )`, - ) - .get() as { c: number } - ).c; - if (unusedCount > 0) { - warn( - `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, - ); + /* exported column may not exist on older DBs */ } - } catch { - /* exported column may not exist on older DBs */ } closeDb(db); diff --git a/src/domain/queries.ts b/src/domain/queries.ts index aae22eaf..7a4413e9 100644 --- a/src/domain/queries.ts +++ b/src/domain/queries.ts @@ -25,7 +25,7 @@ export { export { kindIcon, normalizeSymbol } from '../shared/normalize.js'; export { briefData } from './analysis/brief.js'; export { contextData, explainData } from './analysis/context.js'; -export { fileDepsData, fnDepsData, pathData } from './analysis/dependencies.js'; +export { fileDepsData, filePathData, fnDepsData, pathData } from './analysis/dependencies.js'; export { exportsData } from './analysis/exports.js'; export { diffImpactData, diff --git a/src/features/structure.ts b/src/features/structure.ts index ec57dfd5..a4982948 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -364,7 +364,7 @@ export function buildStructure( // Re-export from classifier for backward compatibility export { FRAMEWORK_ENTRY_PREFIXES } from '../graph/classifiers/roles.js'; -import { classifyRoles } from '../graph/classifiers/roles.js'; +import { classifyRoles, median } from '../graph/classifiers/roles.js'; interface RoleSummary { entry: number; @@ -381,7 +381,42 @@ interface RoleSummary { [key: string]: number; } -export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { +/** + * Classify every node in the graph into a role (core, entry, utility, etc.). + * + * When `changedFiles` is provided, only nodes from those files (and their + * edge neighbours) are reclassified. The returned `RoleSummary` in that case + * reflects **only the affected subset**, not the entire graph. Callers that + * need graph-wide totals should perform a full classification (omit + * `changedFiles`) or query the DB directly. + */ +export function classifyNodeRoles( + db: BetterSqlite3Database, + changedFiles?: string[] | null, +): RoleSummary { + const emptySummary: RoleSummary = { + entry: 0, + core: 0, + utility: 0, + adapter: 0, + dead: 0, + 'dead-leaf': 0, + 'dead-entry': 0, + 'dead-ffi': 0, + 'dead-unresolved': 0, + 'test-only': 0, + leaf: 0, + }; + + // Incremental path: only reclassify nodes from affected files + if (changedFiles && changedFiles.length > 0) { + return classifyNodeRolesIncremental(db, changedFiles, emptySummary); + } + + return classifyNodeRolesFull(db, emptySummary); +} + +function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary { const rows = db .prepare( `SELECT n.id, n.name, n.kind, n.file, @@ -405,20 +440,6 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { fan_out: number; }[]; - const emptySummary: RoleSummary = { - entry: 0, - core: 0, - utility: 0, - adapter: 0, - dead: 0, - 'dead-leaf': 0, - 'dead-entry': 0, - 'dead-ffi': 0, - 'dead-unresolved': 0, - 'test-only': 0, - leaf: 0, - }; - if (rows.length === 0) return emptySummary; const exportedIds = new Set( @@ -505,6 +526,168 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { return summary; } +/** + * Incremental role classification: only reclassify nodes from changed files + * plus their immediate edge neighbours (callers and callees in other files). + * + * Uses indexed point lookups for fan-in/fan-out instead of full table scans. + * Global medians are computed from edge distribution (fast GROUP BY on index). + * Unchanged files not connected to changed files keep their roles from the + * previous build. + */ +function classifyNodeRolesIncremental( + db: BetterSqlite3Database, + changedFiles: string[], + emptySummary: RoleSummary, +): RoleSummary { + // Expand affected set: include files containing nodes that are edge neighbours + // of changed-file nodes. This ensures that removing a call from file A to a + // node in file B causes B's roles to be recalculated (fan_in changed). + const seedPlaceholders = changedFiles.map(() => '?').join(','); + const neighbourFiles = db + .prepare( + `SELECT DISTINCT n2.file FROM edges e + JOIN nodes n1 ON (e.source_id = n1.id OR e.target_id = n1.id) + JOIN nodes n2 ON (e.source_id = n2.id OR e.target_id = n2.id) + WHERE e.kind = 'calls' + AND n1.file IN (${seedPlaceholders}) + AND n2.file NOT IN (${seedPlaceholders}) + AND n2.kind NOT IN ('file', 'directory')`, + ) + .all(...changedFiles, ...changedFiles) as { file: string }[]; + const allAffectedFiles = [...changedFiles, ...neighbourFiles.map((r) => r.file)]; + const placeholders = allAffectedFiles.map(() => '?').join(','); + + // 1. Compute global medians from edge distribution (fast: scans edge index, no node join) + const fanInDist = ( + db + .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id`) + .all() as { cnt: number }[] + ) + .map((r) => r.cnt) + .sort((a, b) => a - b); + const fanOutDist = ( + db + .prepare(`SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id`) + .all() as { cnt: number }[] + ) + .map((r) => r.cnt) + .sort((a, b) => a - b); + + const globalMedians = { fanIn: median(fanInDist), fanOut: median(fanOutDist) }; + + // 2. Get affected nodes using indexed correlated subqueries (fast point lookups) + const rows = db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND target_id = n.id) AS fan_in, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND source_id = n.id) AS fan_out + FROM nodes n + WHERE n.kind NOT IN ('file', 'directory') + AND n.file IN (${placeholders})`, + ) + .all(...allAffectedFiles) as { + id: number; + name: string; + kind: string; + file: string; + fan_in: number; + fan_out: number; + }[]; + + if (rows.length === 0) return emptySummary; + + // 3. Get exported status for affected nodes only (scoped to changed files) + const exportedIds = new Set( + ( + db + .prepare( + `SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + AND target.file IN (${placeholders})`, + ) + .all(...allAffectedFiles) as { target_id: number }[] + ).map((r) => r.target_id), + ); + + // 4. Production fan-in for affected nodes only + const prodFanInMap = new Map(); + const prodRows = db + .prepare( + `SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' + AND target.file IN (${placeholders}) + ${testFilterSQL('caller.file')} + GROUP BY e.target_id`, + ) + .all(...allAffectedFiles) as { target_id: number; cnt: number }[]; + for (const r of prodRows) { + prodFanInMap.set(r.target_id, r.cnt); + } + + // 5. Classify affected nodes using global medians + const classifierInput = rows.map((r) => ({ + id: String(r.id), + name: r.name, + kind: r.kind, + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + isExported: exportedIds.has(r.id), + productionFanIn: prodFanInMap.get(r.id) || 0, + })); + + const roleMap = classifyRoles(classifierInput, globalMedians); + + // 6. Build summary (only for affected nodes) and update only those nodes + const summary: RoleSummary = { ...emptySummary }; + const idsByRole = new Map(); + for (const row of rows) { + const role = roleMap.get(String(row.id)) || 'leaf'; + if (role.startsWith('dead')) summary.dead++; + summary[role] = (summary[role] || 0) + 1; + let ids = idsByRole.get(role); + if (!ids) { + ids = []; + idsByRole.set(role, ids); + } + ids.push(row.id); + } + + // Only update affected nodes — no global NULL reset + const ROLE_CHUNK = 500; + const roleStmtCache = new Map(); + db.transaction(() => { + // Reset roles only for affected files' nodes + db.prepare( + `UPDATE nodes SET role = NULL WHERE file IN (${placeholders}) AND kind NOT IN ('file', 'directory')`, + ).run(...allAffectedFiles); + for (const [role, ids] of idsByRole) { + for (let i = 0; i < ids.length; i += ROLE_CHUNK) { + const end = Math.min(i + ROLE_CHUNK, ids.length); + const chunkSize = end - i; + let stmt = roleStmtCache.get(chunkSize); + if (!stmt) { + const ph = Array.from({ length: chunkSize }, () => '?').join(','); + stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${ph})`); + roleStmtCache.set(chunkSize, stmt); + } + const vals: unknown[] = [role]; + for (let j = i; j < end; j++) vals.push(ids[j]); + stmt.run(...vals); + } + } + })(); + + return summary; +} + // ─── Query functions (re-exported from structure-query.ts) ──────────── // Split to separate query-time concerns (DB reads, sorting, pagination) // from build-time concerns (directory insertion, metrics computation, role classification). diff --git a/src/graph/classifiers/roles.ts b/src/graph/classifiers/roles.ts index 1f8aa88c..7c5517b6 100644 --- a/src/graph/classifiers/roles.ts +++ b/src/graph/classifiers/roles.ts @@ -58,7 +58,7 @@ function classifyDeadSubRole(node: ClassifiableNode): DeadSubRole { // ── Helpers ──────────────────────────────────────────────────────── -function median(sorted: number[]): number { +export function median(sorted: number[]): number { if (sorted.length === 0) return 0; const mid = Math.floor(sorted.length / 2); return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!; @@ -79,20 +79,29 @@ export interface RoleClassificationNode { /** * Classify nodes into architectural roles based on fan-in/fan-out metrics. */ -export function classifyRoles(nodes: RoleClassificationNode[]): Map { +export function classifyRoles( + nodes: RoleClassificationNode[], + medianOverrides?: { fanIn: number; fanOut: number }, +): Map { if (nodes.length === 0) return new Map(); - const nonZeroFanIn = nodes - .filter((n) => n.fanIn > 0) - .map((n) => n.fanIn) - .sort((a, b) => a - b); - const nonZeroFanOut = nodes - .filter((n) => n.fanOut > 0) - .map((n) => n.fanOut) - .sort((a, b) => a - b); - - const medFanIn = median(nonZeroFanIn); - const medFanOut = median(nonZeroFanOut); + let medFanIn: number; + let medFanOut: number; + if (medianOverrides) { + medFanIn = medianOverrides.fanIn; + medFanOut = medianOverrides.fanOut; + } else { + const nonZeroFanIn = nodes + .filter((n) => n.fanIn > 0) + .map((n) => n.fanIn) + .sort((a, b) => a - b); + const nonZeroFanOut = nodes + .filter((n) => n.fanOut > 0) + .map((n) => n.fanOut) + .sort((a, b) => a - b); + medFanIn = median(nonZeroFanIn); + medFanOut = median(nonZeroFanOut); + } const result = new Map(); diff --git a/src/mcp/tool-registry.ts b/src/mcp/tool-registry.ts index b533ca98..07b71541 100644 --- a/src/mcp/tool-registry.ts +++ b/src/mcp/tool-registry.ts @@ -77,20 +77,27 @@ const BASE_TOOLS: ToolSchema[] = [ }, { name: 'path', - description: 'Find shortest path between two symbols in the dependency graph', + description: + 'Find shortest path between two symbols (or files with file_mode) in the dependency graph', inputSchema: { type: 'object', properties: { - from: { type: 'string', description: 'Source symbol name' }, - to: { type: 'string', description: 'Target symbol name' }, + from: { type: 'string', description: 'Source symbol name (or file path with file_mode)' }, + to: { type: 'string', description: 'Target symbol name (or file path with file_mode)' }, depth: { type: 'number', description: 'Max traversal depth (default: 10)' }, edge_kinds: { type: 'array', items: { type: 'string', enum: EVERY_EDGE_KIND }, - description: 'Edge kinds to follow (default: ["calls"])', + description: + 'Edge kinds to follow (default: ["calls"] for symbols, ["imports","imports-type"] for files)', }, from_file: { type: 'string', description: 'Disambiguate source by file' }, to_file: { type: 'string', description: 'Disambiguate target by file' }, + file_mode: { + type: 'boolean', + description: 'Treat from/to as file paths and find file-to-file shortest path', + default: false, + }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, }, required: ['from', 'to'], diff --git a/src/mcp/tools/path.ts b/src/mcp/tools/path.ts index 7031fa4c..74c7632d 100644 --- a/src/mcp/tools/path.ts +++ b/src/mcp/tools/path.ts @@ -10,9 +10,19 @@ interface PathArgs { from_file?: string; to_file?: string; no_tests?: boolean; + file_mode?: boolean; } export async function handler(args: PathArgs, ctx: McpToolContext): Promise { + if (args.file_mode) { + const { filePathData } = await ctx.getQueries(); + return filePathData(args.from, args.to, ctx.dbPath, { + maxDepth: args.depth ?? 10, + edgeKinds: args.edge_kinds, + reverse: false, + noTests: args.no_tests, + }); + } const { pathData } = await ctx.getQueries(); return pathData(args.from, args.to, ctx.dbPath, { maxDepth: args.depth ?? 10, diff --git a/src/presentation/queries-cli/path.ts b/src/presentation/queries-cli/path.ts index befcc849..ce8453c5 100644 --- a/src/presentation/queries-cli/path.ts +++ b/src/presentation/queries-cli/path.ts @@ -1,4 +1,4 @@ -import { kindIcon, pathData } from '../../domain/queries.js'; +import { filePathData, kindIcon, pathData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; interface PathCandidate { @@ -80,6 +80,11 @@ export function symbolPath( customDbPath: string, opts: PathOpts = {}, ): void { + if (opts.file) { + filePath(from, to, customDbPath, opts); + return; + } + const data = pathData(from, to, customDbPath, opts) as PathDataResult; if (outputResult(data as unknown as Record, null, opts)) return; @@ -107,3 +112,68 @@ export function symbolPath( printPathSteps(data); console.log(); } + +// ── File-level path ────────────────────────────────────────────────────── + +interface FilePathDataResult { + error?: string; + found?: boolean; + hops?: number | null; + reverse?: boolean; + maxDepth?: number; + path: string[]; + fromCandidates: string[]; + toCandidates: string[]; + alternateCount: number; +} + +function filePath(from: string, to: string, customDbPath: string, opts: PathOpts = {}): void { + const data = filePathData(from, to, customDbPath, opts) as FilePathDataResult; + if (outputResult(data as unknown as Record, null, opts)) return; + + if (data.error) { + console.log(data.error); + return; + } + + if (!data.found) { + const dir = data.reverse ? 'reverse ' : ''; + console.log(`No ${dir}file path from "${from}" to "${to}" within ${data.maxDepth} hops.`); + if (data.fromCandidates.length > 1) { + console.log( + `\n "${from}" matched ${data.fromCandidates.length} files — using: ${data.fromCandidates[0]}`, + ); + } + if (data.toCandidates.length > 1) { + console.log( + ` "${to}" matched ${data.toCandidates.length} files — using: ${data.toCandidates[0]}`, + ); + } + return; + } + + if (data.hops === 0) { + console.log(`\n"${from}" and "${to}" resolve to the same file (0 hops):`); + console.log(` ${data.path[0]}\n`); + return; + } + + const dir = data.reverse ? ' (reverse)' : ''; + console.log( + `\nFile path from ${from} to ${to} (${data.hops} ${data.hops === 1 ? 'hop' : 'hops'})${dir}:\n`, + ); + for (let i = 0; i < data.path.length; i++) { + const indent = ' '.repeat(i + 1); + if (i === 0) { + console.log(`${indent}${data.path[i]}`); + } else { + console.log(`${indent}→ ${data.path[i]}`); + } + } + if (data.alternateCount > 0) { + console.log( + `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, + ); + } + console.log(); +} diff --git a/tests/integration/incremental-parity.test.ts b/tests/integration/incremental-parity.test.ts index d632503e..b0762815 100644 --- a/tests/integration/incremental-parity.test.ts +++ b/tests/integration/incremental-parity.test.ts @@ -167,4 +167,143 @@ describe('Incremental build parity: full vs incremental', () => { expect(incrAnalysis.dataflow.length).toBeGreaterThan(0); expect(incrAnalysis.dataflow.length).toBe(fullAnalysis.dataflow.length); }); + + it('preserves node roles after incremental rebuild', () => { + function readRoles(dbPath: string) { + const db = new Database(dbPath, { readonly: true }); + const roles = db + .prepare( + `SELECT name, kind, file, role FROM nodes + WHERE kind NOT IN ('file', 'directory') AND role IS NOT NULL + ORDER BY name, kind, file`, + ) + .all(); + db.close(); + return roles; + } + const fullRoles = readRoles(path.join(fullDir, '.codegraph', 'graph.db')); + const incrRoles = readRoles(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrRoles.length).toBeGreaterThan(0); + expect(incrRoles).toEqual(fullRoles); + }); +}); + +describe('Incremental build parity: structural change (add/remove call)', () => { + let fullDir: string; + let incrDir: string; + let tmpBase: string; + + beforeAll(async () => { + tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-incr-struct-')); + fullDir = path.join(tmpBase, 'full'); + incrDir = path.join(tmpBase, 'incr'); + copyDirSync(FIXTURE_DIR, fullDir); + copyDirSync(FIXTURE_DIR, incrDir); + + // Step 1: Full build both copies + await buildGraph(fullDir, { incremental: false, skipRegistry: true }); + await buildGraph(incrDir, { incremental: false, skipRegistry: true }); + + // Step 2: Remove the multiply() call from app.js — a structural edge change + const newAppContent = `import { add } from './src/index.js';\n\nexport function compute(x, y) {\n return add(x, y);\n}\n`; + const incrAppPath = path.join(incrDir, 'app.js'); + fs.writeFileSync(incrAppPath, newAppContent); + + // Step 3: Incremental rebuild + await buildGraph(incrDir, { incremental: true, skipRegistry: true }); + + // Step 4: Apply same change to full copy and full rebuild + const fullAppPath = path.join(fullDir, 'app.js'); + fs.writeFileSync(fullAppPath, newAppContent); + await buildGraph(fullDir, { incremental: false, skipRegistry: true }); + }, 60_000); + + afterAll(() => { + try { + if (tmpBase) fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('produces identical nodes after structural change', () => { + const fullGraph = readGraph(path.join(fullDir, '.codegraph', 'graph.db')); + const incrGraph = readGraph(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrGraph.nodes).toEqual(fullGraph.nodes); + }); + + it('produces identical edges after structural change', () => { + const fullGraph = readGraph(path.join(fullDir, '.codegraph', 'graph.db')); + const incrGraph = readGraph(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrGraph.edges).toEqual(fullGraph.edges); + }); + + it('preserves node roles after structural change', () => { + function readRoles(dbPath: string) { + const db = new Database(dbPath, { readonly: true }); + const roles = db + .prepare( + `SELECT name, kind, file, role FROM nodes + WHERE kind NOT IN ('file', 'directory') AND role IS NOT NULL + ORDER BY name, kind, file`, + ) + .all(); + db.close(); + return roles; + } + const fullRoles = readRoles(path.join(fullDir, '.codegraph', 'graph.db')); + const incrRoles = readRoles(path.join(incrDir, '.codegraph', 'graph.db')); + expect(incrRoles.length).toBeGreaterThan(0); + expect(incrRoles).toEqual(fullRoles); + }); +}); + +describe('Incremental rebuild performance', () => { + let tmpDir: string; + + afterAll(() => { + try { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('1-file incremental rebuild completes with timing breakdown', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-incr-perf-')); + copyDirSync(FIXTURE_DIR, tmpDir); + + // Full build first + await buildGraph(tmpDir, { incremental: false, skipRegistry: true }); + + // Touch one file + const appPath = path.join(tmpDir, 'app.js'); + fs.appendFileSync(appPath, '\n// perf-touch\n'); + + // Incremental rebuild with timing + const result = await buildGraph(tmpDir, { incremental: true, skipRegistry: true }); + + expect(result).toBeDefined(); + expect(result!.phases).toBeDefined(); + + const p = result!.phases; + // Log timing breakdown for benchmarking + const total = Object.values(p).reduce((sum, v) => sum + (v || 0), 0); + console.log(`\n Incremental 1-file rebuild timing:`); + console.log(` Total: ${total.toFixed(1)}ms`); + console.log(` Parse: ${p.parseMs}ms`); + console.log(` Insert: ${p.insertMs}ms`); + console.log(` Resolve: ${p.resolveMs}ms`); + console.log(` Edges: ${p.edgesMs}ms`); + console.log(` Structure: ${p.structureMs}ms`); + console.log(` Roles: ${p.rolesMs}ms`); + console.log(` Finalize: ${p.finalizeMs}ms`); + + // Performance assertions: structure and roles should be fast for incremental. + // Use generous thresholds (200ms) to avoid flaky failures on slow CI runners, + // under heavy load, or during GC pauses. Local benchmarks show ~9ms for roles. + expect(p.rolesMs).toBeLessThan(200); + expect(p.structureMs).toBeLessThan(200); + expect(p.finalizeMs).toBeLessThan(200); + }, 30_000); }); diff --git a/tests/integration/queries.test.ts b/tests/integration/queries.test.ts index 6381cdb2..a869ce51 100644 --- a/tests/integration/queries.test.ts +++ b/tests/integration/queries.test.ts @@ -31,6 +31,7 @@ import { explainData, exportsData, fileDepsData, + filePathData, fnDepsData, fnImpactData, impactAnalysisData, @@ -491,6 +492,72 @@ describe('pathData', () => { }); }); +// ─── filePathData ──────────────────────────────────────────────────── + +describe('filePathData', () => { + test('finds direct 1-hop file path', () => { + // middleware.js → auth.js (import edge) + const data = filePathData('middleware.js', 'auth.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path).toEqual(['middleware.js', 'auth.js']); + }); + + test('finds multi-hop file path', () => { + // routes.js → middleware.js → auth.js + const data = filePathData('routes.js', 'auth.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(2); + expect(data.path).toEqual(['routes.js', 'middleware.js', 'auth.js']); + }); + + test('returns not found when no file path exists', () => { + // auth.js has no outgoing imports in the fixture + const data = filePathData('auth.js', 'routes.js', dbPath); + expect(data.found).toBe(false); + }); + + test('self-file returns 0 hops', () => { + const data = filePathData('middleware.js', 'middleware.js', dbPath); + expect(data.found).toBe(true); + expect(data.hops).toBe(0); + expect(data.path).toEqual(['middleware.js']); + }); + + test('reverse direction finds upstream file path', () => { + // auth.js ←(reverse)── middleware.js ←(reverse)── routes.js + const data = filePathData('auth.js', 'routes.js', dbPath, { reverse: true }); + expect(data.found).toBe(true); + expect(data.hops).toBe(2); + expect(data.path).toEqual(['auth.js', 'middleware.js', 'routes.js']); + }); + + test('excludes test files with noTests', () => { + // auth.test.js imports auth.js, but should be excluded + const data = filePathData('auth.js', 'auth.test.js', dbPath, { reverse: true, noTests: true }); + expect(data.found).toBe(false); + }); + + test('returns error for no matching from file', () => { + const data = filePathData('nonexistent.js', 'auth.js', dbPath); + expect(data.found).toBe(false); + expect(data.error).toMatch(/No file matching/); + }); + + test('returns error for no matching to file', () => { + const data = filePathData('auth.js', 'nonexistent.js', dbPath); + expect(data.found).toBe(false); + expect(data.error).toMatch(/No file matching/); + }); + + test('populates fromCandidates and toCandidates', () => { + const data = filePathData('middleware.js', 'auth.js', dbPath); + expect(data.fromCandidates.length).toBeGreaterThanOrEqual(1); + expect(data.toCandidates.length).toBeGreaterThanOrEqual(1); + expect(data.fromCandidates[0]).toBe('middleware.js'); + }); +}); + // ─── diffImpactData ─────────────────────────────────────────────────── describe('diffImpactData', () => {