diff --git a/__tests__/sync.test.ts b/__tests__/sync.test.ts index 708a92a42..9398b6319 100644 --- a/__tests__/sync.test.ts +++ b/__tests__/sync.test.ts @@ -149,6 +149,34 @@ describe('Sync Module', () => { expect(result.filesRemoved).toBe(0); expect(result.filesChecked).toBeGreaterThan(0); }); + + it('should re-resolve affected caller files when an exported symbol is renamed', async () => { + fs.writeFileSync( + path.join(testDir, 'src', 'api.ts'), + `export function hello() { return 'world'; }` + ); + fs.writeFileSync( + path.join(testDir, 'src', 'consumer.ts'), + `import { hello } from './api';\nexport function run() { return hello(); }` + ); + await cg.sync(); + + fs.writeFileSync( + path.join(testDir, 'src', 'api.ts'), + `export function goodbye() { return 'world'; }` + ); + fs.writeFileSync( + path.join(testDir, 'src', 'consumer.ts'), + `import { goodbye } from './api';\nexport function run() { return goodbye(); }` + ); + + const result = await cg.sync(); + + expect(result.filesModified).toBeGreaterThanOrEqual(1); + expect(result.filesReindexed).toBeGreaterThanOrEqual(result.filesModified); + expect(result.resolutionMode).toMatch(/changed-only|affected-set/); + expect(cg.searchNodes('goodbye').length).toBeGreaterThan(0); + }); }); }); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 9e7f98887..44eee6e9e 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -639,13 +639,21 @@ program if (totalChanges === 0) { clack.log.info('Already up to date'); + if (result.filesAffected > 0) { + clack.log.info( + `Re-resolved ${formatNumber(result.filesAffected)} affected files ` + + `(${result.resolutionMode}, ${result.detectionMode})` + ); + } } else { clack.log.success(`Synced ${formatNumber(totalChanges)} changed files`); const details: string[] = []; if (result.filesAdded > 0) details.push(`Added: ${result.filesAdded}`); if (result.filesModified > 0) details.push(`Modified: ${result.filesModified}`); if (result.filesRemoved > 0) details.push(`Removed: ${result.filesRemoved}`); + if (result.filesAffected > 0) details.push(`Affected: ${result.filesAffected}`); clack.log.info(`${details.join(', ')} ${getGlyphs().dash} ${formatNumber(result.nodesUpdated)} nodes in ${formatDuration(result.durationMs)}`); + clack.log.info(`Resolution: ${result.resolutionMode} ${getGlyphs().dash} Detection: ${result.detectionMode}`); } clack.outro('Done'); @@ -704,6 +712,7 @@ program dbSizeBytes: stats.dbSizeBytes, backend, journalMode, + syncDetection: 'fast-path', nodesByKind: stats.nodesByKind, languages: Object.entries(stats.filesByLanguage).filter(([, count]) => count > 0).map(([lang]) => lang), pendingChanges: { @@ -746,6 +755,7 @@ program ? chalk.green('wal') : chalk.yellow(`${journalMode || 'unknown'} ${getGlyphs().dash} WAL inactive; reads can block on writes`); console.log(` Journal: ${journalLabel}`); + console.log(` Sync: fast-path ${getGlyphs().dash} mtime/size prefilter + content hash confirm`); console.log(); // Node breakdown diff --git a/src/db/migrations.ts b/src/db/migrations.ts index 1a8d1c542..469818d99 100644 --- a/src/db/migrations.ts +++ b/src/db/migrations.ts @@ -9,7 +9,7 @@ import { SqliteDatabase } from './sqlite-adapter'; /** * Current schema version */ -export const CURRENT_SCHEMA_VERSION = 4; +export const CURRENT_SCHEMA_VERSION = 5; /** * Migration definition @@ -65,6 +65,33 @@ const migrations: Migration[] = [ `); }, }, + { + version: 5, + description: 'Add persisted reference facts for affected-file incremental re-resolution', + up: (db) => { + db.exec(` + CREATE TABLE IF NOT EXISTS reference_facts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + from_node_id TEXT NOT NULL, + reference_name TEXT NOT NULL, + reference_kind TEXT NOT NULL, + line INTEGER NOT NULL, + col INTEGER NOT NULL, + candidates TEXT, + file_path TEXT NOT NULL DEFAULT '', + language TEXT NOT NULL DEFAULT 'unknown', + FOREIGN KEY (from_node_id) REFERENCES nodes(id) ON DELETE CASCADE + ); + CREATE INDEX IF NOT EXISTS idx_reference_facts_from_node ON reference_facts(from_node_id); + CREATE INDEX IF NOT EXISTS idx_reference_facts_name ON reference_facts(reference_name); + CREATE INDEX IF NOT EXISTS idx_reference_facts_file_path ON reference_facts(file_path); + CREATE INDEX IF NOT EXISTS idx_reference_facts_from_name ON reference_facts(from_node_id, reference_name); + INSERT INTO reference_facts (from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language) + SELECT from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language + FROM unresolved_refs; + `); + }, + }, ]; /** diff --git a/src/db/queries.ts b/src/db/queries.ts index a0ac31eea..faa6fbe3c 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -9,6 +9,7 @@ import { Node, Edge, FileRecord, + ReferenceFact, UnresolvedReference, NodeKind, EdgeKind, @@ -109,6 +110,18 @@ interface UnresolvedRefRow { language: string; } +interface ReferenceFactRow { + id: number; + from_node_id: string; + reference_name: string; + reference_kind: string; + line: number; + col: number; + candidates: string | null; + file_path: string; + language: string; +} + /** * Convert database row to Node object */ @@ -199,8 +212,13 @@ export class QueryBuilder { getFileByPath?: SqliteStatement; getAllFiles?: SqliteStatement; insertUnresolved?: SqliteStatement; + insertReferenceFact?: SqliteStatement; deleteUnresolvedByNode?: SqliteStatement; + deleteUnresolvedByFile?: SqliteStatement; + deleteReferenceFactsByFile?: SqliteStatement; getUnresolvedByName?: SqliteStatement; + getReferenceFactsByFiles?: SqliteStatement; + getReferencingFilesByNames?: SqliteStatement; getNodesByName?: SqliteStatement; getNodesByQualifiedNameExact?: SqliteStatement; getNodesByLowerName?: SqliteStatement; @@ -1269,6 +1287,23 @@ export class QueryBuilder { this.stmts.deleteEdgesBySource.run(sourceId); } + deleteEdgesByProvenanceAndFiles( + provenances: ReadonlyArray>, + filePaths: readonly string[] + ): void { + if (provenances.length === 0 || filePaths.length === 0) return; + const filePlaceholders = filePaths.map(() => '?').join(','); + const provPlaceholders = provenances.map(() => '?').join(','); + this.db.prepare( + `DELETE FROM edges + WHERE provenance IN (${provPlaceholders}) + AND ( + source IN (SELECT id FROM nodes WHERE file_path IN (${filePlaceholders})) + OR target IN (SELECT id FROM nodes WHERE file_path IN (${filePlaceholders})) + )` + ).run(...provenances, ...filePaths, ...filePaths); + } + /** * Get outgoing edges from a node */ @@ -1443,6 +1478,26 @@ export class QueryBuilder { }); } + insertReferenceFact(ref: ReferenceFact): void { + if (!this.stmts.insertReferenceFact) { + this.stmts.insertReferenceFact = this.db.prepare(` + INSERT INTO reference_facts (from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language) + VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates, @filePath, @language) + `); + } + + this.stmts.insertReferenceFact.run({ + fromNodeId: ref.fromNodeId, + referenceName: ref.referenceName, + referenceKind: ref.referenceKind, + line: ref.line, + col: ref.column, + candidates: ref.candidates ? JSON.stringify(ref.candidates) : null, + filePath: ref.filePath ?? '', + language: ref.language ?? 'unknown', + }); + } + /** * Insert multiple unresolved references in a transaction */ @@ -1456,6 +1511,16 @@ export class QueryBuilder { insert(); } + insertReferenceFactsBatch(refs: ReferenceFact[]): void { + if (refs.length === 0) return; + const insert = this.db.transaction(() => { + for (const ref of refs) { + this.insertReferenceFact(ref); + } + }); + insert(); + } + /** * Delete unresolved references from a node */ @@ -1468,6 +1533,24 @@ export class QueryBuilder { this.stmts.deleteUnresolvedByNode.run(nodeId); } + deleteUnresolvedByFile(filePath: string): void { + if (!this.stmts.deleteUnresolvedByFile) { + this.stmts.deleteUnresolvedByFile = this.db.prepare( + 'DELETE FROM unresolved_refs WHERE file_path = ?' + ); + } + this.stmts.deleteUnresolvedByFile.run(filePath); + } + + deleteReferenceFactsByFile(filePath: string): void { + if (!this.stmts.deleteReferenceFactsByFile) { + this.stmts.deleteReferenceFactsByFile = this.db.prepare( + 'DELETE FROM reference_facts WHERE file_path = ?' + ); + } + this.stmts.deleteReferenceFactsByFile.run(filePath); + } + /** * Get unresolved references by name (for resolution) */ @@ -1589,11 +1672,43 @@ export class QueryBuilder { })); } + getReferenceFactsByFiles(filePaths: string[]): ReferenceFact[] { + if (filePaths.length === 0) return []; + + const placeholders = filePaths.map(() => '?').join(','); + const rows = this.db + .prepare(`SELECT * FROM reference_facts WHERE file_path IN (${placeholders})`) + .all(...filePaths) as ReferenceFactRow[]; + + return rows.map((row) => ({ + fromNodeId: row.from_node_id, + referenceName: row.reference_name, + referenceKind: row.reference_kind as EdgeKind, + line: row.line, + column: row.col, + candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined, + filePath: row.file_path, + language: row.language as Language, + })); + } + + getReferencingFilesByNames(names: readonly string[]): string[] { + if (names.length === 0) return []; + const placeholders = names.map(() => '?').join(','); + const rows = this.db + .prepare( + `SELECT DISTINCT file_path FROM reference_facts WHERE reference_name IN (${placeholders})` + ) + .all(...names) as Array<{ file_path: string }>; + return rows.map((row) => row.file_path); + } + /** * Delete all unresolved references (after resolution) */ clearUnresolvedReferences(): void { this.db.exec('DELETE FROM unresolved_refs'); + this.db.exec('DELETE FROM reference_facts'); } /** @@ -1727,6 +1842,7 @@ export class QueryBuilder { this.nodeCache.clear(); this.db.transaction(() => { this.db.exec('DELETE FROM unresolved_refs'); + this.db.exec('DELETE FROM reference_facts'); this.db.exec('DELETE FROM edges'); this.db.exec('DELETE FROM nodes'); this.db.exec('DELETE FROM files'); diff --git a/src/db/schema.sql b/src/db/schema.sql index b08c34f37..1204af729 100644 --- a/src/db/schema.sql +++ b/src/db/schema.sql @@ -80,6 +80,21 @@ CREATE TABLE IF NOT EXISTS unresolved_refs ( FOREIGN KEY (from_node_id) REFERENCES nodes(id) ON DELETE CASCADE ); +-- Persisted reference facts: extracted references that remain queryable even +-- after successful resolution, enabling future affected-file re-resolution. +CREATE TABLE IF NOT EXISTS reference_facts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + from_node_id TEXT NOT NULL, + reference_name TEXT NOT NULL, + reference_kind TEXT NOT NULL, + line INTEGER NOT NULL, + col INTEGER NOT NULL, + candidates TEXT, + file_path TEXT NOT NULL DEFAULT '', + language TEXT NOT NULL DEFAULT 'unknown', + FOREIGN KEY (from_node_id) REFERENCES nodes(id) ON DELETE CASCADE +); + -- ============================================================================= -- Indexes for Query Performance -- ============================================================================= @@ -142,6 +157,10 @@ CREATE INDEX IF NOT EXISTS idx_unresolved_name ON unresolved_refs(reference_name CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path); CREATE INDEX IF NOT EXISTS idx_unresolved_from_name ON unresolved_refs(from_node_id, reference_name); CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance); +CREATE INDEX IF NOT EXISTS idx_reference_facts_from_node ON reference_facts(from_node_id); +CREATE INDEX IF NOT EXISTS idx_reference_facts_name ON reference_facts(reference_name); +CREATE INDEX IF NOT EXISTS idx_reference_facts_file_path ON reference_facts(file_path); +CREATE INDEX IF NOT EXISTS idx_reference_facts_from_name ON reference_facts(from_node_id, reference_name); -- Project metadata for version/provenance tracking CREATE TABLE IF NOT EXISTS project_metadata ( diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 42037d7f6..04b009a41 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -14,6 +14,8 @@ import { FileRecord, ExtractionResult, ExtractionError, + Node, + ReferenceFact, } from '../types'; import { QueryBuilder } from '../db/queries'; import { extractFromSource } from './tree-sitter'; @@ -80,9 +82,14 @@ export interface SyncResult { filesAdded: number; filesModified: number; filesRemoved: number; + filesReindexed: number; + filesAffected: number; nodesUpdated: number; durationMs: number; changedFilePaths?: string[]; + affectedFilePaths?: string[]; + resolutionMode: 'changed-only' | 'affected-set' | 'full'; + detectionMode: 'fast-path' | 'full-verify'; } /** @@ -1267,6 +1274,7 @@ export class ExtractionOrchestrator { if (existingFile) { this.queries.deleteFile(filePath); } + this.queries.deleteReferenceFactsByFile(filePath); // Filter out nodes with missing required fields before insertion. // This prevents FK violations when edges reference nodes that would @@ -1301,6 +1309,7 @@ export class ExtractionOrchestrator { })); if (refsWithContext.length > 0) { this.queries.insertUnresolvedRefsBatch(refsWithContext); + this.queries.insertReferenceFactsBatch(refsWithContext as ReferenceFact[]); } } @@ -1335,6 +1344,8 @@ export class ExtractionOrchestrator { let filesRemoved = 0; let nodesUpdated = 0; const changedFilePaths: string[] = []; + let resolutionMode: SyncResult['resolutionMode'] = 'changed-only'; + const detectionMode: SyncResult['detectionMode'] = 'fast-path'; onProgress?.({ phase: 'scanning', @@ -1343,6 +1354,7 @@ export class ExtractionOrchestrator { }); const filesToIndex: string[] = []; + const oldNodesByChangedFile = new Map(); // === Filesystem reconcile (git-independent) === // The source of truth for "what changed" is the filesystem vs the indexed // state — never git. We enumerate the current source files and reconcile @@ -1367,6 +1379,8 @@ export class ExtractionOrchestrator { // file deleted from disk but not yet staged, so set membership alone misses it. for (const tracked of trackedFiles) { if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) { + this.queries.deleteUnresolvedByFile(tracked.path); + this.queries.deleteReferenceFactsByFile(tracked.path); this.queries.deleteFile(tracked.path); filesRemoved++; } @@ -1409,15 +1423,53 @@ export class ExtractionOrchestrator { changedFilePaths.push(filePath); filesAdded++; } else if (tracked.contentHash !== contentHash) { + oldNodesByChangedFile.set(filePath, this.queries.getNodesByFile(filePath)); filesToIndex.push(filePath); changedFilePaths.push(filePath); filesModified++; } } + const affectedFileSet = new Set(changedFilePaths); + if (changedFilePaths.length > 0) { + const exportedNames = new Set(); + for (const filePath of changedFilePaths) { + for (const node of oldNodesByChangedFile.get(filePath) ?? []) { + if (node.isExported) exportedNames.add(node.name); + } + } + for (const filePath of filesToIndex) { + const fullPath = path.join(this.rootDir, filePath); + try { + const content = fs.readFileSync(fullPath, 'utf-8'); + const language = detectLanguage(filePath, content); + if (!isLanguageSupported(language)) continue; + const frameworkNames = this.ensureDetectedFrameworks(); + const result = extractFromSource(filePath, content, language, frameworkNames); + for (const node of result.nodes) { + if (node.isExported) exportedNames.add(node.name); + } + } catch { + // Best-effort; affected set can stay conservative if a file is unreadable. + } + } + for (const filePath of this.queries.getReferencingFilesByNames([...exportedNames])) { + affectedFileSet.add(filePath); + } + } + + const affectedFilePaths = [...affectedFileSet] + .filter((filePath) => currentSet.has(filePath) && !filesToIndex.includes(filePath)) + .sort(); + if (affectedFilePaths.length > 0) { + resolutionMode = 'affected-set'; + } + + const reindexedFilePaths = [...new Set([...filesToIndex, ...affectedFilePaths])]; + // Load only grammars needed for changed files - if (filesToIndex.length > 0) { - const neededLanguages = [...new Set(filesToIndex.map((f) => detectLanguage(f)))]; + if (reindexedFilePaths.length > 0) { + const neededLanguages = [...new Set(reindexedFilePaths.map((f) => detectLanguage(f)))]; // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) { neededLanguages.push('cpp'); @@ -1426,9 +1478,9 @@ export class ExtractionOrchestrator { } // Index changed files - const total = filesToIndex.length; - for (let i = 0; i < filesToIndex.length; i++) { - const filePath = filesToIndex[i]!; + const total = reindexedFilePaths.length; + for (let i = 0; i < reindexedFilePaths.length; i++) { + const filePath = reindexedFilePaths[i]!; onProgress?.({ phase: 'parsing', current: i + 1, @@ -1436,6 +1488,34 @@ export class ExtractionOrchestrator { currentFile: filePath, }); + // Affected-only files keep their structural graph; only resolver/synthesized + // edges and reference facts need a refresh. + if (!filesToIndex.includes(filePath)) { + this.queries.deleteEdgesByProvenanceAndFiles(['resolver', 'heuristic'], [filePath]); + this.queries.deleteUnresolvedByFile(filePath); + this.queries.deleteReferenceFactsByFile(filePath); + const fullPath = path.join(this.rootDir, filePath); + try { + const content = fs.readFileSync(fullPath, 'utf-8'); + const language = detectLanguage(filePath, content); + if (isLanguageSupported(language)) { + const frameworkNames = this.ensureDetectedFrameworks(); + const parsed = extractFromSource(filePath, content, language, frameworkNames); + const refsWithContext = parsed.unresolvedReferences.map((ref) => ({ + ...ref, + filePath: ref.filePath ?? filePath, + language: ref.language ?? language, + })); + this.queries.insertReferenceFactsBatch(refsWithContext as ReferenceFact[]); + // Keep unresolved_refs aligned with the resolution batch input. + this.queries.insertUnresolvedRefsBatch(refsWithContext); + } + } catch { + continue; + } + continue; + } + const result = await this.indexFile(filePath); nodesUpdated += result.nodes.length; } @@ -1445,9 +1525,14 @@ export class ExtractionOrchestrator { filesAdded, filesModified, filesRemoved, + filesReindexed: reindexedFilePaths.length, + filesAffected: affectedFilePaths.length, nodesUpdated, durationMs: Date.now() - startTime, changedFilePaths: changedFilePaths.length > 0 ? changedFilePaths : undefined, + affectedFilePaths: affectedFilePaths.length > 0 ? affectedFilePaths : undefined, + resolutionMode, + detectionMode, }; } diff --git a/src/index.ts b/src/index.ts index ee3bf51fa..51a320f50 100644 --- a/src/index.ts +++ b/src/index.ts @@ -414,7 +414,18 @@ export class CodeGraph { try { this.fileLock.acquire(); } catch { - return { filesChecked: 0, filesAdded: 0, filesModified: 0, filesRemoved: 0, nodesUpdated: 0, durationMs: 0 }; + return { + filesChecked: 0, + filesAdded: 0, + filesModified: 0, + filesRemoved: 0, + filesReindexed: 0, + filesAffected: 0, + nodesUpdated: 0, + durationMs: 0, + resolutionMode: 'changed-only', + detectionMode: 'fast-path', + }; } try { const result = await this.orchestrator.sync(options.onProgress); @@ -423,15 +434,18 @@ export class CodeGraph { // every sync that touched files so edits to `app.module.ts` propagate // to controllers in unchanged files. The pass is idempotent and cheap // (regex over *.module.ts only). - if (result.filesAdded > 0 || result.filesModified > 0) { + if (result.filesAdded > 0 || result.filesModified > 0 || result.filesAffected > 0) { this.resolver.runPostExtract(); } // Resolve references if files were updated - if (result.filesAdded > 0 || result.filesModified > 0) { - if (result.changedFilePaths) { - // Scope resolution to changed files (git fast path — bounded set) - const unresolvedRefs = this.queries.getUnresolvedReferencesByFiles(result.changedFilePaths); + if (result.filesReindexed > 0) { + const resolutionFiles = result.affectedFilePaths + ? [...new Set([...(result.changedFilePaths ?? []), ...result.affectedFilePaths])] + : result.changedFilePaths; + + if (resolutionFiles && resolutionFiles.length > 0) { + const unresolvedRefs = this.queries.getReferenceFactsByFiles(resolutionFiles); options.onProgress?.({ phase: 'resolving', @@ -446,6 +460,7 @@ export class CodeGraph { total, }); }); + this.resolver.synthesizeHeuristicEdges(); } else { // No git info — use batched resolution to avoid OOM const unresolvedCount = this.queries.getUnresolvedReferencesCount(); @@ -467,7 +482,7 @@ export class CodeGraph { } // Refresh planner stats + checkpoint the WAL after bulk writes. - if (result.filesAdded > 0 || result.filesModified > 0 || result.filesRemoved > 0) { + if (result.filesAdded > 0 || result.filesModified > 0 || result.filesRemoved > 0 || result.filesAffected > 0) { this.db.runMaintenance(); } diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 09d1831d9..a99f67ba9 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -2687,6 +2687,8 @@ export class ToolHandler { ); } + lines.push(`**Sync detection:** fast-path (mtime/size prefilter + content hash confirm)`); + lines.push('', '### Nodes by Kind:'); for (const [kind, count] of Object.entries(stats.nodesByKind)) { diff --git a/src/resolution/index.ts b/src/resolution/index.ts index 5158e8301..d54a40582 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -670,6 +670,7 @@ export class ReferenceResolver { kind, line: ref.original.line, column: ref.original.column, + provenance: 'resolver', metadata: { confidence: ref.confidence, resolvedBy: ref.resolvedBy, @@ -709,6 +710,15 @@ export class ReferenceResolver { return result; } + /** + * Re-run whole-graph heuristic edge synthesis after an incremental resolve. + * The synthesizer is additive and idempotent as long as prior heuristic edges + * for the affected files were cleared first by the caller. + */ + synthesizeHeuristicEdges(): number { + return synthesizeCallbackEdges(this.queries, this.context); + } + /** * Resolve and persist in batches to keep memory bounded. * Processes unresolved references in chunks, persisting edges and cleaning diff --git a/src/types.ts b/src/types.ts index 0cfaf0bba..e3d2c918b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -189,7 +189,7 @@ export interface Edge { column?: number; /** How this edge was created */ - provenance?: 'tree-sitter' | 'scip' | 'heuristic'; + provenance?: 'tree-sitter' | 'scip' | 'heuristic' | 'resolver'; } /** @@ -295,6 +295,12 @@ export interface UnresolvedReference { candidates?: string[]; } +/** + * A persisted reference fact extracted from source, kept even after resolution + * so future syncs can re-resolve affected files when target symbols change. + */ +export interface ReferenceFact extends UnresolvedReference {} + // ============================================================================= // Query Types // =============================================================================