Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
253e3a7
fix: db version warning, barrel export tracing, quieter tsconfig, Set…
carlos-alm Mar 26, 2026
8e43e43
docs: soften EXTENSIONS/IGNORE_DIRS changelog wording
carlos-alm Mar 26, 2026
b60fbb7
fix: address review feedback — version check, Set mutation, barrel ed…
carlos-alm Mar 26, 2026
54c6c18
refactor: replace vendor.d.ts with @types/better-sqlite3
carlos-alm Mar 26, 2026
5eeb0dc
fix: resolve merge conflicts with main
carlos-alm Mar 26, 2026
9b469df
fix: address Greptile review feedback
carlos-alm Mar 26, 2026
e3eb0e5
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
c3ccbdd
fix: preserve transaction argument types via inline inference (#640)
carlos-alm Mar 27, 2026
4eceebd
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
64c1565
perf: sub-100ms 1-file incremental rebuilds (466ms → 78-90ms)
carlos-alm Mar 27, 2026
34d08ba
perf: scope node loading and skip filesystem scan for incremental builds
carlos-alm Mar 27, 2026
a6d8080
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
95626eb
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
7dd0fd6
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
0878fd1
fix: align scopedLoad gate with loadNodes to prevent missing lazy fal…
carlos-alm Mar 27, 2026
38165eb
fix: align drift detection and metadata persistence thresholds, remov…
carlos-alm Mar 27, 2026
eae0280
Merge branch 'feat/dogfood-fixes-9.1-9.4' of https://github.com/optav…
carlos-alm Mar 27, 2026
4b633be
Merge branch 'main' into feat/dogfood-fixes-9.1-9.4
carlos-alm Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 72 additions & 7 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -561,17 +561,82 @@ function buildClassHierarchyEdges(

// ── Main entry point ────────────────────────────────────────────────────

/**
* For small incremental builds (≤5 changed files on a large codebase), scope
* the node loading query to only files that are relevant: changed files +
* their import targets. Falls back to loading ALL nodes for full builds or
* larger incremental changes.
*/
function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;

// Gate: only scope for small incremental on large codebases
if (!isFullBuild && fileSymbols.size <= 5) {
const existingFileCount = (
db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number }
).c;
if (existingFileCount > 20) {
// Collect relevant files: changed files + their import targets
const relevantFiles = new Set<string>(fileSymbols.keys());
if (batchResolved) {
for (const resolvedPath of batchResolved.values()) {
relevantFiles.add(resolvedPath);
}
}
// Also add barrel-only files
for (const barrelPath of ctx.barrelOnlyFiles) {
relevantFiles.add(barrelPath);
}

const placeholders = [...relevantFiles].map(() => '?').join(',');
const rows = db
.prepare(
`SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter} AND file IN (${placeholders})`,
)
.all(...relevantFiles) as QueryNodeRow[];
return { rows, scoped: true };
}
}

const rows = db
.prepare(`SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter}`)
.all() as QueryNodeRow[];
return { rows, scoped: false };
}

/**
* For scoped node loading, patch nodesByName.get with a lazy SQL fallback
* so global name-only lookups (resolveByMethodOrGlobal, supplementReceiverEdges)
* can still find nodes outside the scoped set.
*/
function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
if (!scopedLoad) return;
const { db } = ctx;
const fallbackStmt = db.prepare(
`SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`,
);
const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
ctx.nodesByName.get = (name: string) => {
const result = originalGet(name);
if (result !== undefined) return result;
const rows = fallbackStmt.all(name) as unknown as NodeRow[];
if (rows.length > 0) {
ctx.nodesByName.set(name, rows);
return rows;
}
return undefined;
};
}

export async function buildEdges(ctx: PipelineContext): Promise<void> {
const { db, engineName } = ctx;

const getNodeIdStmt = makeGetNodeIdStmt(db);

const allNodes = db
.prepare(
`SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`,
)
.all() as QueryNodeRow[];
setupNodeLookups(ctx, allNodes);
const { rows: allNodesBefore, scoped: scopedLoad } = loadNodes(ctx);
setupNodeLookups(ctx, allNodesBefore);
addLazyFallback(ctx, scopedLoad);

const t0 = performance.now();
const buildEdgesTx = db.transaction(() => {
Expand All @@ -592,7 +657,7 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {

const native = engineName === 'native' ? loadNative() : null;
if (native?.buildCallEdges) {
buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native);
buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native);
} else {
buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows);
}
Expand Down
89 changes: 83 additions & 6 deletions src/domain/graph/builder/stages/collect-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,78 @@
* Stage: collectFiles
*
* Collects all source files to process. Handles both normal and scoped rebuilds.
* For incremental builds with a valid journal, reconstructs the file list from
* the DB's file_hashes table + journal deltas, skipping the filesystem scan.
*/
import fs from 'node:fs';
import path from 'node:path';
import { info } from '../../../../infrastructure/logger.js';
import { debug, info } from '../../../../infrastructure/logger.js';
import { normalizePath } from '../../../../shared/constants.js';
import { readJournal } from '../../journal.js';
import type { PipelineContext } from '../context.js';
import { collectFiles as collectFilesUtil } from '../helpers.js';

/**
* Reconstruct allFiles from DB file_hashes + journal deltas.
* Returns null when the fast path isn't applicable (first build, no journal, etc).
*/
function tryFastCollect(
ctx: PipelineContext,
): { files: string[]; directories: Set<string> } | null {
const { db, rootDir } = ctx;

// 1. Check that file_hashes table exists and has entries
let dbFileCount: number;
try {
dbFileCount = (db.prepare('SELECT COUNT(*) as c FROM file_hashes').get() as { c: number }).c;
} catch {
return null;
}
if (dbFileCount === 0) return null;

// 2. Read the journal — only use fast path when journal has entries,
// proving the watcher was active and tracking changes. An empty-but-valid
// journal (no watcher) could miss file deletions.
const journal = readJournal(rootDir);
if (!journal.valid) return null;
const hasEntries =
(journal.changed && journal.changed.length > 0) ||
(journal.removed && journal.removed.length > 0);
if (!hasEntries) return null;

// 3. Load existing file list from file_hashes (relative paths)
const dbFiles = (db.prepare('SELECT file FROM file_hashes').all() as Array<{ file: string }>).map(
(r) => r.file,
);

// 4. Apply journal deltas: remove deleted files, add new/changed files
const fileSet = new Set(dbFiles);
if (journal.removed) {
for (const removed of journal.removed) {
fileSet.delete(removed);
}
}
if (journal.changed) {
for (const changed of journal.changed) {
fileSet.add(changed);
}
}

// 5. Convert to absolute paths and compute directories
const files: string[] = [];
const directories = new Set<string>();
for (const relPath of fileSet) {
const absPath = path.join(rootDir, relPath);
files.push(absPath);
directories.add(path.dirname(absPath));
}

debug(
`collectFiles fast path: ${dbFiles.length} from DB, journal: +${journal.changed?.length ?? 0}/-${journal.removed?.length ?? 0} → ${files.length} files`,
);
return { files, directories };
}

export async function collectFiles(ctx: PipelineContext): Promise<void> {
const { rootDir, config, opts } = ctx;

Expand All @@ -33,10 +97,23 @@ export async function collectFiles(ctx: PipelineContext): Promise<void> {
ctx.removed = missing;
ctx.isFullBuild = false;
info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
} else {
const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
ctx.allFiles = collected.files;
ctx.discoveredDirs = collected.directories;
info(`Found ${ctx.allFiles.length} files to parse`);
return;
}

// Incremental fast path: reconstruct file list from DB + journal deltas
// instead of full recursive filesystem scan (~8ms savings on 473 files).
if (ctx.incremental && !ctx.forceFullRebuild) {
const fast = tryFastCollect(ctx);
if (fast) {
ctx.allFiles = fast.files;
ctx.discoveredDirs = fast.directories;
info(`Found ${ctx.allFiles.length} files (cached)`);
return;
}
}

const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
ctx.allFiles = collected.files;
ctx.discoveredDirs = collected.directories;
info(`Found ${ctx.allFiles.length} files to parse`);
}
9 changes: 7 additions & 2 deletions src/domain/graph/builder/stages/finalize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
// built_at is only used by stale-embeddings check (skipped for incremental),
// and counts are only used by drift detection (skipped for ≤3 files).
// This avoids a transaction commit + WAL fsync (~15-30ms).
if (isFullBuild || allSymbols.size > 5) {
// Threshold aligned with drift detection gate (allSymbols.size > 3) so stored
// counts stay fresh whenever drift detection reads them.
if (isFullBuild || allSymbols.size > 3) {
try {
setBuildMeta(db, {
engine: ctx.engineName,
Expand Down Expand Up @@ -157,6 +159,10 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
}
}

// Intentionally measured before closeDb / writeJournalHeader / auto-registration:
// for the deferred-close path the close is async (setImmediate), and for full
// builds the metric captures finalize logic only — DB close cost is tracked
// separately via timing.closeDbMs when available.
ctx.timing.finalizeMs = performance.now() - t0;

// For small incremental builds, defer db.close() to the next event loop tick.
Expand All @@ -177,7 +183,6 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
// registered during the initial full build. The dynamic import + file I/O
// costs ~100ms which dominates incremental finalize time.
if (!opts.skipRegistry && isFullBuild) {
const { tmpdir } = await import('node:os');
const tmpDir = path.resolve(tmpdir());
const resolvedRoot = path.resolve(rootDir);
if (resolvedRoot.startsWith(tmpDir)) {
Expand Down
Loading