From 50ec4a070575b2eae754c63389096ddf14be3e62 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 02:44:44 -0600 Subject: [PATCH 1/6] =?UTF-8?q?perf:=20reduce=20query=20latency=20regressi?= =?UTF-8?q?on=20from=203.1.4=20=E2=86=92=203.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three targeted fixes for the +28–56% query latency regression: 1. Pin benchmark hub target to stable function names (buildGraph, openDb, loadConfig) instead of auto-selecting the most-connected node. Barrel/type files becoming the hub made version-to-version comparison meaningless. 2. Gate implementors queries in bfsTransitiveCallers — check once whether the graph has any 'implements' edges before doing per-node findNodeById + findImplementors lookups. Skips all implementor overhead for codebases without interface/trait hierarchies. 3. Cache loadConfig() results per cwd. The config file is read from disk on every fnImpactData and diffImpactData call; caching eliminates redundant fs.existsSync + readFileSync + JSON.parse per query invocation. Impact: 5 functions changed, 123 affected --- scripts/query-benchmark.js | 27 ++++++++++++++++++++++++++- src/domain/analysis/impact.js | 20 ++++++++++++++++++-- src/infrastructure/config.js | 24 +++++++++++++++++++++--- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js index b225c577..b2c091c1 100644 --- a/scripts/query-benchmark.js +++ b/scripts/query-benchmark.js @@ -111,8 +111,31 @@ function round1(n) { return Math.round(n * 10) / 10; } +// Pinned hub targets — stable function names that exist across versions. +// Auto-selecting the most-connected node makes version-to-version comparison +// meaningless when barrel/type files get added or removed. +const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig']; + function selectTargets() { const db = new Database(dbPath, { readonly: true }); + + // Try pinned candidates first for a stable hub across versions + let hub = null; + for (const candidate of PINNED_HUB_CANDIDATES) { + const row = db + .prepare( + `SELECT n.name FROM nodes n + JOIN edges e ON e.source_id = n.id OR e.target_id = n.id + WHERE n.name = ? AND n.file NOT LIKE '%test%' AND n.file NOT LIKE '%spec%' + LIMIT 1`, + ) + .get(candidate); + if (row) { + hub = row.name; + break; + } + } + const rows = db .prepare( `SELECT n.name, COUNT(e.id) AS cnt @@ -127,7 +150,9 @@ function selectTargets() { if (rows.length === 0) throw new Error('No nodes with edges found in graph'); - const hub = rows[0].name; + // Fall back to most-connected if no pinned candidate found + if (!hub) hub = rows[0].name; + const mid = rows[Math.floor(rows.length / 2)].name; const leaf = rows[rows.length - 1].name; return { hub, mid, leaf }; diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index c2ea3540..2ce1dbbf 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -24,6 +24,19 @@ import { findMatchingNodes } from './symbol-lookup.js'; const INTERFACE_LIKE_KINDS = new Set(['interface', 'trait']); +/** + * Check whether the graph contains any 'implements' edges. + * Cached per db handle so the query runs at most once per connection. + */ +const _hasImplementsCache = new WeakMap(); +function hasImplementsEdges(db) { + if (_hasImplementsCache.has(db)) return _hasImplementsCache.get(db); + const row = db.prepare("SELECT 1 FROM edges WHERE kind = 'implements' LIMIT 1").get(); + const result = !!row; + _hasImplementsCache.set(db, result); + return result; +} + /** * BFS traversal to find transitive callers of a node. * When an interface/trait node is encountered (either as the start node or @@ -40,6 +53,9 @@ export function bfsTransitiveCallers( startId, { noTests = false, maxDepth = 3, includeImplementors = true, onVisit } = {}, ) { + // Skip all implementor lookups when the graph has no implements edges + const resolveImplementors = includeImplementors && hasImplementsEdges(db); + const visited = new Set([startId]); const levels = {}; let frontier = [startId]; @@ -47,7 +63,7 @@ export function bfsTransitiveCallers( // Seed: if start node is an interface/trait, include its implementors at depth 1. // Implementors go into a separate list so their callers appear at depth 2, not depth 1. const implNextFrontier = []; - if (includeImplementors) { + if (resolveImplementors) { const startNode = findNodeById(db, startId); if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) { const impls = findImplementors(db, startId); @@ -88,7 +104,7 @@ export function bfsTransitiveCallers( // If a caller is an interface/trait, also pull in its implementors // Implementors are one extra hop away, so record at d+1 - if (includeImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { + if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { const impls = findImplementors(db, c.id); for (const impl of impls) { if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index e8439ab0..c6f12c6e 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -130,12 +130,19 @@ export const DEFAULTS = { }, }; +// Per-cwd config cache — avoids re-reading the config file on every query call. +// The config file rarely changes within a single process lifetime. +const _configCache = new Map(); + /** * Load project configuration from a .codegraphrc.json or similar file. - * Returns merged config with defaults. + * Returns merged config with defaults. Results are cached per cwd. */ export function loadConfig(cwd) { cwd = cwd || process.cwd(); + const cached = _configCache.get(cwd); + if (cached) return cached; + for (const name of CONFIG_FILES) { const filePath = path.join(cwd, name); if (fs.existsSync(filePath)) { @@ -148,13 +155,24 @@ export function loadConfig(cwd) { merged.query.excludeTests = Boolean(config.excludeTests); } delete merged.excludeTests; - return resolveSecrets(applyEnvOverrides(merged)); + const result = resolveSecrets(applyEnvOverrides(merged)); + _configCache.set(cwd, result); + return result; } catch (err) { debug(`Failed to parse config ${filePath}: ${err.message}`); } } } - return resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + _configCache.set(cwd, defaults); + return defaults; +} + +/** + * Clear the config cache. Useful in tests or after config file changes. + */ +export function clearConfigCache() { + _configCache.clear(); } const ENV_LLM_MAP = { From 1468ef1a464f079bcb4df7ea0622e1d3c2f818ae Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 02:53:37 -0600 Subject: [PATCH 2/6] fix: return structuredClone from config cache and guard benchmark db handle Prevent callers from mutating the cached config object by returning a deep clone on cache hits. Add try/finally to selectTargets() so the database handle is closed even if a query throws. Impact: 2 functions changed, 1 affected --- scripts/query-benchmark.js | 6 +++++- src/infrastructure/config.js | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js index b2c091c1..230caf77 100644 --- a/scripts/query-benchmark.js +++ b/scripts/query-benchmark.js @@ -118,6 +118,7 @@ const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig']; function selectTargets() { const db = new Database(dbPath, { readonly: true }); + try { // Try pinned candidates first for a stable hub across versions let hub = null; @@ -146,7 +147,6 @@ function selectTargets() { ORDER BY cnt DESC`, ) .all(); - db.close(); if (rows.length === 0) throw new Error('No nodes with edges found in graph'); @@ -156,6 +156,10 @@ function selectTargets() { const mid = rows[Math.floor(rows.length / 2)].name; const leaf = rows[rows.length - 1].name; return { hub, mid, leaf }; + + } finally { + db.close(); + } } function benchDepths(fn, name, depths) { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index c6f12c6e..961e2848 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -141,7 +141,7 @@ const _configCache = new Map(); export function loadConfig(cwd) { cwd = cwd || process.cwd(); const cached = _configCache.get(cwd); - if (cached) return cached; + if (cached) return structuredClone(cached); for (const name of CONFIG_FILES) { const filePath = path.join(cwd, name); @@ -169,7 +169,9 @@ export function loadConfig(cwd) { } /** - * Clear the config cache. Useful in tests or after config file changes. + * Clear the config cache. Intended for long-running processes that need to + * pick up on-disk config changes, and for test isolation when tests share + * the same cwd. */ export function clearConfigCache() { _configCache.clear(); From 749136316d7af04be01454124a1e10d214ae1866 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 03:08:23 -0600 Subject: [PATCH 3/6] fix: install @huggingface/transformers in npm-mode benchmark workers The embedding benchmark's npm mode installs codegraph into a temp dir, but @huggingface/transformers is a devDependency and not included. All 6 model workers crash on import, producing symbols: 0, models: {}. Install it explicitly from the local devDependencies version, matching the existing pattern for native platform packages. Also add a guard in update-embedding-report.js to reject empty results and fail loudly instead of silently overwriting valid benchmark data. --- scripts/lib/bench-config.js | 21 +++++++++++++++++++++ scripts/update-embedding-report.js | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/scripts/lib/bench-config.js b/scripts/lib/bench-config.js index bd354334..55306e70 100644 --- a/scripts/lib/bench-config.js +++ b/scripts/lib/bench-config.js @@ -134,6 +134,27 @@ export async function resolveBenchmarkSource() { console.error(`Warning: failed to install native package: ${err.message}`); } + // @huggingface/transformers is a devDependency (lazy-loaded for embeddings). + // It is not installed as a transitive dep in npm mode, so install it + // explicitly so the embedding benchmark workers can import it. + try { + const localPkg = JSON.parse( + fs.readFileSync(path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..', 'package.json'), 'utf8'), + ); + const hfVersion = localPkg.devDependencies?.['@huggingface/transformers']; + if (hfVersion) { + console.error(`Installing @huggingface/transformers@${hfVersion} for embedding benchmarks...`); + execFileSync('npm', ['install', `@huggingface/transformers@${hfVersion}`, '--no-audit', '--no-fund', '--no-save'], { + cwd: tmpDir, + stdio: 'pipe', + timeout: 120_000, + }); + console.error('Installed @huggingface/transformers'); + } + } catch (err) { + console.error(`Warning: failed to install @huggingface/transformers: ${err.message}`); + } + const srcDir = path.join(pkgDir, 'src'); if (!fs.existsSync(srcDir)) { diff --git a/scripts/update-embedding-report.js b/scripts/update-embedding-report.js index 47e31d15..645c1844 100644 --- a/scripts/update-embedding-report.js +++ b/scripts/update-embedding-report.js @@ -26,6 +26,15 @@ if (arg) { } const entry = JSON.parse(jsonText); +// Guard: reject empty benchmark results (all workers crashed or no symbols indexed) +if (!entry.symbols || !entry.models || Object.keys(entry.models).length === 0) { + console.error( + `Embedding benchmark produced empty results (symbols=${entry.symbols}, models=${Object.keys(entry.models || {}).length}). ` + + 'Skipping report update to avoid overwriting valid data. Check benchmark worker logs.', + ); + process.exit(1); +} + // ── Paths ──────────────────────────────────────────────────────────────── const reportPath = path.join(root, 'generated', 'benchmarks', 'EMBEDDING-BENCHMARKS.md'); From 4b4545c70bb4dc29a48be13e4bb111b03097fe21 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 03:39:49 -0600 Subject: [PATCH 4/6] fix: free leaked WASM trees in native engine typeMap backfill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The typeMap backfill path in parseFilesAuto and backfillTypeMap called wasmExtractSymbols but never freed the returned WASM tree objects. Over repeated builds (benchmarks, watch mode), hundreds of trees accumulated in WASM linear memory, eventually corrupting V8 state and crashing the native addon with ACCESS_VIOLATION / has_exception(). Two fixes: 1. Free WASM trees immediately after extracting typeMap data in both backfillTypeMap() and the parseFilesAuto() bulk backfill loop. 2. Skip backfill entirely for JS files — only TS/TSX have type annotations that WASM can extract. The native engine already handles JS `new Expr()` patterns, so re-parsing all JS files with WASM was pure waste. Closes #530 Impact: 2 functions changed, 2 affected --- src/domain/parser.js | 58 ++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/src/domain/parser.js b/src/domain/parser.js index 8ccbcd3b..c2063d34 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -395,8 +395,22 @@ async function backfillTypeMap(filePath, source) { } const parsers = await createParsers(); const extracted = wasmExtractSymbols(parsers, filePath, code); - if (!extracted?.symbols?.typeMap) return { typeMap: [], backfilled: false }; + if (!extracted?.symbols?.typeMap) { + // Free the WASM tree to prevent memory accumulation across repeated builds + if (extracted?.tree && typeof extracted.tree.delete === 'function') { + try { + extracted.tree.delete(); + } catch {} + } + return { typeMap: [], backfilled: false }; + } const tm = extracted.symbols.typeMap; + // Free the WASM tree — only the typeMap data is needed + if (extracted.tree && typeof extracted.tree.delete === 'function') { + try { + extracted.tree.delete(); + } catch {} + } return { typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])), backfilled: true, @@ -486,21 +500,35 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) { } // Backfill typeMap via WASM for native binaries that predate the type-map feature if (needsTypeMap.length > 0) { - const parsers = await createParsers(); - for (const { filePath, relPath } of needsTypeMap) { - try { - const code = fs.readFileSync(filePath, 'utf-8'); - const extracted = wasmExtractSymbols(parsers, filePath, code); - if (extracted?.symbols?.typeMap) { - const symbols = result.get(relPath); - symbols.typeMap = - extracted.symbols.typeMap instanceof Map - ? extracted.symbols.typeMap - : new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName])); - symbols._typeMapBackfilled = true; + // Only backfill for languages where WASM extraction can produce typeMap + // (TS/TSX have type annotations; JS only has `new Expr()` which native already handles) + const TS_EXTS = new Set(['.ts', '.tsx']); + const tsFiles = needsTypeMap.filter(({ filePath }) => TS_EXTS.has(path.extname(filePath))); + if (tsFiles.length > 0) { + const parsers = await createParsers(); + for (const { filePath, relPath } of tsFiles) { + let extracted; + try { + const code = fs.readFileSync(filePath, 'utf-8'); + extracted = wasmExtractSymbols(parsers, filePath, code); + if (extracted?.symbols?.typeMap) { + const symbols = result.get(relPath); + symbols.typeMap = + extracted.symbols.typeMap instanceof Map + ? extracted.symbols.typeMap + : new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName])); + symbols._typeMapBackfilled = true; + } + } catch { + /* skip — typeMap is a best-effort backfill */ + } finally { + // Free the WASM tree to prevent memory accumulation across repeated builds + if (extracted?.tree && typeof extracted.tree.delete === 'function') { + try { + extracted.tree.delete(); + } catch {} + } } - } catch { - /* skip — typeMap is a best-effort backfill */ } } } From a5a66eed378511d39043cb529a4839a83babf38f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 03:57:00 -0600 Subject: [PATCH 5/6] fix(native): align edge builder kind filters with JS parity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rust edge builder only matched `kind == "class"` when looking up source nodes and targets for extends/implements edges. This caused all `impl Trait for Struct` relationships (and any non-class hierarchy) to be silently dropped — producing 0 implements edges for Rust sources while WASM correctly found 9. Align the three kind filter sets with the JS-side constants: - Source: class, struct, record, enum (was: class only) - Extends targets: class, struct, trait, record (was: class only) - Implements targets: interface, class, trait (was: interface, class) Fixes #530 (partial — implements parity gap) Impact: 1 functions changed, 0 affected --- crates/codegraph-core/src/edge_builder.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 522ce768..f702d899 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -339,13 +339,17 @@ pub fn build_call_edges( for cls in &file_input.classes { let source_row = nodes_by_name_and_file .get(&(cls.name.as_str(), rel_path.as_str())) - .and_then(|v| v.iter().find(|n| n.kind == "class")); + .and_then(|v| v.iter().find(|n| { + n.kind == "class" || n.kind == "struct" || n.kind == "record" || n.kind == "enum" + })); if let Some(source) = source_row { if let Some(ref extends_name) = cls.extends { let targets = nodes_by_name .get(extends_name.as_str()) - .map(|v| v.iter().filter(|n| n.kind == "class").collect::>()) + .map(|v| v.iter().filter(|n| { + n.kind == "class" || n.kind == "struct" || n.kind == "trait" || n.kind == "record" + }).collect::>()) .unwrap_or_default(); for t in targets { edges.push(ComputedEdge { @@ -362,7 +366,7 @@ pub fn build_call_edges( .get(implements_name.as_str()) .map(|v| { v.iter() - .filter(|n| n.kind == "interface" || n.kind == "class") + .filter(|n| n.kind == "interface" || n.kind == "class" || n.kind == "trait") .collect::>() }) .unwrap_or_default(); From 83bbd2cc168130b9f067155cf550d5c301667d56 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 04:27:00 -0600 Subject: [PATCH 6/6] fix: address review feedback on WASM tree cleanup and JS backfill skip - Consolidate duplicated tree.delete() in backfillTypeMap into a single finally block, preventing future early-return paths from leaking trees - Skip WASM typeMap backfill for JS files in parseFileAuto and parseFileIncremental single-file paths, matching the bulk path behavior Impact: 3 functions changed, 2 affected --- src/domain/parser.js | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/domain/parser.js b/src/domain/parser.js index c2063d34..59a4a10c 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -395,26 +395,23 @@ async function backfillTypeMap(filePath, source) { } const parsers = await createParsers(); const extracted = wasmExtractSymbols(parsers, filePath, code); - if (!extracted?.symbols?.typeMap) { + try { + if (!extracted?.symbols?.typeMap) { + return { typeMap: [], backfilled: false }; + } + const tm = extracted.symbols.typeMap; + return { + typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])), + backfilled: true, + }; + } finally { // Free the WASM tree to prevent memory accumulation across repeated builds if (extracted?.tree && typeof extracted.tree.delete === 'function') { try { extracted.tree.delete(); } catch {} } - return { typeMap: [], backfilled: false }; - } - const tm = extracted.symbols.typeMap; - // Free the WASM tree — only the typeMap data is needed - if (extracted.tree && typeof extracted.tree.delete === 'function') { - try { - extracted.tree.delete(); - } catch {} } - return { - typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])), - backfilled: true, - }; } /** @@ -455,7 +452,13 @@ export async function parseFileAuto(filePath, source, opts = {}) { const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false); if (!result) return null; const patched = patchNativeResult(result); - if (!patched.typeMap || patched.typeMap.length === 0) { + // Only backfill typeMap for TS/TSX — JS files have no type annotations, + // and the native engine already handles `new Expr()` patterns. + const TS_BACKFILL_EXTS = new Set(['.ts', '.tsx']); + if ( + (!patched.typeMap || patched.typeMap.length === 0) && + TS_BACKFILL_EXTS.has(path.extname(filePath)) + ) { const { typeMap, backfilled } = await backfillTypeMap(filePath, source); patched.typeMap = typeMap; if (backfilled) patched._typeMapBackfilled = true; @@ -606,7 +609,13 @@ export async function parseFileIncremental(cache, filePath, source, opts = {}) { const result = cache.parseFile(filePath, source); if (!result) return null; const patched = patchNativeResult(result); - if (!patched.typeMap || patched.typeMap.length === 0) { + // Only backfill typeMap for TS/TSX — JS files have no type annotations, + // and the native engine already handles `new Expr()` patterns. + const TS_BACKFILL_EXTS = new Set(['.ts', '.tsx']); + if ( + (!patched.typeMap || patched.typeMap.length === 0) && + TS_BACKFILL_EXTS.has(path.extname(filePath)) + ) { const { typeMap, backfilled } = await backfillTypeMap(filePath, source); patched.typeMap = typeMap; if (backfilled) patched._typeMapBackfilled = true;