diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.ts similarity index 68% rename from src/domain/graph/builder/incremental.js rename to src/domain/graph/builder/incremental.ts index 2be5cefa..48034b25 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.ts @@ -9,16 +9,44 @@ */ import fs from 'node:fs'; import path from 'node:path'; +import type BetterSqlite3 from 'better-sqlite3'; import { bulkNodeIdsByFile } from '../../../db/index.js'; import { warn } from '../../../infrastructure/logger.js'; import { normalizePath } from '../../../shared/constants.js'; +import type { EngineOpts, ExtractorOutput, PathAliases } from '../../../types.js'; import { parseFileIncremental } from '../../parser.js'; import { computeConfidence, resolveImportPath } from '../resolve.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; +// ── Local types ───────────────────────────────────────────────────────── + +export interface IncrementalStmts { + insertNode: { run: (...params: unknown[]) => unknown }; + insertEdge: { run: (...params: unknown[]) => unknown }; + getNodeId: { get: (...params: unknown[]) => { id: number } | undefined }; + deleteEdgesForFile: { run: (...params: unknown[]) => unknown }; + deleteNodes: { run: (...params: unknown[]) => unknown }; + countNodes: { get: (...params: unknown[]) => { c: number } | undefined }; + listSymbols: { all: (...params: unknown[]) => unknown[] }; + findNodeInFile: { all: (...params: unknown[]) => unknown[] }; + findNodeByName: { all: (...params: unknown[]) => unknown[] }; +} + +interface RebuildResult { + file: string; + nodesAdded: number; + nodesRemoved: number; + edgesAdded: number; + deleted?: boolean; + event?: string; + symbolDiff?: unknown; + nodesBefore?: number; + nodesAfter?: number; +} + // ── Node insertion ────────────────────────────────────────────────────── -function insertFileNodes(stmts, relPath, symbols) { +function insertFileNodes(stmts: IncrementalStmts, relPath: string, symbols: ExtractorOutput): void { stmts.insertNode.run(relPath, 'file', relPath, 0, null); for (const def of symbols.definitions) { stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); @@ -35,8 +63,13 @@ function insertFileNodes(stmts, relPath, symbols) { // ── Containment edges ────────────────────────────────────────────────── -function buildContainmentEdges(db, stmts, relPath, symbols) { - const nodeIdMap = new Map(); +function buildContainmentEdges( + db: BetterSqlite3.Database, + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, +): number { + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -68,11 +101,14 @@ function buildContainmentEdges(db, stmts, relPath, symbols) { // ── Reverse-dep cascade ──────────────────────────────────────────────── // Lazily-cached prepared statements for reverse-dep operations -let _revDepDb = null; -let _findRevDepsStmt = null; -let _deleteOutEdgesStmt = null; - -function getRevDepStmts(db) { +let _revDepDb: BetterSqlite3.Database | null = null; +let _findRevDepsStmt: BetterSqlite3.Statement | null = null; +let _deleteOutEdgesStmt: BetterSqlite3.Statement | null = null; + +function getRevDepStmts(db: BetterSqlite3.Database): { + findRevDepsStmt: BetterSqlite3.Statement; + deleteOutEdgesStmt: BetterSqlite3.Statement; +} { if (_revDepDb !== db) { _revDepDb = db; _findRevDepsStmt = db.prepare( @@ -85,24 +121,32 @@ function getRevDepStmts(db) { 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', ); } - return { findRevDepsStmt: _findRevDepsStmt, deleteOutEdgesStmt: _deleteOutEdgesStmt }; + return { + findRevDepsStmt: _findRevDepsStmt!, + deleteOutEdgesStmt: _deleteOutEdgesStmt!, + }; } -function findReverseDeps(db, relPath) { +function findReverseDeps(db: BetterSqlite3.Database, relPath: string): string[] { const { findRevDepsStmt } = getRevDepStmts(db); - return findRevDepsStmt.all(relPath, relPath).map((r) => r.file); + return (findRevDepsStmt.all(relPath, relPath) as Array<{ file: string }>).map((r) => r.file); } -function deleteOutgoingEdges(db, relPath) { +function deleteOutgoingEdges(db: BetterSqlite3.Database, relPath: string): void { const { deleteOutEdgesStmt } = getRevDepStmts(db); deleteOutEdgesStmt.run(relPath); } -async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) { +async function parseReverseDep( + rootDir: string, + depRelPath: string, + engineOpts: EngineOpts, + cache: unknown, +): Promise { const absPath = path.join(rootDir, depRelPath); if (!fs.existsSync(absPath)) return null; - let code; + let code: string; try { code = readFileSafe(absPath); } catch { @@ -112,13 +156,20 @@ async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) { return parseFileIncremental(cache, absPath, code, engineOpts); } -function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBarrel) { +function rebuildReverseDepEdges( + db: BetterSqlite3.Database, + rootDir: string, + depRelPath: string, + symbols: ExtractorOutput, + stmts: IncrementalStmts, + skipBarrel: boolean, +): number { const fileNodeRow = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0); if (!fileNodeRow) return 0; - const aliases = { baseUrl: null, paths: {} }; + const aliases: PathAliases = { baseUrl: null, paths: {} }; let edgesAdded = buildContainmentEdges(db, stmts, depRelPath, symbols); - // Don't rebuild dir→file containment for reverse-deps (it was never deleted) + // Don't rebuild dir->file containment for reverse-deps (it was never deleted) edgesAdded += buildImportEdges( stmts, depRelPath, @@ -135,7 +186,11 @@ function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBar // ── Directory containment edges ──────────────────────────────────────── -function rebuildDirContainment(_db, stmts, relPath) { +function rebuildDirContainment( + _db: BetterSqlite3.Database, + stmts: IncrementalStmts, + relPath: string, +): number { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') return 0; const dirRow = stmts.getNodeId.get(dir, 'directory', dir, 0); @@ -149,12 +204,12 @@ function rebuildDirContainment(_db, stmts, relPath) { // ── Ancillary table cleanup ──────────────────────────────────────────── -function purgeAncillaryData(db, relPath) { - const tryExec = (sql, ...args) => { +function purgeAncillaryData(db: BetterSqlite3.Database, relPath: string): void { + const tryExec = (sql: string, ...args: string[]): void => { try { db.prepare(sql).run(...args); - } catch (err) { - if (!err?.message?.includes('no such table')) throw err; + } catch (err: unknown) { + if (!(err as Error | undefined)?.message?.includes('no such table')) throw err; } }; tryExec( @@ -184,12 +239,16 @@ function purgeAncillaryData(db, relPath) { // ── Import edge building ──────────────────────────────────────────────── // Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops) -let _barrelDb = null; -let _isBarrelStmt = null; -let _reexportTargetsStmt = null; -let _hasDefStmt = null; - -function getBarrelStmts(db) { +let _barrelDb: BetterSqlite3.Database | null = null; +let _isBarrelStmt: BetterSqlite3.Statement | null = null; +let _reexportTargetsStmt: BetterSqlite3.Statement | null = null; +let _hasDefStmt: BetterSqlite3.Statement | null = null; + +function getBarrelStmts(db: BetterSqlite3.Database): { + isBarrelStmt: BetterSqlite3.Statement; + reexportTargetsStmt: BetterSqlite3.Statement; + hasDefStmt: BetterSqlite3.Statement; +} { if (_barrelDb !== db) { _barrelDb = db; _isBarrelStmt = db.prepare( @@ -208,26 +267,31 @@ function getBarrelStmts(db) { ); } return { - isBarrelStmt: _isBarrelStmt, - reexportTargetsStmt: _reexportTargetsStmt, - hasDefStmt: _hasDefStmt, + isBarrelStmt: _isBarrelStmt!, + reexportTargetsStmt: _reexportTargetsStmt!, + hasDefStmt: _hasDefStmt!, }; } -function isBarrelFile(db, relPath) { +function isBarrelFile(db: BetterSqlite3.Database, relPath: string): boolean { const { isBarrelStmt } = getBarrelStmts(db); - const reexportCount = isBarrelStmt.get(relPath)?.c; + const reexportCount = (isBarrelStmt.get(relPath) as { c: number } | undefined)?.c; return (reexportCount || 0) > 0; } -function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) { +function resolveBarrelTarget( + db: BetterSqlite3.Database, + barrelPath: string, + symbolName: string, + visited: Set = new Set(), +): string | null { if (visited.has(barrelPath)) return null; visited.add(barrelPath); const { reexportTargetsStmt, hasDefStmt } = getBarrelStmts(db); // Find re-export targets from this barrel - const reexportTargets = reexportTargetsStmt.all(barrelPath); + const reexportTargets = reexportTargetsStmt.all(barrelPath) as Array<{ file: string }>; for (const { file: targetFile } of reexportTargets) { // Check if the symbol is defined in this target file @@ -247,10 +311,16 @@ function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) { * Resolve barrel imports for a single import statement and create edges to actual source files. * Shared by buildImportEdges (primary file) and Pass 2 of the reverse-dep cascade. */ -function resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp) { +function resolveBarrelImportEdges( + db: BetterSqlite3.Database, + stmts: IncrementalStmts, + fileNodeId: number, + resolvedPath: string, + imp: ExtractorOutput['imports'][number], +): number { let edgesAdded = 0; if (!isBarrelFile(db, resolvedPath)) return edgesAdded; - const resolvedSources = new Set(); + const resolvedSources = new Set(); for (const name of imp.names) { const cleanName = name.replace(/^\*\s+as\s+/, ''); const actualSource = resolveBarrelTarget(db, resolvedPath, cleanName); @@ -267,7 +337,15 @@ function resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp) { return edgesAdded; } -function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, db) { +function buildImportEdges( + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, + fileNodeId: number, + aliases: PathAliases, + db: BetterSqlite3.Database | null, +): number { let edgesAdded = 0; for (const imp of symbols.imports) { const resolvedPath = resolveImportPath( @@ -291,8 +369,13 @@ function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, return edgesAdded; } -function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { - const importedNames = new Map(); +function buildImportedNamesMap( + symbols: ExtractorOutput, + rootDir: string, + relPath: string, + aliases: PathAliases, +): Map { + const importedNames = new Map(); for (const imp of symbols.imports) { const resolvedPath = resolveImportPath( path.join(rootDir, relPath), @@ -309,8 +392,13 @@ function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { // ── Call edge building ────────────────────────────────────────────────── -function findCaller(call, definitions, relPath, stmts) { - let caller = null; +function findCaller( + call: ExtractorOutput['calls'][number], + definitions: ExtractorOutput['definitions'], + relPath: string, + stmts: IncrementalStmts, +): { id: number } | null { + let caller: { id: number } | null = null; let callerSpan = Infinity; for (const def of definitions) { if (def.line <= call.line) { @@ -333,16 +421,25 @@ function findCaller(call, definitions, relPath, stmts) { return caller; } -function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) { +function resolveCallTargets( + stmts: IncrementalStmts, + call: ExtractorOutput['calls'][number], + relPath: string, + importedNames: Map, + typeMap: Map, +): { targets: Array<{ id: number; file: string }>; importedFrom: string | undefined } { const importedFrom = importedNames.get(call.name); - let targets; + let targets: Array<{ id: number; file: string }> | undefined; if (importedFrom) { - targets = stmts.findNodeInFile.all(call.name, importedFrom); + targets = stmts.findNodeInFile.all(call.name, importedFrom) as Array<{ + id: number; + file: string; + }>; } if (!targets || targets.length === 0) { - targets = stmts.findNodeInFile.all(call.name, relPath); + targets = stmts.findNodeInFile.all(call.name, relPath) as Array<{ id: number; file: string }>; if (targets.length === 0) { - targets = stmts.findNodeByName.all(call.name); + targets = stmts.findNodeByName.all(call.name) as Array<{ id: number; file: string }>; } } // Type-aware resolution: translate variable receiver to declared type @@ -351,23 +448,34 @@ function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) { const typeName = typeEntry ? typeof typeEntry === 'string' ? typeEntry - : typeEntry.type + : (typeEntry as { type?: string }).type : null; if (typeName) { const qualified = `${typeName}.${call.name}`; - targets = stmts.findNodeByName.all(qualified); + targets = stmts.findNodeByName.all(qualified) as Array<{ id: number; file: string }>; } } - return { targets, importedFrom }; + return { targets: targets ?? [], importedFrom }; } -function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { - const rawTM = symbols.typeMap; - const typeMap = +function buildCallEdges( + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, + fileNodeRow: { id: number }, + importedNames: Map, +): number { + const rawTM: unknown = symbols.typeMap; + const typeMap: Map = rawTM instanceof Map ? rawTM : Array.isArray(rawTM) && rawTM.length > 0 - ? new Map(rawTM.map((e) => [e.name, e.typeName ?? e.type ?? null])) + ? new Map( + (rawTM as Array<{ name: string; typeName?: string; type?: string }>).map((e) => [ + e.name, + e.typeName ?? e.type ?? null, + ]), + ) : new Map(); let edgesAdded = 0; for (const call of symbols.calls) { @@ -397,22 +505,20 @@ function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { /** * Parse a single file and update the database incrementally. - * - * @param {import('better-sqlite3').Database} db - * @param {string} rootDir - Absolute root directory - * @param {string} filePath - Absolute file path - * @param {object} stmts - Prepared DB statements - * @param {object} engineOpts - Engine options - * @param {object|null} cache - Parse tree cache (native only) - * @param {object} [options] - * @param {Function} [options.diffSymbols] - Symbol diff function - * @returns {Promise} Update result or null on failure */ -export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, options = {}) { +export async function rebuildFile( + db: BetterSqlite3.Database, + rootDir: string, + filePath: string, + stmts: IncrementalStmts, + engineOpts: EngineOpts, + cache: unknown, + options: { diffSymbols?: (old: unknown[], new_: unknown[]) => unknown } = {}, +): Promise { const { diffSymbols } = options; const relPath = normalizePath(path.relative(rootDir, filePath)); const oldNodes = stmts.countNodes.get(relPath)?.c || 0; - const oldSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; + const oldSymbols: unknown[] = diffSymbols ? stmts.listSymbols.all(relPath) : []; // Find reverse-deps BEFORE purging (edges still reference the old nodes) const reverseDeps = findReverseDeps(db, relPath); @@ -423,7 +529,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach stmts.deleteNodes.run(relPath); if (!fs.existsSync(filePath)) { - if (cache) cache.remove(filePath); + if (cache) (cache as { remove(p: string): void }).remove(filePath); const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null; return { file: relPath, @@ -438,11 +544,11 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach }; } - let code; + let code: string; try { code = readFileSafe(filePath); } catch (err) { - warn(`Cannot read ${relPath}: ${err.message}`); + warn(`Cannot read ${relPath}: ${(err as Error).message}`); return null; } @@ -452,13 +558,13 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach insertFileNodes(stmts, relPath, symbols); const newNodes = stmts.countNodes.get(relPath)?.c || 0; - const newSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; + const newSymbols: unknown[] = diffSymbols ? stmts.listSymbols.all(relPath) : []; const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0); if (!fileNodeRow) return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 }; - const aliases = { baseUrl: null, paths: {} }; + const aliases: PathAliases = { baseUrl: null, paths: {} }; let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols); edgesAdded += rebuildDirContainment(db, stmts, relPath); @@ -469,7 +575,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach // Cascade: rebuild outgoing edges for reverse-dep files. // Two-pass approach: first rebuild direct edges (creating reexports edges for barrels), // then add barrel import edges (which need reexports edges to exist for resolution). - const depSymbols = new Map(); + const depSymbols = new Map(); for (const depRelPath of reverseDeps) { const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache); if (symbols_) { @@ -485,7 +591,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach for (const [depRelPath, symbols_] of depSymbols) { const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0); if (!fileNodeRow_) continue; - const aliases_ = { baseUrl: null, paths: {} }; + const aliases_: PathAliases = { baseUrl: null, paths: {} }; for (const imp of symbols_.imports) { if (imp.reexport) continue; const resolvedPath = resolveImportPath( diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.ts similarity index 73% rename from src/domain/graph/builder/stages/build-edges.js rename to src/domain/graph/builder/stages/build-edges.ts index 1aa03471..1445709c 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -6,40 +6,99 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type BetterSqlite3 from 'better-sqlite3'; import { getNodeId } from '../../../../db/index.js'; import { loadNative } from '../../../../infrastructure/native.js'; +import type { + Call, + ClassRelation, + Definition, + ExtractorOutput, + Import, + NativeAddon, + NodeRow, + TypeMapEntry, +} from '../../../../types.js'; import { computeConfidence } from '../../resolve.js'; +import type { PipelineContext } from '../context.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExport } from './resolve-imports.js'; +// ── Local types ────────────────────────────────────────────────────────── + +type EdgeRowTuple = [number, number, string, number, number]; + +interface NodeIdStmt { + get(name: string, kind: string, file: string, line: number): { id: number } | undefined; +} + +/** Minimal node shape returned by the SELECT query. */ +interface QueryNodeRow { + id: number; + name: string; + kind: string; + file: string; + line: number; +} + +/** Shape fed to the native buildCallEdges FFI. */ +interface NativeFileEntry { + file: string; + fileNodeId: number; + definitions: Array<{ name: string; kind: string; line: number; endLine: number | null }>; + calls: Call[]; + importedNames: Array<{ name: string; file: string }>; + classes: ClassRelation[]; + typeMap: Array<{ name: string; typeName: string; confidence: number }>; +} + +/** Shape returned by native buildCallEdges. */ +interface NativeEdge { + sourceId: number; + targetId: number; + kind: string; + confidence: number; + dynamic: number; +} + +/** TypeMap entry used in receiver supplement (normalized from native format). */ +interface NormalizedTypeEntry { + type: string; + confidence: number; +} + // ── Node lookup setup ─────────────────────────────────────────────────── -function makeGetNodeIdStmt(db) { +function makeGetNodeIdStmt(db: BetterSqlite3.Database): NodeIdStmt { return { - get: (name, kind, file, line) => { + get: (name: string, kind: string, file: string, line: number) => { const id = getNodeId(db, name, kind, file, line); return id != null ? { id } : undefined; }, }; } -function setupNodeLookups(ctx, allNodes) { +function setupNodeLookups(ctx: PipelineContext, allNodes: QueryNodeRow[]): void { ctx.nodesByName = new Map(); for (const node of allNodes) { if (!ctx.nodesByName.has(node.name)) ctx.nodesByName.set(node.name, []); - ctx.nodesByName.get(node.name).push(node); + ctx.nodesByName.get(node.name)!.push(node as unknown as NodeRow); } ctx.nodesByNameAndFile = new Map(); for (const node of allNodes) { const key = `${node.name}|${node.file}`; if (!ctx.nodesByNameAndFile.has(key)) ctx.nodesByNameAndFile.set(key, []); - ctx.nodesByNameAndFile.get(key).push(node); + ctx.nodesByNameAndFile.get(key)!.push(node as unknown as NodeRow); } } // ── Import edges ──────────────────────────────────────────────────────── -function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { +function buildImportEdges( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; for (const [relPath, symbols] of fileSymbols) { @@ -69,8 +128,16 @@ function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { } } -function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, edgeRows) { - const resolvedSources = new Set(); +function buildBarrelEdges( + ctx: PipelineContext, + imp: Import, + resolvedPath: string, + fileNodeId: number, + edgeKind: string, + getNodeIdStmt: NodeIdStmt, + edgeRows: EdgeRowTuple[], +): void { + const resolvedSources = new Set(); for (const name of imp.names) { const cleanName = name.replace(/^\*\s+as\s+/, ''); const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); @@ -92,9 +159,15 @@ function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeI // ── Call edges (native engine) ────────────────────────────────────────── -function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) { +function buildCallEdgesNative( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + allNodes: QueryNodeRow[], + native: NativeAddon, +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; - const nativeFiles = []; + const nativeFiles: NativeFileEntry[] = []; for (const [relPath, symbols] of fileSymbols) { if (barrelOnlyFiles.has(relPath)) continue; @@ -102,7 +175,7 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) if (!fileNodeRow) continue; const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); - const typeMap = + const typeMap: Array<{ name: string; typeName: string; confidence: number }> = symbols.typeMap instanceof Map ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ name, @@ -110,7 +183,7 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) confidence: typeof entry === 'object' ? entry.confidence : 0.9, })) : Array.isArray(symbols.typeMap) - ? symbols.typeMap + ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) : []; nativeFiles.push({ file: relPath, @@ -128,7 +201,9 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) }); } - const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); + const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [ + ...BUILTIN_RECEIVERS, + ]) as NativeEdge[]; for (const e of nativeEdges) { allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); } @@ -142,8 +217,13 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) } } -function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { - const importedNames = []; +function buildImportedNamesForNative( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, +): Array<{ name: string; file: string }> { + const importedNames: Array<{ name: string; file: string }> = []; for (const imp of symbols.imports) { const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); for (const name of imp.names) { @@ -161,8 +241,13 @@ function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { // ── Receiver edge supplement for older native binaries ────────────────── -function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { - const seenCallEdges = new Set(); +function supplementReceiverEdges( + ctx: PipelineContext, + nativeFiles: NativeFileEntry[], + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { + const seenCallEdges = new Set(); // Collect existing edges to avoid duplicates for (const row of allEdgeRows) { seenCallEdges.add(`${row[0]}|${row[1]}|${row[2]}`); @@ -170,7 +255,7 @@ function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { for (const nf of nativeFiles) { const relPath = nf.file; - const typeMap = new Map( + const typeMap = new Map( nf.typeMap.map((t) => [t.name, { type: t.typeName, confidence: t.confidence ?? 0.9 }]), ); const fileNodeRow = { id: nf.fileNodeId }; @@ -208,7 +293,11 @@ function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { // ── Call edges (JS fallback) ──────────────────────────────────────────── -function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { +function buildCallEdgesJS( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; for (const [relPath, symbols] of fileSymbols) { @@ -217,8 +306,8 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { if (!fileNodeRow) continue; const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap = symbols.typeMap || new Map(); - const seenCallEdges = new Set(); + const typeMap: Map = symbols.typeMap || new Map(); + const seenCallEdges = new Set(); buildFileCallEdges( ctx, @@ -235,8 +324,13 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { } } -function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { - const importedNames = new Map(); +function buildImportedNamesMap( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, +): Map { + const importedNames = new Map(); for (const imp of symbols.imports) { const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); for (const name of imp.names) { @@ -246,8 +340,14 @@ function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { return importedNames; } -function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { - let caller = null; +function findCaller( + call: Call, + definitions: ReadonlyArray<{ name: string; kind: string; line: number; endLine?: number | null }>, + relPath: string, + getNodeIdStmt: NodeIdStmt, + fileNodeRow: { id: number }, +): { id: number } { + let caller: { id: number } | null = null; let callerSpan = Infinity; for (const def of definitions) { if (def.line <= call.line) { @@ -270,9 +370,15 @@ function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { return caller || fileNodeRow; } -function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { +function resolveCallTargets( + ctx: PipelineContext, + call: Call, + relPath: string, + importedNames: Map, + typeMap: Map, +): { targets: NodeRow[]; importedFrom: string | undefined } { const importedFrom = importedNames.get(call.name); - let targets; + let targets: NodeRow[] | undefined; if (importedFrom) { targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; @@ -293,8 +399,8 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { if (targets.length > 1) { targets.sort((a, b) => { - const confA = computeConfidence(relPath, a.file, importedFrom); - const confB = computeConfidence(relPath, b.file, importedFrom); + const confA = computeConfidence(relPath, a.file, importedFrom ?? null); + const confB = computeConfidence(relPath, b.file, importedFrom ?? null); return confB - confA; }); } @@ -302,7 +408,12 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { return { targets, importedFrom }; } -function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { +function resolveByMethodOrGlobal( + ctx: PipelineContext, + call: Call, + relPath: string, + typeMap: Map, +): NodeRow[] { // Type-aware resolution: translate variable receiver to its declared type if (call.receiver && typeMap) { const typeEntry = typeMap.get(call.receiver); @@ -332,21 +443,21 @@ function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { } function buildFileCallEdges( - ctx, - relPath, - symbols, - fileNodeRow, - importedNames, - seenCallEdges, - getNodeIdStmt, - allEdgeRows, - typeMap, -) { + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + fileNodeRow: { id: number }, + importedNames: Map, + seenCallEdges: Set, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + typeMap: Map, +): void { for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); - const isDynamic = call.dynamic ? 1 : 0; + const isDynamic: number = call.dynamic ? 1 : 0; const { targets, importedFrom } = resolveCallTargets( ctx, call, @@ -359,7 +470,7 @@ function buildFileCallEdges( const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { seenCallEdges.add(edgeKey); - const confidence = computeConfidence(relPath, t.file, importedFrom); + const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); } } @@ -377,17 +488,25 @@ function buildFileCallEdges( } } -function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap) { +function buildReceiverEdge( + ctx: PipelineContext, + call: Call, + caller: { id: number }, + relPath: string, + seenCallEdges: Set, + allEdgeRows: EdgeRowTuple[], + typeMap: Map, +): void { const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const typeEntry = typeMap?.get(call.receiver); + const typeEntry = typeMap?.get(call.receiver!); const typeName = typeEntry ? (typeof typeEntry === 'string' ? typeEntry : typeEntry.type) : null; const typeConfidence = typeEntry && typeof typeEntry === 'object' ? typeEntry.confidence : null; - const effectiveReceiver = typeName || call.receiver; + const effectiveReceiver = typeName || call.receiver!; const samefile = ctx.nodesByNameAndFile.get(`${effectiveReceiver}|${relPath}`) || []; const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(effectiveReceiver) || []; const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); if (receiverNodes.length > 0 && caller) { - const recvTarget = receiverNodes[0]; + const recvTarget = receiverNodes[0]!; const recvKey = `recv|${caller.id}|${recvTarget.id}`; if (!seenCallEdges.has(recvKey)) { seenCallEdges.add(recvKey); @@ -404,7 +523,12 @@ const HIERARCHY_SOURCE_KINDS = new Set(['class', 'struct', 'record', 'enum']); const EXTENDS_TARGET_KINDS = new Set(['class', 'struct', 'trait', 'record']); const IMPLEMENTS_TARGET_KINDS = new Set(['interface', 'trait', 'class']); -function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { +function buildClassHierarchyEdges( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + allEdgeRows: EdgeRowTuple[], +): void { for (const cls of symbols.classes) { if (cls.extends) { const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find((n) => @@ -438,10 +562,7 @@ function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { // ── Main entry point ──────────────────────────────────────────────────── -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildEdges(ctx) { +export async function buildEdges(ctx: PipelineContext): Promise { const { db, engineName } = ctx; const getNodeIdStmt = makeGetNodeIdStmt(db); @@ -450,12 +571,12 @@ export async function buildEdges(ctx) { .prepare( `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`, ) - .all(); + .all() as QueryNodeRow[]; setupNodeLookups(ctx, allNodes); const t0 = performance.now(); const buildEdgesTx = db.transaction(() => { - const allEdgeRows = []; + const allEdgeRows: EdgeRowTuple[] = []; buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); diff --git a/src/domain/graph/builder/stages/build-structure.js b/src/domain/graph/builder/stages/build-structure.ts similarity index 65% rename from src/domain/graph/builder/stages/build-structure.js rename to src/domain/graph/builder/stages/build-structure.ts index f4737df9..0f22a694 100644 --- a/src/domain/graph/builder/stages/build-structure.js +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -7,19 +7,20 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { debug } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { ExtractorOutput } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; import { readFileSafe } from '../helpers.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildStructure(ctx) { +export async function buildStructure(ctx: PipelineContext): Promise { const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx; // Build line count map (prefer cached _lineCount from parser) ctx.lineCountMap = new Map(); for (const [relPath, symbols] of fileSymbols) { - if (symbols.lineCount ?? symbols._lineCount) { - ctx.lineCountMap.set(relPath, symbols.lineCount ?? symbols._lineCount); + const lineCount = + (symbols as ExtractorOutput & { lineCount?: number }).lineCount ?? symbols._lineCount; + if (lineCount) { + ctx.lineCountMap.set(relPath, lineCount); } else { const absPath = path.join(rootDir, relPath); try { @@ -33,7 +34,9 @@ export async function buildStructure(ctx) { // For incremental builds, load unchanged files from DB for complete structure if (!isFullBuild) { - const existingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all(); + const existingFiles = db + .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'") + .all() as Array<{ file: string }>; const defsByFile = db.prepare( "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", ); @@ -48,19 +51,20 @@ export async function buildStructure(ctx) { FROM node_metrics m JOIN nodes n ON m.node_id = n.id WHERE n.kind = 'file'`, ); - const cachedLineCounts = new Map(); - for (const row of lineCountByFile.all()) { + const cachedLineCounts = new Map(); + for (const row of lineCountByFile.all() as Array<{ file: string; line_count: number }>) { cachedLineCounts.set(row.file, row.line_count); } let loadedFromDb = 0; for (const { file: relPath } of existingFiles) { if (!fileSymbols.has(relPath)) { - const importCount = importCountByFile.get(relPath)?.cnt || 0; + const importCount = + (importCountByFile.get(relPath) as { cnt: number } | undefined)?.cnt || 0; fileSymbols.set(relPath, { definitions: defsByFile.all(relPath), - imports: new Array(importCount), + imports: new Array(importCount) as unknown as ExtractorOutput['imports'], exports: [], - }); + } as unknown as ExtractorOutput); loadedFromDb++; } if (!ctx.lineCountMap.has(relPath)) { @@ -83,23 +87,36 @@ export async function buildStructure(ctx) { // Build directory structure const t0 = performance.now(); - const relDirs = new Set(); + const relDirs = new Set(); for (const absDir of discoveredDirs) { relDirs.add(normalizePath(path.relative(rootDir, absDir))); } try { - const { buildStructure: buildStructureFn } = await import('../../../../features/structure.js'); + const { buildStructure: buildStructureFn } = (await import( + '../../../../features/structure.js' + )) as { + buildStructure: ( + db: PipelineContext['db'], + fileSymbols: Map, + rootDir: string, + lineCountMap: Map, + directories: Set, + changedFiles: string[] | null, + ) => void; + }; const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()]; buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths); } catch (err) { - debug(`Structure analysis failed: ${err.message}`); + debug(`Structure analysis failed: ${(err as Error).message}`); } ctx.timing.structureMs = performance.now() - t0; // Classify node roles const t1 = performance.now(); try { - const { classifyNodeRoles } = await import('../../../../features/structure.js'); + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: (db: PipelineContext['db']) => Record; + }; const roleSummary = classifyNodeRoles(db); debug( `Roles: ${Object.entries(roleSummary) @@ -107,7 +124,7 @@ export async function buildStructure(ctx) { .join(', ')}`, ); } catch (err) { - debug(`Role classification failed: ${err.message}`); + debug(`Role classification failed: ${(err as Error).message}`); } ctx.timing.rolesMs = performance.now() - t1; } diff --git a/src/domain/graph/builder/stages/collect-files.js b/src/domain/graph/builder/stages/collect-files.ts similarity index 79% rename from src/domain/graph/builder/stages/collect-files.js rename to src/domain/graph/builder/stages/collect-files.ts index 9f3eb636..6551b598 100644 --- a/src/domain/graph/builder/stages/collect-files.js +++ b/src/domain/graph/builder/stages/collect-files.ts @@ -7,19 +7,17 @@ import fs from 'node:fs'; import path from 'node:path'; import { info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { PipelineContext } from '../context.js'; import { collectFiles as collectFilesUtil } from '../helpers.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function collectFiles(ctx) { +export async function collectFiles(ctx: PipelineContext): Promise { const { rootDir, config, opts } = ctx; if (opts.scope) { // Scoped rebuild: rebuild only specified files - const scopedFiles = opts.scope.map((f) => normalizePath(f)); - const existing = []; - const missing = []; + const scopedFiles = opts.scope.map((f: string) => normalizePath(f)); + const existing: Array<{ file: string; relPath: string }> = []; + const missing: string[] = []; for (const rel of scopedFiles) { const abs = path.join(rootDir, rel); if (fs.existsSync(abs)) { @@ -36,7 +34,7 @@ export async function collectFiles(ctx) { ctx.isFullBuild = false; info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`); } else { - const collected = collectFilesUtil(rootDir, [], config, new Set()); + const collected = collectFilesUtil(rootDir, [], config, new Set()); ctx.allFiles = collected.files; ctx.discoveredDirs = collected.directories; info(`Found ${ctx.allFiles.length} files to parse`); diff --git a/src/domain/graph/builder/stages/detect-changes.js b/src/domain/graph/builder/stages/detect-changes.ts similarity index 63% rename from src/domain/graph/builder/stages/detect-changes.js rename to src/domain/graph/builder/stages/detect-changes.ts index baf03e95..cbb18897 100644 --- a/src/domain/graph/builder/stages/detect-changes.js +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -1,27 +1,65 @@ /** * Stage: detectChanges * - * Three-tier change detection cascade + incremental reverse-dependency handling. - * Sets ctx.parseChanges, ctx.metadataUpdates, ctx.removed, ctx.isFullBuild, ctx.earlyExit. + * Determines which files have changed since the last build using a tiered + * strategy: journal → mtime+size → content hash. Handles full, incremental, + * and scoped rebuilds. */ import fs from 'node:fs'; import path from 'node:path'; +import type BetterSqlite3 from 'better-sqlite3'; import { closeDb } from '../../../../db/index.js'; import { debug, info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { EngineOpts, ExtractorOutput } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; +import type { PipelineContext } from '../context.js'; import { fileHash, fileStat, purgeFilesFromGraph, readFileSafe } from '../helpers.js'; -// ── Three-tier change detection ───────────────────────────────────────── +// ── Local types ──────────────────────────────────────────────────────── -/** - * Determine which files have changed since last build. - * Tier 0 — Journal: O(changed) when watcher was running - * Tier 1 — mtime+size: O(n) stats, O(changed) reads - * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) - */ -function getChangedFiles(db, allFiles, rootDir) { +interface FileHashRow { + file: string; + hash: string; + mtime: number; + size: number; +} + +interface FileStat { + mtimeMs: number; + size: number; +} + +interface ChangedFile { + file: string; + relPath?: string; + content?: string; + hash?: string; + stat?: FileStat; + metadataOnly?: boolean; + _reverseDepOnly?: boolean; +} + +interface ChangeResult { + changed: ChangedFile[]; + removed: string[]; + isFullBuild: boolean; +} + +interface NeedsHashItem { + file: string; + relPath: string; + stat?: FileStat; +} + +// ── Helpers ──────────────────────────────────────────────────────────── + +function getChangedFiles( + db: BetterSqlite3.Database, + allFiles: string[], + rootDir: string, +): ChangeResult { let hasTable = false; try { db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); @@ -38,30 +76,28 @@ function getChangedFiles(db, allFiles, rootDir) { }; } - const existing = new Map( - db - .prepare('SELECT file, hash, mtime, size FROM file_hashes') - .all() - .map((r) => [r.file, r]), + const existing = new Map( + (db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[]).map( + (r) => [r.file, r], + ), ); const removed = detectRemovedFiles(existing, allFiles, rootDir); - - // Tier 0: Journal const journalResult = tryJournalTier(db, existing, rootDir, removed); if (journalResult) return journalResult; - - // Tier 1 + 2: mtime/size fast-path → hash comparison return mtimeAndHashTiers(existing, allFiles, rootDir, removed); } -function detectRemovedFiles(existing, allFiles, rootDir) { - const currentFiles = new Set(); +function detectRemovedFiles( + existing: Map, + allFiles: string[], + rootDir: string, +): string[] { + const currentFiles = new Set(); for (const file of allFiles) { currentFiles.add(normalizePath(path.relative(rootDir, file))); } - - const removed = []; + const removed: string[] = []; for (const existingFile of existing.keys()) { if (!currentFiles.has(existingFile)) { removed.push(existingFile); @@ -70,15 +106,22 @@ function detectRemovedFiles(existing, allFiles, rootDir) { return removed; } -function tryJournalTier(db, existing, rootDir, removed) { +function tryJournalTier( + db: BetterSqlite3.Database, + existing: Map, + rootDir: string, + removed: string[], +): ChangeResult | null { const journal = readJournal(rootDir); if (!journal.valid) return null; - const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); + const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get() as + | { latest: number | null } + | undefined; const latestDbMtime = dbMtimes?.latest || 0; - const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; + const hasJournalEntries = journal.changed!.length > 0 || journal.removed!.length > 0; - if (!hasJournalEntries || journal.timestamp < latestDbMtime) { + if (!hasJournalEntries || journal.timestamp! < latestDbMtime) { debug( `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`, ); @@ -86,16 +129,15 @@ function tryJournalTier(db, existing, rootDir, removed) { } debug( - `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, + `Tier 0: journal valid, ${journal.changed!.length} changed, ${journal.removed!.length} removed`, ); - const changed = []; + const changed: ChangedFile[] = []; - for (const relPath of journal.changed) { + for (const relPath of journal.changed!) { const absPath = path.join(rootDir, relPath); - const stat = fileStat(absPath); + const stat = fileStat(absPath) as FileStat | undefined; if (!stat) continue; - - let content; + let content: string | undefined; try { content = readFileSafe(absPath); } catch { @@ -109,38 +151,37 @@ function tryJournalTier(db, existing, rootDir, removed) { } const removedSet = new Set(removed); - for (const relPath of journal.removed) { + for (const relPath of journal.removed!) { if (existing.has(relPath)) removedSet.add(relPath); } return { changed, removed: [...removedSet], isFullBuild: false }; } -function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { - // Tier 1: mtime+size fast-path - const needsHash = []; - const skipped = []; +function mtimeAndHashTiers( + existing: Map, + allFiles: string[], + rootDir: string, + removed: string[], +): ChangeResult { + const needsHash: NeedsHashItem[] = []; + const skipped: string[] = []; for (const file of allFiles) { const relPath = normalizePath(path.relative(rootDir, file)); const record = existing.get(relPath); - if (!record) { needsHash.push({ file, relPath }); continue; } - - const stat = fileStat(file); + const stat = fileStat(file) as FileStat | undefined; if (!stat) continue; - const storedMtime = record.mtime || 0; const storedSize = record.size || 0; - if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) { skipped.push(relPath); continue; } - needsHash.push({ file, relPath, stat }); } @@ -148,20 +189,17 @@ function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`); } - // Tier 2: Hash comparison - const changed = []; - + const changed: ChangedFile[] = []; for (const item of needsHash) { - let content; + let content: string | undefined; try { content = readFileSafe(item.file); } catch { continue; } const hash = fileHash(content); - const stat = item.stat || fileStat(item.file); + const stat = item.stat || (fileStat(item.file) as FileStat | undefined); const record = existing.get(item.relPath); - if (!record || record.hash !== hash) { changed.push({ file: item.file, content, hash, relPath: item.relPath, stat }); } else if (stat) { @@ -186,41 +224,44 @@ function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { return { changed, removed, isFullBuild: false }; } -// ── Pending analysis ──────────────────────────────────────────────────── - -/** - * Run pending analysis pass when no file changes but analysis tables are empty. - */ -async function runPendingAnalysis(ctx) { +async function runPendingAnalysis(ctx: PipelineContext): Promise { const { db, opts, engineOpts, allFiles, rootDir } = ctx; - const needsCfg = - opts.cfg !== false && + (opts as Record)['cfg'] !== false && (() => { try { - return db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get().c === 0; + return ( + (db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get() as { c: number } | undefined) + ?.c === 0 + ); } catch { return true; } })(); const needsDataflow = - opts.dataflow !== false && + (opts as Record)['dataflow'] !== false && (() => { try { - return db.prepare('SELECT COUNT(*) as c FROM dataflow').get().c === 0; + return ( + (db.prepare('SELECT COUNT(*) as c FROM dataflow').get() as { c: number } | undefined) + ?.c === 0 + ); } catch { return true; } })(); - if (!needsCfg && !needsDataflow) return false; info('No file changes. Running pending analysis pass...'); const analysisOpts = { ...engineOpts, - dataflow: needsDataflow && opts.dataflow !== false, + dataflow: needsDataflow && (opts as Record)['dataflow'] !== false, }; - const analysisSymbols = await parseFilesAuto(allFiles, rootDir, analysisOpts); + const analysisSymbols: Map = await parseFilesAuto( + allFiles, + rootDir, + analysisOpts, + ); if (needsCfg) { const { buildCFGData } = await import('../../../../features/cfg.js'); await buildCFGData(db, analysisSymbols, rootDir, engineOpts); @@ -232,9 +273,7 @@ async function runPendingAnalysis(ctx) { return true; } -// ── Metadata self-heal ────────────────────────────────────────────────── - -function healMetadata(ctx) { +function healMetadata(ctx: PipelineContext): void { const { db, metadataUpdates } = ctx; if (!metadataUpdates || metadataUpdates.length === 0) return; try { @@ -243,7 +282,7 @@ function healMetadata(ctx) { ); const healTx = db.transaction(() => { for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; const size = item.stat ? item.stat.size : 0; healHash.run(item.relPath, item.hash, mtime, size); } @@ -255,12 +294,13 @@ function healMetadata(ctx) { } } -// ── Reverse-dependency cascade ────────────────────────────────────────── - -function findReverseDependencies(db, changedRelPaths, rootDir) { - const reverseDeps = new Set(); +function findReverseDependencies( + db: BetterSqlite3.Database, + changedRelPaths: Set, + rootDir: string, +): Set { + const reverseDeps = new Set(); if (changedRelPaths.size === 0) return reverseDeps; - const findReverseDepsStmt = db.prepare(` SELECT DISTINCT n_src.file FROM edges e JOIN nodes n_src ON e.source_id = n_src.id @@ -268,7 +308,7 @@ function findReverseDependencies(db, changedRelPaths, rootDir) { WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' `); for (const relPath of changedRelPaths) { - for (const row of findReverseDepsStmt.all(relPath)) { + for (const row of findReverseDepsStmt.all(relPath) as Array<{ file: string }>) { if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { const absPath = path.join(rootDir, row.file); if (fs.existsSync(absPath)) { @@ -280,13 +320,15 @@ function findReverseDependencies(db, changedRelPaths, rootDir) { return reverseDeps; } -function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { +function purgeAndAddReverseDeps( + ctx: PipelineContext, + changePaths: string[], + reverseDeps: Set, +): void { const { db, rootDir } = ctx; - if (changePaths.length > 0 || ctx.removed.length > 0) { purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); } - if (reverseDeps.size > 0) { const deleteOutgoingEdgesForFile = db.prepare( 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', @@ -301,9 +343,7 @@ function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { } } -// ── Shared helpers ─────────────────────────────────────────────────────── - -function detectHasEmbeddings(db) { +function detectHasEmbeddings(db: BetterSqlite3.Database): boolean { try { db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); return true; @@ -312,39 +352,27 @@ function detectHasEmbeddings(db) { } } -// ── Scoped build path ─────────────────────────────────────────────────── - -function handleScopedBuild(ctx) { +function handleScopedBuild(ctx: PipelineContext): void { const { db, rootDir, opts } = ctx; - ctx.hasEmbeddings = detectHasEmbeddings(db); - const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - - let reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set([...changePaths, ...ctx.removed]); + let reverseDeps = new Set(); + if (!(opts as Record)['noReverseDeps']) { + const changedRelPaths = new Set([...changePaths, ...ctx.removed]); reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } - - // Purge changed + removed files, then add reverse-deps purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); - info( `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, ); } -// ── Full/incremental build path ───────────────────────────────────────── - -function handleFullBuild(ctx) { +function handleFullBuild(ctx: PipelineContext): void { const { db } = ctx; - const hasEmbeddings = detectHasEmbeddings(db); ctx.hasEmbeddings = hasEmbeddings; - const deletions = 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( @@ -354,14 +382,12 @@ function handleFullBuild(ctx) { ); } -function handleIncrementalBuild(ctx) { +function handleIncrementalBuild(ctx: PipelineContext): void { const { db, rootDir, opts } = ctx; - ctx.hasEmbeddings = detectHasEmbeddings(db); - - let reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set(); + let reverseDeps = new Set(); + if (!(opts as Record)['noReverseDeps']) { + const changedRelPaths = new Set(); for (const item of ctx.parseChanges) { changedRelPaths.add(item.relPath || normalizePath(path.relative(rootDir, item.file))); } @@ -370,45 +396,54 @@ function handleIncrementalBuild(ctx) { } reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } - info( `Incremental: ${ctx.parseChanges.length} changed, ${ctx.removed.length} removed${reverseDeps.size > 0 ? `, ${reverseDeps.size} reverse-deps` : ''}`, ); if (ctx.parseChanges.length > 0) debug(`Changed files: ${ctx.parseChanges.map((c) => c.relPath).join(', ')}`); if (ctx.removed.length > 0) debug(`Removed files: ${ctx.removed.join(', ')}`); - const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } -// ── Main entry point ──────────────────────────────────────────────────── - -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function detectChanges(ctx) { +export async function detectChanges(ctx: PipelineContext): Promise { const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; - - // Scoped builds already set parseChanges in collectFiles - if (opts.scope) { + if ((opts as Record)['scope']) { handleScopedBuild(ctx); return; } - const increResult = incremental && !forceFullRebuild ? getChangedFiles(db, allFiles, rootDir) - : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; - + : { + changed: allFiles.map((f): ChangedFile => ({ file: f })), + removed: [] as string[], + isFullBuild: true, + }; ctx.removed = increResult.removed; ctx.isFullBuild = increResult.isFullBuild; - ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); - ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); - - // Early exit: no changes detected + ctx.parseChanges = increResult.changed + .filter((c) => !c.metadataOnly) + .map((c) => ({ + file: c.file, + relPath: c.relPath, + content: c.content, + hash: c.hash, + stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined, + _reverseDepOnly: c._reverseDepOnly, + })); + ctx.metadataUpdates = increResult.changed + .filter( + (c): c is ChangedFile & { relPath: string; hash: string; stat: FileStat } => + !!c.metadataOnly && !!c.relPath && !!c.hash && !!c.stat, + ) + .map((c) => ({ + relPath: c.relPath, + hash: c.hash, + stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size }, + })); if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { const ranAnalysis = await runPendingAnalysis(ctx); if (ranAnalysis) { @@ -417,7 +452,6 @@ export async function detectChanges(ctx) { ctx.earlyExit = true; return; } - healMetadata(ctx); info('No changes detected. Graph is up to date.'); closeDb(db); @@ -425,7 +459,6 @@ export async function detectChanges(ctx) { ctx.earlyExit = true; return; } - if (ctx.isFullBuild) { handleFullBuild(ctx); } else { diff --git a/src/domain/graph/builder/stages/finalize.js b/src/domain/graph/builder/stages/finalize.ts similarity index 64% rename from src/domain/graph/builder/stages/finalize.js rename to src/domain/graph/builder/stages/finalize.ts index 6b493785..90d23757 100644 --- a/src/domain/graph/builder/stages/finalize.js +++ b/src/domain/graph/builder/stages/finalize.ts @@ -9,33 +9,36 @@ import { performance } from 'node:perf_hooks'; import { closeDb, getBuildMeta, setBuildMeta } from '../../../../db/index.js'; import { debug, info, warn } from '../../../../infrastructure/logger.js'; import { writeJournalHeader } from '../../journal.js'; +import type { PipelineContext } from '../context.js'; const __builderDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1')); -const CODEGRAPH_VERSION = JSON.parse( - fs.readFileSync(path.join(__builderDir, '..', '..', '..', '..', '..', 'package.json'), 'utf-8'), +const CODEGRAPH_VERSION = ( + JSON.parse( + fs.readFileSync(path.join(__builderDir, '..', '..', '..', '..', '..', 'package.json'), 'utf-8'), + ) as { version: string } ).version; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function finalize(ctx) { +export async function finalize(ctx: PipelineContext): Promise { const { db, allSymbols, rootDir, isFullBuild, hasEmbeddings, config, opts, schemaVersion } = ctx; const t0 = performance.now(); // Release cached WASM trees for (const [, symbols] of allSymbols) { - if (symbols._tree && typeof symbols._tree.delete === 'function') { + const tree = symbols._tree as { delete?: () => void } | undefined; + if (tree && typeof tree.delete === 'function') { try { - symbols._tree.delete(); - } catch {} + tree.delete(); + } catch { + /* ignore cleanup errors */ + } } - symbols._tree = null; - symbols._langId = null; + symbols._tree = undefined; + symbols._langId = undefined; } - const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; - const actualEdgeCount = db.prepare('SELECT COUNT(*) as c FROM edges').get().c; + const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c; + const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c; info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`); info(`Stored in ${ctx.dbPath}`); @@ -49,10 +52,11 @@ export async function finalize(ctx) { if (prevN > 0) { const nodeDrift = Math.abs(nodeCount - prevN) / prevN; const edgeDrift = prevE > 0 ? Math.abs(actualEdgeCount - prevE) / prevE : 0; - const driftThreshold = config.build?.driftThreshold ?? 0.2; + const driftThreshold = + (config as { build?: { driftThreshold?: number } }).build?.driftThreshold ?? 0.2; if (nodeDrift > driftThreshold || edgeDrift > driftThreshold) { warn( - `Incremental build diverged significantly from previous counts (nodes: ${prevN}→${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}→${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, + `Incremental build diverged significantly from previous counts (nodes: ${prevN}\u2192${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}\u2192${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, ); } } @@ -62,24 +66,29 @@ export async function finalize(ctx) { // Orphaned embeddings warning if (hasEmbeddings) { try { - const orphaned = db - .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)') - .get().c; + const orphaned = ( + db + .prepare( + 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + ) + .get() as { c: number } + ).c; if (orphaned > 0) { warn( `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, ); } } catch { - /* ignore — embeddings table may have been dropped */ + /* ignore - embeddings table may have been dropped */ } } // Unused exports warning try { - const unusedCount = db - .prepare( - `SELECT COUNT(*) as c FROM nodes + const unusedCount = ( + db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE exported = 1 AND kind != 'file' AND id NOT IN ( SELECT DISTINCT e.target_id FROM edges e @@ -87,8 +96,9 @@ export async function finalize(ctx) { JOIN nodes target ON e.target_id = target.id WHERE e.kind = 'calls' AND caller.file != target.file )`, - ) - .get().c; + ) + .get() as { c: number } + ).c; if (unusedCount > 0) { warn( `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, @@ -110,7 +120,7 @@ export async function finalize(ctx) { edge_count: actualEdgeCount, }); } catch (err) { - warn(`Failed to write build metadata: ${err.message}`); + warn(`Failed to write build metadata: ${(err as Error).message}`); } closeDb(db); @@ -127,10 +137,12 @@ export async function finalize(ctx) { debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`); } else { try { - const { registerRepo } = await import('../../../../infrastructure/registry.js'); + const { registerRepo } = (await import('../../../../infrastructure/registry.js')) as { + registerRepo: (rootDir: string) => void; + }; registerRepo(rootDir); } catch (err) { - debug(`Auto-registration failed: ${err.message}`); + debug(`Auto-registration failed: ${(err as Error).message}`); } } } diff --git a/src/domain/graph/builder/stages/insert-nodes.js b/src/domain/graph/builder/stages/insert-nodes.ts similarity index 74% rename from src/domain/graph/builder/stages/insert-nodes.js rename to src/domain/graph/builder/stages/insert-nodes.ts index 6e22c966..46737844 100644 --- a/src/domain/graph/builder/stages/insert-nodes.js +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -6,7 +6,10 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type BetterSqlite3 from 'better-sqlite3'; import { bulkNodeIdsByFile } from '../../../../db/index.js'; +import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; import { batchInsertEdges, batchInsertNodes, @@ -15,10 +18,23 @@ import { readFileSafe, } from '../helpers.js'; +/** Shape of precomputed file data gathered from filesToParse entries. */ +interface PrecomputedFileData { + file: string; + relPath?: string; + content?: string; + hash?: string; + stat?: { mtime: number; size: number } | null; + _reverseDepOnly?: boolean; +} + // ── Phase 1: Insert file nodes, definitions, exports ──────────────────── -function insertDefinitionsAndExports(db, allSymbols) { - const phase1Rows = []; +function insertDefinitionsAndExports( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const phase1Rows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); for (const def of symbols.definitions) { @@ -55,10 +71,13 @@ function insertDefinitionsAndExports(db, allSymbols) { // ── Phase 2: Insert children (needs parent IDs) ──────────────────────── -function insertChildren(db, allSymbols) { - const childRows = []; +function insertChildren( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const childRows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -87,10 +106,13 @@ function insertChildren(db, allSymbols) { // ── Phase 3: Insert containment + parameter_of edges ──────────────────── -function insertContainmentEdges(db, allSymbols) { - const edgeRows = []; +function insertContainmentEdges( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const edgeRows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -118,7 +140,14 @@ function insertContainmentEdges(db, allSymbols) { // ── Phase 4: Update file hashes ───────────────────────────────────────── -function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash) { +function updateFileHashes( + _db: BetterSqlite3.Database, + allSymbols: Map, + precomputedData: Map, + metadataUpdates: MetadataUpdate[], + rootDir: string, + upsertHash: BetterSqlite3.Statement | null, +): void { if (!upsertHash) return; for (const [relPath] of allSymbols) { @@ -126,13 +155,20 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo if (precomputed?._reverseDepOnly) { // no-op: file unchanged, hash already correct } else if (precomputed?.hash) { - const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; + let mtime: number; + let size: number; + if (precomputed.stat) { + mtime = precomputed.stat.mtime; + size = precomputed.stat.size; + } else { + const rawStat = fileStat(path.join(rootDir, relPath)); + mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + size = rawStat ? rawStat.size : 0; + } upsertHash.run(relPath, precomputed.hash, mtime, size); } else { const absPath = path.join(rootDir, relPath); - let code; + let code: string | null; try { code = readFileSafe(absPath); } catch { @@ -149,7 +185,7 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo // Also update metadata-only entries (self-heal mtime/size without re-parse) for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; const size = item.stat ? item.stat.size : 0; upsertHash.run(item.relPath, item.hash, mtime, size); } @@ -157,18 +193,15 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo // ── Main entry point ──────────────────────────────────────────────────── -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function insertNodes(ctx) { +export async function insertNodes(ctx: PipelineContext): Promise { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - const precomputedData = new Map(); + const precomputedData = new Map(); for (const item of filesToParse) { - if (item.relPath) precomputedData.set(item.relPath, item); + if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); } - let upsertHash; + let upsertHash: BetterSqlite3.Statement | null; try { upsertHash = db.prepare( 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)', diff --git a/src/domain/graph/builder/stages/parse-files.js b/src/domain/graph/builder/stages/parse-files.ts similarity index 87% rename from src/domain/graph/builder/stages/parse-files.js rename to src/domain/graph/builder/stages/parse-files.ts index 6690bb5f..9e8254c1 100644 --- a/src/domain/graph/builder/stages/parse-files.js +++ b/src/domain/graph/builder/stages/parse-files.ts @@ -7,11 +7,9 @@ import { performance } from 'node:perf_hooks'; import { info } from '../../../../infrastructure/logger.js'; import { parseFilesAuto } from '../../../parser.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function parseFiles(ctx) { +export async function parseFiles(ctx: PipelineContext): Promise { const { allFiles, parseChanges, isFullBuild, engineOpts, rootDir } = ctx; ctx.filesToParse = isFullBuild ? allFiles.map((f) => ({ file: f })) : parseChanges; diff --git a/src/domain/graph/builder/stages/resolve-imports.js b/src/domain/graph/builder/stages/resolve-imports.ts similarity index 73% rename from src/domain/graph/builder/stages/resolve-imports.js rename to src/domain/graph/builder/stages/resolve-imports.ts index 7d9bbe40..eb828386 100644 --- a/src/domain/graph/builder/stages/resolve-imports.js +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -1,23 +1,20 @@ -/** - * Stage: resolveImports - * - * Batch import resolution + barrel/re-export map construction. - * For incremental builds, loads unchanged barrel files for resolution. - */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type { Import } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { resolveImportPath, resolveImportsBatch } from '../../resolve.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function resolveImports(ctx) { - const { db, fileSymbols, rootDir, aliases, allFiles, isFullBuild, engineOpts } = ctx; +interface ReexportEntry { + source: string; + names: string[]; + wildcardReexport: boolean; +} - // Collect all (fromFile, importSource) pairs and resolve in one native call +export async function resolveImports(ctx: PipelineContext): Promise { + const { db, fileSymbols, rootDir, aliases, allFiles, isFullBuild, engineOpts } = ctx; const t0 = performance.now(); - const batchInputs = []; + const batchInputs: Array<{ fromFile: string; importSource: string }> = []; for (const [relPath, symbols] of fileSymbols) { const absFile = path.join(rootDir, relPath); for (const imp of symbols.imports) { @@ -27,8 +24,7 @@ export async function resolveImports(ctx) { ctx.batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases, allFiles); ctx.timing.resolveMs = performance.now() - t0; - // Build re-export map for barrel resolution - ctx.reexportMap = new Map(); + ctx.reexportMap = new Map(); for (const [relPath, symbols] of fileSymbols) { const reexports = symbols.imports.filter((imp) => imp.reexport); if (reexports.length > 0) { @@ -43,16 +39,13 @@ export async function resolveImports(ctx) { } } - // For incremental builds, load unchanged barrel files into reexportMap - ctx.barrelOnlyFiles = new Set(); + ctx.barrelOnlyFiles = new Set(); if (!isFullBuild) { const barrelCandidates = db - .prepare( - `SELECT DISTINCT n1.file FROM edges e + .prepare(`SELECT DISTINCT n1.file FROM edges e JOIN nodes n1 ON e.source_id = n1.id - WHERE e.kind = 'reexports' AND n1.kind = 'file'`, - ) - .all(); + WHERE e.kind = 'reexports' AND n1.kind = 'file'`) + .all() as Array<{ file: string }>; for (const { file: relPath } of barrelCandidates) { if (fileSymbols.has(relPath)) continue; const absPath = path.join(rootDir, relPath); @@ -62,11 +55,11 @@ export async function resolveImports(ctx) { if (fileSym) { fileSymbols.set(relPath, fileSym); ctx.barrelOnlyFiles.add(relPath); - const reexports = fileSym.imports.filter((imp) => imp.reexport); + const reexports = fileSym.imports.filter((imp: Import) => imp.reexport); if (reexports.length > 0) { ctx.reexportMap.set( relPath, - reexports.map((imp) => ({ + reexports.map((imp: Import) => ({ source: getResolved(ctx, absPath, imp.source), names: imp.names, wildcardReexport: imp.wildcardReexport || false, @@ -81,11 +74,7 @@ export async function resolveImports(ctx) { } } -/** - * Resolve an import source, preferring batch results. - * Exported so other stages (build-edges) can reuse it. - */ -export function getResolved(ctx, absFile, importSource) { +export function getResolved(ctx: PipelineContext, absFile: string, importSource: string): string { if (ctx.batchResolved) { const key = `${absFile}|${importSource}`; const hit = ctx.batchResolved.get(key); @@ -94,10 +83,7 @@ export function getResolved(ctx, absFile, importSource) { return resolveImportPath(absFile, importSource, ctx.rootDir, ctx.aliases); } -/** - * Check if a file is a barrel (re-export hub). - */ -export function isBarrelFile(ctx, relPath) { +export function isBarrelFile(ctx: PipelineContext, relPath: string): boolean { const symbols = ctx.fileSymbols.get(relPath); if (!symbols) return false; const reexports = symbols.imports.filter((imp) => imp.reexport); @@ -106,15 +92,16 @@ export function isBarrelFile(ctx, relPath) { return reexports.length >= ownDefs; } -/** - * Resolve a symbol through barrel re-export chains. - */ -export function resolveBarrelExport(ctx, barrelPath, symbolName, visited = new Set()) { +export function resolveBarrelExport( + ctx: PipelineContext, + barrelPath: string, + symbolName: string, + visited: Set = new Set(), +): string | null { if (visited.has(barrelPath)) return null; visited.add(barrelPath); - const reexports = ctx.reexportMap.get(barrelPath); + const reexports = ctx.reexportMap.get(barrelPath) as ReexportEntry[] | undefined; if (!reexports) return null; - for (const re of reexports) { if (re.names.length > 0 && !re.wildcardReexport) { if (re.names.includes(symbolName)) { diff --git a/src/domain/graph/builder/stages/run-analyses.js b/src/domain/graph/builder/stages/run-analyses.ts similarity index 71% rename from src/domain/graph/builder/stages/run-analyses.js rename to src/domain/graph/builder/stages/run-analyses.ts index 53384613..c943cdf4 100644 --- a/src/domain/graph/builder/stages/run-analyses.js +++ b/src/domain/graph/builder/stages/run-analyses.ts @@ -5,18 +5,19 @@ * Filters out reverse-dep files for incremental builds. */ import { debug, warn } from '../../../../infrastructure/logger.js'; +import type { ExtractorOutput } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function runAnalyses(ctx) { +export async function runAnalyses(ctx: PipelineContext): Promise { const { db, allSymbols, rootDir, opts, engineOpts, isFullBuild, filesToParse } = ctx; // For incremental builds, exclude reverse-dep-only files - let astComplexitySymbols = allSymbols; + let astComplexitySymbols: Map = allSymbols; if (!isFullBuild) { const reverseDepFiles = new Set( - filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath), + filesToParse + .filter((item) => (item as { _reverseDepOnly?: boolean })._reverseDepOnly) + .map((item) => item.relPath), ); if (reverseDepFiles.size > 0) { astComplexitySymbols = new Map(); @@ -39,6 +40,8 @@ export async function runAnalyses(ctx) { ctx.timing.cfgMs = analysisTiming.cfgMs; ctx.timing.dataflowMs = analysisTiming.dataflowMs; } catch (err) { - warn(`Analysis engine failed (AST/complexity/CFG/dataflow may be incomplete): ${err.message}`); + warn( + `Analysis engine failed (AST/complexity/CFG/dataflow may be incomplete): ${(err as Error).message}`, + ); } } diff --git a/src/domain/graph/change-journal.js b/src/domain/graph/change-journal.ts similarity index 60% rename from src/domain/graph/change-journal.js rename to src/domain/graph/change-journal.ts index 7589b5a6..0bfa6b4f 100644 --- a/src/domain/graph/change-journal.js +++ b/src/domain/graph/change-journal.ts @@ -5,36 +5,36 @@ import { debug, warn } from '../../infrastructure/logger.js'; export const CHANGE_EVENTS_FILENAME = 'change-events.ndjson'; export const DEFAULT_MAX_BYTES = 1024 * 1024; // 1 MB -/** - * Returns the absolute path to the NDJSON change events file. - */ -export function changeEventsPath(rootDir) { +export function changeEventsPath(rootDir: string): string { return path.join(rootDir, '.codegraph', CHANGE_EVENTS_FILENAME); } -/** - * Compare old and new symbol arrays, returning added/removed/modified sets. - * Symbols are keyed on `name\0kind`. A symbol is "modified" if the same - * name+kind exists in both but the line changed. - * - * @param {Array<{name:string, kind:string, line:number}>} oldSymbols - * @param {Array<{name:string, kind:string, line:number}>} newSymbols - * @returns {{ added: Array, removed: Array, modified: Array }} - */ -export function diffSymbols(oldSymbols, newSymbols) { - const oldMap = new Map(); +interface SymbolEntry { + name: string; + kind: string; + line: number; +} + +interface SymbolDiff { + added: Array<{ name: string; kind: string; line: number }>; + removed: Array<{ name: string; kind: string }>; + modified: Array<{ name: string; kind: string; line: number }>; +} + +export function diffSymbols(oldSymbols: SymbolEntry[], newSymbols: SymbolEntry[]): SymbolDiff { + const oldMap = new Map(); for (const s of oldSymbols) { oldMap.set(`${s.name}\0${s.kind}`, s); } - const newMap = new Map(); + const newMap = new Map(); for (const s of newSymbols) { newMap.set(`${s.name}\0${s.kind}`, s); } - const added = []; - const removed = []; - const modified = []; + const added: SymbolDiff['added'] = []; + const removed: SymbolDiff['removed'] = []; + const modified: SymbolDiff['modified'] = []; for (const [key, s] of newMap) { const old = oldMap.get(key); @@ -54,10 +54,29 @@ export function diffSymbols(oldSymbols, newSymbols) { return { added, removed, modified }; } -/** - * Assemble a single change event object. - */ -export function buildChangeEvent(file, event, symbolDiff, counts) { +interface ChangeEvent { + ts: string; + file: string; + event: string; + symbols: unknown; + counts: { + nodes: { before: number; after: number }; + edges: { added: number }; + }; +} + +interface ChangeEventCounts { + nodesBefore?: number; + nodesAfter?: number; + edgesAdded?: number; +} + +export function buildChangeEvent( + file: string, + event: string, + symbolDiff: unknown, + counts: ChangeEventCounts, +): ChangeEvent { return { ts: new Date().toISOString(), file, @@ -70,11 +89,7 @@ export function buildChangeEvent(file, event, symbolDiff, counts) { }; } -/** - * Append change events as NDJSON lines to the change events file. - * Creates the .codegraph directory if needed. Non-fatal on failure. - */ -export function appendChangeEvents(rootDir, events) { +export function appendChangeEvents(rootDir: string, events: ChangeEvent[]): void { const filePath = changeEventsPath(rootDir); const dir = path.dirname(filePath); @@ -86,7 +101,7 @@ export function appendChangeEvents(rootDir, events) { fs.appendFileSync(filePath, lines); debug(`Appended ${events.length} change event(s) to ${filePath}`); } catch (err) { - warn(`Failed to append change events: ${err.message}`); + warn(`Failed to append change events: ${(err as Error).message}`); return; } @@ -97,16 +112,12 @@ export function appendChangeEvents(rootDir, events) { } } -/** - * If the file exceeds maxBytes, keep the last ~half by finding - * the first newline at or after the midpoint and rewriting from there. - */ -export function rotateIfNeeded(filePath, maxBytes = DEFAULT_MAX_BYTES) { - let stat; +export function rotateIfNeeded(filePath: string, maxBytes: number = DEFAULT_MAX_BYTES): void { + let stat: fs.Stats; try { stat = fs.statSync(filePath); } catch { - return; // file doesn't exist, nothing to rotate + return; } if (stat.size <= maxBytes) return; @@ -125,6 +136,6 @@ export function rotateIfNeeded(filePath, maxBytes = DEFAULT_MAX_BYTES) { fs.writeFileSync(filePath, kept); debug(`Rotated change events: ${stat.size} → ${kept.length} bytes`); } catch (err) { - warn(`Failed to rotate change events: ${err.message}`); + warn(`Failed to rotate change events: ${(err as Error).message}`); } } diff --git a/src/domain/graph/cycles.js b/src/domain/graph/cycles.js deleted file mode 100644 index c7872a61..00000000 --- a/src/domain/graph/cycles.js +++ /dev/null @@ -1,82 +0,0 @@ -import { tarjan } from '../../graph/algorithms/tarjan.js'; -import { buildDependencyGraph } from '../../graph/builders/dependency.js'; -import { CodeGraph } from '../../graph/model.js'; -import { loadNative } from '../../infrastructure/native.js'; - -/** - * Detect circular dependencies in the codebase using Tarjan's SCC algorithm. - * Dispatches to native Rust implementation when available, falls back to JS. - * @param {object} db - Open SQLite database - * @param {object} opts - { fileLevel: true, noTests: false } - * @returns {string[][]} Array of cycles, each cycle is an array of file paths - */ -export function findCycles(db, opts = {}) { - const fileLevel = opts.fileLevel !== false; - const noTests = opts.noTests || false; - - const graph = buildDependencyGraph(db, { fileLevel, noTests }); - - // Build a label map: DB string ID → human-readable key - // File-level: file path; Function-level: name|file composite (for native Rust compat) - const idToLabel = new Map(); - for (const [id, attrs] of graph.nodes()) { - if (fileLevel) { - idToLabel.set(id, attrs.file); - } else { - idToLabel.set(id, `${attrs.label}|${attrs.file}`); - } - } - - // Build edge array with human-readable keys (for native engine) - const edges = graph.toEdgeArray().map((e) => ({ - source: idToLabel.get(e.source), - target: idToLabel.get(e.target), - })); - - // Try native Rust implementation - const native = loadNative(); - if (native) { - return native.detectCycles(edges); - } - - // Fallback: JS Tarjan via graph subsystem - // Re-key graph with human-readable labels for consistent output - const labelGraph = new CodeGraph(); - for (const { source, target } of edges) { - labelGraph.addEdge(source, target); - } - return tarjan(labelGraph); -} - -/** - * Pure-JS Tarjan's SCC implementation. - * Kept for backward compatibility — accepts raw {source, target}[] edges. - */ -export function findCyclesJS(edges) { - const graph = new CodeGraph(); - for (const { source, target } of edges) { - graph.addEdge(source, target); - } - return tarjan(graph); -} - -/** - * Format cycles for human-readable output. - */ -export function formatCycles(cycles) { - if (cycles.length === 0) { - return 'No circular dependencies detected.'; - } - - const lines = [`Found ${cycles.length} circular dependency cycle(s):\n`]; - for (let i = 0; i < cycles.length; i++) { - const cycle = cycles[i]; - lines.push(` Cycle ${i + 1} (${cycle.length} files):`); - for (const file of cycle) { - lines.push(` -> ${file}`); - } - lines.push(` -> ${cycle[0]} (back to start)`); - lines.push(''); - } - return lines.join('\n'); -} diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts new file mode 100644 index 00000000..9517133d --- /dev/null +++ b/src/domain/graph/cycles.ts @@ -0,0 +1,66 @@ +import { tarjan } from '../../graph/algorithms/tarjan.js'; +import { buildDependencyGraph } from '../../graph/builders/dependency.js'; +import { CodeGraph } from '../../graph/model.js'; +import { loadNative } from '../../infrastructure/native.js'; +import type { BetterSqlite3Database } from '../../types.js'; + +export function findCycles( + db: BetterSqlite3Database, + opts: { fileLevel?: boolean; noTests?: boolean } = {}, +): string[][] { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + + const graph = buildDependencyGraph(db, { fileLevel, noTests }); + + const idToLabel = new Map(); + for (const [id, attrs] of graph.nodes()) { + if (fileLevel) { + idToLabel.set(id, attrs['file'] as string); + } else { + idToLabel.set(id, `${attrs['label']}|${attrs['file']}`); + } + } + + const edges = graph.toEdgeArray().map((e) => ({ + source: idToLabel.get(e.source) ?? e.source, + target: idToLabel.get(e.target) ?? e.target, + })); + + const native = loadNative(); + if (native) { + return native.detectCycles(edges) as string[][]; + } + + const labelGraph = new CodeGraph(); + for (const { source, target } of edges) { + labelGraph.addEdge(source, target); + } + return tarjan(labelGraph); +} + +export function findCyclesJS(edges: Array<{ source: string; target: string }>): string[][] { + const graph = new CodeGraph(); + for (const { source, target } of edges) { + graph.addEdge(source, target); + } + return tarjan(graph); +} + +export function formatCycles(cycles: string[][]): string { + if (cycles.length === 0) { + return 'No circular dependencies detected.'; + } + + const lines: string[] = [`Found ${cycles.length} circular dependency cycle(s):\n`]; + for (let i = 0; i < cycles.length; i++) { + const cycle = cycles[i]!; + lines.push(` Cycle ${i + 1} (${cycle.length} files):`); + for (const file of cycle) { + lines.push(` -> ${file}`); + } + lines.push(` -> ${cycle[0]} (back to start)`); + lines.push(''); + } + return lines.join('\n'); +} diff --git a/src/domain/graph/journal.js b/src/domain/graph/journal.ts similarity index 67% rename from src/domain/graph/journal.js rename to src/domain/graph/journal.ts index 714889f2..4ad63a35 100644 --- a/src/domain/graph/journal.js +++ b/src/domain/graph/journal.ts @@ -5,13 +5,16 @@ import { debug, warn } from '../../infrastructure/logger.js'; export const JOURNAL_FILENAME = 'changes.journal'; const HEADER_PREFIX = '# codegraph-journal v1 '; -/** - * Read and validate the change journal. - * Returns { valid, timestamp, changed[], removed[] } or { valid: false }. - */ -export function readJournal(rootDir) { +interface JournalResult { + valid: boolean; + timestamp?: number; + changed?: string[]; + removed?: string[]; +} + +export function readJournal(rootDir: string): JournalResult { const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME); - let content; + let content: string; try { content = fs.readFileSync(journalPath, 'utf-8'); } catch { @@ -19,24 +22,24 @@ export function readJournal(rootDir) { } const lines = content.split('\n'); - if (lines.length === 0 || !lines[0].startsWith(HEADER_PREFIX)) { + if (lines.length === 0 || !lines[0]!.startsWith(HEADER_PREFIX)) { debug('Journal has malformed or missing header'); return { valid: false }; } - const timestamp = Number(lines[0].slice(HEADER_PREFIX.length).trim()); + const timestamp = Number(lines[0]!.slice(HEADER_PREFIX.length).trim()); if (!Number.isFinite(timestamp) || timestamp <= 0) { debug('Journal has invalid timestamp'); return { valid: false }; } - const changed = []; - const removed = []; - const seenChanged = new Set(); - const seenRemoved = new Set(); + const changed: string[] = []; + const removed: string[] = []; + const seenChanged = new Set(); + const seenRemoved = new Set(); for (let i = 1; i < lines.length; i++) { - const line = lines[i].trim(); + const line = lines[i]!.trim(); if (!line || line.startsWith('#')) continue; if (line.startsWith('DELETED ')) { @@ -56,11 +59,10 @@ export function readJournal(rootDir) { return { valid: true, timestamp, changed, removed }; } -/** - * Append changed/deleted paths to the journal. - * Creates the journal with a header if it doesn't exist. - */ -export function appendJournalEntries(rootDir, entries) { +export function appendJournalEntries( + rootDir: string, + entries: Array<{ file: string; deleted?: boolean }>, +): void { const dir = path.join(rootDir, '.codegraph'); const journalPath = path.join(dir, JOURNAL_FILENAME); @@ -68,7 +70,6 @@ export function appendJournalEntries(rootDir, entries) { fs.mkdirSync(dir, { recursive: true }); } - // If journal doesn't exist, create with a placeholder header if (!fs.existsSync(journalPath)) { fs.writeFileSync(journalPath, `${HEADER_PREFIX}0\n`); } @@ -81,11 +82,7 @@ export function appendJournalEntries(rootDir, entries) { fs.appendFileSync(journalPath, `${lines.join('\n')}\n`); } -/** - * Write a fresh journal header after a successful build. - * Atomic: write to temp file then rename. - */ -export function writeJournalHeader(rootDir, timestamp) { +export function writeJournalHeader(rootDir: string, timestamp: number): void { const dir = path.join(rootDir, '.codegraph'); const journalPath = path.join(dir, JOURNAL_FILENAME); const tmpPath = `${journalPath}.tmp`; @@ -98,8 +95,7 @@ export function writeJournalHeader(rootDir, timestamp) { fs.writeFileSync(tmpPath, `${HEADER_PREFIX}${timestamp}\n`); fs.renameSync(tmpPath, journalPath); } catch (err) { - warn(`Failed to write journal header: ${err.message}`); - // Clean up temp file if rename failed + warn(`Failed to write journal header: ${(err as Error).message}`); try { fs.unlinkSync(tmpPath); } catch { diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index c7948355..30cef5d6 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -5,7 +5,7 @@ import { info } from '../../infrastructure/logger.js'; import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; -import { rebuildFile } from './builder/incremental.js'; +import { type IncrementalStmts, rebuildFile } from './builder/incremental.js'; import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js'; import { appendJournalEntries } from './journal.js'; @@ -28,7 +28,11 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = // Alias for functions expecting the project's BetterSqlite3Database interface const typedDb = db as unknown as import('../../types.js').BetterSqlite3Database; initSchema(db); - const engineOpts = { engine: (opts.engine || 'auto') as import('../../types.js').EngineMode }; + const engineOpts: import('../../types.js').EngineOpts = { + engine: (opts.engine || 'auto') as import('../../types.js').EngineMode, + dataflow: false, + ast: false, + }; const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); console.log( `Watch mode using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`, @@ -99,9 +103,17 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = edgesAdded: number; }> = []; for (const filePath of files) { - const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, { - diffSymbols, - })) as (typeof results)[number] | null; + const result = (await rebuildFile( + db, + rootDir, + filePath, + stmts as IncrementalStmts, + engineOpts, + cache, + { + diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown, + }, + )) as (typeof results)[number] | null; if (result) results.push(result); } const updates = results; diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts index 085acbea..05cd58ea 100644 --- a/src/domain/search/generator.ts +++ b/src/domain/search/generator.ts @@ -4,6 +4,7 @@ import type BetterSqlite3 from 'better-sqlite3'; import { closeDb, findDbPath, openDb } from '../../db/index.js'; import { warn } from '../../infrastructure/logger.js'; import { DbError } from '../../shared/errors.js'; +import type { NodeRow } from '../../types.js'; import { embed, getModelConfig } from './models.js'; import { buildSourceText } from './strategies/source.js'; import { buildStructuredText } from './strategies/structured.js'; @@ -81,7 +82,7 @@ export async function buildEmbeddings( .prepare( `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`, ) - .all() as Array<{ id: number; name: string; kind: string; file: string; line: number }>; + .all() as Array; console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`); diff --git a/src/domain/search/search/cli-formatter.js b/src/domain/search/search/cli-formatter.ts similarity index 80% rename from src/domain/search/search/cli-formatter.js rename to src/domain/search/search/cli-formatter.ts index a0b45a80..1081afc7 100644 --- a/src/domain/search/search/cli-formatter.js +++ b/src/domain/search/search/cli-formatter.ts @@ -1,37 +1,41 @@ import { warn } from '../../../infrastructure/logger.js'; import { hybridSearchData } from './hybrid.js'; import { ftsSearchData } from './keyword.js'; +import type { SemanticSearchOpts } from './semantic.js'; import { multiSearchData, searchData } from './semantic.js'; -/** - * Search with mode support — CLI wrapper with multi-query detection. - * Modes: 'hybrid' (default), 'semantic', 'keyword' - */ -export async function search(query, customDbPath, opts = {}) { +interface SearchOpts extends SemanticSearchOpts { + mode?: 'hybrid' | 'semantic' | 'keyword'; + json?: boolean; +} + +export async function search( + query: string, + customDbPath: string | undefined, + opts: SearchOpts = {}, +): Promise { const mode = opts.mode || 'hybrid'; - // Split by semicolons, trim, filter empties const queries = query .split(';') .map((q) => q.trim()) .filter((q) => q.length > 0); - const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'); + const kindIcon = (kind: string): string => + kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'; - // ─── Keyword-only mode ────────────────────────────────────────────── + // Keyword-only mode if (mode === 'keyword') { - const singleQuery = queries.length === 1 ? queries[0] : query; + const singleQuery = queries.length === 1 ? queries[0]! : query; const data = ftsSearchData(singleQuery, customDbPath, opts); if (!data) { console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.'); return; } - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`); if (data.results.length === 0) { console.log(' No results found.'); @@ -46,18 +50,16 @@ export async function search(query, customDbPath, opts = {}) { return; } - // ─── Semantic-only mode ───────────────────────────────────────────── + // Semantic-only mode if (mode === 'semantic') { if (queries.length <= 1) { const singleQuery = queries[0] || query; const data = await searchData(singleQuery, customDbPath, opts); if (!data) return; - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nSemantic search: "${singleQuery}"\n`); if (data.results.length === 0) { console.log(' No results above threshold.'); @@ -72,12 +74,10 @@ export async function search(query, customDbPath, opts = {}) { } else { const data = await multiSearchData(queries, customDbPath, opts); if (!data) return; - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); console.log(); @@ -101,11 +101,10 @@ export async function search(query, customDbPath, opts = {}) { return; } - // ─── Hybrid mode (default) ────────────────────────────────────────── + // Hybrid mode (default) const data = await hybridSearchData(query, customDbPath, opts); if (!data) { - // No FTS5 index — fall back to semantic-only warn( 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.', ); @@ -134,12 +133,12 @@ export async function search(query, customDbPath, opts = {}) { console.log( ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, ); - const parts = []; + const parts: string[] = []; if (r.bm25Rank != null) { - parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`); + parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score!.toFixed(2)})`); } if (r.semanticRank != null) { - parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`); + parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity! * 100).toFixed(1)}%)`); } if (parts.length > 0) { console.log(` ${parts.join(' | ')}`); diff --git a/src/domain/search/search/filters.js b/src/domain/search/search/filters.ts similarity index 54% rename from src/domain/search/search/filters.js rename to src/domain/search/search/filters.ts index 47becc3a..4d91bb56 100644 --- a/src/domain/search/search/filters.js +++ b/src/domain/search/search/filters.ts @@ -1,13 +1,6 @@ -/** - * Match a file path against a glob pattern. - * Supports *, **, and ? wildcards. Zero dependencies. - */ -export function globMatch(filePath, pattern) { - // Normalize separators to forward slashes +export function globMatch(filePath: string, pattern: string): boolean { const normalized = filePath.replace(/\\/g, '/'); - // Escape regex specials except glob chars let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&'); - // Replace ** first (matches any path segment), then * and ? regex = regex.replace(/\*\*/g, '\0'); regex = regex.replace(/\*/g, '[^/]*'); regex = regex.replace(/\0/g, '.*'); @@ -15,23 +8,18 @@ export function globMatch(filePath, pattern) { try { return new RegExp(`^${regex}$`).test(normalized); } catch { - // Malformed pattern — fall back to substring match return normalized.includes(pattern); } } const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; -/** - * Apply post-query filters (glob pattern, noTests) to a set of rows. - * Mutates nothing — returns a new filtered array. - * @param {Array} rows - Rows with at least a `file` property - * @param {object} opts - * @param {string} [opts.filePattern] - Glob pattern (only applied if it contains glob chars) - * @param {boolean} [opts.noTests] - Exclude test/spec files - * @returns {Array} - */ -export function applyFilters(rows, opts = {}) { +export interface FilterOpts { + filePattern?: string | string[]; + noTests?: boolean; +} + +export function applyFilters(rows: T[], opts: FilterOpts = {}): T[] { let filtered = rows; const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; diff --git a/src/domain/search/search/hybrid.js b/src/domain/search/search/hybrid.ts similarity index 55% rename from src/domain/search/search/hybrid.js rename to src/domain/search/search/hybrid.ts index 2c6cd00a..80370312 100644 --- a/src/domain/search/search/hybrid.js +++ b/src/domain/search/search/hybrid.ts @@ -1,22 +1,41 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { loadConfig } from '../../../infrastructure/config.js'; +import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js'; import { hasFtsIndex } from '../stores/fts5.js'; import { ftsSearchData } from './keyword.js'; +import type { SemanticSearchOpts } from './semantic.js'; import { searchData } from './semantic.js'; -/** - * Hybrid BM25 + semantic search with RRF fusion. - * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] } - * or null if no FTS5 index (caller should fall back to semantic-only). - */ -export async function hybridSearchData(query, customDbPath, opts = {}) { +interface HybridResult { + name: string; + kind: string; + file: string; + line: number; + endLine: number | null; + role: string | null; + fileHash: string | null; + rrf: number; + bm25Score: number | null; + bm25Rank: number | null; + similarity: number | null; + semanticRank: number | null; +} + +export interface HybridSearchResult { + results: HybridResult[]; +} + +export async function hybridSearchData( + query: string, + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const k = opts.rrfK ?? searchCfg.rrfK ?? 60; const topK = (opts.limit ?? searchCfg.topK ?? 15) * 5; - // Split semicolons for multi-query support const queries = typeof query === 'string' ? query @@ -25,30 +44,41 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { .filter((q) => q.length > 0) : [query]; - // Check FTS5 availability first (sync, cheap) - const checkDb = openReadonlyOrFail(customDbPath); + const checkDb = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; const ftsAvailable = hasFtsIndex(checkDb); checkDb.close(); if (!ftsAvailable) return null; - // Collect ranked lists: for each query, one BM25 list + one semantic list - const rankedLists = []; + interface RankedItem { + key: string; + rank: number; + source: 'bm25' | 'semantic'; + name: string; + kind: string; + file: string; + line: number; + endLine?: number | null; + role?: string | null; + fileHash?: string | null; + bm25Score?: number; + similarity?: number; + } + + const rankedLists: RankedItem[][] = []; for (const q of queries) { - // BM25 ranked list (sync) const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK }); if (bm25Data?.results) { rankedLists.push( bm25Data.results.map((r, idx) => ({ key: `${r.name}:${r.file}:${r.line}`, rank: idx + 1, - source: 'bm25', + source: 'bm25' as const, ...r, })), ); } - // Semantic ranked list (async) const semData = await searchData(q, customDbPath, { ...opts, limit: topK, @@ -59,15 +89,29 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { semData.results.map((r, idx) => ({ key: `${r.name}:${r.file}:${r.line}`, rank: idx + 1, - source: 'semantic', + source: 'semantic' as const, ...r, })), ); } } - // RRF fusion across all ranked lists - const fusionMap = new Map(); + interface FusionEntry { + name: string; + kind: string; + file: string; + line: number; + endLine: number | null; + role: string | null; + fileHash: string | null; + rrfScore: number; + bm25Score: number | null; + bm25Rank: number | null; + similarity: number | null; + semanticRank: number | null; + } + + const fusionMap = new Map(); for (const list of rankedLists) { for (const item of list) { if (!fusionMap.has(item.key)) { @@ -76,9 +120,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { kind: item.kind, file: item.file, line: item.line, - endLine: item.endLine ?? null, - role: item.role ?? null, - fileHash: item.fileHash ?? null, + endLine: (item.endLine as number | null) ?? null, + role: (item.role as string | null) ?? null, + fileHash: (item.fileHash as string | null) ?? null, rrfScore: 0, bm25Score: null, bm25Rank: null, @@ -86,23 +130,23 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { semanticRank: null, }); } - const entry = fusionMap.get(item.key); + const entry = fusionMap.get(item.key)!; entry.rrfScore += 1 / (k + item.rank); if (item.source === 'bm25') { if (entry.bm25Rank === null || item.rank < entry.bm25Rank) { - entry.bm25Score = item.bm25Score; + entry.bm25Score = (item as RankedItem & { bm25Score?: number }).bm25Score ?? null; entry.bm25Rank = item.rank; } } else { if (entry.semanticRank === null || item.rank < entry.semanticRank) { - entry.similarity = item.similarity; + entry.similarity = (item as RankedItem & { similarity?: number }).similarity ?? null; entry.semanticRank = item.rank; } } } } - const results = [...fusionMap.values()] + const results: HybridResult[] = [...fusionMap.values()] .sort((a, b) => b.rrfScore - a.rrfScore) .slice(0, limit) .map((e) => ({ diff --git a/src/domain/search/search/keyword.js b/src/domain/search/search/keyword.ts similarity index 59% rename from src/domain/search/search/keyword.js rename to src/domain/search/search/keyword.ts index 4a4e3ed7..66eda0ac 100644 --- a/src/domain/search/search/keyword.js +++ b/src/domain/search/search/keyword.ts @@ -1,17 +1,47 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { buildFileConditionSQL } from '../../../db/query-builder.js'; +import type { BetterSqlite3Database } from '../../../types.js'; import { normalizeSymbol } from '../../queries.js'; import { hasFtsIndex, sanitizeFtsQuery } from '../stores/fts5.js'; import { applyFilters } from './filters.js'; -/** - * BM25 keyword search via FTS5. - * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index. - */ -export function ftsSearchData(query, customDbPath, opts = {}) { +export interface FtsSearchOpts { + limit?: number; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; +} + +interface FtsRow { + node_id: number; + bm25_score: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; + role: string | null; +} + +export interface FtsSearchResult { + results: Array<{ + name: string; + kind: string; + file: string; + line: number; + bm25Score: number; + [key: string]: unknown; + }>; +} + +export function ftsSearchData( + query: string, + customDbPath: string | undefined, + opts: FtsSearchOpts = {}, +): FtsSearchResult | null { const limit = opts.limit || 15; - const db = openReadonlyOrFail(customDbPath); + const db = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; try { if (!hasFtsIndex(db)) { @@ -30,7 +60,7 @@ export function ftsSearchData(query, customDbPath, opts = {}) { JOIN nodes n ON f.rowid = n.id WHERE fts_index MATCH ? `; - const params = [ftsQuery]; + const params: unknown[] = [ftsQuery]; if (opts.kind) { sql += ' AND n.kind = ?'; @@ -40,9 +70,6 @@ export function ftsSearchData(query, customDbPath, opts = {}) { const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; const isGlob = fpArr.length > 0 && fpArr.some((p) => /[*?[\]]/.test(p)); - // For non-glob patterns, push filtering into SQL via buildFileConditionSQL - // (handles escapeLike + ESCAPE clause). Glob patterns are handled post-query - // by applyFilters. if (fpArr.length > 0 && !isGlob) { const fc = buildFileConditionSQL(fpArr, 'n.file'); sql += fc.sql; @@ -50,22 +77,21 @@ export function ftsSearchData(query, customDbPath, opts = {}) { } sql += ' ORDER BY rank LIMIT ?'; - params.push(limit * 5); // fetch generous set for post-filtering + params.push(limit * 5); - let rows; + let rows: FtsRow[]; try { - rows = db.prepare(sql).all(...params); + rows = db.prepare(sql).all(...params) as FtsRow[]; } catch { - // Invalid FTS5 query syntax — return empty return { results: [] }; } rows = applyFilters(rows, opts); - const hc = new Map(); + const hc = new Map(); const results = rows.slice(0, limit).map((row) => ({ ...normalizeSymbol(row, db, hc), - bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display + bm25Score: -row.bm25_score, })); return { results }; diff --git a/src/domain/search/search/prepare.js b/src/domain/search/search/prepare.ts similarity index 67% rename from src/domain/search/search/prepare.js rename to src/domain/search/search/prepare.ts index fb1552e4..3907aa5b 100644 --- a/src/domain/search/search/prepare.js +++ b/src/domain/search/search/prepare.ts @@ -1,17 +1,39 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { escapeLike } from '../../../db/query-builder.js'; import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js'; +import type { BetterSqlite3Database } from '../../../types.js'; import { MODELS } from '../models.js'; import { applyFilters } from './filters.js'; -/** - * Shared setup for search functions: opens DB, validates embeddings/model, loads rows. - * Returns { db, rows, modelKey, storedDim } or null on failure (prints error). - * On null return, the DB is closed. On exception, the DB is also closed - * (callers only need to close DB from the returned object on the happy path). - */ -export function prepareSearch(customDbPath, opts = {}) { - const db = openReadonlyOrFail(customDbPath); +export interface PreparedSearch { + db: BetterSqlite3Database; + rows: Array<{ + node_id: number; + vector: Buffer; + text_preview: string; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; + role: string | null; + }>; + modelKey: string | null; + storedDim: number | null; +} + +export interface PrepareSearchOpts { + model?: string; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; +} + +export function prepareSearch( + customDbPath: string | undefined, + opts: PrepareSearchOpts = {}, +): PreparedSearch | null { + const db = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; try { const count = getEmbeddingCount(db); @@ -35,7 +57,6 @@ export function prepareSearch(customDbPath, opts = {}) { } } - // Pre-filter: allow filtering by kind or file pattern to reduce search space const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; const isGlob = fpArr.length > 0 && fpArr.some((p) => /[*?[\]]/.test(p)); @@ -44,8 +65,8 @@ export function prepareSearch(customDbPath, opts = {}) { FROM embeddings e JOIN nodes n ON e.node_id = n.id `; - const params = []; - const conditions = []; + const params: unknown[] = []; + const conditions: string[] = []; if (opts.kind) { conditions.push('n.kind = ?'); params.push(opts.kind); @@ -53,7 +74,7 @@ export function prepareSearch(customDbPath, opts = {}) { if (fpArr.length > 0 && !isGlob) { if (fpArr.length === 1) { conditions.push("n.file LIKE ? ESCAPE '\\'"); - params.push(`%${escapeLike(fpArr[0])}%`); + params.push(`%${escapeLike(fpArr[0]!)}%`); } else { conditions.push(`(${fpArr.map(() => "n.file LIKE ? ESCAPE '\\'").join(' OR ')})`); params.push(...fpArr.map((f) => `%${escapeLike(f)}%`)); @@ -63,7 +84,7 @@ export function prepareSearch(customDbPath, opts = {}) { sql += ` WHERE ${conditions.join(' AND ')}`; } - let rows = db.prepare(sql).all(...params); + let rows = db.prepare(sql).all(...params) as PreparedSearch['rows']; rows = applyFilters(rows, opts); return { db, rows, modelKey, storedDim }; diff --git a/src/domain/search/search/semantic.js b/src/domain/search/search/semantic.ts similarity index 59% rename from src/domain/search/search/semantic.js rename to src/domain/search/search/semantic.ts index 262d5946..40e2f887 100644 --- a/src/domain/search/search/semantic.js +++ b/src/domain/search/search/semantic.ts @@ -1,17 +1,42 @@ import { loadConfig } from '../../../infrastructure/config.js'; import { warn } from '../../../infrastructure/logger.js'; +import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js'; import { normalizeSymbol } from '../../queries.js'; import { embed } from '../models.js'; import { cosineSim } from '../stores/sqlite-blob.js'; import { prepareSearch } from './prepare.js'; -/** - * Single-query semantic search — returns data instead of printing. - * Returns { results: [{ name, kind, file, line, similarity }] } or null on failure. - */ -export async function searchData(query, customDbPath, opts = {}) { +export interface SemanticSearchOpts { + config?: CodegraphConfig; + limit?: number; + minScore?: number; + model?: string; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; + rrfK?: number; +} + +interface SemanticResult { + name: string; + kind: string; + file: string; + line: number; + similarity: number; + [key: string]: unknown; +} + +export interface SearchDataResult { + results: SemanticResult[]; +} + +export async function searchData( + query: string, + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2; @@ -23,7 +48,7 @@ export async function searchData(query, customDbPath, opts = {}) { const { vectors: [queryVec], dim, - } = await embed([query], modelKey); + } = await embed([query], modelKey ?? undefined); if (storedDim && dim !== storedDim) { console.log( @@ -33,15 +58,15 @@ export async function searchData(query, customDbPath, opts = {}) { return null; } - const hc = new Map(); - const results = []; + const hc = new Map(); + const results: SemanticResult[] = []; for (const row of rows) { - const vec = new Float32Array(new Uint8Array(row.vector).buffer); - const sim = cosineSim(queryVec, vec); + const vec = new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer); + const sim = cosineSim(queryVec!, vec); if (sim >= minScore) { results.push({ - ...normalizeSymbol(row, db, hc), + ...normalizeSymbol(row, db as BetterSqlite3Database, hc), similarity: sim, }); } @@ -54,13 +79,25 @@ export async function searchData(query, customDbPath, opts = {}) { } } -/** - * Multi-query semantic search with Reciprocal Rank Fusion (RRF). - * Returns { results: [{ name, kind, file, line, rrf, queryScores }] } or null on failure. - */ -export async function multiSearchData(queries, customDbPath, opts = {}) { +export interface MultiSearchResult { + results: Array<{ + name: string; + kind: string; + file: string; + line: number; + rrf: number; + queryScores: Array<{ query: string; similarity: number; rank: number }>; + [key: string]: unknown; + }>; +} + +export async function multiSearchData( + queries: string[], + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2; const k = opts.rrfK ?? searchCfg.rrfK ?? 60; @@ -70,13 +107,12 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { const { db, rows, modelKey, storedDim } = prepared; try { - const { vectors: queryVecs, dim } = await embed(queries, modelKey); + const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined); - // Warn about similar queries that may bias RRF results const SIMILARITY_WARN_THRESHOLD = searchCfg.similarityWarnThreshold ?? 0.85; for (let i = 0; i < queryVecs.length; i++) { for (let j = i + 1; j < queryVecs.length; j++) { - const sim = cosineSim(queryVecs[i], queryVecs[j]); + const sim = cosineSim(queryVecs[i]!, queryVecs[j]!); if (sim >= SIMILARITY_WARN_THRESHOLD) { warn( `Queries "${queries[i]}" and "${queries[j]}" are very similar ` + @@ -96,47 +132,47 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { return null; } - // Parse row vectors once - const rowVecs = rows.map((row) => new Float32Array(new Uint8Array(row.vector).buffer)); + const rowVecs = rows.map( + (row) => new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer), + ); - // For each query: compute similarities, filter by minScore, rank const perQueryRanked = queries.map((_query, qi) => { - const scored = []; + const scored: Array<{ rowIndex: number; similarity: number }> = []; for (let ri = 0; ri < rows.length; ri++) { - const sim = cosineSim(queryVecs[qi], rowVecs[ri]); + const sim = cosineSim(queryVecs[qi]!, rowVecs[ri]!); if (sim >= minScore) { scored.push({ rowIndex: ri, similarity: sim }); } } scored.sort((a, b) => b.similarity - a.similarity); - // Assign 1-indexed ranks return scored.map((item, rank) => ({ ...item, rank: rank + 1 })); }); - // Fuse results using RRF: for each unique row, sum 1/(k + rank_i) across queries - const fusionMap = new Map(); // rowIndex -> { rrfScore, queryScores[] } + const fusionMap = new Map< + number, + { rrfScore: number; queryScores: Array<{ query: string; similarity: number; rank: number }> } + >(); for (let qi = 0; qi < queries.length; qi++) { - for (const item of perQueryRanked[qi]) { + for (const item of perQueryRanked[qi]!) { if (!fusionMap.has(item.rowIndex)) { fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] }); } - const entry = fusionMap.get(item.rowIndex); + const entry = fusionMap.get(item.rowIndex)!; entry.rrfScore += 1 / (k + item.rank); entry.queryScores.push({ - query: queries[qi], + query: queries[qi]!, similarity: item.similarity, rank: item.rank, }); } } - // Build results sorted by RRF score - const hc = new Map(); - const results = []; + const hc = new Map(); + const results: MultiSearchResult['results'] = []; for (const [rowIndex, entry] of fusionMap) { - const row = rows[rowIndex]; + const row = rows[rowIndex]!; results.push({ - ...normalizeSymbol(row, db, hc), + ...normalizeSymbol(row, db as BetterSqlite3Database, hc), rrf: entry.rrfScore, queryScores: entry.queryScores, }); diff --git a/src/domain/search/stores/fts5.js b/src/domain/search/stores/fts5.ts similarity index 58% rename from src/domain/search/stores/fts5.js rename to src/domain/search/stores/fts5.ts index 9b902dce..0733c0f4 100644 --- a/src/domain/search/stores/fts5.js +++ b/src/domain/search/stores/fts5.ts @@ -1,12 +1,11 @@ +import type { BetterSqlite3Database } from '../../../types.js'; + /** * Sanitize a user query for FTS5 MATCH syntax. - * Wraps each token as an implicit OR and escapes special FTS5 characters. */ -export function sanitizeFtsQuery(query) { - // Remove FTS5 special chars that could cause syntax errors +export function sanitizeFtsQuery(query: string): string | null { const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim(); if (!cleaned) return null; - // Split into tokens, wrap with OR for multi-token queries const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0); if (tokens.length === 0) return null; if (tokens.length === 1) return `"${tokens[0]}"`; @@ -15,12 +14,13 @@ export function sanitizeFtsQuery(query) { /** * Check if the FTS5 index exists in the database. - * Returns true if fts_index table exists and has rows, false otherwise. */ -export function hasFtsIndex(db) { +export function hasFtsIndex(db: BetterSqlite3Database): boolean { try { - const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get(); - return row.c > 0; + const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get() as + | { c: number } + | undefined; + return (row?.c ?? 0) > 0; } catch { return false; } diff --git a/src/domain/search/stores/sqlite-blob.js b/src/domain/search/stores/sqlite-blob.js deleted file mode 100644 index 75037ffa..00000000 --- a/src/domain/search/stores/sqlite-blob.js +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @typedef {object} VectorStore - * @property {(queryVec: Float32Array, rows: Array<{vector: Buffer}>) => Array<{index: number, score: number}>} search - * Score every row against a query vector and return scored indices. - * - * Future implementations (e.g. HNSW via `hnsw.js`) implement this same shape - * for approximate nearest-neighbor search. - */ - -/** - * Cosine similarity between two Float32Arrays. - */ -export function cosineSim(a, b) { - let dot = 0, - normA = 0, - normB = 0; - for (let i = 0; i < a.length; i++) { - dot += a[i] * b[i]; - normA += a[i] * a[i]; - normB += b[i] * b[i]; - } - const denom = Math.sqrt(normA) * Math.sqrt(normB); - return denom === 0 ? 0 : dot / denom; -} diff --git a/src/domain/search/stores/sqlite-blob.ts b/src/domain/search/stores/sqlite-blob.ts new file mode 100644 index 00000000..e4b992b1 --- /dev/null +++ b/src/domain/search/stores/sqlite-blob.ts @@ -0,0 +1,15 @@ +/** + * Cosine similarity between two Float32Arrays. + */ +export function cosineSim(a: Float32Array, b: Float32Array): number { + let dot = 0; + let normA = 0; + let normB = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i]! * b[i]!; + normA += a[i]! * a[i]!; + normB += b[i]! * b[i]!; + } + const denom = Math.sqrt(normA) * Math.sqrt(normB); + return denom === 0 ? 0 : dot / denom; +} diff --git a/src/domain/search/strategies/source.js b/src/domain/search/strategies/source.ts similarity index 71% rename from src/domain/search/strategies/source.js rename to src/domain/search/strategies/source.ts index 3b25e0f3..aa91c8ac 100644 --- a/src/domain/search/strategies/source.js +++ b/src/domain/search/strategies/source.ts @@ -1,9 +1,14 @@ +import type { NodeRow } from '../../../types.js'; import { splitIdentifier } from './text-utils.js'; /** * Build raw source-code text for a symbol (original strategy). */ -export function buildSourceText(node, file, lines) { +export function buildSourceText( + node: Pick, + file: string, + lines: string[], +): string { const startLine = Math.max(0, node.line - 1); const endLine = node.end_line ? Math.min(lines.length, node.end_line) diff --git a/src/domain/search/strategies/structured.js b/src/domain/search/strategies/structured.ts similarity index 71% rename from src/domain/search/strategies/structured.js rename to src/domain/search/strategies/structured.ts index 83f5ff0a..705492d8 100644 --- a/src/domain/search/strategies/structured.js +++ b/src/domain/search/strategies/structured.ts @@ -1,35 +1,40 @@ import { findCalleeNames, findCallerNames } from '../../../db/index.js'; +import type { BetterSqlite3Database, NodeRow } from '../../../types.js'; import { extractLeadingComment, splitIdentifier } from './text-utils.js'; +interface NodeWithId extends Pick { + id: number; +} + /** * Build graph-enriched text for a symbol using dependency context. - * Produces compact, semantic text (~100 tokens) instead of full source code. */ -export function buildStructuredText(node, file, lines, db) { +export function buildStructuredText( + node: NodeWithId, + file: string, + lines: string[], + db: BetterSqlite3Database, +): string { const readable = splitIdentifier(node.name); - const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`]; + const parts: string[] = [`${node.kind} ${node.name} (${readable}) in ${file}`]; const startLine = Math.max(0, node.line - 1); - // Extract parameters from signature (best-effort, single-line) const sigLine = lines[startLine] || ''; const paramMatch = sigLine.match(/\(([^)]*)\)/); if (paramMatch?.[1]?.trim()) { parts.push(`Parameters: ${paramMatch[1].trim()}`); } - // Graph context: callees (capped at 10) const callees = findCalleeNames(db, node.id); if (callees.length > 0) { parts.push(`Calls: ${callees.slice(0, 10).join(', ')}`); } - // Graph context: callers (capped at 10) const callers = findCallerNames(db, node.id); if (callers.length > 0) { parts.push(`Called by: ${callers.slice(0, 10).join(', ')}`); } - // Leading comment (high semantic value) or first few lines of code const comment = extractLeadingComment(lines, startLine); if (comment) { parts.push(comment); diff --git a/src/domain/search/strategies/text-utils.js b/src/domain/search/strategies/text-utils.ts similarity index 53% rename from src/domain/search/strategies/text-utils.js rename to src/domain/search/strategies/text-utils.ts index fca8f29e..48e873cb 100644 --- a/src/domain/search/strategies/text-utils.js +++ b/src/domain/search/strategies/text-utils.ts @@ -1,8 +1,7 @@ /** * Split an identifier into readable words. - * camelCase/PascalCase -> "camel Case", snake_case -> "snake case", kebab-case -> "kebab case" */ -export function splitIdentifier(name) { +export function splitIdentifier(name: string): string { return name .replace(/([a-z])([A-Z])/g, '$1 $2') .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') @@ -11,15 +10,14 @@ export function splitIdentifier(name) { } /** - * Extract leading comment text (JSDoc, //, #, etc.) above a function line. - * Returns the cleaned comment text or null if none found. + * Extract leading comment text above a function line. */ -export function extractLeadingComment(lines, fnLineIndex) { +export function extractLeadingComment(lines: string[], fnLineIndex: number): string | null { if (fnLineIndex > lines.length) return null; - const raw = []; + const raw: string[] = []; for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) { if (i >= lines.length) continue; - const trimmed = lines[i].trim(); + const trimmed = lines[i]!.trim(); if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) { raw.unshift(trimmed); } else if (trimmed === '') { @@ -32,10 +30,10 @@ export function extractLeadingComment(lines, fnLineIndex) { return raw .map((line) => line - .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */ - .replace(/^\*\s?/, '') // middle * lines - .replace(/^\/\/\/?\s?/, '') // // or /// - .replace(/^#\s?/, '') // # (Python/Ruby) + .replace(/^\/\*\*?\s?|\*\/$/g, '') + .replace(/^\*\s?/, '') + .replace(/^\/\/\/?\s?/, '') + .replace(/^#\s?/, '') .trim(), ) .filter((l) => l.length > 0) diff --git a/src/types.ts b/src/types.ts index 77eb612a..4505956e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -870,6 +870,8 @@ export interface BuildGraphOpts { engine?: EngineMode; dataflow?: boolean; ast?: boolean; + scope?: string[]; + skipRegistry?: boolean; } /** Build timing result from buildGraph. */