From e09cf9230bc2c87df6c54c391a496227075fbaf1 Mon Sep 17 00:00:00 2001 From: msnandhis <45960035+msnandhis@users.noreply.github.com> Date: Fri, 29 May 2026 21:04:10 +0530 Subject: [PATCH] fix(db): chunk unresolved reference file filters --- CHANGELOG.md | 1 + __tests__/db-perf.test.ts | 87 ++++++++++++++++++++++++++++++++++++++- src/db/queries.ts | 25 ++++++++--- 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd89e722d..0900f50c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fixes - Indexing a project that contains only config-style files (YAML, Twig, or `.properties`) no longer misleadingly reports "No files found to index" — these files are tracked at the file level and are now counted as indexed. Thanks @luojiyin1987 (#357). +- Syncing very large repositories no longer fails with "too many SQL variables" while resolving changed files; CodeGraph now batches those lookups so large codebases can complete normally. (#540) ## [0.9.7] - 2026-05-28 diff --git a/__tests__/db-perf.test.ts b/__tests__/db-perf.test.ts index 1dc3f1eb6..230164548 100644 --- a/__tests__/db-perf.test.ts +++ b/__tests__/db-perf.test.ts @@ -10,13 +10,13 @@ * 4. `insertEdges` validates endpoints from the DB, not stale node cache. */ -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; import { DatabaseConnection } from '../src/db'; import { QueryBuilder } from '../src/db/queries'; -import { Node } from '../src/types'; +import { EdgeKind, Node } from '../src/types'; function makeNode(id: string, name = id): Node { return { @@ -174,6 +174,89 @@ describe('insertEdges endpoint validation', () => { }); }); +describe('unresolved reference chunking', () => { + let dir: string; + let db: DatabaseConnection; + let q: QueryBuilder; + + beforeEach(() => { + dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-unresolved-')); + db = DatabaseConnection.initialize(path.join(dir, 'test.db')); + q = new QueryBuilder(db.getDb()); + }); + + afterEach(() => { + db.close(); + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('gets unresolved references for more files than SQLite accepts in one IN list', () => { + const count = 1500; + const nodes = Array.from({ length: count }, (_, i) => + makeNode(`node-${i}`, `node${i}`) + ); + const filePaths = nodes.map((_, i) => `src/file-${i}.ts`); + + q.insertNodes(nodes.map((node, i) => ({ ...node, filePath: filePaths[i] }))); + q.insertUnresolvedRefsBatch( + nodes.map((node, i) => ({ + fromNodeId: node.id, + referenceName: `target${i}`, + referenceKind: 'calls' as EdgeKind, + line: 1, + column: 0, + filePath: filePaths[i], + language: 'typescript', + })) + ); + + const prepareSpy = vi.spyOn(db.getDb(), 'prepare'); + const refs = q.getUnresolvedReferencesByFiles(filePaths); + const selectCalls = prepareSpy.mock.calls + .map(([sql]) => String(sql)) + .filter((sql) => sql.includes('FROM unresolved_refs WHERE file_path IN')); + const maxPlaceholders = Math.max( + ...selectCalls.map((sql) => (sql.match(/\?/g) ?? []).length) + ); + + expect(refs).toHaveLength(count); + expect(refs.map((ref) => ref.filePath).sort()).toEqual([...filePaths].sort()); + expect(maxPlaceholders).toBeLessThanOrEqual(500); + }); + + it('deletes resolved references for more nodes than SQLite accepts in one IN list', () => { + const count = 1500; + const nodes = Array.from({ length: count }, (_, i) => + makeNode(`node-${i}`, `node${i}`) + ); + + q.insertNodes(nodes); + q.insertUnresolvedRefsBatch( + nodes.map((node, i) => ({ + fromNodeId: node.id, + referenceName: `target${i}`, + referenceKind: 'calls' as EdgeKind, + line: 1, + column: 0, + filePath: `src/file-${i}.ts`, + language: 'typescript', + })) + ); + + const prepareSpy = vi.spyOn(db.getDb(), 'prepare'); + q.deleteResolvedReferences(nodes.map((node) => node.id)); + const deleteCalls = prepareSpy.mock.calls + .map(([sql]) => String(sql)) + .filter((sql) => sql.includes('DELETE FROM unresolved_refs WHERE from_node_id IN')); + const maxPlaceholders = Math.max( + ...deleteCalls.map((sql) => (sql.match(/\?/g) ?? []).length) + ); + + expect(q.getUnresolvedReferences()).toEqual([]); + expect(maxPlaceholders).toBeLessThanOrEqual(500); + }); +}); + describe('runMaintenance', () => { let dir: string; let db: DatabaseConnection; diff --git a/src/db/queries.ts b/src/db/queries.ts index a0ac31eea..271e5a0dc 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -1572,10 +1572,17 @@ export class QueryBuilder { getUnresolvedReferencesByFiles(filePaths: string[]): UnresolvedReference[] { if (filePaths.length === 0) return []; - const placeholders = filePaths.map(() => '?').join(','); - const rows = this.db - .prepare(`SELECT * FROM unresolved_refs WHERE file_path IN (${placeholders})`) - .all(...filePaths) as UnresolvedRefRow[]; + const rows: UnresolvedRefRow[] = []; + const uniqueFilePaths = [...new Set(filePaths)]; + for (let i = 0; i < uniqueFilePaths.length; i += SQLITE_PARAM_CHUNK_SIZE) { + const chunk = uniqueFilePaths.slice(i, i + SQLITE_PARAM_CHUNK_SIZE); + const placeholders = chunk.map(() => '?').join(','); + rows.push( + ...(this.db + .prepare(`SELECT * FROM unresolved_refs WHERE file_path IN (${placeholders})`) + .all(...chunk) as UnresolvedRefRow[]) + ); + } return rows.map((row) => ({ fromNodeId: row.from_node_id, @@ -1601,8 +1608,14 @@ export class QueryBuilder { */ deleteResolvedReferences(fromNodeIds: string[]): void { if (fromNodeIds.length === 0) return; - const placeholders = fromNodeIds.map(() => '?').join(','); - this.db.prepare(`DELETE FROM unresolved_refs WHERE from_node_id IN (${placeholders})`).run(...fromNodeIds); + const uniqueNodeIds = [...new Set(fromNodeIds)]; + for (let i = 0; i < uniqueNodeIds.length; i += SQLITE_PARAM_CHUNK_SIZE) { + const chunk = uniqueNodeIds.slice(i, i + SQLITE_PARAM_CHUNK_SIZE); + const placeholders = chunk.map(() => '?').join(','); + this.db + .prepare(`DELETE FROM unresolved_refs WHERE from_node_id IN (${placeholders})`) + .run(...chunk); + } } /**