Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/parse-worker-hardening.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@stainless-code/codemap": minor
---

Add per-file parse timeouts with worker recycle during full and incremental indexing; failures log to errors.log and appear in the index summary.
28 changes: 14 additions & 14 deletions docs/plans/agent-surface-delivery.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

## Quick resume

| Next action | Detail |
| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
| **Review / merge** | PR 3 — index lock + error log (branch `feat/index-lock`) when open |
| **Start next** | **PR 4** — trace recipes (`call-path`, `symbol-neighborhood`) or **PR 5** — `affected-tests-recipe` (parallel with 4 after 3 merges) |
| **Do not start yet** | PR 6 (MCP trace tools) until PR 4 land; PR 9 (eval harness) until PR 8 |
| Next action | Detail |
| -------------------- | ------------------------------------------------------------------------------------------ |
| **Review / merge** | PR 3 stack — parse worker hardening (`feat/parse-worker-hardening`) when open |
| **Start next** | **PR 4** — trace recipes or **PR 5** — `affected-tests-recipe` (parallel after PR 3 lands) |
| **Do not start yet** | PR 6 (MCP trace tools) until PR 4 land; PR 9 (eval harness) until PR 8 |

Update the table below when a PR merges or a new branch opens.

Expand All @@ -35,15 +35,15 @@ Merge each PR to `main` directly. No long-lived integration branch (`feat/agent-

Max **3 parallel tracks** at once.

| PR | Plans | Status | Blocked by | Parallel with |
| ----- | ----------------------------------------------------------------------------------------------------------------------------- | ------- | --------------------- | --------------------------------- |
| **3** | [`index-lock-and-error-log`](./index-lock-and-error-log.md) → [`parse-worker-hardening`](./parse-worker-hardening.md) (stack) | open | PR 2 merged | 4, 5 |
| **4** | Recipe half of [`mcp-trace-explore-tools`](./mcp-trace-explore-tools.md) (`call-path`, `symbol-neighborhood` SQL + tests) | planned | — | 3, 5 |
| **5** | [`affected-tests-recipe`](./affected-tests-recipe.md) | planned | — | 3, 4 |
| **6** | MCP half of trace (`trace` / `explore` / `node` tools) + update instructions | planned | PR 1, PR 4 | — |
| **7** | [`field-qualified-search`](./field-qualified-search.md) | planned | PR 1 | 4, 5 if `mcp-server.ts` untouched |
| **8** | [`agents-init-mcp-wiring`](./agents-init-mcp-wiring.md) | planned | PR 1 | 3–5 |
| **9** | [`agent-eval-harness`](./agent-eval-harness.md) | planned | PR 1, PR 8, allowlist | **last P1** |
| PR | Plans | Status | Blocked by | Parallel with |
| ----- | ----------------------------------------------------------------------------------------------------------------------------- | ------- | --------------------------------------------------------------------------------------------- | --------------------------------- |
| **3** | [`index-lock-and-error-log`](./index-lock-and-error-log.md) → [`parse-worker-hardening`](./parse-worker-hardening.md) (stack) | open | [#129](https://github.com/stainless-code/codemap/pull/129) merged; worker hardening in flight | 4, 5 |
| **4** | Recipe half of [`mcp-trace-explore-tools`](./mcp-trace-explore-tools.md) (`call-path`, `symbol-neighborhood` SQL + tests) | planned | — | 3, 5 |
| **5** | [`affected-tests-recipe`](./affected-tests-recipe.md) | planned | — | 3, 4 |
| **6** | MCP half of trace (`trace` / `explore` / `node` tools) + update instructions | planned | PR 1, PR 4 | — |
| **7** | [`field-qualified-search`](./field-qualified-search.md) | planned | PR 1 | 4, 5 if `mcp-server.ts` untouched |
| **8** | [`agents-init-mcp-wiring`](./agents-init-mcp-wiring.md) | planned | PR 1 | 3–5 |
| **9** | [`agent-eval-harness`](./agent-eval-harness.md) | planned | PR 1, PR 8, allowlist | **last P1** |

**Parallelization constraints**

Expand Down
192 changes: 47 additions & 145 deletions src/application/index-engine.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import { spawnSync } from "node:child_process";
import { readFileSync, statSync, writeFileSync } from "node:fs";
import { readFileSync, writeFileSync } from "node:fs";
import { extname, join } from "node:path";

import { LANG_MAP } from "../constants";
import { extractCssData } from "../css-parser";
import {
openDb,
closeDb,
Expand Down Expand Up @@ -43,14 +42,11 @@ import {
META_FTS5_ENABLED_KEY,
SCHEMA_VERSION,
} from "../db";
import type { CodemapDatabase, DynamicImportRow, FileRow } from "../db";
import { countLines } from "../extractors/offsets";
import type { CodemapDatabase, DynamicImportRow } from "../db";
import { filterRowsByChangedFiles } from "../git-changed";
import { globSync } from "../glob-sync";
import { hashContent } from "../hash";
import { extractMarkers, extractSuppressions } from "../markers";
import type { ParsedFile } from "../parse-worker";
import { extractFileData } from "../parser";
import { resolveImports, resolveModuleSpecifier } from "../resolver";
import {
getExcludeDirNames,
Expand Down Expand Up @@ -80,29 +76,6 @@ import type {

export const VALID_EXTENSIONS = new Set(Object.keys(LANG_MAP));

const TS_EXTENSIONS = new Set([
".ts",
".tsx",
".mts",
".cts",
".js",
".jsx",
".mjs",
".cjs",
]);
const CSS_EXTENSIONS = new Set([".css"]);

function langFromExt(ext: string): string {
return LANG_MAP[ext.toLowerCase()] ?? "text";
}

function fileCategory(path: string): "ts" | "css" | "text" {
const ext = extname(path).toLowerCase();
if (TS_EXTENSIONS.has(ext)) return "ts";
if (CSS_EXTENSIONS.has(ext)) return "css";
return "text";
}

function persistTierSubstrate(
db: CodemapDatabase,
relPath: string,
Expand Down Expand Up @@ -284,6 +257,14 @@ function reportParseError(relPath: string, reason: string): void {
}
}

function countParseFailures(results: readonly ParsedFile[]): number {
let failures = 0;
for (const parsed of results) {
if (parsed.error || parsed.parseError) failures++;
}
return failures;
}

function insertParsedResults(
db: CodemapDatabase,
results: ParsedFile[],
Expand Down Expand Up @@ -480,11 +461,13 @@ export async function indexFiles(

let indexed = 0;
let skipped = 0;
let parseFailures = 0;

if (fullRebuild) {
const parseStart = performance.now();
const results = await parseFilesParallel(filePaths);
parseMs = performance.now() - parseStart;
parseFailures = countParseFailures(results);
// relPath is always POSIX-normalized ASCII (toRelativePosix upstream); byte order suffices
// for architecture.md § Sorted inserts' B-tree locality and skips the Intl-collator tax.
results.sort((a, b) =>
Expand All @@ -504,129 +487,48 @@ export async function indexFiles(
const existingHashes = options?.existingHashes ?? getAllFileHashes(db);
const root = getProjectRoot();
const sourceCache = options?.sourceCache;
const toParse: string[] = [];
const readFailed: string[] = [];

for (const relPath of filePaths) {
const absPath = join(root, relPath);
let hash: string;
const cached = sourceCache?.get(relPath);
if (cached !== undefined) {
hash = cached.hash;
} else {
try {
hash = hashContent(readFileSync(absPath, "utf-8"));
} catch {
readFailed.push(relPath);
continue;
}
}

if (existingHashes.get(relPath) === hash) {
skipped++;
} else {
toParse.push(relPath);
}
}

const parseStart = performance.now();
const parsedResults = await parseFilesParallel(toParse);
parseMs = performance.now() - parseStart;
parseFailures = countParseFailures(parsedResults);

const transaction = db.transaction(() => {
const deleted = options?.deletedPaths ?? [];
if (deleted.length > 0) {
deleteFilesFromIndex(db, deleted, quiet);
}
for (const relPath of filePaths) {
const absPath = join(root, relPath);
let source: string;
let hash: string;
// `--files` targeted reindex + cache-less callers fall through to read+hash.
const cached = sourceCache?.get(relPath);
if (cached !== undefined) {
source = cached.source;
hash = cached.hash;
} else {
try {
source = readFileSync(absPath, "utf-8");
} catch {
deleteFileData(db, relPath);
continue;
}
hash = hashContent(source);
}

if (existingHashes.get(relPath) === hash) {
skipped++;
continue;
}

for (const relPath of readFailed) {
deleteFileData(db, relPath);

const stat = statSync(absPath);
const lineCount = countLines(source);

const fileRow: FileRow = {
path: relPath,
content_hash: hash,
size: stat.size,
line_count: lineCount,
language: langFromExt(extname(relPath)),
last_modified: Math.floor(stat.mtimeMs),
indexed_at: Date.now(),
};
insertFile(db, fileRow);

if (getFts5Enabled()) {
upsertSourceFts(db, relPath, source);
}

try {
const category = fileCategory(relPath);

if (category === "text") {
const markers = extractMarkers(source, relPath);
if (markers.length) insertMarkers(db, markers);
} else if (category === "css") {
const cssData = extractCssData(absPath, source, relPath);
if (cssData.variables.length) {
insertCssVariables(db, cssData.variables);
}
if (cssData.classes.length) insertCssClasses(db, cssData.classes);
if (cssData.keyframes.length) {
insertCssKeyframes(db, cssData.keyframes);
}
if (cssData.markers.length) insertMarkers(db, cssData.markers);
if (cssData.importSources.length) {
insertImports(
db,
cssData.importSources.map((importSource) => ({
file_path: relPath,
source: importSource,
resolved_path: null,
specifiers: "[]",
is_type_only: 0,
line_number: 0,
})),
);
}
} else {
const data = extractFileData(absPath, source, relPath);
if (data.symbols.length) insertSymbols(db, data.symbols);
const deps = resolveImports(absPath, data.imports, indexedPaths);
insertImportsWithSpecifiers(
db,
data.imports,
data.importSpecifiers,
);
if (data.scopes.length) insertScopes(db, data.scopes);
if (data.references.length) insertReferences(db, data.references);
if (data.fileMetrics) insertFileMetrics(db, [data.fileMetrics]);
if (data.functionParams.length)
insertFunctionParams(db, data.functionParams);
if (data.runtimeMarkers.length)
insertRuntimeMarkers(db, data.runtimeMarkers);
if (data.testSuites.length) insertTestSuites(db, data.testSuites);
if (deps.length) insertDependencies(db, deps);
if (data.exports.length) insertExports(db, data.exports);
if (data.components.length) insertComponents(db, data.components);
if (data.markers.length) insertMarkers(db, data.markers);
if (data.typeMembers.length)
insertTypeMembers(db, data.typeMembers);
if (data.calls.length) insertCalls(db, data.calls);
persistDynamicImports(db, absPath, data.dynamicImports);
persistTierSubstrate(db, relPath, data);
if (data.hasSideEffects) {
db.run("UPDATE files SET has_side_effects = 1 WHERE path = ?", [
relPath,
]);
}
}
// Category-agnostic: one regex pass over raw source, no AST needed.
const suppressions = extractSuppressions(source, relPath);
if (suppressions.length) insertSuppressions(db, suppressions);
} catch (err) {
reportParseError(
relPath,
err instanceof Error ? err.message : String(err),
);
}

indexed++;
}
for (const parsed of parsedResults) {
deleteFileData(db, parsed.relPath);
}
indexed += insertParsedResults(db, parsedResults, indexedPaths);
});

transaction();
Expand Down Expand Up @@ -718,7 +620,7 @@ export async function indexFiles(
`\n Codemap ${fullRebuild ? "(full rebuild)" : "(incremental)"}`,
);
console.log(
` ${indexed} files indexed, ${skipped} unchanged, ${elapsed}ms`,
` ${indexed} files indexed, ${skipped} unchanged${parseFailures > 0 ? `, ${parseFailures} parse failures` : ""}, ${elapsed}ms`,
);
console.log(` ───────────────────────────────────`);
for (const [key, value] of Object.entries(stats)) {
Expand Down
33 changes: 33 additions & 0 deletions src/application/parse-timeout.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { describe, expect, test } from "bun:test";

import {
DEFAULT_PARSE_TIMEOUT_MS,
MAX_PARSE_TIMEOUT_MS,
computeParseTimeoutMs,
parseParseTimeoutMsOverride,
} from "./parse-timeout";

describe("parseParseTimeoutMsOverride", () => {
test("accepts positive integers", () => {
expect(parseParseTimeoutMsOverride("5000")).toBe(5000);
});

test("rejects malformed values", () => {
expect(parseParseTimeoutMsOverride("0")).toBeNull();
expect(parseParseTimeoutMsOverride("abc")).toBeNull();
});
});

describe("computeParseTimeoutMs", () => {
test("uses env override when set", () => {
expect(computeParseTimeoutMs(1_000_000, "15000")).toBe(15_000);
});

test("scales with file size up to cap", () => {
expect(computeParseTimeoutMs(0, undefined)).toBe(DEFAULT_PARSE_TIMEOUT_MS);
expect(computeParseTimeoutMs(5_000_000, undefined)).toBe(10_000 + 100);
expect(computeParseTimeoutMs(2_000_000_000, undefined)).toBe(
MAX_PARSE_TIMEOUT_MS,
);
});
});
50 changes: 50 additions & 0 deletions src/application/parse-timeout.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/** Default per-file parse timeout floor (ms). */
export const DEFAULT_PARSE_TIMEOUT_MS = 10_000;

/** Hard cap on per-file parse timeout (ms). */
export const MAX_PARSE_TIMEOUT_MS = 30_000;

/** +1 ms per this many bytes between floor and cap. */
export const PARSE_TIMEOUT_BYTES_PER_MS = 50_000;

const PARSE_TIMEOUT_ENV_RE = /^\d+$/;

export function parseParseTimeoutMsOverride(
env: string | undefined,
): number | null {
if (env === undefined || env === "") return null;
if (!PARSE_TIMEOUT_ENV_RE.test(env)) return null;
const parsed = Number(env);
if (!Number.isSafeInteger(parsed) || parsed < 1) return null;
return parsed;
}

/**
* Per-file parse budget: `CODEMAP_PARSE_TIMEOUT_MS` when set, else
* 10s + size scaling capped at 30s.
*/
export function computeParseTimeoutMs(
fileSizeBytes: number,
env: string | undefined = process.env.CODEMAP_PARSE_TIMEOUT_MS,
): number {
const override = parseParseTimeoutMsOverride(env);
if (override !== null) return override;
const scaled =
DEFAULT_PARSE_TIMEOUT_MS +
Math.floor(Math.max(0, fileSizeBytes) / PARSE_TIMEOUT_BYTES_PER_MS);
return Math.min(MAX_PARSE_TIMEOUT_MS, scaled);
}

export class ParseTimeoutError extends Error {
readonly timeoutMs: number;

constructor(timeoutMs: number) {
super(`parse timed out after ${timeoutMs}ms`);
this.name = "ParseTimeoutError";
this.timeoutMs = timeoutMs;
}
}

export function delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
Loading
Loading