diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index aaacb65d..f8daa0ad 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -92,6 +92,7 @@ try { const INCREMENTAL_RUNS = 3; const QUERY_RUNS = 5; +const QUERY_WARMUP_RUNS = 3; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); function median(arr) { @@ -133,9 +134,14 @@ const buildStart = performance.now(); const buildResult = await buildGraph(root, { engine, incremental: false }); const buildTimeMs = performance.now() - buildStart; -const queryStart = performance.now(); -fnDepsData('buildGraph', dbPath); -const queryTimeMs = performance.now() - queryStart; +// Warmed median of QUERY_RUNS samples with `noTests: true` to match the +// methodology used by query-benchmark.ts and the per-target `queries.*Ms` +// block below (which calls `benchQuery`, also warmed). Earlier versions of +// this script measured a single cold call, which conflated steady-state +// query latency with NAPI/rusqlite/OS-page-cache init costs (~65ms on +// macOS) and inflated growth from test-fixture files pulled in by new +// native extractors. See #1113 for the methodology rationale. +const queryTimeMs = benchQuery(fnDepsData, 'buildGraph', dbPath, { depth: 3, noTests: true }); const stats = statsData(dbPath); const totalFiles = stats.files.total; @@ -191,6 +197,11 @@ const targets = workerTargets() || selectTargets(); console.error(` hub=${targets.hub}, leaf=${targets.leaf}`); function benchQuery(fn, ...args) { + // Warmup runs prime NAPI bindings, the rusqlite statement cache, and the + // OS page cache so the timed loop measures steady-state query latency + // rather than first-call init (~65ms on macOS). Each call site warms + // independently — methodology does not rely on call ordering elsewhere. + for (let i = 0; i < QUERY_WARMUP_RUNS; i++) fn(...args); const timings = []; for (let i = 0; i < QUERY_RUNS; i++) { const start = performance.now(); diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 1694c0d5..54b3f5ae 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -176,16 +176,19 @@ const SKIP_VERSIONS = new Set(['3.8.0']); * absolute delta 10.4ms exactly at the MIN_ABSOLUTE_DELTA floor. Exempt * this release; remove once 3.11.0+ data confirms stabilization. * - * - 3.10.0:Query time — cumulative effect of adding two native extractors - * (Solidity #1100 + R #1102) in quick succession. Neither tripped the - * threshold individually (Solidity PR's Query time stayed at 49ms, R PR - * showed no warning), but the combined +110% (49.6 → ~105ms) on the - * `fnDepsData('buildGraph', dbPath)` measurement reflects natural graph - * growth: ~1100 LoC of new extractor code + 9 fixture files added to the - * self-build benchmark expand `buildGraph`'s transitive callee count and - * DB row counts. Tracked in #1113 — exempt this release; remove once - * 3.11.0+ data captures the new steady-state and the per-language - * fixture footprint has been evaluated. + * - 3.10.0:Query time — methodology artifact, not a real regression. The + * metric was a single-shot cold call to `fnDepsData('buildGraph', dbPath)` + * with no warmup, no median, and `noTests: false` — so it captured ~65ms + * of NAPI/rusqlite/OS-page-cache init plus the cost of walking through + * fixture files added by new language extractors. Local v3.9.6 vs HEAD + * on the same corpus measured 78.8ms vs 67.5ms single-shot (HEAD faster), + * while the warmed `queries.fnDepsMs` in the same benchmark showed 4.0ms + * vs 2.8ms — confirming no underlying regression. Methodology fixed in + * #1113: queryTimeMs now uses 3 warmup runs + median of 5 with + * `noTests: true`, matching query-benchmark.ts hygiene. Exemption kept + * in place until 3.11.0+ data captures the new steady-state under the + * updated methodology (expected ~36ms native on this corpus); remove + * the entry then. * * - 3.10.0:fnDeps depth 5 — same cause as Query time above. Merging main * into #1102 added the Erlang extractor (#1103) on top of the existing