From 12294cafb6523f56c064aa6845d66bf9acab5a88 Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Wed, 4 Mar 2026 10:46:44 +0530 Subject: [PATCH 1/2] fix looping with JS/TS --- codeflash/languages/javascript/mocha_runner.py | 4 +++- codeflash/languages/javascript/support.py | 13 +++++++++---- codeflash/languages/javascript/test_runner.py | 6 +++--- codeflash/languages/javascript/vitest_runner.py | 6 ++++-- codeflash/models/models.py | 8 +++++++- tests/languages/javascript/test_support_dispatch.py | 4 ++-- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/codeflash/languages/javascript/mocha_runner.py b/codeflash/languages/javascript/mocha_runner.py index 4e1644011..b742b4e48 100644 --- a/codeflash/languages/javascript/mocha_runner.py +++ b/codeflash/languages/javascript/mocha_runner.py @@ -536,7 +536,9 @@ def run_mocha_benchmarking_tests( ) mocha_env["CODEFLASH_TEST_MODULE"] = test_module_path - total_timeout = max(120, (target_duration_ms // 1000) + 60, timeout or 120) + # Total timeout: allow headroom for Mocha startup. Behavioral tests use 600s; + # benchmarking should be comparably generous. + total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) logger.debug(f"Running Mocha benchmarking tests: {' '.join(mocha_cmd)}") logger.debug( diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py index 1d1387e16..a0b77a5db 100644 --- a/codeflash/languages/javascript/support.py +++ b/codeflash/languages/javascript/support.py @@ -2372,9 +2372,14 @@ def run_behavioral_tests( candidate_index=candidate_index, ) - # JavaScript/TypeScript benchmarking uses high max_loops like Python (100,000) - # The actual loop count is limited by target_duration_seconds, not max_loops - JS_BENCHMARKING_MAX_LOOPS = 100_000 + # Max iterations per capturePerf call site. Each iteration writes a timing + # marker to stdout (~200 bytes). With N perf test cases this produces + # N * max_loops markers flowing through Vitest's fork IPC pipe + Python + # subprocess pipe. 100k caused 20-200 MB of stdout for micro-functions, + # creating pipe backpressure that inflated wall-clock time past the + # subprocess timeout. 5000 keeps stdout under 10 MB while still providing + # enough data points for stable timing measurements. + JS_BENCHMARKING_MAX_LOOPS = 5_000 def run_benchmarking_tests( self, @@ -2384,7 +2389,7 @@ def run_benchmarking_tests( timeout: int | None = None, project_root: Path | None = None, min_loops: int = 5, - max_loops: int = 100_000, + max_loops: int = 5_000, target_duration_seconds: float = 10.0, test_framework: str | None = None, ) -> tuple[Path, Any]: diff --git a/codeflash/languages/javascript/test_runner.py b/codeflash/languages/javascript/test_runner.py index f60232ab6..68fd1e3f7 100644 --- a/codeflash/languages/javascript/test_runner.py +++ b/codeflash/languages/javascript/test_runner.py @@ -1025,9 +1025,9 @@ def run_jest_benchmarking_tests( if "--max-old-space-size" not in existing_node_options: jest_env["NODE_OPTIONS"] = f"{existing_node_options} --max-old-space-size=4096".strip() - # Total timeout for the entire benchmark run (longer than single-loop timeout) - # Account for startup overhead + target duration + buffer - total_timeout = max(120, (target_duration_ms // 1000) + 60, timeout or 120) + # Total timeout: allow headroom for Jest startup and TS compilation. + # Behavioral tests use 600s; benchmarking should be comparably generous. + total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) logger.debug(f"Running Jest benchmarking tests with in-process loop runner: {' '.join(jest_cmd)}") logger.debug( diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index 67b307ba4..fa80b2c73 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -616,8 +616,10 @@ def run_vitest_benchmarking_tests( vitest_env["CODEFLASH_TEST_MODULE"] = test_module_path logger.debug(f"[VITEST-BENCH] Set CODEFLASH_TEST_MODULE={test_module_path}") - # Total timeout for the entire benchmark run - total_timeout = max(120, (target_duration_ms // 1000) + 60, timeout or 120) + # Total timeout: allow headroom for Vitest startup (TS compilation, module resolution) + # which can take 30-60s in monorepos. Behavioral tests use 600s; benchmarking + # should be at least as generous. + total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) logger.debug(f"[VITEST-BENCH] Running Vitest benchmarking tests: {' '.join(vitest_cmd)}") logger.debug( diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 10e0f0252..9dacad6bf 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -773,7 +773,13 @@ def get_src_code(self, test_path: Path) -> Optional[str]: test_src = test_path.read_text(encoding="utf-8") module_node = cst.parse_module(test_src) except Exception: - return None + # libcst can't parse non-Python files (JS/TS) — return a descriptive string + # so the code repair API receives a non-None test_src_code. + return ( + f"// Test: {self.test_function_name}\n" + f"// File: {test_path.name}\n" + f"// Testing function: {self.function_getting_tested}" + ) if self.test_class_name: for stmt in module_node.body: diff --git a/tests/languages/javascript/test_support_dispatch.py b/tests/languages/javascript/test_support_dispatch.py index 46f08e913..3eee21017 100644 --- a/tests/languages/javascript/test_support_dispatch.py +++ b/tests/languages/javascript/test_support_dispatch.py @@ -182,9 +182,9 @@ def test_passes_loop_parameters(self, mock_vitest_runner: MagicMock, js_support: call_kwargs = mock_vitest_runner.call_args.kwargs assert call_kwargs["min_loops"] == 10 - # JS/TS always uses high max_loops (100_000) regardless of passed value + # JS/TS uses JS_BENCHMARKING_MAX_LOOPS (5_000) regardless of passed value # Actual loop count is limited by target_duration, not max_loops - assert call_kwargs["max_loops"] == 100_000 + assert call_kwargs["max_loops"] == 5_000 assert call_kwargs["target_duration_ms"] == 5000 From f0bf7dcdcb1e0c26525f261e6d5e989c0f783a82 Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Wed, 4 Mar 2026 11:09:39 +0530 Subject: [PATCH 2/2] reduce time --- codeflash/languages/javascript/mocha_runner.py | 6 +++--- codeflash/languages/javascript/support.py | 16 +++++++--------- codeflash/languages/javascript/test_runner.py | 6 +++--- codeflash/languages/javascript/vitest_runner.py | 8 ++++---- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/codeflash/languages/javascript/mocha_runner.py b/codeflash/languages/javascript/mocha_runner.py index b742b4e48..911151d43 100644 --- a/codeflash/languages/javascript/mocha_runner.py +++ b/codeflash/languages/javascript/mocha_runner.py @@ -536,9 +536,9 @@ def run_mocha_benchmarking_tests( ) mocha_env["CODEFLASH_TEST_MODULE"] = test_module_path - # Total timeout: allow headroom for Mocha startup. Behavioral tests use 600s; - # benchmarking should be comparably generous. - total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) + # Subprocess timeout: target_duration + 120s headroom for Mocha startup. + # capturePerf's time budget governs actual looping. + total_timeout = max(120, (target_duration_ms // 1000) + 120) logger.debug(f"Running Mocha benchmarking tests: {' '.join(mocha_cmd)}") logger.debug( diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py index a0b77a5db..69fd5ac96 100644 --- a/codeflash/languages/javascript/support.py +++ b/codeflash/languages/javascript/support.py @@ -2372,14 +2372,12 @@ def run_behavioral_tests( candidate_index=candidate_index, ) - # Max iterations per capturePerf call site. Each iteration writes a timing - # marker to stdout (~200 bytes). With N perf test cases this produces - # N * max_loops markers flowing through Vitest's fork IPC pipe + Python - # subprocess pipe. 100k caused 20-200 MB of stdout for micro-functions, - # creating pipe backpressure that inflated wall-clock time past the - # subprocess timeout. 5000 keeps stdout under 10 MB while still providing - # enough data points for stable timing measurements. - JS_BENCHMARKING_MAX_LOOPS = 5_000 + # Max iterations per capturePerf call site. Each iteration writes a ~200-byte + # timing marker to stdout. The actual loop count is governed by the 10s time + # budget (CODEFLASH_PERF_TARGET_DURATION_MS) — this constant is just a ceiling. + # Python uses max_loops=250; JS iterations are lighter (no pytest overhead) so + # 1000 gives comparable statistical power while keeping stdout under 200 KB. + JS_BENCHMARKING_MAX_LOOPS = 1_000 def run_benchmarking_tests( self, @@ -2389,7 +2387,7 @@ def run_benchmarking_tests( timeout: int | None = None, project_root: Path | None = None, min_loops: int = 5, - max_loops: int = 5_000, + max_loops: int = 1_000, target_duration_seconds: float = 10.0, test_framework: str | None = None, ) -> tuple[Path, Any]: diff --git a/codeflash/languages/javascript/test_runner.py b/codeflash/languages/javascript/test_runner.py index 68fd1e3f7..d19cd7a1a 100644 --- a/codeflash/languages/javascript/test_runner.py +++ b/codeflash/languages/javascript/test_runner.py @@ -1025,9 +1025,9 @@ def run_jest_benchmarking_tests( if "--max-old-space-size" not in existing_node_options: jest_env["NODE_OPTIONS"] = f"{existing_node_options} --max-old-space-size=4096".strip() - # Total timeout: allow headroom for Jest startup and TS compilation. - # Behavioral tests use 600s; benchmarking should be comparably generous. - total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) + # Subprocess timeout: target_duration + 120s headroom for Jest startup + # and TS compilation. capturePerf's time budget governs actual looping. + total_timeout = max(120, (target_duration_ms // 1000) + 120) logger.debug(f"Running Jest benchmarking tests with in-process loop runner: {' '.join(jest_cmd)}") logger.debug( diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index fa80b2c73..ef8e5bc11 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -616,10 +616,10 @@ def run_vitest_benchmarking_tests( vitest_env["CODEFLASH_TEST_MODULE"] = test_module_path logger.debug(f"[VITEST-BENCH] Set CODEFLASH_TEST_MODULE={test_module_path}") - # Total timeout: allow headroom for Vitest startup (TS compilation, module resolution) - # which can take 30-60s in monorepos. Behavioral tests use 600s; benchmarking - # should be at least as generous. - total_timeout = max(120, (target_duration_ms // 1000) + 120, (timeout or 60) * 5) + # Subprocess timeout: target_duration + 120s headroom for Vitest startup + # (TS compilation, module resolution). The capturePerf time budget (10s default) + # governs actual looping; this is just a safety net for process-level hangs. + total_timeout = max(120, (target_duration_ms // 1000) + 120) logger.debug(f"[VITEST-BENCH] Running Vitest benchmarking tests: {' '.join(vitest_cmd)}") logger.debug(