From be5b4de6cdb2ad4bcc9d3c255d5098bb1397f62f Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Fri, 6 Feb 2026 03:44:34 +0530 Subject: [PATCH 1/4] ESM config compatibility for vitest --- codeflash/languages/javascript/instrument.py | 112 ++++++++++++++++++ .../languages/javascript/vitest_runner.py | 4 +- codeflash/verification/verifier.py | 7 ++ 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/codeflash/languages/javascript/instrument.py b/codeflash/languages/javascript/instrument.py index 30e7fff7a..f76cf7967 100644 --- a/codeflash/languages/javascript/instrument.py +++ b/codeflash/languages/javascript/instrument.py @@ -901,6 +901,118 @@ def is_relevant_import(module_path: str) -> bool: return test_code +def fix_import_path_for_test_location( + test_code: str, + source_file_path: Path, + test_file_path: Path, + module_root: Path, +) -> str: + """Fix import paths in generated test code to be relative to test file location. + + The AI may generate tests with import paths that are relative to the module root + (e.g., 'apps/web/app/file') instead of relative to where the test file is located + (e.g., '../../app/file'). This function fixes such imports. + + Args: + test_code: The generated test code. + source_file_path: Absolute path to the source file being tested. + test_file_path: Absolute path to where the test file will be written. + module_root: Root directory of the module/project. + + Returns: + Test code with corrected import paths. + + """ + import os + + # Calculate the correct relative import path from test file to source file + test_dir = test_file_path.parent + try: + correct_rel_path = os.path.relpath(source_file_path, test_dir) + correct_rel_path = correct_rel_path.replace("\\", "/") + # Remove file extension for JS/TS imports + for ext in [".tsx", ".ts", ".jsx", ".js", ".mjs", ".cjs"]: + if correct_rel_path.endswith(ext): + correct_rel_path = correct_rel_path[: -len(ext)] + break + # Ensure it starts with ./ or ../ + if not correct_rel_path.startswith("."): + correct_rel_path = "./" + correct_rel_path + except ValueError: + # Can't compute relative path (different drives on Windows) + return test_code + + # Try to compute what incorrect path the AI might have generated + # The AI often uses module_root-relative paths like 'apps/web/app/...' + try: + source_rel_to_module = os.path.relpath(source_file_path, module_root) + source_rel_to_module = source_rel_to_module.replace("\\", "/") + # Remove extension + for ext in [".tsx", ".ts", ".jsx", ".js", ".mjs", ".cjs"]: + if source_rel_to_module.endswith(ext): + source_rel_to_module = source_rel_to_module[: -len(ext)] + break + except ValueError: + return test_code + + # Also check for project root-relative paths (including module_root in path) + try: + project_root = module_root.parent if module_root.name in ["src", "lib", "app", "web", "apps"] else module_root + source_rel_to_project = os.path.relpath(source_file_path, project_root) + source_rel_to_project = source_rel_to_project.replace("\\", "/") + for ext in [".tsx", ".ts", ".jsx", ".js", ".mjs", ".cjs"]: + if source_rel_to_project.endswith(ext): + source_rel_to_project = source_rel_to_project[: -len(ext)] + break + except ValueError: + source_rel_to_project = None + + # Source file name (for matching module paths that end with the file name) + source_name = source_file_path.stem + + # Patterns to find import statements + # ESM: import { func } from 'path' or import func from 'path' + esm_import_pattern = re.compile(r"(import\s+(?:{[^}]+}|\w+)\s+from\s+['\"])([^'\"]+)(['\"])") + # CommonJS: const { func } = require('path') or const func = require('path') + cjs_require_pattern = re.compile(r"((?:const|let|var)\s+(?:{[^}]+}|\w+)\s*=\s*require\s*\(\s*['\"])([^'\"]+)(['\"])") + + def should_fix_path(import_path: str) -> bool: + """Check if this import path looks like it should point to our source file.""" + # Skip relative imports that already look correct + if import_path.startswith("./") or import_path.startswith("../"): + return False + # Skip package imports (no path separators or start with @) + if "/" not in import_path and "\\" not in import_path: + return False + if import_path.startswith("@") and "/" in import_path: + # Could be an alias like @/utils - skip these + return False + # Check if it looks like it points to our source file + if import_path == source_rel_to_module: + return True + if source_rel_to_project and import_path == source_rel_to_project: + return True + if import_path.endswith(source_name) or import_path.endswith("/" + source_name): + return True + return False + + def fix_import(match: re.Match) -> str: + """Replace incorrect import path with correct relative path.""" + prefix = match.group(1) + import_path = match.group(2) + suffix = match.group(3) + + if should_fix_path(import_path): + logger.debug(f"Fixing import path: {import_path} -> {correct_rel_path}") + return f"{prefix}{correct_rel_path}{suffix}" + return match.group(0) + + test_code = esm_import_pattern.sub(fix_import, test_code) + test_code = cjs_require_pattern.sub(fix_import, test_code) + + return test_code + + def get_instrumented_test_path(original_path: Path, mode: str) -> Path: """Generate path for instrumented test file. diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index 65ade975d..e6ce972e7 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -192,7 +192,7 @@ def _ensure_codeflash_vitest_config(project_root: Path) -> Path | None: logger.debug("Detected vitest workspace configuration - skipping custom config") return None - codeflash_config_path = project_root / "codeflash.vitest.config.js" + codeflash_config_path = project_root / "codeflash.vitest.config.mjs" # If already exists, use it if codeflash_config_path.exists(): @@ -281,7 +281,7 @@ def _build_vitest_behavioral_command( # For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts), # we need to create a custom config that allows all test patterns. - # This is done by creating a codeflash.vitest.config.js file. + # This is done by creating a codeflash.vitest.config.mjs file. if project_root: codeflash_vitest_config = _ensure_codeflash_vitest_config(project_root) if codeflash_vitest_config: diff --git a/codeflash/verification/verifier.py b/codeflash/verification/verifier.py index f351bd262..4d49c94c2 100644 --- a/codeflash/verification/verifier.py +++ b/codeflash/verification/verifier.py @@ -66,6 +66,7 @@ def generate_tests( if is_javascript(): from codeflash.languages.javascript.instrument import ( TestingMode, + fix_import_path_for_test_location, instrument_generated_js_test, validate_and_fix_import_style, ) @@ -76,6 +77,12 @@ def generate_tests( source_file = Path(function_to_optimize.file_path) + # Fix import paths to be relative to test file location + # AI may generate imports like 'apps/web/app/file' instead of '../../app/file' + generated_test_source = fix_import_path_for_test_location( + generated_test_source, source_file, test_path, module_path + ) + # Validate and fix import styles (default vs named exports) generated_test_source = validate_and_fix_import_style( generated_test_source, source_file, function_to_optimize.function_name From c233a371a975f92f5632f988aa841d14a61f4a35 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 6 Feb 2026 07:45:20 +0000 Subject: [PATCH 2/4] style: auto-fix linting issues --- codeflash/languages/javascript/instrument.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/codeflash/languages/javascript/instrument.py b/codeflash/languages/javascript/instrument.py index f76cf7967..028209326 100644 --- a/codeflash/languages/javascript/instrument.py +++ b/codeflash/languages/javascript/instrument.py @@ -902,10 +902,7 @@ def is_relevant_import(module_path: str) -> bool: def fix_import_path_for_test_location( - test_code: str, - source_file_path: Path, - test_file_path: Path, - module_root: Path, + test_code: str, source_file_path: Path, test_file_path: Path, module_root: Path ) -> str: """Fix import paths in generated test code to be relative to test file location. @@ -974,12 +971,14 @@ def fix_import_path_for_test_location( # ESM: import { func } from 'path' or import func from 'path' esm_import_pattern = re.compile(r"(import\s+(?:{[^}]+}|\w+)\s+from\s+['\"])([^'\"]+)(['\"])") # CommonJS: const { func } = require('path') or const func = require('path') - cjs_require_pattern = re.compile(r"((?:const|let|var)\s+(?:{[^}]+}|\w+)\s*=\s*require\s*\(\s*['\"])([^'\"]+)(['\"])") + cjs_require_pattern = re.compile( + r"((?:const|let|var)\s+(?:{[^}]+}|\w+)\s*=\s*require\s*\(\s*['\"])([^'\"]+)(['\"])" + ) def should_fix_path(import_path: str) -> bool: """Check if this import path looks like it should point to our source file.""" # Skip relative imports that already look correct - if import_path.startswith("./") or import_path.startswith("../"): + if import_path.startswith(("./", "../")): return False # Skip package imports (no path separators or start with @) if "/" not in import_path and "\\" not in import_path: @@ -992,11 +991,11 @@ def should_fix_path(import_path: str) -> bool: return True if source_rel_to_project and import_path == source_rel_to_project: return True - if import_path.endswith(source_name) or import_path.endswith("/" + source_name): + if import_path.endswith((source_name, "/" + source_name)): return True return False - def fix_import(match: re.Match) -> str: + def fix_import(match: re.Match[str]) -> str: """Replace incorrect import path with correct relative path.""" prefix = match.group(1) import_path = match.group(2) @@ -1008,9 +1007,7 @@ def fix_import(match: re.Match) -> str: return match.group(0) test_code = esm_import_pattern.sub(fix_import, test_code) - test_code = cjs_require_pattern.sub(fix_import, test_code) - - return test_code + return cjs_require_pattern.sub(fix_import, test_code) def get_instrumented_test_path(original_path: Path, mode: str) -> Path: From 410aca7b832b60e23a24edc935b9b46fa3d398e2 Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Mon, 9 Feb 2026 20:21:37 +0530 Subject: [PATCH 3/4] fix loop count issue among subsequent tests --- codeflash/languages/javascript/parse.py | 68 +++++++++++++++++++ codeflash/languages/javascript/support.py | 2 + .../languages/javascript/vitest_runner.py | 51 +++++++++++++- codeflash/optimization/function_optimizer.py | 5 ++ codeflash/verification/test_runner.py | 1 + packages/codeflash/runtime/capture.js | 13 +++- 6 files changed, 136 insertions(+), 4 deletions(-) diff --git a/codeflash/languages/javascript/parse.py b/codeflash/languages/javascript/parse.py index 0d62b50b4..869e2592e 100644 --- a/codeflash/languages/javascript/parse.py +++ b/codeflash/languages/javascript/parse.py @@ -175,6 +175,17 @@ def parse_jest_test_xml( logger.debug(f"Found {marker_count} timing start markers in Jest stdout") else: logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})") + # Check for END markers with duration (perf test markers) + end_marker_count = len(jest_end_pattern.findall(global_stdout)) + if end_marker_count > 0: + logger.debug(f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout") + # Sample a few markers to verify loop indices + end_samples = list(jest_end_pattern.finditer(global_stdout))[:5] + for sample in end_samples: + groups = sample.groups() + logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}") + else: + logger.debug(f"[PERF-DEBUG] No END markers with duration found in Jest stdout") except (AttributeError, UnicodeDecodeError): global_stdout = "" @@ -197,6 +208,12 @@ def parse_jest_test_xml( key = match.groups()[:5] end_matches_dict[key] = match + # Debug: log suite-level END marker parsing for perf tests + if end_matches_dict: + # Get unique loop indices from the parsed END markers + loop_indices = sorted(set(int(k[3]) if k[3].isdigit() else 1 for k in end_matches_dict.keys())) + logger.debug(f"[PERF-DEBUG] Suite {suite_count}: parsed {len(end_matches_dict)} END markers from suite_stdout, loop_index range: {min(loop_indices)}-{max(loop_indices)}") + # Also collect timing markers from testcase-level system-out (Vitest puts output at testcase level) for tc in suite: tc_system_out = tc._elem.find("system-out") # noqa: SLF001 @@ -327,6 +344,13 @@ def parse_jest_test_xml( sanitized_test_name = re.sub(r"[!#: ()\[\]{}|\\/*?^$.+\-]", "_", test_name) matching_starts = [m for m in start_matches if sanitized_test_name in m.group(2)] + # Debug: log which branch we're taking + logger.debug( + f"[FLOW-DEBUG] Testcase '{test_name[:50]}': " + f"total_start_matches={len(start_matches)}, matching_starts={len(matching_starts)}, " + f"total_end_matches={len(end_matches_dict)}" + ) + # For performance tests (capturePerf), there are no START markers - only END markers with duration # Check for END markers directly if no START markers found matching_ends_direct = [] @@ -337,6 +361,28 @@ def parse_jest_test_xml( # end_key is (module, testName, funcName, loopIndex, invocationId) if len(end_key) >= 2 and sanitized_test_name in end_key[1]: matching_ends_direct.append(end_match) + # Debug: log matching results for perf tests + if matching_ends_direct: + loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct] + logger.debug( + f"[PERF-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} END markers, " + f"loop_index range: {min(loop_indices)}-{max(loop_indices)}" + ) + elif end_matches_dict: + # No matches but we have END markers - check why + sample_keys = list(end_matches_dict.keys())[:3] + logger.debug( + f"[PERF-MISMATCH] Testcase '{test_name[:40]}': no matches found. " + f"sanitized_test_name='{sanitized_test_name[:50]}', " + f"sample end_keys={[k[1][:30] if len(k) >= 2 else k for k in sample_keys]}" + ) + + # Log if we're skipping the matching_ends_direct branch + if matching_starts and end_matches_dict: + logger.debug( + f"[FLOW-SKIP] Testcase '{test_name[:40]}': has {len(matching_starts)} START markers, " + f"skipping {len(end_matches_dict)} END markers (behavior test mode)" + ) if not matching_starts and not matching_ends_direct: # No timing markers found - use JUnit XML time attribute as fallback @@ -373,11 +419,13 @@ def parse_jest_test_xml( ) elif matching_ends_direct: # Performance test format: process END markers directly (no START markers) + loop_indices_found = [] for end_match in matching_ends_direct: groups = end_match.groups() # groups: (module, testName, funcName, loopIndex, invocationId, durationNs) func_name = groups[2] loop_index = int(groups[3]) if groups[3].isdigit() else 1 + loop_indices_found.append(loop_index) line_id = groups[4] try: runtime = int(groups[5]) @@ -403,6 +451,12 @@ def parse_jest_test_xml( stdout="", ) ) + if loop_indices_found: + logger.debug( + f"[LOOP-DEBUG] Testcase '{test_name}': processed {len(matching_ends_direct)} END markers, " + f"loop_index range: {min(loop_indices_found)}-{max(loop_indices_found)}, " + f"total results so far: {len(test_results.test_results)}" + ) else: # Process each timing marker for match in matching_starts: @@ -454,5 +508,19 @@ def parse_jest_test_xml( f"Jest XML parsing complete: {len(test_results.test_results)} results " f"from {suite_count} suites, {testcase_count} testcases" ) + # Debug: show loop_index distribution for perf analysis + if test_results.test_results: + loop_indices = [r.loop_index for r in test_results.test_results] + unique_loop_indices = sorted(set(loop_indices)) + min_idx, max_idx = min(unique_loop_indices), max(unique_loop_indices) + logger.debug( + f"[LOOP-SUMMARY] Results loop_index: min={min_idx}, max={max_idx}, " + f"unique_count={len(unique_loop_indices)}, total_results={len(loop_indices)}" + ) + if max_idx == 1 and len(loop_indices) > 1: + logger.warning( + f"[LOOP-WARNING] All {len(loop_indices)} results have loop_index=1. " + "Perf test markers may not have been parsed correctly." + ) return test_results diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py index 17c3b1021..9cb8d97c3 100644 --- a/codeflash/languages/javascript/support.py +++ b/codeflash/languages/javascript/support.py @@ -2134,6 +2134,7 @@ def run_benchmarking_tests( from codeflash.languages.test_framework import get_js_test_framework_or_default framework = test_framework or get_js_test_framework_or_default() + logger.debug(f"run_benchmarking_tests called with framework={framework}") # Use JS-specific high max_loops - actual loop count is limited by target_duration effective_max_loops = self.JS_BENCHMARKING_MAX_LOOPS @@ -2141,6 +2142,7 @@ def run_benchmarking_tests( if framework == "vitest": from codeflash.languages.javascript.vitest_runner import run_vitest_benchmarking_tests + logger.debug("Dispatching to run_vitest_benchmarking_tests") return run_vitest_benchmarking_tests( test_paths=test_paths, test_env=test_env, diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index e6ce972e7..9d6389563 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -520,6 +520,9 @@ def run_vitest_benchmarking_tests( ) -> tuple[Path, subprocess.CompletedProcess]: """Run Vitest benchmarking tests with external looping from Python. + NOTE: This function MUST use benchmarking_file_path (perf tests with capturePerf), + NOT instrumented_behavior_file_path (behavior tests with capture). + Uses external process-level looping to run tests multiple times and collect timing data. This matches the Python pytest approach where looping is controlled externally for simplicity. @@ -544,6 +547,22 @@ def run_vitest_benchmarking_tests( # Get performance test files test_files = [Path(file.benchmarking_file_path) for file in test_paths.test_files if file.benchmarking_file_path] + # Log test file selection + total_test_files = len(test_paths.test_files) + perf_test_files = len(test_files) + logger.debug(f"Vitest benchmark test file selection: {perf_test_files}/{total_test_files} have benchmarking_file_path") + if perf_test_files == 0: + logger.warning("No perf test files found! Cannot run benchmarking tests.") + for tf in test_paths.test_files: + logger.warning(f"Test file: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}") + elif perf_test_files < total_test_files: + for tf in test_paths.test_files: + if not tf.benchmarking_file_path: + logger.warning(f"Missing benchmarking_file_path: behavior={tf.instrumented_behavior_file_path}") + else: + for tf in test_files[:3]: # Log first 3 perf test files + logger.debug(f"Using perf test file: {tf}") + # Use provided project_root, or detect it as fallback if project_root is None and test_files: project_root = _find_vitest_project_root(test_files[0]) @@ -574,14 +593,21 @@ def run_vitest_benchmarking_tests( vitest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false" vitest_env["CODEFLASH_LOOP_INDEX"] = "1" + # Set test module for marker identification (use first test file as reference) + if test_files: + test_module_path = str(test_files[0].relative_to(effective_cwd) if test_files[0].is_relative_to(effective_cwd) else test_files[0].name) + vitest_env["CODEFLASH_TEST_MODULE"] = test_module_path + logger.debug(f"[VITEST-BENCH] Set CODEFLASH_TEST_MODULE={test_module_path}") + # Total timeout for the entire benchmark run total_timeout = max(120, (target_duration_ms // 1000) + 60, timeout or 120) - logger.debug(f"Running Vitest benchmarking tests: {' '.join(vitest_cmd)}") + logger.debug(f"[VITEST-BENCH] Running Vitest benchmarking tests: {' '.join(vitest_cmd)}") logger.debug( - f"Vitest benchmarking config: min_loops={min_loops}, max_loops={max_loops}, " + f"[VITEST-BENCH] Config: min_loops={min_loops}, max_loops={max_loops}, " f"target_duration={target_duration_ms}ms, stability_check={stability_check}" ) + logger.debug(f"[VITEST-BENCH] Environment: CODEFLASH_PERF_LOOP_COUNT={vitest_env.get('CODEFLASH_PERF_LOOP_COUNT')}") total_start_time = time.time() @@ -606,7 +632,26 @@ def run_vitest_benchmarking_tests( result = subprocess.CompletedProcess(args=vitest_cmd, returncode=-1, stdout="", stderr="Vitest not found") wall_clock_seconds = time.time() - total_start_time - logger.debug(f"Vitest benchmarking completed in {wall_clock_seconds:.2f}s") + logger.debug(f"[VITEST-BENCH] Completed in {wall_clock_seconds:.2f}s, returncode={result.returncode}") + + # Debug: Check for END markers with duration (perf test format) + if result.stdout: + import re + perf_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:(\d+):[^:]+:(\d+)######!") + perf_matches = list(perf_end_pattern.finditer(result.stdout)) + if perf_matches: + loop_indices = [int(m.group(1)) for m in perf_matches] + logger.debug( + f"[VITEST-BENCH] Found {len(perf_matches)} perf END markers in stdout, " + f"loop_index range: {min(loop_indices)}-{max(loop_indices)}" + ) + else: + logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})") + # Check if there are behavior END markers instead + behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!") + behavior_matches = list(behavior_end_pattern.finditer(result.stdout)) + if behavior_matches: + logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)") return result_file_path, result diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 08f78ba58..c6cc66664 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2363,6 +2363,10 @@ def establish_original_code_baseline( ) console.rule() with progress_bar("Running performance benchmarks..."): + logger.debug(f"[BENCHMARK-START] Starting benchmarking tests with {len(self.test_files.test_files)} test files") + for idx, tf in enumerate(self.test_files.test_files): + logger.debug(f"[BENCHMARK-FILES] Test file {idx}: perf_file={tf.benchmarking_file_path}") + if self.function_to_optimize.is_async and is_python(): from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function @@ -2380,6 +2384,7 @@ def establish_original_code_baseline( enable_coverage=False, code_context=code_context, ) + logger.debug(f"[BENCHMARK-DONE] Got {len(benchmarking_results.test_results)} benchmark results") finally: if self.function_to_optimize.is_async: self.write_code_and_helpers( diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 2a05c9fda..c0ecdc03d 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -325,6 +325,7 @@ def run_benchmarking_tests( pytest_max_loops: int = 100_000, js_project_root: Path | None = None, ) -> tuple[Path, subprocess.CompletedProcess]: + logger.debug(f"run_benchmarking_tests called: framework={test_framework}, num_files={len(test_paths.test_files)}") # Check if there's a language support for this test framework that implements run_benchmarking_tests language_support = get_language_support_by_framework(test_framework) if language_support is not None and hasattr(language_support, "run_benchmarking_tests"): diff --git a/packages/codeflash/runtime/capture.js b/packages/codeflash/runtime/capture.js index eabcee539..616e2907c 100644 --- a/packages/codeflash/runtime/capture.js +++ b/packages/codeflash/runtime/capture.js @@ -839,7 +839,7 @@ function setTestName(name) { resetInvocationCounters(); } -// Jest lifecycle hooks - these run automatically when this module is imported +// Jest/Vitest lifecycle hooks - these run automatically when this module is imported if (typeof beforeEach !== 'undefined') { beforeEach(() => { // Get current test name and path from Jest's expect state @@ -854,6 +854,17 @@ if (typeof beforeEach !== 'undefined') { } // Reset invocation counters for each test resetInvocationCounters(); + + // For Vitest (no external loop-runner), reset perf state for each test + // so each test gets its own time budget for internal looping. + // For Jest with loop-runner, CODEFLASH_PERF_CURRENT_BATCH is set, + // and we want shared state across the test file. + const hasExternalLoopRunner = process.env.CODEFLASH_PERF_CURRENT_BATCH !== undefined; + if (!hasExternalLoopRunner) { + resetPerfState(); + // Also reset invocation loop counts so each test starts fresh + sharedPerfState.invocationLoopCounts = {}; + } }); } From 5926949b40f6f9ccaa9bd796b9bdd6a62e9664ee Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:54:20 +0000 Subject: [PATCH 4/4] style: auto-fix linting issues --- codeflash/languages/javascript/parse.py | 12 ++++++++---- codeflash/languages/javascript/support.py | 2 +- codeflash/languages/javascript/vitest_runner.py | 15 ++++++++++++--- codeflash/optimization/function_optimizer.py | 4 +++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/codeflash/languages/javascript/parse.py b/codeflash/languages/javascript/parse.py index 869e2592e..1bfda8bca 100644 --- a/codeflash/languages/javascript/parse.py +++ b/codeflash/languages/javascript/parse.py @@ -178,14 +178,16 @@ def parse_jest_test_xml( # Check for END markers with duration (perf test markers) end_marker_count = len(jest_end_pattern.findall(global_stdout)) if end_marker_count > 0: - logger.debug(f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout") + logger.debug( + f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout" + ) # Sample a few markers to verify loop indices end_samples = list(jest_end_pattern.finditer(global_stdout))[:5] for sample in end_samples: groups = sample.groups() logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}") else: - logger.debug(f"[PERF-DEBUG] No END markers with duration found in Jest stdout") + logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout") except (AttributeError, UnicodeDecodeError): global_stdout = "" @@ -211,8 +213,10 @@ def parse_jest_test_xml( # Debug: log suite-level END marker parsing for perf tests if end_matches_dict: # Get unique loop indices from the parsed END markers - loop_indices = sorted(set(int(k[3]) if k[3].isdigit() else 1 for k in end_matches_dict.keys())) - logger.debug(f"[PERF-DEBUG] Suite {suite_count}: parsed {len(end_matches_dict)} END markers from suite_stdout, loop_index range: {min(loop_indices)}-{max(loop_indices)}") + loop_indices = sorted({int(k[3]) if k[3].isdigit() else 1 for k in end_matches_dict}) + logger.debug( + f"[PERF-DEBUG] Suite {suite_count}: parsed {len(end_matches_dict)} END markers from suite_stdout, loop_index range: {min(loop_indices)}-{max(loop_indices)}" + ) # Also collect timing markers from testcase-level system-out (Vitest puts output at testcase level) for tc in suite: diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py index 9cb8d97c3..0a12f48a7 100644 --- a/codeflash/languages/javascript/support.py +++ b/codeflash/languages/javascript/support.py @@ -2134,7 +2134,7 @@ def run_benchmarking_tests( from codeflash.languages.test_framework import get_js_test_framework_or_default framework = test_framework or get_js_test_framework_or_default() - logger.debug(f"run_benchmarking_tests called with framework={framework}") + logger.debug("run_benchmarking_tests called with framework=%s", framework) # Use JS-specific high max_loops - actual loop count is limited by target_duration effective_max_loops = self.JS_BENCHMARKING_MAX_LOOPS diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index 9d6389563..d169752bc 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -550,11 +550,15 @@ def run_vitest_benchmarking_tests( # Log test file selection total_test_files = len(test_paths.test_files) perf_test_files = len(test_files) - logger.debug(f"Vitest benchmark test file selection: {perf_test_files}/{total_test_files} have benchmarking_file_path") + logger.debug( + f"Vitest benchmark test file selection: {perf_test_files}/{total_test_files} have benchmarking_file_path" + ) if perf_test_files == 0: logger.warning("No perf test files found! Cannot run benchmarking tests.") for tf in test_paths.test_files: - logger.warning(f"Test file: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}") + logger.warning( + f"Test file: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}" + ) elif perf_test_files < total_test_files: for tf in test_paths.test_files: if not tf.benchmarking_file_path: @@ -595,7 +599,11 @@ def run_vitest_benchmarking_tests( # Set test module for marker identification (use first test file as reference) if test_files: - test_module_path = str(test_files[0].relative_to(effective_cwd) if test_files[0].is_relative_to(effective_cwd) else test_files[0].name) + test_module_path = str( + test_files[0].relative_to(effective_cwd) + if test_files[0].is_relative_to(effective_cwd) + else test_files[0].name + ) vitest_env["CODEFLASH_TEST_MODULE"] = test_module_path logger.debug(f"[VITEST-BENCH] Set CODEFLASH_TEST_MODULE={test_module_path}") @@ -637,6 +645,7 @@ def run_vitest_benchmarking_tests( # Debug: Check for END markers with duration (perf test format) if result.stdout: import re + perf_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:(\d+):[^:]+:(\d+)######!") perf_matches = list(perf_end_pattern.finditer(result.stdout)) if perf_matches: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index c6cc66664..cac81fc92 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2363,7 +2363,9 @@ def establish_original_code_baseline( ) console.rule() with progress_bar("Running performance benchmarks..."): - logger.debug(f"[BENCHMARK-START] Starting benchmarking tests with {len(self.test_files.test_files)} test files") + logger.debug( + f"[BENCHMARK-START] Starting benchmarking tests with {len(self.test_files.test_files)} test files" + ) for idx, tf in enumerate(self.test_files.test_files): logger.debug(f"[BENCHMARK-FILES] Test file {idx}: perf_file={tf.benchmarking_file_path}")