From 1b3fa91a5c3c55752eabc8868db5ff2ff61f8312 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-39-200.ec2.internal>
Date: Wed, 4 Mar 2026 01:06:37 +0000
Subject: [PATCH 1/5] feat: Java testgen class name fix, remove per-test
 @Timeout, and wire language_version

- Add class_name and qualified_name to /testgen API payload so the backend
  has explicit access to computed FunctionToOptimize properties
- Add client-side _fix_java_test_class_name() to correct wrong class name
  references in LLM-generated Java test code
- Remove per-test @Timeout annotation from Java instrumentation (causes
  timing instability on CI runners; Maven Surefire handles timeouts)
- Remove redundant default_language_version, use language_version as canonical

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/rules/architecture.md                 |  2 +-
 codeflash/api/aiservice.py                    | 63 ++++++++-----------
 codeflash/api/schemas.py                      | 22 ++++---
 codeflash/languages/base.py                   | 10 +--
 codeflash/languages/java/instrumentation.py   | 31 ++++++++-
 codeflash/languages/java/support.py           | 31 +++++++++
 codeflash/languages/javascript/support.py     | 18 ++++++
 codeflash/languages/python/support.py         |  5 ++
 codeflash/optimization/function_optimizer.py  | 55 +++++++++++++++-
 codeflash/verification/verifier.py            |  3 +-
 .../test_java/test_instrumentation.py         |  3 +-
 11 files changed, 183 insertions(+), 60 deletions(-)

diff --git a/.claude/rules/architecture.md b/.claude/rules/architecture.md
index 96eb3a43c..5439b1ce2 100644
--- a/.claude/rules/architecture.md
+++ b/.claude/rules/architecture.md
@@ -64,7 +64,7 @@ Core protocol in `languages/base.py`. Each language (`PythonSupport`, `JavaScrip
 |----------|----------------|---------|
 | Identity | `language`, `file_extensions`, `default_file_extension` | Language identification |
 | Identity | `comment_prefix`, `dir_excludes` | Language conventions |
-| AI service | `default_language_version` | Language version for API payloads (`None` for Python, `"ES2022"` for JS) |
+| AI service | `language_version` | Detected language version for API payloads (e.g., `"3.11.0"` for Python, `"17"` for Java) |
 | AI service | `valid_test_frameworks` | Allowed test frameworks for validation |
 | Discovery | `discover_functions`, `discover_tests` | Find optimizable functions and their tests |
 | Discovery | `adjust_test_config_for_discovery` | Pre-discovery config adjustment (no-op default) |
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index cc59aadfb..7e6e67000 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -129,8 +129,7 @@ def optimize_code(
         experiment_metadata: ExperimentMetadata | None = None,
         *,
         language: str = "python",
-        language_version: str
-        | None = None,  # TODO:{claude} add language version to the language support and it should be cached
+        language_version: str | None = None,
         module_system: str | None = None,
         is_async: bool = False,
         n_candidates: int = 5,
@@ -177,16 +176,12 @@ def optimize_code(
             "is_numerical_code": is_numerical_code,
         }
 
-        # Add language-specific version fields
-        # Always include python_version for backward compatibility with older backend
-        payload["python_version"] = platform.python_version()
-        if is_python():
-            pass  # python_version already set
-        elif is_java():
-            payload["language_version"] = language_version or "17"  # Default Java version
-        else:
-            payload["language_version"] = language_version or "ES2022"
-            # Add module system for JavaScript/TypeScript (esm or commonjs)
+        # Add language version (canonical for all languages)
+        payload["language_version"] = language_version
+        # Backward compat: backend still expects python_version
+        payload["python_version"] = language_version if is_python() else platform.python_version()
+
+        if not is_python():
             if module_system:
                 payload["module_system"] = module_system
 
@@ -262,7 +257,8 @@ def get_jit_rewritten_code(self, source_code: str, trace_id: str) -> list[Optimi
             "source_code": source_code,
             "trace_id": trace_id,
             "dependency_code": "",  # dummy value to please the api endpoint
-            "python_version": "3.12.1",  # dummy value to please the api endpoint
+            "language_version": platform.python_version(),
+            "python_version": platform.python_version(),  # backward compat
             "current_username": get_last_commit_author_if_pr_exists(None),
             "repo_owner": git_repo_owner,
             "repo_name": git_repo_name,
@@ -329,18 +325,15 @@ def optimize_python_code_line_profiler(
         logger.info("Generating optimized candidates with line profiler…")
         console.rule()
 
-        # Set python_version for backward compatibility with Python, or use language_version
-        python_version = language_version if language_version else platform.python_version()
-
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
             "n_candidates": n_candidates,
             "line_profiler_results": line_profiler_results,
             "trace_id": trace_id,
-            "python_version": python_version,
             "language": language,
             "language_version": language_version,
+            "python_version": language_version if is_python() else platform.python_version(),  # backward compat
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
             "call_sequence": self.get_next_sequence(),
@@ -434,14 +427,10 @@ def optimize_code_refinement(self, request: list[AIServiceRefinerRequest]) -> li
                 "language": opt.language,
             }
 
-            # Add language version - always include python_version for backward compatibility
-            item["python_version"] = platform.python_version()
-            if is_python():
-                pass  # python_version already set
-            elif opt.language_version:
-                item["language_version"] = opt.language_version
-            else:
-                item["language_version"] = "ES2022"  # Default for JS/TS
+            # Add language version (canonical for all languages)
+            item["language_version"] = opt.language_version
+            # Backward compat: backend still expects python_version
+            item["python_version"] = opt.language_version if is_python() else platform.python_version()
 
             # Add multi-file context if provided
             if opt.additional_context_files:
@@ -649,7 +638,8 @@ def generate_ranking(
             "diffs": diffs,
             "speedups": speedups,
             "optimization_ids": optimization_ids,
-            "python_version": platform.python_version(),
+            "language_version": platform.python_version(),
+            "python_version": platform.python_version(),  # backward compat
             "function_references": function_references,
         }
         logger.info("loading|Generating ranking")
@@ -785,18 +775,16 @@ def generate_regression_tests(
             "is_async": function_to_optimize.is_async,
             "call_sequence": self.get_next_sequence(),
             "is_numerical_code": is_numerical_code,
+            "class_name": function_to_optimize.class_name,
+            "qualified_name": function_to_optimize.qualified_name,
         }
 
-        # Add language-specific version fields
-        # Always include python_version for backward compatibility with older backend
-        payload["python_version"] = platform.python_version()
-        if is_python():
-            pass  # python_version already set
-        elif is_java():
-            payload["language_version"] = language_version or "17"  # Default Java version
-        else:
-            payload["language_version"] = language_version or "ES2022"
-            # Add module system for JavaScript/TypeScript (esm or commonjs)
+        # Add language version (canonical for all languages)
+        payload["language_version"] = language_version
+        # Backward compat: backend still expects python_version
+        payload["python_version"] = language_version if is_python() else platform.python_version()
+
+        if not is_python():
             if module_system:
                 payload["module_system"] = module_system
 
@@ -884,7 +872,8 @@ def get_optimization_review(
             "codeflash_version": codeflash_version,
             "calling_fn_details": calling_fn_details,
             "language": language,
-            "python_version": platform.python_version() if is_python() else None,
+            "language_version": platform.python_version() if is_python() else None,
+            "python_version": platform.python_version() if is_python() else None,  # backward compat
             "call_sequence": self.get_next_sequence(),
         }
         console.rule()
diff --git a/codeflash/api/schemas.py b/codeflash/api/schemas.py
index 37e2c72a5..30d6390e4 100644
--- a/codeflash/api/schemas.py
+++ b/codeflash/api/schemas.py
@@ -137,14 +137,15 @@ def to_payload(self) -> dict[str, Any]:
             "is_numerical_code": self.is_numerical_code,
         }
 
-        # Add language-specific fields
-        if self.language_info.version:
-            payload["language_version"] = self.language_info.version
+        # Add language version (canonical for all languages)
+        payload["language_version"] = self.language_info.version
 
-        # Backward compat: always include python_version
+        # Backward compat: backend still expects python_version
         import platform
 
-        payload["python_version"] = platform.python_version()
+        payload["python_version"] = (
+            self.language_info.version if self.language_info.name == "python" else platform.python_version()
+        )
 
         # Module system for JS/TS
         if self.language_info.module_system != ModuleSystem.UNKNOWN:
@@ -205,14 +206,15 @@ def to_payload(self) -> dict[str, Any]:
             "is_numerical_code": self.is_numerical_code,
         }
 
-        # Add language version
-        if self.language_info.version:
-            payload["language_version"] = self.language_info.version
+        # Add language version (canonical for all languages)
+        payload["language_version"] = self.language_info.version
 
-        # Backward compat: always include python_version
+        # Backward compat: backend still expects python_version
         import platform
 
-        payload["python_version"] = platform.python_version()
+        payload["python_version"] = (
+            self.language_info.version if self.language_info.name == "python" else platform.python_version()
+        )
 
         # Module system for JS/TS
         if self.language_info.module_system != ModuleSystem.UNKNOWN:
diff --git a/codeflash/languages/base.py b/codeflash/languages/base.py
index 92ae95e63..b217a8dee 100644
--- a/codeflash/languages/base.py
+++ b/codeflash/languages/base.py
@@ -325,13 +325,9 @@ def dir_excludes(self) -> frozenset[str]:
         ...
 
     @property
-    def default_language_version(self) -> str | None:
-        """Default language version string sent to AI service.
-
-        Returns None for languages where the runtime version is auto-detected (e.g. Python).
-        Returns a version string (e.g. "ES2022") for languages that need an explicit default.
-        """
-        return None
+    def language_version(self) -> str | None:
+        """The detected language version (e.g., "17" for Java, "ES2022" for JS)."""
+        ...
 
     @property
     def valid_test_frameworks(self) -> tuple[str, ...]:
diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index ee7700f5e..043054b2e 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -258,6 +258,7 @@ def wrap_target_calls_with_treesitter(
     precise_call_timing: bool = False,
     class_name: str = "",
     test_method_name: str = "",
+    target_return_type: str = "",
 ) -> tuple[list[str], int]:
     """Replace target method calls in body_lines with capture + serialize using tree-sitter.
 
@@ -327,6 +328,8 @@ def wrap_target_calls_with_treesitter(
             call_counter += 1
             var_name = f"_cf_result{iter_id}_{call_counter}"
             cast_type = _infer_array_cast_type(body_line)
+            if not cast_type and target_return_type and target_return_type != "void":
+                cast_type = target_return_type
             var_with_cast = f"({cast_type}){var_name}" if cast_type else var_name
 
             # Use per-call unique variables (with call_counter suffix) for behavior mode
@@ -524,6 +527,26 @@ def _infer_array_cast_type(line: str) -> str | None:
     return None
 
 
+def _extract_return_type(function_to_optimize: Any) -> str:
+    """Extract the return type of a Java function from its source file using tree-sitter."""
+    file_path = getattr(function_to_optimize, "file_path", None)
+    func_name = _get_function_name(function_to_optimize)
+    if not file_path or not file_path.exists():
+        return ""
+    try:
+        from codeflash.languages.java.parser import get_java_analyzer
+
+        analyzer = get_java_analyzer()
+        source_text = file_path.read_text(encoding="utf-8")
+        methods = analyzer.find_methods(source_text)
+        for method in methods:
+            if method.name == func_name and method.return_type:
+                return method.return_type
+    except Exception:
+        logger.debug("Could not extract return type for %s", func_name)
+    return ""
+
+
 def _get_qualified_name(func: Any) -> str:
     """Get the qualified name from FunctionToOptimize."""
     if hasattr(func, "qualified_name"):
@@ -617,6 +640,7 @@ def instrument_existing_test(
     """
     source = test_string
     func_name = _get_function_name(function_to_optimize)
+    target_return_type = _extract_return_type(function_to_optimize)
 
     # Get the original class name from the file name
     if test_path:
@@ -648,14 +672,16 @@ def instrument_existing_test(
         )
     else:
         # Behavior mode: add timing instrumentation that also writes to SQLite
-        modified_source = _add_behavior_instrumentation(modified_source, original_class_name, func_name)
+        modified_source = _add_behavior_instrumentation(
+            modified_source, original_class_name, func_name, target_return_type
+        )
 
     logger.debug("Java %s testing for %s: renamed class %s -> %s", mode, func_name, original_class_name, new_class_name)
     # Why return True here?
     return True, modified_source
 
 
-def _add_behavior_instrumentation(source: str, class_name: str, func_name: str) -> str:
+def _add_behavior_instrumentation(source: str, class_name: str, func_name: str, target_return_type: str = "") -> str:
     """Add behavior instrumentation to test methods.
 
     For behavior mode, this adds:
@@ -796,6 +822,7 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 precise_call_timing=True,
                 class_name=class_name,
                 test_method_name=test_method_name,
+                target_return_type=target_return_type,
             )
 
             # Add behavior instrumentation setup code (shared variables for all calls in the method)
diff --git a/codeflash/languages/java/support.py b/codeflash/languages/java/support.py
index a9cdbf8e8..5fd0d72ad 100644
--- a/codeflash/languages/java/support.py
+++ b/codeflash/languages/java/support.py
@@ -64,6 +64,7 @@ def __init__(self) -> None:
         self._analyzer = get_java_analyzer()
         self.line_profiler_agent_arg: str | None = None
         self.line_profiler_warmup_iterations: int = 0
+        self._language_version: str | None = None
 
     @property
     def language(self) -> Language:
@@ -93,6 +94,10 @@ def default_file_extension(self) -> str:
     def dir_excludes(self) -> frozenset[str]:
         return frozenset({"target", "build", ".gradle", ".mvn", ".idea", "apidocs", "javadoc"})
 
+    @property
+    def language_version(self) -> str | None:
+        return self._language_version
+
     def postprocess_generated_tests(
         self, generated_tests: GeneratedTestsList, test_framework: str, project_root: Path, source_file_path: Path
     ) -> GeneratedTestsList:
@@ -364,10 +369,36 @@ def ensure_runtime_environment(self, project_root: Path) -> bool:
         if config is None:
             return False
 
+        self._language_version = config.java_version
+        if self._language_version is None:
+            self._detect_java_version()
+
         # For now, assume the runtime is available
         # A full implementation would check/install the JAR
         return True
 
+    def _detect_java_version(self) -> None:
+        """Detect and cache the Java runtime version."""
+        import subprocess
+
+        try:
+            result = subprocess.run(["java", "-version"], check=False, capture_output=True, text=True, timeout=10)
+            # java -version outputs to stderr, e.g. 'openjdk version "17.0.2"'
+            output = result.stderr or result.stdout
+            for line in output.splitlines():
+                if "version" in line:
+                    # Extract version between quotes: "17.0.2" -> "17"
+                    start = line.find('"')
+                    end = line.find('"', start + 1)
+                    if start != -1 and end != -1:
+                        full_version = line[start + 1 : end]
+                        # Use major version only: "17.0.2" -> "17", "1.8.0_292" -> "8"
+                        major = full_version.split(".")[0]
+                        self._language_version = "8" if major == "1" else major
+                        return
+        except Exception:
+            pass
+
     def instrument_existing_test(
         self,
         test_string: str,
diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py
index 51526f94e..e2fce71cf 100644
--- a/codeflash/languages/javascript/support.py
+++ b/codeflash/languages/javascript/support.py
@@ -36,6 +36,9 @@ class JavaScriptSupport:
     using tree-sitter for code analysis and Jest for test execution.
     """
 
+    def __init__(self) -> None:
+        self._language_version: str | None = None
+
     # === Properties ===
 
     @property
@@ -68,6 +71,10 @@ def comment_prefix(self) -> str:
     def dir_excludes(self) -> frozenset[str]:
         return frozenset({"node_modules", "dist", "build", ".next", ".nuxt", "coverage", ".cache", ".turbo", ".vercel"})
 
+    @property
+    def language_version(self) -> str | None:
+        return self._language_version
+
     # === Discovery ===
 
     def discover_functions(
@@ -2077,6 +2084,15 @@ def verify_requirements(self, project_root: Path, test_framework: str = "jest")
 
         return len(errors) == 0, errors
 
+    def _detect_node_version(self) -> None:
+        """Detect and cache the Node.js runtime version."""
+        try:
+            result = subprocess.run(["node", "--version"], check=False, capture_output=True, text=True, timeout=10)
+            if result.returncode == 0 and result.stdout.strip():
+                self._language_version = result.stdout.strip().lstrip("v")
+        except Exception:
+            pass
+
     def ensure_runtime_environment(self, project_root: Path) -> bool:
         """Ensure codeflash npm package is installed.
 
@@ -2091,6 +2107,8 @@ def ensure_runtime_environment(self, project_root: Path) -> bool:
         """
         from codeflash.cli_cmds.console import logger
 
+        self._detect_node_version()
+
         node_modules_pkg = project_root / "node_modules" / "codeflash"
         if node_modules_pkg.exists():
             logger.debug("codeflash already installed")
diff --git a/codeflash/languages/python/support.py b/codeflash/languages/python/support.py
index b0e6926c1..0e3aaf8b3 100644
--- a/codeflash/languages/python/support.py
+++ b/codeflash/languages/python/support.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import platform
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -107,6 +108,10 @@ def dir_excludes(self) -> frozenset[str]:
             }
         )
 
+    @property
+    def language_version(self) -> str | None:
+        return platform.python_version()
+
     # === Discovery ===
 
     def discover_functions(
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 795476837..18a116e25 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -662,8 +662,19 @@ def generate_and_instrument_tests(
             behavior_path = generated_test.behavior_file_path
             perf_path = generated_test.perf_file_path
 
-            # For Java, fix paths to match package structure
+            # For Java, fix class name references and paths to match package structure
             if is_java():
+                correct_class = self.function_to_optimize.class_name
+                if correct_class:
+                    generated_test.generated_original_test_source = self._fix_java_test_class_name(
+                        generated_test.generated_original_test_source, correct_class
+                    )
+                    generated_test.instrumented_behavior_test_source = self._fix_java_test_class_name(
+                        generated_test.instrumented_behavior_test_source, correct_class
+                    )
+                    generated_test.instrumented_perf_test_source = self._fix_java_test_class_name(
+                        generated_test.instrumented_perf_test_source, correct_class
+                    )
                 behavior_path, perf_path, modified_behavior_source, modified_perf_source = self._fix_java_test_paths(
                     generated_test.instrumented_behavior_test_source,
                     generated_test.instrumented_perf_test_source,
@@ -833,6 +844,44 @@ def _get_java_sources_root(self) -> Path:
         logger.debug(f"[JAVA-ROOT] Returning Java sources root: {tests_root}, tests_root was: {tests_root}")
         return tests_root
 
+    @staticmethod
+    def _fix_java_test_class_name(source: str, correct_class_name: str) -> str:
+        """Fix incorrect class name references in generated Java test code.
+
+        The backend LLM sometimes generates tests that reference a wrong class name
+        (e.g. 'with' instead of 'DataCalculator'). This method detects the class being
+        tested from the test source and replaces it with the correct class name.
+        """
+        import re
+
+        # Extract the package from the test source
+        pkg_match = re.search(r"^\s*package\s+([\w.]+)\s*;", source, re.MULTILINE)
+        if not pkg_match:
+            return source
+
+        package = pkg_match.group(1)
+
+        # Find imports from the same package that aren't the correct class
+        # Pattern: import <package>.<ClassName>;
+        import_pattern = re.compile(rf"^\s*import\s+{re.escape(package)}\.(\w+)\s*;", re.MULTILINE)
+        wrong_class = None
+        for m in import_pattern.finditer(source):
+            imported = m.group(1)
+            if imported != correct_class_name and not imported.endswith("Test"):
+                wrong_class = imported
+                break
+
+        if not wrong_class or wrong_class == correct_class_name:
+            return source
+
+        logger.debug(
+            f"[JAVA] Fixing class name: replacing '{wrong_class}' with '{correct_class_name}' in generated test"
+        )
+        # Replace all occurrences of the wrong class name with the correct one
+        # Use word boundary to avoid partial replacements
+        source = re.sub(rf"\b{re.escape(wrong_class)}\b", correct_class_name, source)
+        return source
+
     def _fix_java_test_paths(
         self, behavior_source: str, perf_source: str, used_paths: set[Path]
     ) -> tuple[Path, Path, str, str]:
@@ -1451,6 +1500,7 @@ def process_single_candidate(
                             optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
                             function_references=function_references,
                             language=self.function_to_optimize.language,
+                            language_version=self.language_support.language_version,
                         )
                     ],
                 )
@@ -1512,6 +1562,7 @@ def determine_best_candidate(
             else None,
             is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts,
             language=self.function_to_optimize.language,
+            language_version=self.language_support.language_version,
         )
 
         processor = CandidateProcessor(
@@ -2139,6 +2190,7 @@ def generate_optimizations(
             self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id,
             ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
             language=self.function_to_optimize.language,
+            language_version=self.language_support.language_version,
             is_async=self.function_to_optimize.is_async,
             n_candidates=n_candidates,
             is_numerical_code=is_numerical_code,
@@ -2165,6 +2217,7 @@ def generate_optimizations(
                 self.function_trace_id[:-4] + "EXP1",
                 ExperimentMetadata(id=self.experiment_id, group="experiment"),
                 language=self.function_to_optimize.language,
+                language_version=self.language_support.language_version,
                 is_async=self.function_to_optimize.is_async,
                 n_candidates=n_candidates,
             )
diff --git a/codeflash/verification/verifier.py b/codeflash/verification/verifier.py
index b00700607..3850919be 100644
--- a/codeflash/verification/verifier.py
+++ b/codeflash/verification/verifier.py
@@ -7,7 +7,7 @@
 
 from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.code_utils import get_run_tmp_file, module_name_from_file_path
-from codeflash.languages import is_java, is_javascript
+from codeflash.languages import current_language_support, is_java, is_javascript
 from codeflash.verification.verification_utils import ModifyInspiredTests, delete_multiple_if_name_main
 
 if TYPE_CHECKING:
@@ -68,6 +68,7 @@ def generate_tests(
         trace_id=function_trace_id,
         test_index=test_index,
         language=function_to_optimize.language,
+        language_version=current_language_support().language_version,
         module_system=project_module_system,
         is_numerical_code=is_numerical_code,
     )
diff --git a/tests/test_languages/test_java/test_instrumentation.py b/tests/test_languages/test_java/test_instrumentation.py
index a7e1e769f..3637aeea2 100644
--- a/tests/test_languages/test_java/test_instrumentation.py
+++ b/tests/test_languages/test_java/test_instrumentation.py
@@ -221,6 +221,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
     }
 }
 """
+
         test_file.write_text(source)
 
         func = FunctionToOptimize(
@@ -2704,7 +2705,7 @@ def test_behavior_mode_writes_to_sqlite(self, java_project):
                 }
             }
         }
-        assertEquals(1, _cf_result1_1);
+        assertEquals(1, (int)_cf_result1_1);
     }
 }
 """

From 292b6a6b79cfce6ad4e2ec8355045ee529c23422 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Wed, 4 Mar 2026 01:09:25 +0000
Subject: [PATCH 2/5] style: auto-fix linting issues (RET504 in
 function_optimizer, D413 in base)

---
 codeflash/languages/base.py                  | 1 +
 codeflash/optimization/function_optimizer.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/languages/base.py b/codeflash/languages/base.py
index b217a8dee..6ffa9c316 100644
--- a/codeflash/languages/base.py
+++ b/codeflash/languages/base.py
@@ -859,6 +859,7 @@ def run_line_profile_tests(
 
         Returns:
             Tuple of (result_file_path, subprocess_result).
+
         """
         ...
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 18a116e25..b2ed5ecc7 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -879,8 +879,7 @@ def _fix_java_test_class_name(source: str, correct_class_name: str) -> str:
         )
         # Replace all occurrences of the wrong class name with the correct one
         # Use word boundary to avoid partial replacements
-        source = re.sub(rf"\b{re.escape(wrong_class)}\b", correct_class_name, source)
-        return source
+        return re.sub(rf"\b{re.escape(wrong_class)}\b", correct_class_name, source)
 
     def _fix_java_test_paths(
         self, behavior_source: str, perf_source: str, used_paths: set[Path]

From ade855874252cc16d8708b74e409acb7e781e504 Mon Sep 17 00:00:00 2001
From: HeshamHM28 <HeshamMohamedFathy@outlook.com>
Date: Tue, 3 Mar 2026 17:40:11 -0800
Subject: [PATCH 3/5] Update codeflash/languages/java/support.py

Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com>
---
 codeflash/languages/java/support.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/codeflash/languages/java/support.py b/codeflash/languages/java/support.py
index 5fd0d72ad..12338d4c9 100644
--- a/codeflash/languages/java/support.py
+++ b/codeflash/languages/java/support.py
@@ -379,6 +379,9 @@ def ensure_runtime_environment(self, project_root: Path) -> bool:
 
     def _detect_java_version(self) -> None:
         """Detect and cache the Java runtime version."""
+        if self._language_version is not None:
+            return
+
         import subprocess
 
         try:

From da038169d201358935b560262a6ccf93d3d7d00e Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-39-200.ec2.internal>
Date: Wed, 4 Mar 2026 05:26:27 +0000
Subject: [PATCH 4/5] refactor: remove brittle _fix_java_test_class_name
 workaround

The method guessed wrong class names by scanning same-package imports
and blindly replaced all word-boundary matches, risking corruption of
unrelated identifiers. The proper fix belongs in the testgen prompt or
postprocess_generated_tests, not regex surgery on generated code.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/optimization/function_optimizer.py | 50 +-------------------
 1 file changed, 1 insertion(+), 49 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index b2ed5ecc7..269eb1c0a 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -662,19 +662,8 @@ def generate_and_instrument_tests(
             behavior_path = generated_test.behavior_file_path
             perf_path = generated_test.perf_file_path
 
-            # For Java, fix class name references and paths to match package structure
+            # For Java, fix paths to match package structure
             if is_java():
-                correct_class = self.function_to_optimize.class_name
-                if correct_class:
-                    generated_test.generated_original_test_source = self._fix_java_test_class_name(
-                        generated_test.generated_original_test_source, correct_class
-                    )
-                    generated_test.instrumented_behavior_test_source = self._fix_java_test_class_name(
-                        generated_test.instrumented_behavior_test_source, correct_class
-                    )
-                    generated_test.instrumented_perf_test_source = self._fix_java_test_class_name(
-                        generated_test.instrumented_perf_test_source, correct_class
-                    )
                 behavior_path, perf_path, modified_behavior_source, modified_perf_source = self._fix_java_test_paths(
                     generated_test.instrumented_behavior_test_source,
                     generated_test.instrumented_perf_test_source,
@@ -844,43 +833,6 @@ def _get_java_sources_root(self) -> Path:
         logger.debug(f"[JAVA-ROOT] Returning Java sources root: {tests_root}, tests_root was: {tests_root}")
         return tests_root
 
-    @staticmethod
-    def _fix_java_test_class_name(source: str, correct_class_name: str) -> str:
-        """Fix incorrect class name references in generated Java test code.
-
-        The backend LLM sometimes generates tests that reference a wrong class name
-        (e.g. 'with' instead of 'DataCalculator'). This method detects the class being
-        tested from the test source and replaces it with the correct class name.
-        """
-        import re
-
-        # Extract the package from the test source
-        pkg_match = re.search(r"^\s*package\s+([\w.]+)\s*;", source, re.MULTILINE)
-        if not pkg_match:
-            return source
-
-        package = pkg_match.group(1)
-
-        # Find imports from the same package that aren't the correct class
-        # Pattern: import <package>.<ClassName>;
-        import_pattern = re.compile(rf"^\s*import\s+{re.escape(package)}\.(\w+)\s*;", re.MULTILINE)
-        wrong_class = None
-        for m in import_pattern.finditer(source):
-            imported = m.group(1)
-            if imported != correct_class_name and not imported.endswith("Test"):
-                wrong_class = imported
-                break
-
-        if not wrong_class or wrong_class == correct_class_name:
-            return source
-
-        logger.debug(
-            f"[JAVA] Fixing class name: replacing '{wrong_class}' with '{correct_class_name}' in generated test"
-        )
-        # Replace all occurrences of the wrong class name with the correct one
-        # Use word boundary to avoid partial replacements
-        return re.sub(rf"\b{re.escape(wrong_class)}\b", correct_class_name, source)
-
     def _fix_java_test_paths(
         self, behavior_source: str, perf_source: str, used_paths: set[Path]
     ) -> tuple[Path, Path, str, str]:

From b0670309e14fed5204f045ccd392cd413010ee32 Mon Sep 17 00:00:00 2001
From: HeshamHM28 <HeshamMohamedFathy@outlook.com>
Date: Wed, 4 Mar 2026 07:53:31 +0200
Subject: [PATCH 5/5] fix: update python_version handling for backward
 compatibility with language_version

---
 codeflash/api/aiservice.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index 7e6e67000..b301424ad 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -178,8 +178,8 @@ def optimize_code(
 
         # Add language version (canonical for all languages)
         payload["language_version"] = language_version
-        # Backward compat: backend still expects python_version
-        payload["python_version"] = language_version if is_python() else platform.python_version()
+        # Backward compat: Python backend still expects python_version
+        payload["python_version"] = language_version if is_python() else None
 
         if not is_python():
             if module_system:
@@ -257,7 +257,6 @@ def get_jit_rewritten_code(self, source_code: str, trace_id: str) -> list[Optimi
             "source_code": source_code,
             "trace_id": trace_id,
             "dependency_code": "",  # dummy value to please the api endpoint
-            "language_version": platform.python_version(),
             "python_version": platform.python_version(),  # backward compat
             "current_username": get_last_commit_author_if_pr_exists(None),
             "repo_owner": git_repo_owner,
@@ -333,7 +332,7 @@ def optimize_python_code_line_profiler(
             "trace_id": trace_id,
             "language": language,
             "language_version": language_version,
-            "python_version": language_version if is_python() else platform.python_version(),  # backward compat
+            "python_version": language_version if is_python() else None,  # backward compat
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
             "call_sequence": self.get_next_sequence(),
@@ -429,8 +428,8 @@ def optimize_code_refinement(self, request: list[AIServiceRefinerRequest]) -> li
 
             # Add language version (canonical for all languages)
             item["language_version"] = opt.language_version
-            # Backward compat: backend still expects python_version
-            item["python_version"] = opt.language_version if is_python() else platform.python_version()
+            # Backward compat: Python backend still expects python_version
+            item["python_version"] = opt.language_version if is_python() else None
 
             # Add multi-file context if provided
             if opt.additional_context_files:
@@ -638,7 +637,6 @@ def generate_ranking(
             "diffs": diffs,
             "speedups": speedups,
             "optimization_ids": optimization_ids,
-            "language_version": platform.python_version(),
             "python_version": platform.python_version(),  # backward compat
             "function_references": function_references,
         }
@@ -781,8 +779,8 @@ def generate_regression_tests(
 
         # Add language version (canonical for all languages)
         payload["language_version"] = language_version
-        # Backward compat: backend still expects python_version
-        payload["python_version"] = language_version if is_python() else platform.python_version()
+        # Backward compat: Python backend still expects python_version
+        payload["python_version"] = language_version if is_python() else None
 
         if not is_python():
             if module_system: