diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index 0cad9e451..c6bf40c34 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -148,6 +148,7 @@ def parse_args() -> Namespace:
args.yes = True
args.no_pr = True
args.worktree = True
+ args.effort = "low"
return process_and_validate_cmd_args(args)
diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index 64b1b2e87..98c54f358 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import contextlib
import logging
from collections import deque
from contextlib import contextmanager
@@ -408,6 +409,7 @@ def subagent_log_optimization_result(
new_code: dict[Path, str],
review: str,
test_results: TestResults,
+ project_root: Path | None = None,
) -> None:
import sys
from xml.sax.saxutils import escape
@@ -421,7 +423,11 @@ def subagent_log_optimization_result(
old = original_code.get(path, "")
new = new_code.get(path, "")
if old != new:
- diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
+ display_path = str(path)
+ if project_root is not None:
+ with contextlib.suppress(ValueError):
+ display_path = str(path.relative_to(project_root))
+ diff = unified_diff_strings(old, new, fromfile=display_path, tofile=display_path)
if diff:
diff_parts.append(diff)
@@ -468,18 +474,17 @@ def subagent_log_optimization_result(
if new_code[path] != original_code.get(path, ""):
xml.append(f' {escape(new_code[path])}')
xml.append(" ")
- xml.append(" First, present a short summary: function name, performance improvement, and explanation.")
- xml.append(" If you are running inside an IDE (VS Code, Cursor, Windsurf, etc.):")
- xml.append(" For each optimized-code element, use the Write tool to write the optimized code to the file.")
- xml.append(" This will surface the diff in the IDE's native diff view for the user to accept or reject.")
- xml.append(" If you are running in a terminal without an IDE connection:")
- xml.append(" Present this optimization to the user using AskUserQuestion.")
- xml.append(" Show the function name, performance improvement, and explanation as the question text.")
- xml.append(" Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
+ xml.append(" 1. Review the diff and optimized code yourself. Write a brief assessment (2-3 sentences) covering:")
+ xml.append(" - Whether the optimization is correct and preserves behavior")
+ xml.append(" - What technique was used (e.g. algorithmic improvement, caching, vectorization)")
+ xml.append(" - Any risks or trade-offs (e.g. increased memory usage, reduced readability)")
+ xml.append(" 2. Present this optimization to the user using AskUserQuestion.")
+ xml.append(" Show the function name, performance improvement, and your review as the question text.")
+ xml.append(" Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
xml.append(
- " Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
+ " Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
)
- xml.append(" If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
+ xml.append(" 3. If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
xml.append(" ")
xml.append("")
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 4556176e9..26dbd3b48 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1018,16 +1018,18 @@ def select_best_optimization(
runtimes_list.append(new_best_opt.runtime)
if len(optimization_ids) > 1:
- future_ranking = self.executor.submit(
- ai_service_client.generate_ranking,
- diffs=diff_strs,
- optimization_ids=optimization_ids,
- speedups=speedups_list,
- trace_id=self.get_trace_id(exp_type),
- function_references=function_references,
- )
- concurrent.futures.wait([future_ranking])
- ranking = future_ranking.result()
+ ranking = None
+ if not is_subagent_mode():
+ future_ranking = self.executor.submit(
+ ai_service_client.generate_ranking,
+ diffs=diff_strs,
+ optimization_ids=optimization_ids,
+ speedups=speedups_list,
+ trace_id=self.get_trace_id(exp_type),
+ function_references=function_references,
+ )
+ concurrent.futures.wait([future_ranking])
+ ranking = future_ranking.result()
if ranking:
min_key = ranking[0]
else:
@@ -2390,6 +2392,25 @@ def process_review(
code_context: CodeOptimizationContext,
function_references: str,
) -> None:
+ if is_subagent_mode():
+ subagent_log_optimization_result(
+ function_name=explanation.function_name,
+ file_path=explanation.file_path,
+ perf_improvement_line=explanation.perf_improvement_line,
+ original_runtime_ns=explanation.original_runtime_ns,
+ best_runtime_ns=explanation.best_runtime_ns,
+ raw_explanation=explanation.raw_explanation_message,
+ original_code=original_code_combined,
+ new_code=new_code_combined,
+ review="",
+ test_results=explanation.winning_behavior_test_results,
+ project_root=self.project_root,
+ )
+ mark_optimization_success(
+ trace_id=self.function_trace_id, is_optimization_found=best_optimization is not None
+ )
+ return
+
coverage_message = (
original_code_baseline.coverage_results.build_message()
if original_code_baseline.coverage_results
@@ -2537,20 +2558,7 @@ def process_review(
self.optimization_review = opt_review_result.review
# Display the reviewer result to the user
- if is_subagent_mode():
- subagent_log_optimization_result(
- function_name=new_explanation.function_name,
- file_path=new_explanation.file_path,
- perf_improvement_line=new_explanation.perf_improvement_line,
- original_runtime_ns=new_explanation.original_runtime_ns,
- best_runtime_ns=new_explanation.best_runtime_ns,
- raw_explanation=new_explanation.raw_explanation_message,
- original_code=original_code_combined,
- new_code=new_code_combined,
- review=opt_review_result.review,
- test_results=new_explanation.winning_behavior_test_results,
- )
- elif opt_review_result.review:
+ if opt_review_result.review:
review_display = {
"high": ("[bold green]High[/bold green]", "green", "Recommended to merge"),
"medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"),
@@ -2667,12 +2675,15 @@ def establish_original_code_baseline(
logger.debug(
f"[PIPELINE] Test file {idx}: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}"
)
+ total_looping_time = (
+ TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+ )
behavioral_results, coverage_results = self.run_and_parse_tests(
testing_type=TestingMode.BEHAVIOR,
test_env=test_env,
test_files=self.test_files,
optimization_iteration=0,
- testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
+ testing_time=total_looping_time,
enable_coverage=True,
code_context=code_context,
)
@@ -2713,6 +2724,7 @@ def establish_original_code_baseline(
self.instrument_async_for_mode(TestingMode.PERFORMANCE)
try:
+ subagent = is_subagent_mode()
benchmarking_results, _ = self.run_and_parse_tests(
testing_type=TestingMode.PERFORMANCE,
test_env=test_env,
@@ -2721,6 +2733,7 @@ def establish_original_code_baseline(
testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
enable_coverage=False,
code_context=code_context,
+ **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
)
logger.debug(f"[BENCHMARK-DONE] Got {len(benchmarking_results.test_results)} benchmark results")
finally:
@@ -2871,6 +2884,10 @@ def run_optimized_candidate(
try:
self.instrument_capture(file_path_to_helper_classes)
+
+ total_looping_time = (
+ TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+ )
candidate_behavior_results, _ = self.run_and_parse_tests(
testing_type=TestingMode.BEHAVIOR,
test_env=test_env,
@@ -2911,6 +2928,7 @@ def run_optimized_candidate(
self.instrument_async_for_mode(TestingMode.PERFORMANCE)
try:
+ subagent = is_subagent_mode()
candidate_benchmarking_results, _ = self.run_and_parse_tests(
testing_type=TestingMode.PERFORMANCE,
test_env=test_env,
@@ -2918,6 +2936,7 @@ def run_optimized_candidate(
optimization_iteration=optimization_candidate_index,
testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
enable_coverage=False,
+ **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
)
finally:
if self.function_to_optimize.is_async:
diff --git a/tests/test_languages/test_javascript_test_runner.py b/tests/test_languages/test_javascript_test_runner.py
index 7dccaa332..10a84999e 100644
--- a/tests/test_languages/test_javascript_test_runner.py
+++ b/tests/test_languages/test_javascript_test_runner.py
@@ -1,5 +1,6 @@
"""Tests for JavaScript/Jest test runner functionality."""
+import sys
import tempfile
from pathlib import Path
from unittest.mock import patch, MagicMock
@@ -896,6 +897,7 @@ def test_line_profile_command_uses_bundled_reporter(self):
reporter_args = [a for a in cmd if "--reporters=codeflash/jest-reporter" in a]
assert len(reporter_args) == 1
+ @pytest.mark.skipif(sys.platform == "win32", reason="Node.js subprocess pipe behavior unreliable on Windows CI")
def test_reporter_produces_valid_junit_xml(self):
"""The reporter JS should produce JUnit XML parseable by junitparser."""
import subprocess