From 8474c36fa730bebea8906bfc1c791646e6667d17 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Thu, 12 Mar 2026 17:04:28 +0200
Subject: [PATCH] fix: raise JS/TS noise floor to 3x Python to reduce false
 positive speedups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Separate V8 processes have significant JIT/GC variance (15%+) that causes
false positive speedups at the current 5% threshold. This raises the JS/TS
noise floor to 15% (45% for <10μs functions) via a 3x multiplier.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/result/critic.py | 26 +++++++++++++-----
 tests/test_critic.py       | 55 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index 600c4a537..d43d3890c 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -11,6 +11,7 @@
     MIN_TESTCASE_PASSED_THRESHOLD,
     MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
 )
+from codeflash.languages.current import is_javascript
 from codeflash.models.test_type import TestType
 
 if TYPE_CHECKING:
@@ -24,6 +25,23 @@ class AcceptanceReason(Enum):
     NONE = "none"
 
 
+JS_NOISE_MULTIPLIER = 3
+
+
+def compute_noise_floor(original_code_runtime: int, *, disable_gh_action_noise: bool = False) -> float:
+    """Compute the noise floor for speedup acceptance based on runtime and language.
+
+    JavaScript/TypeScript gets a higher noise floor because separate V8 processes
+    have significant JIT/GC variance that creates false positive speedups.
+    """
+    noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
+    if is_javascript():
+        noise_floor *= JS_NOISE_MULTIPLIER
+    if not disable_gh_action_noise and env_utils.is_ci():
+        noise_floor *= 2
+    return noise_floor
+
+
 def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
     """Calculate the performance gain of an optimized code over the original code.
 
@@ -91,9 +109,7 @@ def speedup_critic(
     - Concurrency improvements detect when blocking calls are replaced with non-blocking equivalents
     """
     # Runtime performance evaluation
-    noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
-    if not disable_gh_action_noise and env_utils.is_ci():
-        noise_floor = noise_floor * 2  # Increase the noise floor in GitHub Actions mode
+    noise_floor = compute_noise_floor(original_code_runtime, disable_gh_action_noise=disable_gh_action_noise)
 
     perf_gain = performance_gain(
         original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
@@ -151,9 +167,7 @@ def get_acceptance_reason(
     Returns the primary reason for acceptance, with priority:
     concurrency > throughput > runtime (for async code).
     """
-    noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_runtime_ns < 10000 else MIN_IMPROVEMENT_THRESHOLD
-    if env_utils.is_ci():
-        noise_floor = noise_floor * 2
+    noise_floor = compute_noise_floor(original_runtime_ns)
 
     perf_gain = performance_gain(original_runtime_ns=original_runtime_ns, optimized_runtime_ns=optimized_runtime_ns)
     runtime_improved = perf_gain > noise_floor
diff --git a/tests/test_critic.py b/tests/test_critic.py
index b6a871d47..1d61fc046 100644
--- a/tests/test_critic.py
+++ b/tests/test_critic.py
@@ -2,6 +2,8 @@
 from pathlib import Path
 from unittest.mock import Mock
 
+import pytest
+
 from codeflash.code_utils.env_utils import get_pr_number
 from codeflash.models.models import (
     CodeOptimizationContext,
@@ -15,7 +17,9 @@
     TestResults,
     TestType,
 )
+from codeflash.languages.current import reset_current_language, set_current_language
 from codeflash.result.critic import (
+    compute_noise_floor,
     concurrency_gain,
     coverage_critic,
     performance_gain,
@@ -799,3 +803,54 @@ def test_parse_concurrency_metrics() -> None:
     metrics_no_class = parse_concurrency_metrics(test_results_no_class, "my_function")
     assert metrics_no_class is not None
     assert metrics_no_class.concurrency_ratio == 2.0  # 5000000 / 2500000
+
+
+def test_compute_noise_floor_python() -> None:
+    """Python noise floor: 5% for >=10μs, 15% for <10μs."""
+    reset_current_language()
+    assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.05)
+    assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.15)
+
+
+def test_compute_noise_floor_javascript() -> None:
+    """JS noise floor is 3x Python: 15% for >=10μs, 45% for <10μs."""
+    set_current_language("javascript")
+    try:
+        assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
+        assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.45)
+    finally:
+        reset_current_language()
+
+
+def test_compute_noise_floor_typescript() -> None:
+    """TypeScript gets the same JS multiplier."""
+    set_current_language("typescript")
+    try:
+        assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
+    finally:
+        reset_current_language()
+
+
+def test_speedup_critic_rejects_js_false_positive() -> None:
+    """A 10.6% speedup that passes for Python should be rejected for JS (noise floor 15%)."""
+    original_code_runtime = 100_000  # 100μs — above the 10μs fast-function threshold
+
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=90_500,  # ~10.5% improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=12,
+    )
+
+    # Python: 10.5% > 5% noise floor → accepted
+    reset_current_language()
+    assert speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
+
+    # JavaScript: 10.5% < 15% noise floor → rejected
+    set_current_language("javascript")
+    try:
+        assert not speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
+    finally:
+        reset_current_language()