From 8474c36fa730bebea8906bfc1c791646e6667d17 Mon Sep 17 00:00:00 2001 From: ali Date: Thu, 12 Mar 2026 17:04:28 +0200 Subject: [PATCH] fix: raise JS/TS noise floor to 3x Python to reduce false positive speedups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate V8 processes have significant JIT/GC variance (15%+) that causes false positive speedups at the current 5% threshold. This raises the JS/TS noise floor to 15% (45% for <10μs functions) via a 3x multiplier. Co-Authored-By: Claude Opus 4.6 --- codeflash/result/critic.py | 26 +++++++++++++----- tests/test_critic.py | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index 600c4a537..d43d3890c 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -11,6 +11,7 @@ MIN_TESTCASE_PASSED_THRESHOLD, MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD, ) +from codeflash.languages.current import is_javascript from codeflash.models.test_type import TestType if TYPE_CHECKING: @@ -24,6 +25,23 @@ class AcceptanceReason(Enum): NONE = "none" +JS_NOISE_MULTIPLIER = 3 + + +def compute_noise_floor(original_code_runtime: int, *, disable_gh_action_noise: bool = False) -> float: + """Compute the noise floor for speedup acceptance based on runtime and language. + + JavaScript/TypeScript gets a higher noise floor because separate V8 processes + have significant JIT/GC variance that creates false positive speedups. + """ + noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD + if is_javascript(): + noise_floor *= JS_NOISE_MULTIPLIER + if not disable_gh_action_noise and env_utils.is_ci(): + noise_floor *= 2 + return noise_floor + + def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float: """Calculate the performance gain of an optimized code over the original code. @@ -91,9 +109,7 @@ def speedup_critic( - Concurrency improvements detect when blocking calls are replaced with non-blocking equivalents """ # Runtime performance evaluation - noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD - if not disable_gh_action_noise and env_utils.is_ci(): - noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode + noise_floor = compute_noise_floor(original_code_runtime, disable_gh_action_noise=disable_gh_action_noise) perf_gain = performance_gain( original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime @@ -151,9 +167,7 @@ def get_acceptance_reason( Returns the primary reason for acceptance, with priority: concurrency > throughput > runtime (for async code). """ - noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_runtime_ns < 10000 else MIN_IMPROVEMENT_THRESHOLD - if env_utils.is_ci(): - noise_floor = noise_floor * 2 + noise_floor = compute_noise_floor(original_runtime_ns) perf_gain = performance_gain(original_runtime_ns=original_runtime_ns, optimized_runtime_ns=optimized_runtime_ns) runtime_improved = perf_gain > noise_floor diff --git a/tests/test_critic.py b/tests/test_critic.py index b6a871d47..1d61fc046 100644 --- a/tests/test_critic.py +++ b/tests/test_critic.py @@ -2,6 +2,8 @@ from pathlib import Path from unittest.mock import Mock +import pytest + from codeflash.code_utils.env_utils import get_pr_number from codeflash.models.models import ( CodeOptimizationContext, @@ -15,7 +17,9 @@ TestResults, TestType, ) +from codeflash.languages.current import reset_current_language, set_current_language from codeflash.result.critic import ( + compute_noise_floor, concurrency_gain, coverage_critic, performance_gain, @@ -799,3 +803,54 @@ def test_parse_concurrency_metrics() -> None: metrics_no_class = parse_concurrency_metrics(test_results_no_class, "my_function") assert metrics_no_class is not None assert metrics_no_class.concurrency_ratio == 2.0 # 5000000 / 2500000 + + +def test_compute_noise_floor_python() -> None: + """Python noise floor: 5% for >=10μs, 15% for <10μs.""" + reset_current_language() + assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.05) + assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.15) + + +def test_compute_noise_floor_javascript() -> None: + """JS noise floor is 3x Python: 15% for >=10μs, 45% for <10μs.""" + set_current_language("javascript") + try: + assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15) + assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.45) + finally: + reset_current_language() + + +def test_compute_noise_floor_typescript() -> None: + """TypeScript gets the same JS multiplier.""" + set_current_language("typescript") + try: + assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15) + finally: + reset_current_language() + + +def test_speedup_critic_rejects_js_false_positive() -> None: + """A 10.6% speedup that passes for Python should be rejected for JS (noise floor 15%).""" + original_code_runtime = 100_000 # 100μs — above the 10μs fast-function threshold + + candidate_result = OptimizedCandidateResult( + max_loop_count=5, + best_test_runtime=90_500, # ~10.5% improvement + behavior_test_results=TestResults(), + benchmarking_test_results=TestResults(), + optimization_candidate_index=0, + total_candidate_timing=12, + ) + + # Python: 10.5% > 5% noise floor → accepted + reset_current_language() + assert speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True) + + # JavaScript: 10.5% < 15% noise floor → rejected + set_current_language("javascript") + try: + assert not speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True) + finally: + reset_current_language()