From ac9397975a67ee37893b184efa09ee3e85f1ead3 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Wed, 15 Apr 2026 17:03:43 +0800 Subject: [PATCH] Add IQR-based environment fluctuation detection to benchmark timing stats This commit introduces IQR (Interquartile Range) based environment fluctuation detection to the timing statistics calculation in test_compiler_util.py. The feature helps detect unstable benchmarking environments by measuring the relative variation in timing results. Key Changes: - Enhanced get_timing_stats() to compute median, Q1, Q3, and IQR - Added environment variable GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD for configurable fluctuation detection sensitivity - RuntimeError is raised when IQR/median exceeds the threshold - Extended return stats dictionary with new fields: median, iqr - Updated print_times_and_speedup() to use median for speedup calculation IQR/median Ratio: - Measures relative variability of timing measurements - Lower values indicate more consistent timing - Higher values indicate environment instability or interference Environment Variable Configuration: - GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD (default: 0.2) - Controls the sensitivity of fluctuation detection Detection Algorithm: 1. Calculate median, Q1 (25th percentile), Q3 (75th percentile) 2. Compute IQR = Q3 - Q1 3. Calculate relative IQR = IQR / median 4. Compare against threshold 5. Raise RuntimeError with detailed diagnostics if exceeded Error Message Format: When fluctuation is detected, the error message includes: - IQR/median ratio and threshold - Raw timing values for manual inspection Use Cases: - Multi-user GPU environments where timing variance is common - CI/CD pipeline monitoring for performance regression detection - Manual benchmark verification in shared resources - Identifying external workload interference Co-Authored-By: Claude Opus 4.6 --- graph_net_bench/test_compiler_util.py | 50 ++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index 44ccc703e..44a01de0e 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -108,11 +108,45 @@ def get_device_utilization(device_id, device_count, synchronizer_func): def get_timing_stats(elapsed_times): + """Compute timing statistics and detect environment fluctuation via IQR/median. + + If IQR/median exceeds a threshold, the environment is considered unstable and + a RuntimeError is raised to request re-evaluation. The threshold is configured + via the environment variable GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD (default: 0.2). + + Args: + elapsed_times: List of elapsed times in ms. + Returns: + dict: Statistics containing median, iqr, mean, std, min, max. + Raises: + RuntimeError: If IQR/median exceeds threshold, indicating excessive fluctuation. + """ + rel_iqr_threshold = float( + os.getenv("GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD", "0.2") + ) + arr = np.array(elapsed_times) + median = float(np.median(arr)) + q1 = float(np.percentile(arr, 25)) + q3 = float(np.percentile(arr, 75)) + iqr = q3 - q1 + + if median > 0: + rel_iqr = iqr / median + if rel_iqr > rel_iqr_threshold: + raise RuntimeError( + f"Environment fluctuation detected.\n" + f" IQR/median = {rel_iqr:.1%} (threshold: {rel_iqr_threshold:.0%})\n" + f" Raw times (ms): {elapsed_times}\n" + f"Please re-run evaluation." + ) + stats = { - "mean": float(f"{np.mean(elapsed_times):.6g}"), - "std": float(f"{np.std(elapsed_times):.6g}"), - "min": float(f"{np.min(elapsed_times):.6g}"), - "max": float(f"{np.max(elapsed_times):.6g}"), + "median": float(f"{median:.6g}"), + "iqr": float(f"{iqr:.6g}"), + "mean": float(f"{np.mean(arr):.6g}"), + "std": float(f"{np.std(arr):.6g}"), + "min": float(f"{np.min(arr):.6g}"), + "max": float(f"{np.max(arr):.6g}"), } return stats @@ -206,15 +240,15 @@ def print_times_and_speedup(args, eager_stats, compiled_stats): e2e_speedup = 0 gpu_speedup = 0 - eager_e2e_time_ms = eager_stats.get("e2e", {}).get("mean", 0) - compiled_e2e_time_ms = compiled_stats.get("e2e", {}).get("mean", 0) + eager_e2e_time_ms = eager_stats.get("e2e", {}).get("median", 0) + compiled_e2e_time_ms = compiled_stats.get("e2e", {}).get("median", 0) if eager_e2e_time_ms > 0 and compiled_e2e_time_ms > 0: e2e_speedup = eager_e2e_time_ms / compiled_e2e_time_ms if is_gpu_device(args.device): - eager_gpu_time_ms = eager_stats.get("gpu", {}).get("mean", 0) - compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("mean", 0) + eager_gpu_time_ms = eager_stats.get("gpu", {}).get("median", 0) + compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("median", 0) if eager_gpu_time_ms > 0 and compiled_gpu_time_ms > 0: gpu_speedup = eager_gpu_time_ms / compiled_gpu_time_ms