Skip to content

Commit adc9037

Browse files
JewelRoamclaude
andauthored
Add IQR-based environment fluctuation detection to benchmark timing stats (#691)
This commit introduces IQR (Interquartile Range) based environment fluctuation detection to the timing statistics calculation in test_compiler_util.py. The feature helps detect unstable benchmarking environments by measuring the relative variation in timing results. Key Changes: - Enhanced get_timing_stats() to compute median, Q1, Q3, and IQR - Added environment variable GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD for configurable fluctuation detection sensitivity - RuntimeError is raised when IQR/median exceeds the threshold - Extended return stats dictionary with new fields: median, iqr - Updated print_times_and_speedup() to use median for speedup calculation IQR/median Ratio: - Measures relative variability of timing measurements - Lower values indicate more consistent timing - Higher values indicate environment instability or interference Environment Variable Configuration: - GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD (default: 0.2) - Controls the sensitivity of fluctuation detection Detection Algorithm: 1. Calculate median, Q1 (25th percentile), Q3 (75th percentile) 2. Compute IQR = Q3 - Q1 3. Calculate relative IQR = IQR / median 4. Compare against threshold 5. Raise RuntimeError with detailed diagnostics if exceeded Error Message Format: When fluctuation is detected, the error message includes: - IQR/median ratio and threshold - Raw timing values for manual inspection Use Cases: - Multi-user GPU environments where timing variance is common - CI/CD pipeline monitoring for performance regression detection - Manual benchmark verification in shared resources - Identifying external workload interference Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8ce68eb commit adc9037

1 file changed

Lines changed: 42 additions & 8 deletions

File tree

graph_net_bench/test_compiler_util.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,45 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
108108

109109

110110
def get_timing_stats(elapsed_times):
111+
"""Compute timing statistics and detect environment fluctuation via IQR/median.
112+
113+
If IQR/median exceeds a threshold, the environment is considered unstable and
114+
a RuntimeError is raised to request re-evaluation. The threshold is configured
115+
via the environment variable GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD (default: 0.2).
116+
117+
Args:
118+
elapsed_times: List of elapsed times in ms.
119+
Returns:
120+
dict: Statistics containing median, iqr, mean, std, min, max.
121+
Raises:
122+
RuntimeError: If IQR/median exceeds threshold, indicating excessive fluctuation.
123+
"""
124+
rel_iqr_threshold = float(
125+
os.getenv("GRAPH_NET_FLUCTUATION_DETECT_THRESHOLD", "0.2")
126+
)
127+
arr = np.array(elapsed_times)
128+
median = float(np.median(arr))
129+
q1 = float(np.percentile(arr, 25))
130+
q3 = float(np.percentile(arr, 75))
131+
iqr = q3 - q1
132+
133+
if median > 0:
134+
rel_iqr = iqr / median
135+
if rel_iqr > rel_iqr_threshold:
136+
raise RuntimeError(
137+
f"Environment fluctuation detected.\n"
138+
f" IQR/median = {rel_iqr:.1%} (threshold: {rel_iqr_threshold:.0%})\n"
139+
f" Raw times (ms): {elapsed_times}\n"
140+
f"Please re-run evaluation."
141+
)
142+
111143
stats = {
112-
"mean": float(f"{np.mean(elapsed_times):.6g}"),
113-
"std": float(f"{np.std(elapsed_times):.6g}"),
114-
"min": float(f"{np.min(elapsed_times):.6g}"),
115-
"max": float(f"{np.max(elapsed_times):.6g}"),
144+
"median": float(f"{median:.6g}"),
145+
"iqr": float(f"{iqr:.6g}"),
146+
"mean": float(f"{np.mean(arr):.6g}"),
147+
"std": float(f"{np.std(arr):.6g}"),
148+
"min": float(f"{np.min(arr):.6g}"),
149+
"max": float(f"{np.max(arr):.6g}"),
116150
}
117151
return stats
118152

@@ -206,15 +240,15 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
206240
e2e_speedup = 0
207241
gpu_speedup = 0
208242

209-
eager_e2e_time_ms = eager_stats.get("e2e", {}).get("mean", 0)
210-
compiled_e2e_time_ms = compiled_stats.get("e2e", {}).get("mean", 0)
243+
eager_e2e_time_ms = eager_stats.get("e2e", {}).get("median", 0)
244+
compiled_e2e_time_ms = compiled_stats.get("e2e", {}).get("median", 0)
211245

212246
if eager_e2e_time_ms > 0 and compiled_e2e_time_ms > 0:
213247
e2e_speedup = eager_e2e_time_ms / compiled_e2e_time_ms
214248

215249
if is_gpu_device(args.device):
216-
eager_gpu_time_ms = eager_stats.get("gpu", {}).get("mean", 0)
217-
compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("mean", 0)
250+
eager_gpu_time_ms = eager_stats.get("gpu", {}).get("median", 0)
251+
compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("median", 0)
218252

219253
if eager_gpu_time_ms > 0 and compiled_gpu_time_ms > 0:
220254
gpu_speedup = eager_gpu_time_ms / compiled_gpu_time_ms

0 commit comments

Comments
 (0)