From 13eb0c696a6044656720f4ebe10da8c57c652c33 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:17:03 +0000 Subject: [PATCH 1/2] Initial plan From 3b239b7a1a4523ccdfc906657776235cb3a03003 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:20:03 +0000 Subject: [PATCH 2/2] Fix test_throughput_consistency: use time.perf_counter_ns() with best-of-N sampling --- prototype/test_correctness_suite.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/prototype/test_correctness_suite.py b/prototype/test_correctness_suite.py index c77da7c..e19125b 100644 --- a/prototype/test_correctness_suite.py +++ b/prototype/test_correctness_suite.py @@ -12,6 +12,7 @@ """ import sys +import time import numpy as np import pytest from prototype.model_tools import ToyModel @@ -319,19 +320,28 @@ def test_throughput_consistency(self): for bs in batch_sizes: x = np.random.randn(bs, 8).astype(np.float32) - # Measure time for single-node - start = np.datetime64('now') - _ = single_node_forward(model, x[0]) # First element only - end = np.datetime64('now') + # Warm up to avoid first-run JIT/cache effects + for _ in range(3): + _ = single_node_forward(model, x[0]) - latency = (end - start).astype(np.float64) * 1e9 # nanoseconds + # Take best-of-N to reduce OS scheduling noise + samples = [] + for _ in range(10): + start = time.perf_counter_ns() + _ = single_node_forward(model, x[0]) # First element only + end = time.perf_counter_ns() + samples.append(end - start) - latencies.append(latency) + best_latency = float(min(samples)) # nanoseconds + latencies.append(best_latency) # Latencies should be within 20% of each other (allowing for variance) min_latency = min(latencies) max_latency = max(latencies) + if min_latency < 100: + pytest.skip("Timer resolution too low to measure latency reliably") + assert (max_latency - min_latency) / min_latency < 0.2, \ f"Latency variance too high: {min_latency} vs {max_latency}"